From: Tejun Heo <tj@kernel.org>
Date: Tue, 2 Apr 2013 00:08:13 +0000 (-0700)
Subject: Merge tag 'v3.9-rc5' into wq/for-3.10
X-Git-Url: http://git.mmlx.us/?a=commitdiff_plain;h=229641a6f1f09e27a1f12fba38980f33f4c92975;p=linux-edison.git

Merge tag 'v3.9-rc5' into wq/for-3.10

Writeback conversion to workqueue will be based on top of wq/for-3.10
branch to take advantage of custom attrs and NUMA support for unbound
workqueues.  Mainline currently contains two commits which result in
non-trivial merge conflicts with wq/for-3.10 and because
block/for-3.10/core is based on v3.9-rc3 which contains one of the
conflicting commits, we need a pre-merge-window merge anyway.  Let's
pull v3.9-rc5 into wq/for-3.10 so that the block tree doesn't suffer
from workqueue merge conflicts.

The two conflicts and their resolutions:

* e68035fb65 ("workqueue: convert to idr_alloc()") in mainline changes
  worker_pool_assign_id() to use idr_alloc() instead of the old idr
  interface.  worker_pool_assign_id() goes through multiple locking
  changes in wq/for-3.10 causing the following conflict.

  static int worker_pool_assign_id(struct worker_pool *pool)
  {
	  int ret;

  <<<<<<< HEAD
	  lockdep_assert_held(&wq_pool_mutex);

	  do {
		  if (!idr_pre_get(&worker_pool_idr, GFP_KERNEL))
			  return -ENOMEM;
		  ret = idr_get_new(&worker_pool_idr, pool, &pool->id);
	  } while (ret == -EAGAIN);
  =======
	  mutex_lock(&worker_pool_idr_mutex);
	  ret = idr_alloc(&worker_pool_idr, pool, 0, 0, GFP_KERNEL);
	  if (ret >= 0)
		  pool->id = ret;
	  mutex_unlock(&worker_pool_idr_mutex);
  >>>>>>> c67bf5361e7e66a0ff1f4caf95f89347d55dfb89

	  return ret < 0 ? ret : 0;
  }

  We want locking from the former and idr_alloc() usage from the
  latter, which can be combined to the following.

  static int worker_pool_assign_id(struct worker_pool *pool)
  {
	  int ret;

	  lockdep_assert_held(&wq_pool_mutex);

	  ret = idr_alloc(&worker_pool_idr, pool, 0, 0, GFP_KERNEL);
	  if (ret >= 0) {
		  pool->id = ret;
		  return 0;
	  }
	  return ret;
   }

* eb2834285c ("workqueue: fix possible pool stall bug in
  wq_unbind_fn()") updated wq_unbind_fn() such that it has single
  larger for_each_std_worker_pool() loop instead of two separate loops
  with a schedule() call inbetween.  wq/for-3.10 renamed
  pool->assoc_mutex to pool->manager_mutex causing the following
  conflict (earlier function body and comments omitted for brevity).

  static void wq_unbind_fn(struct work_struct *work)
  {
  ...
		  spin_unlock_irq(&pool->lock);
  <<<<<<< HEAD
		  mutex_unlock(&pool->manager_mutex);
	  }
  =======
		  mutex_unlock(&pool->assoc_mutex);
  >>>>>>> c67bf5361e7e66a0ff1f4caf95f89347d55dfb89

		  schedule();

  <<<<<<< HEAD
	  for_each_cpu_worker_pool(pool, cpu)
  =======
  >>>>>>> c67bf5361e7e66a0ff1f4caf95f89347d55dfb89
		  atomic_set(&pool->nr_running, 0);

		  spin_lock_irq(&pool->lock);
		  wake_up_worker(pool);
		  spin_unlock_irq(&pool->lock);
	  }
  }

  The resolution is mostly trivial.  We want the control flow of the
  latter with the rename of the former.

  static void wq_unbind_fn(struct work_struct *work)
  {
  ...
		  spin_unlock_irq(&pool->lock);
		  mutex_unlock(&pool->manager_mutex);

		  schedule();

		  atomic_set(&pool->nr_running, 0);

		  spin_lock_irq(&pool->lock);
		  wake_up_worker(pool);
		  spin_unlock_irq(&pool->lock);
	  }
  }

Signed-off-by: Tejun Heo <tj@kernel.org>
---

229641a6f1f09e27a1f12fba38980f33f4c92975
diff --cc kernel/workqueue.c
index 729ac6a4486,b48cd597145..dd2a4c49a39
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@@ -508,31 -456,40 +508,30 @@@ static int worker_pool_assign_id(struc
  {
  	int ret;
  
 -	mutex_lock(&worker_pool_idr_mutex);
 +	lockdep_assert_held(&wq_pool_mutex);
 +
- 	do {
- 		if (!idr_pre_get(&worker_pool_idr, GFP_KERNEL))
- 			return -ENOMEM;
- 		ret = idr_get_new(&worker_pool_idr, pool, &pool->id);
- 	} while (ret == -EAGAIN);
- 
+ 	ret = idr_alloc(&worker_pool_idr, pool, 0, 0, GFP_KERNEL);
 -	if (ret >= 0)
++	if (ret >= 0) {
+ 		pool->id = ret;
 -	mutex_unlock(&worker_pool_idr_mutex);
 -
 -	return ret < 0 ? ret : 0;
++		return 0;
++	}
 +	return ret;
  }
  
 -/*
 - * Lookup worker_pool by id.  The idr currently is built during boot and
 - * never modified.  Don't worry about locking for now.
 +/**
 + * unbound_pwq_by_node - return the unbound pool_workqueue for the given node
 + * @wq: the target workqueue
 + * @node: the node ID
 + *
 + * This must be called either with pwq_lock held or sched RCU read locked.
 + * If the pwq needs to be used beyond the locking in effect, the caller is
 + * responsible for guaranteeing that the pwq stays online.
   */
 -static struct worker_pool *worker_pool_by_id(int pool_id)
 -{
 -	return idr_find(&worker_pool_idr, pool_id);
 -}
 -
 -static struct worker_pool *get_std_worker_pool(int cpu, bool highpri)
 -{
 -	struct worker_pool *pools = std_worker_pools(cpu);
 -
 -	return &pools[highpri];
 -}
 -
 -static struct pool_workqueue *get_pwq(unsigned int cpu,
 -				      struct workqueue_struct *wq)
 +static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq,
 +						  int node)
  {
 -	if (!(wq->flags & WQ_UNBOUND)) {
 -		if (likely(cpu < nr_cpu_ids))
 -			return per_cpu_ptr(wq->pool_wq.pcpu, cpu);
 -	} else if (likely(cpu == WORK_CPU_UNBOUND))
 -		return wq->pool_wq.single;
 -	return NULL;
 +	assert_rcu_or_wq_mutex(wq);
 +	return rcu_dereference_raw(wq->numa_pwq_tbl[node]);
  }
  
  static unsigned int work_color_to_flags(int color)
@@@ -4407,128 -3446,37 +4406,134 @@@ static void wq_unbind_fn(struct work_st
  		pool->flags |= POOL_DISASSOCIATED;
  
  		spin_unlock_irq(&pool->lock);
 -		mutex_unlock(&pool->assoc_mutex);
 +		mutex_unlock(&pool->manager_mutex);
- 	}
  
- 	/*
- 	 * Call schedule() so that we cross rq->lock and thus can guarantee
- 	 * sched callbacks see the %WORKER_UNBOUND flag.  This is necessary
- 	 * as scheduler callbacks may be invoked from other cpus.
- 	 */
- 	schedule();
+ 		/*
+ 		 * Call schedule() so that we cross rq->lock and thus can
+ 		 * guarantee sched callbacks see the %WORKER_UNBOUND flag.
+ 		 * This is necessary as scheduler callbacks may be invoked
+ 		 * from other cpus.
+ 		 */
+ 		schedule();
  
- 	/*
- 	 * Sched callbacks are disabled now.  Zap nr_running.  After this,
- 	 * nr_running stays zero and need_more_worker() and keep_working()
- 	 * are always true as long as the worklist is not empty.  Pools on
- 	 * @cpu now behave as unbound (in terms of concurrency management)
- 	 * pools which are served by workers tied to the CPU.
- 	 *
- 	 * On return from this function, the current worker would trigger
- 	 * unbound chain execution of pending work items if other workers
- 	 * didn't already.
- 	 */
- 	for_each_cpu_worker_pool(pool, cpu)
+ 		/*
+ 		 * Sched callbacks are disabled now.  Zap nr_running.
+ 		 * After this, nr_running stays zero and need_more_worker()
+ 		 * and keep_working() are always true as long as the
+ 		 * worklist is not empty.  This pool now behaves as an
+ 		 * unbound (in terms of concurrency management) pool which
+ 		 * are served by workers tied to the pool.
+ 		 */
  		atomic_set(&pool->nr_running, 0);
+ 
+ 		/*
+ 		 * With concurrency management just turned off, a busy
+ 		 * worker blocking could lead to lengthy stalls.  Kick off
+ 		 * unbound chain execution of currently pending work items.
+ 		 */
+ 		spin_lock_irq(&pool->lock);
+ 		wake_up_worker(pool);
+ 		spin_unlock_irq(&pool->lock);
+ 	}
  }
  
 +/**
 + * rebind_workers - rebind all workers of a pool to the associated CPU
 + * @pool: pool of interest
 + *
 + * @pool->cpu is coming online.  Rebind all workers to the CPU.
 + */
 +static void rebind_workers(struct worker_pool *pool)
 +{
 +	struct worker *worker;
 +	int wi;
 +
 +	lockdep_assert_held(&pool->manager_mutex);
 +
 +	/*
 +	 * Restore CPU affinity of all workers.  As all idle workers should
 +	 * be on the run-queue of the associated CPU before any local
 +	 * wake-ups for concurrency management happen, restore CPU affinty
 +	 * of all workers first and then clear UNBOUND.  As we're called
 +	 * from CPU_ONLINE, the following shouldn't fail.
 +	 */
 +	for_each_pool_worker(worker, wi, pool)
 +		WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
 +						  pool->attrs->cpumask) < 0);
 +
 +	spin_lock_irq(&pool->lock);
 +
 +	for_each_pool_worker(worker, wi, pool) {
 +		unsigned int worker_flags = worker->flags;
 +
 +		/*
 +		 * A bound idle worker should actually be on the runqueue
 +		 * of the associated CPU for local wake-ups targeting it to
 +		 * work.  Kick all idle workers so that they migrate to the
 +		 * associated CPU.  Doing this in the same loop as
 +		 * replacing UNBOUND with REBOUND is safe as no worker will
 +		 * be bound before @pool->lock is released.
 +		 */
 +		if (worker_flags & WORKER_IDLE)
 +			wake_up_process(worker->task);
 +
 +		/*
 +		 * We want to clear UNBOUND but can't directly call
 +		 * worker_clr_flags() or adjust nr_running.  Atomically
 +		 * replace UNBOUND with another NOT_RUNNING flag REBOUND.
 +		 * @worker will clear REBOUND using worker_clr_flags() when
 +		 * it initiates the next execution cycle thus restoring
 +		 * concurrency management.  Note that when or whether
 +		 * @worker clears REBOUND doesn't affect correctness.
 +		 *
 +		 * ACCESS_ONCE() is necessary because @worker->flags may be
 +		 * tested without holding any lock in
 +		 * wq_worker_waking_up().  Without it, NOT_RUNNING test may
 +		 * fail incorrectly leading to premature concurrency
 +		 * management operations.
 +		 */
 +		WARN_ON_ONCE(!(worker_flags & WORKER_UNBOUND));
 +		worker_flags |= WORKER_REBOUND;
 +		worker_flags &= ~WORKER_UNBOUND;
 +		ACCESS_ONCE(worker->flags) = worker_flags;
 +	}
 +
 +	spin_unlock_irq(&pool->lock);
 +}
 +
 +/**
 + * restore_unbound_workers_cpumask - restore cpumask of unbound workers
 + * @pool: unbound pool of interest
 + * @cpu: the CPU which is coming up
 + *
 + * An unbound pool may end up with a cpumask which doesn't have any online
 + * CPUs.  When a worker of such pool get scheduled, the scheduler resets
 + * its cpus_allowed.  If @cpu is in @pool's cpumask which didn't have any
 + * online CPU before, cpus_allowed of all its workers should be restored.
 + */
 +static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu)
 +{
 +	static cpumask_t cpumask;
 +	struct worker *worker;
 +	int wi;
 +
 +	lockdep_assert_held(&pool->manager_mutex);
 +
 +	/* is @cpu allowed for @pool? */
 +	if (!cpumask_test_cpu(cpu, pool->attrs->cpumask))
 +		return;
 +
 +	/* is @cpu the only online CPU? */
 +	cpumask_and(&cpumask, pool->attrs->cpumask, cpu_online_mask);
 +	if (cpumask_weight(&cpumask) != 1)
 +		return;
 +
 +	/* as we're called from CPU_ONLINE, the following shouldn't fail */
 +	for_each_pool_worker(worker, wi, pool)
 +		WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task,
 +						  pool->attrs->cpumask) < 0);
 +}
 +
  /*
   * Workqueues should be brought up before normal priority CPU notifiers.
   * This will be registered high priority CPU notifier.