From: Tejun Heo Date: Tue, 2 Apr 2013 00:08:13 +0000 (-0700) Subject: Merge tag 'v3.9-rc5' into wq/for-3.10 X-Git-Url: http://git.mmlx.us/?a=commitdiff_plain;h=229641a6f1f09e27a1f12fba38980f33f4c92975;p=linux-edison.git Merge tag 'v3.9-rc5' into wq/for-3.10 Writeback conversion to workqueue will be based on top of wq/for-3.10 branch to take advantage of custom attrs and NUMA support for unbound workqueues. Mainline currently contains two commits which result in non-trivial merge conflicts with wq/for-3.10 and because block/for-3.10/core is based on v3.9-rc3 which contains one of the conflicting commits, we need a pre-merge-window merge anyway. Let's pull v3.9-rc5 into wq/for-3.10 so that the block tree doesn't suffer from workqueue merge conflicts. The two conflicts and their resolutions: * e68035fb65 ("workqueue: convert to idr_alloc()") in mainline changes worker_pool_assign_id() to use idr_alloc() instead of the old idr interface. worker_pool_assign_id() goes through multiple locking changes in wq/for-3.10 causing the following conflict. static int worker_pool_assign_id(struct worker_pool *pool) { int ret; <<<<<<< HEAD lockdep_assert_held(&wq_pool_mutex); do { if (!idr_pre_get(&worker_pool_idr, GFP_KERNEL)) return -ENOMEM; ret = idr_get_new(&worker_pool_idr, pool, &pool->id); } while (ret == -EAGAIN); ======= mutex_lock(&worker_pool_idr_mutex); ret = idr_alloc(&worker_pool_idr, pool, 0, 0, GFP_KERNEL); if (ret >= 0) pool->id = ret; mutex_unlock(&worker_pool_idr_mutex); >>>>>>> c67bf5361e7e66a0ff1f4caf95f89347d55dfb89 return ret < 0 ? ret : 0; } We want locking from the former and idr_alloc() usage from the latter, which can be combined to the following. static int worker_pool_assign_id(struct worker_pool *pool) { int ret; lockdep_assert_held(&wq_pool_mutex); ret = idr_alloc(&worker_pool_idr, pool, 0, 0, GFP_KERNEL); if (ret >= 0) { pool->id = ret; return 0; } return ret; } * eb2834285c ("workqueue: fix possible pool stall bug in wq_unbind_fn()") updated wq_unbind_fn() such that it has single larger for_each_std_worker_pool() loop instead of two separate loops with a schedule() call inbetween. wq/for-3.10 renamed pool->assoc_mutex to pool->manager_mutex causing the following conflict (earlier function body and comments omitted for brevity). static void wq_unbind_fn(struct work_struct *work) { ... spin_unlock_irq(&pool->lock); <<<<<<< HEAD mutex_unlock(&pool->manager_mutex); } ======= mutex_unlock(&pool->assoc_mutex); >>>>>>> c67bf5361e7e66a0ff1f4caf95f89347d55dfb89 schedule(); <<<<<<< HEAD for_each_cpu_worker_pool(pool, cpu) ======= >>>>>>> c67bf5361e7e66a0ff1f4caf95f89347d55dfb89 atomic_set(&pool->nr_running, 0); spin_lock_irq(&pool->lock); wake_up_worker(pool); spin_unlock_irq(&pool->lock); } } The resolution is mostly trivial. We want the control flow of the latter with the rename of the former. static void wq_unbind_fn(struct work_struct *work) { ... spin_unlock_irq(&pool->lock); mutex_unlock(&pool->manager_mutex); schedule(); atomic_set(&pool->nr_running, 0); spin_lock_irq(&pool->lock); wake_up_worker(pool); spin_unlock_irq(&pool->lock); } } Signed-off-by: Tejun Heo --- 229641a6f1f09e27a1f12fba38980f33f4c92975 diff --cc kernel/workqueue.c index 729ac6a4486,b48cd597145..dd2a4c49a39 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@@ -508,31 -456,40 +508,30 @@@ static int worker_pool_assign_id(struc { int ret; - mutex_lock(&worker_pool_idr_mutex); + lockdep_assert_held(&wq_pool_mutex); + - do { - if (!idr_pre_get(&worker_pool_idr, GFP_KERNEL)) - return -ENOMEM; - ret = idr_get_new(&worker_pool_idr, pool, &pool->id); - } while (ret == -EAGAIN); - + ret = idr_alloc(&worker_pool_idr, pool, 0, 0, GFP_KERNEL); - if (ret >= 0) ++ if (ret >= 0) { + pool->id = ret; - mutex_unlock(&worker_pool_idr_mutex); - - return ret < 0 ? ret : 0; ++ return 0; ++ } + return ret; } -/* - * Lookup worker_pool by id. The idr currently is built during boot and - * never modified. Don't worry about locking for now. +/** + * unbound_pwq_by_node - return the unbound pool_workqueue for the given node + * @wq: the target workqueue + * @node: the node ID + * + * This must be called either with pwq_lock held or sched RCU read locked. + * If the pwq needs to be used beyond the locking in effect, the caller is + * responsible for guaranteeing that the pwq stays online. */ -static struct worker_pool *worker_pool_by_id(int pool_id) -{ - return idr_find(&worker_pool_idr, pool_id); -} - -static struct worker_pool *get_std_worker_pool(int cpu, bool highpri) -{ - struct worker_pool *pools = std_worker_pools(cpu); - - return &pools[highpri]; -} - -static struct pool_workqueue *get_pwq(unsigned int cpu, - struct workqueue_struct *wq) +static struct pool_workqueue *unbound_pwq_by_node(struct workqueue_struct *wq, + int node) { - if (!(wq->flags & WQ_UNBOUND)) { - if (likely(cpu < nr_cpu_ids)) - return per_cpu_ptr(wq->pool_wq.pcpu, cpu); - } else if (likely(cpu == WORK_CPU_UNBOUND)) - return wq->pool_wq.single; - return NULL; + assert_rcu_or_wq_mutex(wq); + return rcu_dereference_raw(wq->numa_pwq_tbl[node]); } static unsigned int work_color_to_flags(int color) @@@ -4407,128 -3446,37 +4406,134 @@@ static void wq_unbind_fn(struct work_st pool->flags |= POOL_DISASSOCIATED; spin_unlock_irq(&pool->lock); - mutex_unlock(&pool->assoc_mutex); + mutex_unlock(&pool->manager_mutex); - } - /* - * Call schedule() so that we cross rq->lock and thus can guarantee - * sched callbacks see the %WORKER_UNBOUND flag. This is necessary - * as scheduler callbacks may be invoked from other cpus. - */ - schedule(); + /* + * Call schedule() so that we cross rq->lock and thus can + * guarantee sched callbacks see the %WORKER_UNBOUND flag. + * This is necessary as scheduler callbacks may be invoked + * from other cpus. + */ + schedule(); - /* - * Sched callbacks are disabled now. Zap nr_running. After this, - * nr_running stays zero and need_more_worker() and keep_working() - * are always true as long as the worklist is not empty. Pools on - * @cpu now behave as unbound (in terms of concurrency management) - * pools which are served by workers tied to the CPU. - * - * On return from this function, the current worker would trigger - * unbound chain execution of pending work items if other workers - * didn't already. - */ - for_each_cpu_worker_pool(pool, cpu) + /* + * Sched callbacks are disabled now. Zap nr_running. + * After this, nr_running stays zero and need_more_worker() + * and keep_working() are always true as long as the + * worklist is not empty. This pool now behaves as an + * unbound (in terms of concurrency management) pool which + * are served by workers tied to the pool. + */ atomic_set(&pool->nr_running, 0); + + /* + * With concurrency management just turned off, a busy + * worker blocking could lead to lengthy stalls. Kick off + * unbound chain execution of currently pending work items. + */ + spin_lock_irq(&pool->lock); + wake_up_worker(pool); + spin_unlock_irq(&pool->lock); + } } +/** + * rebind_workers - rebind all workers of a pool to the associated CPU + * @pool: pool of interest + * + * @pool->cpu is coming online. Rebind all workers to the CPU. + */ +static void rebind_workers(struct worker_pool *pool) +{ + struct worker *worker; + int wi; + + lockdep_assert_held(&pool->manager_mutex); + + /* + * Restore CPU affinity of all workers. As all idle workers should + * be on the run-queue of the associated CPU before any local + * wake-ups for concurrency management happen, restore CPU affinty + * of all workers first and then clear UNBOUND. As we're called + * from CPU_ONLINE, the following shouldn't fail. + */ + for_each_pool_worker(worker, wi, pool) + WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, + pool->attrs->cpumask) < 0); + + spin_lock_irq(&pool->lock); + + for_each_pool_worker(worker, wi, pool) { + unsigned int worker_flags = worker->flags; + + /* + * A bound idle worker should actually be on the runqueue + * of the associated CPU for local wake-ups targeting it to + * work. Kick all idle workers so that they migrate to the + * associated CPU. Doing this in the same loop as + * replacing UNBOUND with REBOUND is safe as no worker will + * be bound before @pool->lock is released. + */ + if (worker_flags & WORKER_IDLE) + wake_up_process(worker->task); + + /* + * We want to clear UNBOUND but can't directly call + * worker_clr_flags() or adjust nr_running. Atomically + * replace UNBOUND with another NOT_RUNNING flag REBOUND. + * @worker will clear REBOUND using worker_clr_flags() when + * it initiates the next execution cycle thus restoring + * concurrency management. Note that when or whether + * @worker clears REBOUND doesn't affect correctness. + * + * ACCESS_ONCE() is necessary because @worker->flags may be + * tested without holding any lock in + * wq_worker_waking_up(). Without it, NOT_RUNNING test may + * fail incorrectly leading to premature concurrency + * management operations. + */ + WARN_ON_ONCE(!(worker_flags & WORKER_UNBOUND)); + worker_flags |= WORKER_REBOUND; + worker_flags &= ~WORKER_UNBOUND; + ACCESS_ONCE(worker->flags) = worker_flags; + } + + spin_unlock_irq(&pool->lock); +} + +/** + * restore_unbound_workers_cpumask - restore cpumask of unbound workers + * @pool: unbound pool of interest + * @cpu: the CPU which is coming up + * + * An unbound pool may end up with a cpumask which doesn't have any online + * CPUs. When a worker of such pool get scheduled, the scheduler resets + * its cpus_allowed. If @cpu is in @pool's cpumask which didn't have any + * online CPU before, cpus_allowed of all its workers should be restored. + */ +static void restore_unbound_workers_cpumask(struct worker_pool *pool, int cpu) +{ + static cpumask_t cpumask; + struct worker *worker; + int wi; + + lockdep_assert_held(&pool->manager_mutex); + + /* is @cpu allowed for @pool? */ + if (!cpumask_test_cpu(cpu, pool->attrs->cpumask)) + return; + + /* is @cpu the only online CPU? */ + cpumask_and(&cpumask, pool->attrs->cpumask, cpu_online_mask); + if (cpumask_weight(&cpumask) != 1) + return; + + /* as we're called from CPU_ONLINE, the following shouldn't fail */ + for_each_pool_worker(worker, wi, pool) + WARN_ON_ONCE(set_cpus_allowed_ptr(worker->task, + pool->attrs->cpumask) < 0); +} + /* * Workqueues should be brought up before normal priority CPU notifiers. * This will be registered high priority CPU notifier.