diff options
Diffstat (limited to 'kernel/workqueue.c')
| -rw-r--r-- | kernel/workqueue.c | 220 | 
1 files changed, 210 insertions, 10 deletions
diff --git a/kernel/workqueue.c b/kernel/workqueue.c index c579dbab2e36..61a0264e28f9 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -148,6 +148,8 @@ struct worker_pool {  	int			id;		/* I: pool ID */  	unsigned int		flags;		/* X: flags */ +	unsigned long		watchdog_ts;	/* L: watchdog timestamp */ +  	struct list_head	worklist;	/* L: list of pending works */  	int			nr_workers;	/* L: total number of workers */ @@ -1083,6 +1085,8 @@ static void pwq_activate_delayed_work(struct work_struct *work)  	struct pool_workqueue *pwq = get_work_pwq(work);  	trace_workqueue_activate_work(work); +	if (list_empty(&pwq->pool->worklist)) +		pwq->pool->watchdog_ts = jiffies;  	move_linked_works(work, &pwq->pool->worklist, NULL);  	__clear_bit(WORK_STRUCT_DELAYED_BIT, work_data_bits(work));  	pwq->nr_active++; @@ -1385,6 +1389,8 @@ retry:  		trace_workqueue_activate_work(work);  		pwq->nr_active++;  		worklist = &pwq->pool->worklist; +		if (list_empty(worklist)) +			pwq->pool->watchdog_ts = jiffies;  	} else {  		work_flags |= WORK_STRUCT_DELAYED;  		worklist = &pwq->delayed_works; @@ -2157,6 +2163,8 @@ recheck:  			list_first_entry(&pool->worklist,  					 struct work_struct, entry); +		pool->watchdog_ts = jiffies; +  		if (likely(!(*work_data_bits(work) & WORK_STRUCT_LINKED))) {  			/* optimization path, not strictly necessary */  			process_one_work(worker, work); @@ -2240,6 +2248,7 @@ repeat:  					struct pool_workqueue, mayday_node);  		struct worker_pool *pool = pwq->pool;  		struct work_struct *work, *n; +		bool first = true;  		__set_current_state(TASK_RUNNING);  		list_del_init(&pwq->mayday_node); @@ -2256,9 +2265,14 @@ repeat:  		 * process'em.  		 */  		WARN_ON_ONCE(!list_empty(scheduled)); -		list_for_each_entry_safe(work, n, &pool->worklist, entry) -			if (get_work_pwq(work) == pwq) +		list_for_each_entry_safe(work, n, &pool->worklist, entry) { +			if (get_work_pwq(work) == pwq) { +				if (first) +					pool->watchdog_ts = jiffies;  				move_linked_works(work, scheduled, &n); +			} +			first = false; +		}  		if (!list_empty(scheduled)) {  			process_scheduled_works(rescuer); @@ -2316,6 +2330,37 @@ repeat:  	goto repeat;  } +/** + * check_flush_dependency - check for flush dependency sanity + * @target_wq: workqueue being flushed + * @target_work: work item being flushed (NULL for workqueue flushes) + * + * %current is trying to flush the whole @target_wq or @target_work on it. + * If @target_wq doesn't have %WQ_MEM_RECLAIM, verify that %current is not + * reclaiming memory or running on a workqueue which doesn't have + * %WQ_MEM_RECLAIM as that can break forward-progress guarantee leading to + * a deadlock. + */ +static void check_flush_dependency(struct workqueue_struct *target_wq, +				   struct work_struct *target_work) +{ +	work_func_t target_func = target_work ? target_work->func : NULL; +	struct worker *worker; + +	if (target_wq->flags & WQ_MEM_RECLAIM) +		return; + +	worker = current_wq_worker(); + +	WARN_ONCE(current->flags & PF_MEMALLOC, +		  "workqueue: PF_MEMALLOC task %d(%s) is flushing !WQ_MEM_RECLAIM %s:%pf", +		  current->pid, current->comm, target_wq->name, target_func); +	WARN_ONCE(worker && (worker->current_pwq->wq->flags & WQ_MEM_RECLAIM), +		  "workqueue: WQ_MEM_RECLAIM %s:%pf is flushing !WQ_MEM_RECLAIM %s:%pf", +		  worker->current_pwq->wq->name, worker->current_func, +		  target_wq->name, target_func); +} +  struct wq_barrier {  	struct work_struct	work;  	struct completion	done; @@ -2525,6 +2570,8 @@ void flush_workqueue(struct workqueue_struct *wq)  		list_add_tail(&this_flusher.list, &wq->flusher_overflow);  	} +	check_flush_dependency(wq, NULL); +  	mutex_unlock(&wq->mutex);  	wait_for_completion(&this_flusher.done); @@ -2697,6 +2744,8 @@ static bool start_flush_work(struct work_struct *work, struct wq_barrier *barr)  		pwq = worker->current_pwq;  	} +	check_flush_dependency(pwq->wq, work); +  	insert_wq_barrier(pwq, barr, work, worker);  	spin_unlock_irq(&pool->lock); @@ -3069,6 +3118,7 @@ static int init_worker_pool(struct worker_pool *pool)  	pool->cpu = -1;  	pool->node = NUMA_NO_NODE;  	pool->flags |= POOL_DISASSOCIATED; +	pool->watchdog_ts = jiffies;  	INIT_LIST_HEAD(&pool->worklist);  	INIT_LIST_HEAD(&pool->idle_list);  	hash_init(pool->busy_hash); @@ -3601,7 +3651,6 @@ static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,  					const struct workqueue_attrs *attrs)  {  	struct apply_wqattrs_ctx *ctx; -	int ret = -ENOMEM;  	/* only unbound workqueues can change attributes */  	if (WARN_ON(!(wq->flags & WQ_UNBOUND))) @@ -3612,16 +3661,14 @@ static int apply_workqueue_attrs_locked(struct workqueue_struct *wq,  		return -EINVAL;  	ctx = apply_wqattrs_prepare(wq, attrs); +	if (!ctx) +		return -ENOMEM;  	/* the ctx has been prepared successfully, let's commit it */ -	if (ctx) { -		apply_wqattrs_commit(ctx); -		ret = 0; -	} - +	apply_wqattrs_commit(ctx);  	apply_wqattrs_cleanup(ctx); -	return ret; +	return 0;  }  /** @@ -4308,7 +4355,9 @@ void show_workqueue_state(void)  		pr_info("pool %d:", pool->id);  		pr_cont_pool_info(pool); -		pr_cont(" workers=%d", pool->nr_workers); +		pr_cont(" hung=%us workers=%d", +			jiffies_to_msecs(jiffies - pool->watchdog_ts) / 1000, +			pool->nr_workers);  		if (pool->manager)  			pr_cont(" manager: %d",  				task_pid_nr(pool->manager->task)); @@ -5167,6 +5216,154 @@ static void workqueue_sysfs_unregister(struct workqueue_struct *wq)  static void workqueue_sysfs_unregister(struct workqueue_struct *wq)	{ }  #endif	/* CONFIG_SYSFS */ +/* + * Workqueue watchdog. + * + * Stall may be caused by various bugs - missing WQ_MEM_RECLAIM, illegal + * flush dependency, a concurrency managed work item which stays RUNNING + * indefinitely.  Workqueue stalls can be very difficult to debug as the + * usual warning mechanisms don't trigger and internal workqueue state is + * largely opaque. + * + * Workqueue watchdog monitors all worker pools periodically and dumps + * state if some pools failed to make forward progress for a while where + * forward progress is defined as the first item on ->worklist changing. + * + * This mechanism is controlled through the kernel parameter + * "workqueue.watchdog_thresh" which can be updated at runtime through the + * corresponding sysfs parameter file. + */ +#ifdef CONFIG_WQ_WATCHDOG + +static void wq_watchdog_timer_fn(unsigned long data); + +static unsigned long wq_watchdog_thresh = 30; +static struct timer_list wq_watchdog_timer = +	TIMER_DEFERRED_INITIALIZER(wq_watchdog_timer_fn, 0, 0); + +static unsigned long wq_watchdog_touched = INITIAL_JIFFIES; +static DEFINE_PER_CPU(unsigned long, wq_watchdog_touched_cpu) = INITIAL_JIFFIES; + +static void wq_watchdog_reset_touched(void) +{ +	int cpu; + +	wq_watchdog_touched = jiffies; +	for_each_possible_cpu(cpu) +		per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies; +} + +static void wq_watchdog_timer_fn(unsigned long data) +{ +	unsigned long thresh = READ_ONCE(wq_watchdog_thresh) * HZ; +	bool lockup_detected = false; +	struct worker_pool *pool; +	int pi; + +	if (!thresh) +		return; + +	rcu_read_lock(); + +	for_each_pool(pool, pi) { +		unsigned long pool_ts, touched, ts; + +		if (list_empty(&pool->worklist)) +			continue; + +		/* get the latest of pool and touched timestamps */ +		pool_ts = READ_ONCE(pool->watchdog_ts); +		touched = READ_ONCE(wq_watchdog_touched); + +		if (time_after(pool_ts, touched)) +			ts = pool_ts; +		else +			ts = touched; + +		if (pool->cpu >= 0) { +			unsigned long cpu_touched = +				READ_ONCE(per_cpu(wq_watchdog_touched_cpu, +						  pool->cpu)); +			if (time_after(cpu_touched, ts)) +				ts = cpu_touched; +		} + +		/* did we stall? */ +		if (time_after(jiffies, ts + thresh)) { +			lockup_detected = true; +			pr_emerg("BUG: workqueue lockup - pool"); +			pr_cont_pool_info(pool); +			pr_cont(" stuck for %us!\n", +				jiffies_to_msecs(jiffies - pool_ts) / 1000); +		} +	} + +	rcu_read_unlock(); + +	if (lockup_detected) +		show_workqueue_state(); + +	wq_watchdog_reset_touched(); +	mod_timer(&wq_watchdog_timer, jiffies + thresh); +} + +void wq_watchdog_touch(int cpu) +{ +	if (cpu >= 0) +		per_cpu(wq_watchdog_touched_cpu, cpu) = jiffies; +	else +		wq_watchdog_touched = jiffies; +} + +static void wq_watchdog_set_thresh(unsigned long thresh) +{ +	wq_watchdog_thresh = 0; +	del_timer_sync(&wq_watchdog_timer); + +	if (thresh) { +		wq_watchdog_thresh = thresh; +		wq_watchdog_reset_touched(); +		mod_timer(&wq_watchdog_timer, jiffies + thresh * HZ); +	} +} + +static int wq_watchdog_param_set_thresh(const char *val, +					const struct kernel_param *kp) +{ +	unsigned long thresh; +	int ret; + +	ret = kstrtoul(val, 0, &thresh); +	if (ret) +		return ret; + +	if (system_wq) +		wq_watchdog_set_thresh(thresh); +	else +		wq_watchdog_thresh = thresh; + +	return 0; +} + +static const struct kernel_param_ops wq_watchdog_thresh_ops = { +	.set	= wq_watchdog_param_set_thresh, +	.get	= param_get_ulong, +}; + +module_param_cb(watchdog_thresh, &wq_watchdog_thresh_ops, &wq_watchdog_thresh, +		0644); + +static void wq_watchdog_init(void) +{ +	wq_watchdog_set_thresh(wq_watchdog_thresh); +} + +#else	/* CONFIG_WQ_WATCHDOG */ + +static inline void wq_watchdog_init(void) { } + +#endif	/* CONFIG_WQ_WATCHDOG */ +  static void __init wq_numa_init(void)  {  	cpumask_var_t *tbl; @@ -5290,6 +5487,9 @@ static int __init init_workqueues(void)  	       !system_unbound_wq || !system_freezable_wq ||  	       !system_power_efficient_wq ||  	       !system_freezable_power_efficient_wq); + +	wq_watchdog_init(); +  	return 0;  }  early_initcall(init_workqueues);  | 
