diff options
| author | Takashi Iwai <tiwai@suse.de> | 2012-01-31 15:13:14 +0100 | 
|---|---|---|
| committer | Takashi Iwai <tiwai@suse.de> | 2012-01-31 15:13:14 +0100 | 
| commit | ea51e5040e24eefe44d70bc654a237ca1f0225b0 (patch) | |
| tree | df2e5922dcdfafae62a10d8cd97f98121064fc23 /kernel/rcutree.c | |
| parent | 3422a47041b8cb8f14ac1e3926bcf711121df6dc (diff) | |
| parent | 8dbd52daee38adaae4d5a674bcca837e694a4f4c (diff) | |
Merge branch 'fix/asoc' into for-linus
Diffstat (limited to 'kernel/rcutree.c')
| -rw-r--r-- | kernel/rcutree.c | 290 | 
1 files changed, 205 insertions, 85 deletions
| diff --git a/kernel/rcutree.c b/kernel/rcutree.c index 6b76d812740c..6c4a6722abfd 100644 --- a/kernel/rcutree.c +++ b/kernel/rcutree.c @@ -69,7 +69,7 @@ static struct lock_class_key rcu_node_class[NUM_RCU_LVLS];  		NUM_RCU_LVL_3, \  		NUM_RCU_LVL_4, /* == MAX_RCU_LVLS */ \  	}, \ -	.signaled = RCU_GP_IDLE, \ +	.fqs_state = RCU_GP_IDLE, \  	.gpnum = -300, \  	.completed = -300, \  	.onofflock = __RAW_SPIN_LOCK_UNLOCKED(&structname##_state.onofflock), \ @@ -195,12 +195,10 @@ void rcu_note_context_switch(int cpu)  }  EXPORT_SYMBOL_GPL(rcu_note_context_switch); -#ifdef CONFIG_NO_HZ  DEFINE_PER_CPU(struct rcu_dynticks, rcu_dynticks) = { -	.dynticks_nesting = 1, +	.dynticks_nesting = DYNTICK_TASK_NESTING,  	.dynticks = ATOMIC_INIT(1),  }; -#endif /* #ifdef CONFIG_NO_HZ */  static int blimit = 10;		/* Maximum callbacks per rcu_do_batch. */  static int qhimark = 10000;	/* If this many pending, ignore blimit. */ @@ -328,11 +326,11 @@ static int rcu_implicit_offline_qs(struct rcu_data *rdp)  		return 1;  	} -	/* If preemptible RCU, no point in sending reschedule IPI. */ -	if (rdp->preemptible) -		return 0; - -	/* The CPU is online, so send it a reschedule IPI. */ +	/* +	 * The CPU is online, so send it a reschedule IPI.  This forces +	 * it through the scheduler, and (inefficiently) also handles cases +	 * where idle loops fail to inform RCU about the CPU being idle. +	 */  	if (rdp->cpu != smp_processor_id())  		smp_send_reschedule(rdp->cpu);  	else @@ -343,59 +341,181 @@ static int rcu_implicit_offline_qs(struct rcu_data *rdp)  #endif /* #ifdef CONFIG_SMP */ -#ifdef CONFIG_NO_HZ +/* + * rcu_idle_enter_common - inform RCU that current CPU is moving towards idle + * + * If the new value of the ->dynticks_nesting counter now is zero, + * we really have entered idle, and must do the appropriate accounting. + * The caller must have disabled interrupts. + */ +static void rcu_idle_enter_common(struct rcu_dynticks *rdtp, long long oldval) +{ +	trace_rcu_dyntick("Start", oldval, 0); +	if (!is_idle_task(current)) { +		struct task_struct *idle = idle_task(smp_processor_id()); + +		trace_rcu_dyntick("Error on entry: not idle task", oldval, 0); +		ftrace_dump(DUMP_ALL); +		WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", +			  current->pid, current->comm, +			  idle->pid, idle->comm); /* must be idle task! */ +	} +	rcu_prepare_for_idle(smp_processor_id()); +	/* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ +	smp_mb__before_atomic_inc();  /* See above. */ +	atomic_inc(&rdtp->dynticks); +	smp_mb__after_atomic_inc();  /* Force ordering with next sojourn. */ +	WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); +}  /** - * rcu_enter_nohz - inform RCU that current CPU is entering nohz + * rcu_idle_enter - inform RCU that current CPU is entering idle   * - * Enter nohz mode, in other words, -leave- the mode in which RCU + * Enter idle mode, in other words, -leave- the mode in which RCU   * read-side critical sections can occur.  (Though RCU read-side - * critical sections can occur in irq handlers in nohz mode, a possibility - * handled by rcu_irq_enter() and rcu_irq_exit()). + * critical sections can occur in irq handlers in idle, a possibility + * handled by irq_enter() and irq_exit().) + * + * We crowbar the ->dynticks_nesting field to zero to allow for + * the possibility of usermode upcalls having messed up our count + * of interrupt nesting level during the prior busy period.   */ -void rcu_enter_nohz(void) +void rcu_idle_enter(void)  {  	unsigned long flags; +	long long oldval;  	struct rcu_dynticks *rdtp;  	local_irq_save(flags);  	rdtp = &__get_cpu_var(rcu_dynticks); -	if (--rdtp->dynticks_nesting) { -		local_irq_restore(flags); -		return; -	} -	trace_rcu_dyntick("Start"); -	/* CPUs seeing atomic_inc() must see prior RCU read-side crit sects */ -	smp_mb__before_atomic_inc();  /* See above. */ -	atomic_inc(&rdtp->dynticks); -	smp_mb__after_atomic_inc();  /* Force ordering with next sojourn. */ -	WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1); +	oldval = rdtp->dynticks_nesting; +	rdtp->dynticks_nesting = 0; +	rcu_idle_enter_common(rdtp, oldval);  	local_irq_restore(flags);  } -/* - * rcu_exit_nohz - inform RCU that current CPU is leaving nohz +/** + * rcu_irq_exit - inform RCU that current CPU is exiting irq towards idle + * + * Exit from an interrupt handler, which might possibly result in entering + * idle mode, in other words, leaving the mode in which read-side critical + * sections can occur. + * + * This code assumes that the idle loop never does anything that might + * result in unbalanced calls to irq_enter() and irq_exit().  If your + * architecture violates this assumption, RCU will give you what you + * deserve, good and hard.  But very infrequently and irreproducibly.   * - * Exit nohz mode, in other words, -enter- the mode in which RCU - * read-side critical sections normally occur. + * Use things like work queues to work around this limitation. + * + * You have been warned.   */ -void rcu_exit_nohz(void) +void rcu_irq_exit(void)  {  	unsigned long flags; +	long long oldval;  	struct rcu_dynticks *rdtp;  	local_irq_save(flags);  	rdtp = &__get_cpu_var(rcu_dynticks); -	if (rdtp->dynticks_nesting++) { -		local_irq_restore(flags); -		return; -	} +	oldval = rdtp->dynticks_nesting; +	rdtp->dynticks_nesting--; +	WARN_ON_ONCE(rdtp->dynticks_nesting < 0); +	if (rdtp->dynticks_nesting) +		trace_rcu_dyntick("--=", oldval, rdtp->dynticks_nesting); +	else +		rcu_idle_enter_common(rdtp, oldval); +	local_irq_restore(flags); +} + +/* + * rcu_idle_exit_common - inform RCU that current CPU is moving away from idle + * + * If the new value of the ->dynticks_nesting counter was previously zero, + * we really have exited idle, and must do the appropriate accounting. + * The caller must have disabled interrupts. + */ +static void rcu_idle_exit_common(struct rcu_dynticks *rdtp, long long oldval) +{  	smp_mb__before_atomic_inc();  /* Force ordering w/previous sojourn. */  	atomic_inc(&rdtp->dynticks);  	/* CPUs seeing atomic_inc() must see later RCU read-side crit sects */  	smp_mb__after_atomic_inc();  /* See above. */  	WARN_ON_ONCE(!(atomic_read(&rdtp->dynticks) & 0x1)); -	trace_rcu_dyntick("End"); +	rcu_cleanup_after_idle(smp_processor_id()); +	trace_rcu_dyntick("End", oldval, rdtp->dynticks_nesting); +	if (!is_idle_task(current)) { +		struct task_struct *idle = idle_task(smp_processor_id()); + +		trace_rcu_dyntick("Error on exit: not idle task", +				  oldval, rdtp->dynticks_nesting); +		ftrace_dump(DUMP_ALL); +		WARN_ONCE(1, "Current pid: %d comm: %s / Idle pid: %d comm: %s", +			  current->pid, current->comm, +			  idle->pid, idle->comm); /* must be idle task! */ +	} +} + +/** + * rcu_idle_exit - inform RCU that current CPU is leaving idle + * + * Exit idle mode, in other words, -enter- the mode in which RCU + * read-side critical sections can occur. + * + * We crowbar the ->dynticks_nesting field to DYNTICK_TASK_NESTING to + * allow for the possibility of usermode upcalls messing up our count + * of interrupt nesting level during the busy period that is just + * now starting. + */ +void rcu_idle_exit(void) +{ +	unsigned long flags; +	struct rcu_dynticks *rdtp; +	long long oldval; + +	local_irq_save(flags); +	rdtp = &__get_cpu_var(rcu_dynticks); +	oldval = rdtp->dynticks_nesting; +	WARN_ON_ONCE(oldval != 0); +	rdtp->dynticks_nesting = DYNTICK_TASK_NESTING; +	rcu_idle_exit_common(rdtp, oldval); +	local_irq_restore(flags); +} + +/** + * rcu_irq_enter - inform RCU that current CPU is entering irq away from idle + * + * Enter an interrupt handler, which might possibly result in exiting + * idle mode, in other words, entering the mode in which read-side critical + * sections can occur. + * + * Note that the Linux kernel is fully capable of entering an interrupt + * handler that it never exits, for example when doing upcalls to + * user mode!  This code assumes that the idle loop never does upcalls to + * user mode.  If your architecture does do upcalls from the idle loop (or + * does anything else that results in unbalanced calls to the irq_enter() + * and irq_exit() functions), RCU will give you what you deserve, good + * and hard.  But very infrequently and irreproducibly. + * + * Use things like work queues to work around this limitation. + * + * You have been warned. + */ +void rcu_irq_enter(void) +{ +	unsigned long flags; +	struct rcu_dynticks *rdtp; +	long long oldval; + +	local_irq_save(flags); +	rdtp = &__get_cpu_var(rcu_dynticks); +	oldval = rdtp->dynticks_nesting; +	rdtp->dynticks_nesting++; +	WARN_ON_ONCE(rdtp->dynticks_nesting == 0); +	if (oldval) +		trace_rcu_dyntick("++=", oldval, rdtp->dynticks_nesting); +	else +		rcu_idle_exit_common(rdtp, oldval);  	local_irq_restore(flags);  } @@ -442,27 +562,37 @@ void rcu_nmi_exit(void)  	WARN_ON_ONCE(atomic_read(&rdtp->dynticks) & 0x1);  } +#ifdef CONFIG_PROVE_RCU +  /** - * rcu_irq_enter - inform RCU of entry to hard irq context + * rcu_is_cpu_idle - see if RCU thinks that the current CPU is idle   * - * If the CPU was idle with dynamic ticks active, this updates the - * rdtp->dynticks to let the RCU handling know that the CPU is active. + * If the current CPU is in its idle loop and is neither in an interrupt + * or NMI handler, return true.   */ -void rcu_irq_enter(void) +int rcu_is_cpu_idle(void)  { -	rcu_exit_nohz(); +	int ret; + +	preempt_disable(); +	ret = (atomic_read(&__get_cpu_var(rcu_dynticks).dynticks) & 0x1) == 0; +	preempt_enable(); +	return ret;  } +EXPORT_SYMBOL(rcu_is_cpu_idle); + +#endif /* #ifdef CONFIG_PROVE_RCU */  /** - * rcu_irq_exit - inform RCU of exit from hard irq context + * rcu_is_cpu_rrupt_from_idle - see if idle or immediately interrupted from idle   * - * If the CPU was idle with dynamic ticks active, update the rdp->dynticks - * to put let the RCU handling be aware that the CPU is going back to idle - * with no ticks. + * If the current CPU is idle or running at a first-level (not nested) + * interrupt from idle, return true.  The caller must have at least + * disabled preemption.   */ -void rcu_irq_exit(void) +int rcu_is_cpu_rrupt_from_idle(void)  { -	rcu_enter_nohz(); +	return __get_cpu_var(rcu_dynticks).dynticks_nesting <= 1;  }  #ifdef CONFIG_SMP @@ -475,7 +605,7 @@ void rcu_irq_exit(void)  static int dyntick_save_progress_counter(struct rcu_data *rdp)  {  	rdp->dynticks_snap = atomic_add_return(0, &rdp->dynticks->dynticks); -	return 0; +	return (rdp->dynticks_snap & 0x1) == 0;  }  /* @@ -512,26 +642,6 @@ static int rcu_implicit_dynticks_qs(struct rcu_data *rdp)  #endif /* #ifdef CONFIG_SMP */ -#else /* #ifdef CONFIG_NO_HZ */ - -#ifdef CONFIG_SMP - -static int dyntick_save_progress_counter(struct rcu_data *rdp) -{ -	return 0; -} - -static int rcu_implicit_dynticks_qs(struct rcu_data *rdp) -{ -	return rcu_implicit_offline_qs(rdp); -} - -#endif /* #ifdef CONFIG_SMP */ - -#endif /* #else #ifdef CONFIG_NO_HZ */ - -int rcu_cpu_stall_suppress __read_mostly; -  static void record_gp_stall_check_time(struct rcu_state *rsp)  {  	rsp->gp_start = jiffies; @@ -866,8 +976,8 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)  	/* Advance to a new grace period and initialize state. */  	rsp->gpnum++;  	trace_rcu_grace_period(rsp->name, rsp->gpnum, "start"); -	WARN_ON_ONCE(rsp->signaled == RCU_GP_INIT); -	rsp->signaled = RCU_GP_INIT; /* Hold off force_quiescent_state. */ +	WARN_ON_ONCE(rsp->fqs_state == RCU_GP_INIT); +	rsp->fqs_state = RCU_GP_INIT; /* Hold off force_quiescent_state. */  	rsp->jiffies_force_qs = jiffies + RCU_JIFFIES_TILL_FORCE_QS;  	record_gp_stall_check_time(rsp); @@ -877,7 +987,7 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)  		rnp->qsmask = rnp->qsmaskinit;  		rnp->gpnum = rsp->gpnum;  		rnp->completed = rsp->completed; -		rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state OK. */ +		rsp->fqs_state = RCU_SIGNAL_INIT; /* force_quiescent_state OK */  		rcu_start_gp_per_cpu(rsp, rnp, rdp);  		rcu_preempt_boost_start_gp(rnp);  		trace_rcu_grace_period_init(rsp->name, rnp->gpnum, @@ -927,7 +1037,7 @@ rcu_start_gp(struct rcu_state *rsp, unsigned long flags)  	rnp = rcu_get_root(rsp);  	raw_spin_lock(&rnp->lock);		/* irqs already disabled. */ -	rsp->signaled = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */ +	rsp->fqs_state = RCU_SIGNAL_INIT; /* force_quiescent_state now OK. */  	raw_spin_unlock(&rnp->lock);		/* irqs remain disabled. */  	raw_spin_unlock_irqrestore(&rsp->onofflock, flags);  } @@ -991,7 +1101,7 @@ static void rcu_report_qs_rsp(struct rcu_state *rsp, unsigned long flags)  	rsp->completed = rsp->gpnum;  /* Declare the grace period complete. */  	trace_rcu_grace_period(rsp->name, rsp->completed, "end"); -	rsp->signaled = RCU_GP_IDLE; +	rsp->fqs_state = RCU_GP_IDLE;  	rcu_start_gp(rsp, flags);  /* releases root node's rnp->lock. */  } @@ -1221,7 +1331,7 @@ static void __rcu_offline_cpu(int cpu, struct rcu_state *rsp)  	else  		raw_spin_unlock_irqrestore(&rnp->lock, flags);  	if (need_report & RCU_OFL_TASKS_EXP_GP) -		rcu_report_exp_rnp(rsp, rnp); +		rcu_report_exp_rnp(rsp, rnp, true);  	rcu_node_kthread_setaffinity(rnp, -1);  } @@ -1263,7 +1373,9 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)  	/* If no callbacks are ready, just return.*/  	if (!cpu_has_callbacks_ready_to_invoke(rdp)) {  		trace_rcu_batch_start(rsp->name, 0, 0); -		trace_rcu_batch_end(rsp->name, 0); +		trace_rcu_batch_end(rsp->name, 0, !!ACCESS_ONCE(rdp->nxtlist), +				    need_resched(), is_idle_task(current), +				    rcu_is_callbacks_kthread());  		return;  	} @@ -1291,12 +1403,17 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)  		debug_rcu_head_unqueue(list);  		__rcu_reclaim(rsp->name, list);  		list = next; -		if (++count >= bl) +		/* Stop only if limit reached and CPU has something to do. */ +		if (++count >= bl && +		    (need_resched() || +		     (!is_idle_task(current) && !rcu_is_callbacks_kthread())))  			break;  	}  	local_irq_save(flags); -	trace_rcu_batch_end(rsp->name, count); +	trace_rcu_batch_end(rsp->name, count, !!list, need_resched(), +			    is_idle_task(current), +			    rcu_is_callbacks_kthread());  	/* Update count, and requeue any remaining callbacks. */  	rdp->qlen -= count; @@ -1334,16 +1451,14 @@ static void rcu_do_batch(struct rcu_state *rsp, struct rcu_data *rdp)   * (user mode or idle loop for rcu, non-softirq execution for rcu_bh).   * Also schedule RCU core processing.   * - * This function must be called with hardirqs disabled.  It is normally + * This function must be called from hardirq context.  It is normally   * invoked from the scheduling-clock interrupt.  If rcu_pending returns   * false, there is no point in invoking rcu_check_callbacks().   */  void rcu_check_callbacks(int cpu, int user)  {  	trace_rcu_utilization("Start scheduler-tick"); -	if (user || -	    (idle_cpu(cpu) && rcu_scheduler_active && -	     !in_softirq() && hardirq_count() <= (1 << HARDIRQ_SHIFT))) { +	if (user || rcu_is_cpu_rrupt_from_idle()) {  		/*  		 * Get here if this CPU took its interrupt from user @@ -1457,7 +1572,7 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed)  		goto unlock_fqs_ret;  /* no GP in progress, time updated. */  	}  	rsp->fqs_active = 1; -	switch (rsp->signaled) { +	switch (rsp->fqs_state) {  	case RCU_GP_IDLE:  	case RCU_GP_INIT: @@ -1473,7 +1588,7 @@ static void force_quiescent_state(struct rcu_state *rsp, int relaxed)  		force_qs_rnp(rsp, dyntick_save_progress_counter);  		raw_spin_lock(&rnp->lock);  /* irqs already disabled */  		if (rcu_gp_in_progress(rsp)) -			rsp->signaled = RCU_FORCE_QS; +			rsp->fqs_state = RCU_FORCE_QS;  		break;  	case RCU_FORCE_QS: @@ -1812,7 +1927,7 @@ static int rcu_pending(int cpu)   * by the current CPU, even if none need be done immediately, returning   * 1 if so.   */ -static int rcu_needs_cpu_quick_check(int cpu) +static int rcu_cpu_has_callbacks(int cpu)  {  	/* RCU callbacks either ready or pending? */  	return per_cpu(rcu_sched_data, cpu).nxtlist || @@ -1913,9 +2028,9 @@ rcu_boot_init_percpu_data(int cpu, struct rcu_state *rsp)  	for (i = 0; i < RCU_NEXT_SIZE; i++)  		rdp->nxttail[i] = &rdp->nxtlist;  	rdp->qlen = 0; -#ifdef CONFIG_NO_HZ  	rdp->dynticks = &per_cpu(rcu_dynticks, cpu); -#endif /* #ifdef CONFIG_NO_HZ */ +	WARN_ON_ONCE(rdp->dynticks->dynticks_nesting != DYNTICK_TASK_NESTING); +	WARN_ON_ONCE(atomic_read(&rdp->dynticks->dynticks) != 1);  	rdp->cpu = cpu;  	rdp->rsp = rsp;  	raw_spin_unlock_irqrestore(&rnp->lock, flags); @@ -1942,6 +2057,10 @@ rcu_init_percpu_data(int cpu, struct rcu_state *rsp, int preemptible)  	rdp->qlen_last_fqs_check = 0;  	rdp->n_force_qs_snap = rsp->n_force_qs;  	rdp->blimit = blimit; +	rdp->dynticks->dynticks_nesting = DYNTICK_TASK_NESTING; +	atomic_set(&rdp->dynticks->dynticks, +		   (atomic_read(&rdp->dynticks->dynticks) & ~0x1) + 1); +	rcu_prepare_for_idle_init(cpu);  	raw_spin_unlock(&rnp->lock);		/* irqs remain disabled. */  	/* @@ -2023,6 +2142,7 @@ static int __cpuinit rcu_cpu_notify(struct notifier_block *self,  		rcu_send_cbs_to_online(&rcu_bh_state);  		rcu_send_cbs_to_online(&rcu_sched_state);  		rcu_preempt_send_cbs_to_online(); +		rcu_cleanup_after_idle(cpu);  		break;  	case CPU_DEAD:  	case CPU_DEAD_FROZEN: | 
