From 7c6c16f354cde4a48bd305b2587fc78257bcb936 Mon Sep 17 00:00:00 2001 From: Bruce Ashfield Date: Fri, 24 Aug 2007 20:39:10 +0200 Subject: sched: CONFIG_SCHED_GROUP_FAIR=y fixlet when I built with CONFIG_FAIR_GROUP_SCHED=y, I need the following change to make things right. [ From: mingo@elte.hu ] this config option is not upstream-configurable right now but lets fix this for completeness. Signed-off-by: Bruce Ashfield Signed-off-by: Ingo Molnar --- kernel/sched_fair.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index fedbb51bba96..b5270dc98bef 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1057,7 +1057,7 @@ static void task_new_fair(struct rq *rq, struct task_struct *p) */ static void set_curr_task_fair(struct rq *rq) { - struct sched_entity *se = &rq->curr.se; + struct sched_entity *se = &rq->curr->se; for_each_sched_entity(se) set_next_entity(cfs_rq_of(se), se); -- cgit v1.2.3-70-g09d2 From 71fd37146385c8255bfd370f33ca81fe8c81e5a5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 24 Aug 2007 20:39:10 +0200 Subject: sched: remove HZ dependency from the granularity default remove HZ dependency from the granularity default. Use 10 msec for the base granularity, 1 msec for wakeup granularity and 25 msec for batch wakeup granularity. (These defaults are close to the values that the default HZ=250 setting got previously, and thus it's the most common setting.) Signed-off-by: Ingo Molnar --- kernel/sched.c | 2 +- kernel/sched_fair.c | 13 ++++++------- 2 files changed, 7 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index 96e9b82246d2..e95ff22ed174 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -4923,7 +4923,7 @@ static inline void sched_init_granularity(void) if (sysctl_sched_granularity > gran_limit) sysctl_sched_granularity = gran_limit; - sysctl_sched_runtime_limit = sysctl_sched_granularity * 8; + sysctl_sched_runtime_limit = sysctl_sched_granularity * 5; sysctl_sched_wakeup_granularity = sysctl_sched_granularity / 2; } diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index b5270dc98bef..6b0974c3fb67 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -19,7 +19,7 @@ /* * Preemption granularity: - * (default: 2 msec, units: nanoseconds) + * (default: 10 msec, units: nanoseconds) * * NOTE: this granularity value is not the same as the concept of * 'timeslice length' - timeslices in CFS will typically be somewhat @@ -31,18 +31,17 @@ * number of CPUs. (i.e. factor 2x on 2-way systems, 3x on 4-way * systems, 4x on 8-way systems, 5x on 16-way systems, etc.) */ -unsigned int sysctl_sched_granularity __read_mostly = 2000000000ULL/HZ; +unsigned int sysctl_sched_granularity __read_mostly = 10000000UL; /* * SCHED_BATCH wake-up granularity. - * (default: 10 msec, units: nanoseconds) + * (default: 25 msec, units: nanoseconds) * * This option delays the preemption effects of decoupled workloads * and reduces their over-scheduling. Synchronous workloads will still * have immediate wakeup/sleep latencies. */ -unsigned int sysctl_sched_batch_wakeup_granularity __read_mostly = - 10000000000ULL/HZ; +unsigned int sysctl_sched_batch_wakeup_granularity __read_mostly = 25000000UL; /* * SCHED_OTHER wake-up granularity. @@ -52,12 +51,12 @@ unsigned int sysctl_sched_batch_wakeup_granularity __read_mostly = * and reduces their over-scheduling. Synchronous workloads will still * have immediate wakeup/sleep latencies. */ -unsigned int sysctl_sched_wakeup_granularity __read_mostly = 1000000000ULL/HZ; +unsigned int sysctl_sched_wakeup_granularity __read_mostly = 1000000UL; unsigned int sysctl_sched_stat_granularity __read_mostly; /* - * Initialized in sched_init_granularity(): + * Initialized in sched_init_granularity() [to 5 times the base granularity]: */ unsigned int sysctl_sched_runtime_limit __read_mostly; -- cgit v1.2.3-70-g09d2 From deac4ee65af4befb66b542e4a782e63da93b51a0 Mon Sep 17 00:00:00 2001 From: Sven-Thorsten Dietrich Date: Fri, 24 Aug 2007 20:39:10 +0200 Subject: sched: simplify can_migrate_task() Remove trivial conditional branch in Linux scheduler's can_migrate_task() function. text data bss dec hex filename 34770 2998 24 37792 93a0 sched.o.before 34757 2998 24 37779 9393 sched.o.after Signed-off-by: Sven-Thorsten Dietrich Signed-off-by: Ingo Molnar --- kernel/sched.c | 6 ------ 1 file changed, 6 deletions(-) (limited to 'kernel') diff --git a/kernel/sched.c b/kernel/sched.c index e95ff22ed174..6798328a2e0e 100644 --- a/kernel/sched.c +++ b/kernel/sched.c @@ -2180,12 +2180,6 @@ int can_migrate_task(struct task_struct *p, struct rq *rq, int this_cpu, if (task_running(rq, p)) return 0; - /* - * Aggressive migration if too many balance attempts have failed: - */ - if (sd->nr_balance_failed > sd->cache_nice_tries) - return 1; - return 1; } -- cgit v1.2.3-70-g09d2 From 98fbc798533339be802c6dcd48c2293c712e87db Mon Sep 17 00:00:00 2001 From: Dmitry Adamushko Date: Fri, 24 Aug 2007 20:39:10 +0200 Subject: sched: optimize task_tick_rt() a bit Mitchell Erblich suggested a quality-of-implementation change to not requeue SCHED_RR tasks if there's only a single task on the runqueue, by checking for rq->nr_running == 1. provide a more efficient implementation of that, to check that particular RT priority-queue only. [ From: mingo@elte.hu ] Also first requeue the task then set need_resched - results in slightly better machine-instruction ordering. Also clean up the code a bit. Signed-off-by: Dmitry Adamushko Signed-off-by: Ingo Molnar --- kernel/sched_rt.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'kernel') diff --git a/kernel/sched_rt.c b/kernel/sched_rt.c index dcdcad632fd9..4b87476a02d0 100644 --- a/kernel/sched_rt.c +++ b/kernel/sched_rt.c @@ -207,10 +207,15 @@ static void task_tick_rt(struct rq *rq, struct task_struct *p) return; p->time_slice = static_prio_timeslice(p->static_prio); - set_tsk_need_resched(p); - /* put it at the end of the queue: */ - requeue_task_rt(rq, p); + /* + * Requeue to the end of queue if we are not the only element + * on the queue: + */ + if (p->run_list.prev != p->run_list.next) { + requeue_task_rt(rq, p); + set_tsk_need_resched(p); + } } static struct sched_class rt_sched_class __read_mostly = { -- cgit v1.2.3-70-g09d2 From b2133c8b1e270b4a7c36f70e29be8738d09e850b Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 24 Aug 2007 20:39:10 +0200 Subject: sched: tidy up and simplify the bonus balance make the bonus balance more consistent: do not hand out a bonus if there's too much in flight already, and only deduct as much from a runner as it has the capacity. This makes the bonus engine a zero-sum game (as intended). this also simplifies the code: text data bss dec hex filename 34770 2998 24 37792 93a0 sched.o.before 34749 2998 24 37771 938b sched.o.after and it also avoids overscheduling in sleep-happy workloads like hackbench.c. Signed-off-by: Ingo Molnar --- kernel/sched_fair.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'kernel') diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 6b0974c3fb67..c578370cd693 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -306,6 +306,8 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr) delta = min(cfs_rq->sleeper_bonus, (u64)delta_exec); delta = calc_delta_mine(delta, curr->load.weight, lw); delta = min((u64)delta, cfs_rq->sleeper_bonus); + delta = min(delta, (unsigned long)( + (long)sysctl_sched_runtime_limit - curr->wait_runtime)); cfs_rq->sleeper_bonus -= delta; delta_mine -= delta; } @@ -493,6 +495,13 @@ static void __enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) unsigned long load = cfs_rq->load.weight, delta_fair; long prev_runtime; + /* + * Do not boost sleepers if there's too much bonus 'in flight' + * already: + */ + if (unlikely(cfs_rq->sleeper_bonus > sysctl_sched_runtime_limit)) + return; + if (sysctl_sched_features & SCHED_FEAT_SLEEPER_LOAD_AVG) load = rq_of(cfs_rq)->cpu_load[2]; @@ -512,16 +521,13 @@ static void __enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) prev_runtime = se->wait_runtime; __add_wait_runtime(cfs_rq, se, delta_fair); + schedstat_add(cfs_rq, wait_runtime, se->wait_runtime); delta_fair = se->wait_runtime - prev_runtime; /* * Track the amount of bonus we've given to sleepers: */ cfs_rq->sleeper_bonus += delta_fair; - if (unlikely(cfs_rq->sleeper_bonus > sysctl_sched_runtime_limit)) - cfs_rq->sleeper_bonus = sysctl_sched_runtime_limit; - - schedstat_add(cfs_rq, wait_runtime, se->wait_runtime); } static void enqueue_sleeper(struct cfs_rq *cfs_rq, struct sched_entity *se) -- cgit v1.2.3-70-g09d2 From a6f2994042cc2db9e507dc702ed0b5e2cc5890fe Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 24 Aug 2007 20:39:10 +0200 Subject: sched: simplify bonus calculation #1 current code: delta = min(cfs_rq->sleeper_bonus, (u64)delta_exec); delta = calc_delta_mine(delta, curr->load.weight, lw); delta = min((u64)delta, cfs_rq->sleeper_bonus); drop the first min(), because we clip against sleeper_bonus in the 3rd line again. That gives: delta = calc_delta_mine(delta_exec, curr->load.weight, lw); delta = min((u64)delta, cfs_rq->sleeper_bonus); Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched_fair.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index c578370cd693..5b2d97fcd80c 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -303,8 +303,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr) delta_mine = calc_delta_mine(delta_exec, curr->load.weight, lw); if (cfs_rq->sleeper_bonus > sysctl_sched_granularity) { - delta = min(cfs_rq->sleeper_bonus, (u64)delta_exec); - delta = calc_delta_mine(delta, curr->load.weight, lw); + delta = calc_delta_mine(delta_exec, curr->load.weight, lw); delta = min((u64)delta, cfs_rq->sleeper_bonus); delta = min(delta, (unsigned long)( (long)sysctl_sched_runtime_limit - curr->wait_runtime)); -- cgit v1.2.3-70-g09d2 From ea0aa3b23a193d1fc5c982286edecd071af67d94 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 24 Aug 2007 20:39:10 +0200 Subject: sched: simplify bonus calculation #2 current code: delta = calc_delta_mine(delta_exec, curr->load.weight, lw); delta = min((u64)delta, cfs_rq->sleeper_bonus); Notice that this calc_delta_mine() line is exactly delta_mine, which gives: delta = min((u64)delta_mine, cfs_rq->sleeper_bonus); Signed-off-by: Peter Zijlstra Signed-off-by: Ingo Molnar --- kernel/sched_fair.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index 5b2d97fcd80c..c078f1af721c 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -303,8 +303,7 @@ __update_curr(struct cfs_rq *cfs_rq, struct sched_entity *curr) delta_mine = calc_delta_mine(delta_exec, curr->load.weight, lw); if (cfs_rq->sleeper_bonus > sysctl_sched_granularity) { - delta = calc_delta_mine(delta_exec, curr->load.weight, lw); - delta = min((u64)delta, cfs_rq->sleeper_bonus); + delta = min((u64)delta_mine, cfs_rq->sleeper_bonus); delta = min(delta, (unsigned long)( (long)sysctl_sched_runtime_limit - curr->wait_runtime)); cfs_rq->sleeper_bonus -= delta; -- cgit v1.2.3-70-g09d2 From 095e56c7036fe97bc3ebcd80ed6e121be0847656 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 24 Aug 2007 20:39:10 +0200 Subject: sched: fix startup penalty calculation fix task startup penalty miscalculation: sysctl_sched_granularity is unsigned int and wait_runtime is long so we first have to convert it to long before turning it negative ... Signed-off-by: Ingo Molnar --- kernel/sched_fair.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/sched_fair.c b/kernel/sched_fair.c index c078f1af721c..4d6b7e2df2aa 100644 --- a/kernel/sched_fair.c +++ b/kernel/sched_fair.c @@ -1047,7 +1047,7 @@ static void task_new_fair(struct rq *rq, struct task_struct *p) * -granularity/2, so initialize the task with that: */ if (sysctl_sched_features & SCHED_FEAT_START_DEBIT) - p->se.wait_runtime = -(sysctl_sched_granularity / 2); + p->se.wait_runtime = -((long)sysctl_sched_granularity / 2); __enqueue_entity(cfs_rq, se); } -- cgit v1.2.3-70-g09d2