diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-03-30 17:01:51 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-03-30 17:01:51 -0700 |
commit | 642e53ead6aea8740a219ede509a5d138fd4f780 (patch) | |
tree | 5c4680d0c07315dab24fe7333c62f56bc19ec4e4 /kernel/sched/sched.h | |
parent | 9b82f05f869a823d43ea4186f5f732f2924d3693 (diff) | |
parent | 313f16e2e35abb833eab5bdebc6ae30699adca18 (diff) |
Merge branch 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar:
"The main changes in this cycle are:
- Various NUMA scheduling updates: harmonize the load-balancer and
NUMA placement logic to not work against each other. The intended
result is better locality, better utilization and fewer migrations.
- Introduce Thermal Pressure tracking and optimizations, to improve
task placement on thermally overloaded systems.
- Implement frequency invariant scheduler accounting on (some) x86
CPUs. This is done by observing and sampling the 'recent' CPU
frequency average at ~tick boundaries. The CPU provides this data
via the APERF/MPERF MSRs. This hopefully makes our capacity
estimates more precise and keeps tasks on the same CPU better even
if it might seem overloaded at a lower momentary frequency. (As
usual, turbo mode is a complication that we resolve by observing
the maximum frequency and renormalizing to it.)
- Add asymmetric CPU capacity wakeup scan to improve capacity
utilization on asymmetric topologies. (big.LITTLE systems)
- PSI fixes and optimizations.
- RT scheduling capacity awareness fixes & improvements.
- Optimize the CONFIG_RT_GROUP_SCHED constraints code.
- Misc fixes, cleanups and optimizations - see the changelog for
details"
* 'sched-core-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (62 commits)
threads: Update PID limit comment according to futex UAPI change
sched/fair: Fix condition of avg_load calculation
sched/rt: cpupri_find: Trigger a full search as fallback
kthread: Do not preempt current task if it is going to call schedule()
sched/fair: Improve spreading of utilization
sched: Avoid scale real weight down to zero
psi: Move PF_MEMSTALL out of task->flags
MAINTAINERS: Add maintenance information for psi
psi: Optimize switching tasks inside shared cgroups
psi: Fix cpu.pressure for cpu.max and competing cgroups
sched/core: Distribute tasks within affinity masks
sched/fair: Fix enqueue_task_fair warning
thermal/cpu-cooling, sched/core: Move the arch_set_thermal_pressure() API to generic scheduler code
sched/rt: Remove unnecessary push for unfit tasks
sched/rt: Allow pulling unfitting task
sched/rt: Optimize cpupri_find() on non-heterogenous systems
sched/rt: Re-instate old behavior in select_task_rq_rt()
sched/rt: cpupri_find: Implement fallback mechanism for !fit case
sched/fair: Fix reordering of enqueue/dequeue_task_fair()
sched/fair: Fix runnable_avg for throttled cfs
...
Diffstat (limited to 'kernel/sched/sched.h')
-rw-r--r-- | kernel/sched/sched.h | 69 |
1 files changed, 58 insertions, 11 deletions
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index fdc77e796324..464742874be3 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -118,7 +118,13 @@ extern long calc_load_fold_active(struct rq *this_rq, long adjust); #ifdef CONFIG_64BIT # define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT + SCHED_FIXEDPOINT_SHIFT) # define scale_load(w) ((w) << SCHED_FIXEDPOINT_SHIFT) -# define scale_load_down(w) ((w) >> SCHED_FIXEDPOINT_SHIFT) +# define scale_load_down(w) \ +({ \ + unsigned long __w = (w); \ + if (__w) \ + __w = max(2UL, __w >> SCHED_FIXEDPOINT_SHIFT); \ + __w; \ +}) #else # define NICE_0_LOAD_SHIFT (SCHED_FIXEDPOINT_SHIFT) # define scale_load(w) (w) @@ -305,7 +311,6 @@ bool __dl_overflow(struct dl_bw *dl_b, int cpus, u64 old_bw, u64 new_bw) dl_b->bw * cpus < dl_b->total_bw - old_bw + new_bw; } -extern void dl_change_utilization(struct task_struct *p, u64 new_bw); extern void init_dl_bw(struct dl_bw *dl_b); extern int sched_dl_global_validate(void); extern void sched_dl_do_global(void); @@ -489,7 +494,6 @@ struct cfs_bandwidth { }; /* CFS-related fields in a runqueue */ struct cfs_rq { struct load_weight load; - unsigned long runnable_weight; unsigned int nr_running; unsigned int h_nr_running; /* SCHED_{NORMAL,BATCH,IDLE} */ unsigned int idle_h_nr_running; /* SCHED_IDLE */ @@ -528,7 +532,7 @@ struct cfs_rq { int nr; unsigned long load_avg; unsigned long util_avg; - unsigned long runnable_sum; + unsigned long runnable_avg; } removed; #ifdef CONFIG_FAIR_GROUP_SCHED @@ -688,8 +692,30 @@ struct dl_rq { #ifdef CONFIG_FAIR_GROUP_SCHED /* An entity is a task if it doesn't "own" a runqueue */ #define entity_is_task(se) (!se->my_q) + +static inline void se_update_runnable(struct sched_entity *se) +{ + if (!entity_is_task(se)) + se->runnable_weight = se->my_q->h_nr_running; +} + +static inline long se_runnable(struct sched_entity *se) +{ + if (entity_is_task(se)) + return !!se->on_rq; + else + return se->runnable_weight; +} + #else #define entity_is_task(se) 1 + +static inline void se_update_runnable(struct sched_entity *se) {} + +static inline long se_runnable(struct sched_entity *se) +{ + return !!se->on_rq; +} #endif #ifdef CONFIG_SMP @@ -701,10 +727,6 @@ static inline long se_weight(struct sched_entity *se) return scale_load_down(se->load.weight); } -static inline long se_runnable(struct sched_entity *se) -{ - return scale_load_down(se->runnable_weight); -} static inline bool sched_asym_prefer(int a, int b) { @@ -944,6 +966,9 @@ struct rq { #ifdef CONFIG_HAVE_SCHED_AVG_IRQ struct sched_avg avg_irq; #endif +#ifdef CONFIG_SCHED_THERMAL_PRESSURE + struct sched_avg avg_thermal; +#endif u64 idle_stamp; u64 avg_idle; @@ -1107,6 +1132,24 @@ static inline u64 rq_clock_task(struct rq *rq) return rq->clock_task; } +/** + * By default the decay is the default pelt decay period. + * The decay shift can change the decay period in + * multiples of 32. + * Decay shift Decay period(ms) + * 0 32 + * 1 64 + * 2 128 + * 3 256 + * 4 512 + */ +extern int sched_thermal_decay_shift; + +static inline u64 rq_clock_thermal(struct rq *rq) +{ + return rq_clock_task(rq) >> sched_thermal_decay_shift; +} + static inline void rq_clock_skip_update(struct rq *rq) { lockdep_assert_held(&rq->lock); @@ -1337,8 +1380,6 @@ extern void sched_ttwu_pending(void); for (__sd = rcu_dereference_check_sched_domain(cpu_rq(cpu)->sd); \ __sd; __sd = __sd->parent) -#define for_each_lower_domain(sd) for (; sd; sd = sd->child) - /** * highest_flag_domain - Return highest sched_domain containing flag. * @cpu: The CPU whose highest level of sched domain is to @@ -1869,7 +1910,6 @@ extern struct dl_bandwidth def_dl_bandwidth; extern void init_dl_bandwidth(struct dl_bandwidth *dl_b, u64 period, u64 runtime); extern void init_dl_task_timer(struct sched_dl_entity *dl_se); extern void init_dl_inactive_task_timer(struct sched_dl_entity *dl_se); -extern void init_dl_rq_bw_ratio(struct dl_rq *dl_rq); #define BW_SHIFT 20 #define BW_UNIT (1 << BW_SHIFT) @@ -1968,6 +2008,13 @@ static inline int hrtick_enabled(struct rq *rq) #endif /* CONFIG_SCHED_HRTICK */ +#ifndef arch_scale_freq_tick +static __always_inline +void arch_scale_freq_tick(void) +{ +} +#endif + #ifndef arch_scale_freq_capacity static __always_inline unsigned long arch_scale_freq_capacity(int cpu) |