diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-04-28 13:33:57 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-04-28 13:33:57 -0700 |
commit | 16b3d0cf5bad844daaf436ad2e9061de0fe36e5c (patch) | |
tree | d553a51e6d95fb166df7fa62264e9a27e4c438a4 /include | |
parent | 42dec9a936e7696bea1f27d3c5a0068cd9aa95fd (diff) | |
parent | 2ea46c6fc9452ac100ad907b051d797225847e33 (diff) |
Merge tag 'sched-core-2021-04-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull scheduler updates from Ingo Molnar:
- Clean up SCHED_DEBUG: move the decades old mess of sysctl, procfs and
debugfs interfaces to a unified debugfs interface.
- Signals: Allow caching one sigqueue object per task, to improve
performance & latencies.
- Improve newidle_balance() irq-off latencies on systems with a large
number of CPU cgroups.
- Improve energy-aware scheduling
- Improve the PELT metrics for certain workloads
- Reintroduce select_idle_smt() to improve load-balancing locality -
but without the previous regressions
- Add 'scheduler latency debugging': warn after long periods of pending
need_resched. This is an opt-in feature that requires the enabling of
the LATENCY_WARN scheduler feature, or the use of the
resched_latency_warn_ms=xx boot parameter.
- CPU hotplug fixes for HP-rollback, and for the 'fail' interface. Fix
remaining balance_push() vs. hotplug holes/races
- PSI fixes, plus allow /proc/pressure/ files to be written by
CAP_SYS_RESOURCE tasks as well
- Fix/improve various load-balancing corner cases vs. capacity margins
- Fix sched topology on systems with NUMA diameter of 3 or above
- Fix PF_KTHREAD vs to_kthread() race
- Minor rseq optimizations
- Misc cleanups, optimizations, fixes and smaller updates
* tag 'sched-core-2021-04-28' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (61 commits)
cpumask/hotplug: Fix cpu_dying() state tracking
kthread: Fix PF_KTHREAD vs to_kthread() race
sched/debug: Fix cgroup_path[] serialization
sched,psi: Handle potential task count underflow bugs more gracefully
sched: Warn on long periods of pending need_resched
sched/fair: Move update_nohz_stats() to the CONFIG_NO_HZ_COMMON block to simplify the code & fix an unused function warning
sched/debug: Rename the sched_debug parameter to sched_verbose
sched,fair: Alternative sched_slice()
sched: Move /proc/sched_debug to debugfs
sched,debug: Convert sysctl sched_domains to debugfs
debugfs: Implement debugfs_create_str()
sched,preempt: Move preempt_dynamic to debug.c
sched: Move SCHED_DEBUG sysctl to debugfs
sched: Don't make LATENCYTOP select SCHED_DEBUG
sched: Remove sched_schedstats sysctl out from under SCHED_DEBUG
sched/numa: Allow runtime enabling/disabling of NUMA balance without SCHED_DEBUG
sched: Use cpu_dying() to fix balance_push vs hotplug-rollback
cpumask: Introduce DYING mask
cpumask: Make cpu_{online,possible,present,active}() inline
rseq: Optimise rseq_get_rseq_cs() and clear_rseq_cs()
...
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/cpumask.h | 117 | ||||
-rw-r--r-- | include/linux/debugfs.h | 17 | ||||
-rw-r--r-- | include/linux/kcov.h | 1 | ||||
-rw-r--r-- | include/linux/psi.h | 1 | ||||
-rw-r--r-- | include/linux/psi_types.h | 3 | ||||
-rw-r--r-- | include/linux/sched.h | 4 | ||||
-rw-r--r-- | include/linux/sched/sysctl.h | 9 | ||||
-rw-r--r-- | include/linux/signal.h | 1 | ||||
-rw-r--r-- | include/uapi/linux/ptrace.h | 10 |
9 files changed, 124 insertions, 39 deletions
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 383684e30f12..e6b948a6000d 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -91,44 +91,15 @@ extern struct cpumask __cpu_possible_mask; extern struct cpumask __cpu_online_mask; extern struct cpumask __cpu_present_mask; extern struct cpumask __cpu_active_mask; +extern struct cpumask __cpu_dying_mask; #define cpu_possible_mask ((const struct cpumask *)&__cpu_possible_mask) #define cpu_online_mask ((const struct cpumask *)&__cpu_online_mask) #define cpu_present_mask ((const struct cpumask *)&__cpu_present_mask) #define cpu_active_mask ((const struct cpumask *)&__cpu_active_mask) +#define cpu_dying_mask ((const struct cpumask *)&__cpu_dying_mask) extern atomic_t __num_online_cpus; -#if NR_CPUS > 1 -/** - * num_online_cpus() - Read the number of online CPUs - * - * Despite the fact that __num_online_cpus is of type atomic_t, this - * interface gives only a momentary snapshot and is not protected against - * concurrent CPU hotplug operations unless invoked from a cpuhp_lock held - * region. - */ -static inline unsigned int num_online_cpus(void) -{ - return atomic_read(&__num_online_cpus); -} -#define num_possible_cpus() cpumask_weight(cpu_possible_mask) -#define num_present_cpus() cpumask_weight(cpu_present_mask) -#define num_active_cpus() cpumask_weight(cpu_active_mask) -#define cpu_online(cpu) cpumask_test_cpu((cpu), cpu_online_mask) -#define cpu_possible(cpu) cpumask_test_cpu((cpu), cpu_possible_mask) -#define cpu_present(cpu) cpumask_test_cpu((cpu), cpu_present_mask) -#define cpu_active(cpu) cpumask_test_cpu((cpu), cpu_active_mask) -#else -#define num_online_cpus() 1U -#define num_possible_cpus() 1U -#define num_present_cpus() 1U -#define num_active_cpus() 1U -#define cpu_online(cpu) ((cpu) == 0) -#define cpu_possible(cpu) ((cpu) == 0) -#define cpu_present(cpu) ((cpu) == 0) -#define cpu_active(cpu) ((cpu) == 0) -#endif - extern cpumask_t cpus_booted_once_mask; static inline void cpu_max_bits_warn(unsigned int cpu, unsigned int bits) @@ -857,6 +828,14 @@ set_cpu_active(unsigned int cpu, bool active) cpumask_clear_cpu(cpu, &__cpu_active_mask); } +static inline void +set_cpu_dying(unsigned int cpu, bool dying) +{ + if (dying) + cpumask_set_cpu(cpu, &__cpu_dying_mask); + else + cpumask_clear_cpu(cpu, &__cpu_dying_mask); +} /** * to_cpumask - convert an NR_CPUS bitmap to a struct cpumask * @@ -894,6 +873,82 @@ static inline const struct cpumask *get_cpu_mask(unsigned int cpu) return to_cpumask(p); } +#if NR_CPUS > 1 +/** + * num_online_cpus() - Read the number of online CPUs + * + * Despite the fact that __num_online_cpus is of type atomic_t, this + * interface gives only a momentary snapshot and is not protected against + * concurrent CPU hotplug operations unless invoked from a cpuhp_lock held + * region. + */ +static inline unsigned int num_online_cpus(void) +{ + return atomic_read(&__num_online_cpus); +} +#define num_possible_cpus() cpumask_weight(cpu_possible_mask) +#define num_present_cpus() cpumask_weight(cpu_present_mask) +#define num_active_cpus() cpumask_weight(cpu_active_mask) + +static inline bool cpu_online(unsigned int cpu) +{ + return cpumask_test_cpu(cpu, cpu_online_mask); +} + +static inline bool cpu_possible(unsigned int cpu) +{ + return cpumask_test_cpu(cpu, cpu_possible_mask); +} + +static inline bool cpu_present(unsigned int cpu) +{ + return cpumask_test_cpu(cpu, cpu_present_mask); +} + +static inline bool cpu_active(unsigned int cpu) +{ + return cpumask_test_cpu(cpu, cpu_active_mask); +} + +static inline bool cpu_dying(unsigned int cpu) +{ + return cpumask_test_cpu(cpu, cpu_dying_mask); +} + +#else + +#define num_online_cpus() 1U +#define num_possible_cpus() 1U +#define num_present_cpus() 1U +#define num_active_cpus() 1U + +static inline bool cpu_online(unsigned int cpu) +{ + return cpu == 0; +} + +static inline bool cpu_possible(unsigned int cpu) +{ + return cpu == 0; +} + +static inline bool cpu_present(unsigned int cpu) +{ + return cpu == 0; +} + +static inline bool cpu_active(unsigned int cpu) +{ + return cpu == 0; +} + +static inline bool cpu_dying(unsigned int cpu) +{ + return false; +} + +#endif /* NR_CPUS > 1 */ + #define cpu_is_offline(cpu) unlikely(!cpu_online(cpu)) #if NR_CPUS <= BITS_PER_LONG diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index d6c4cc9ecc77..1fdb4343af9c 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -128,6 +128,8 @@ void debugfs_create_atomic_t(const char *name, umode_t mode, struct dentry *parent, atomic_t *value); struct dentry *debugfs_create_bool(const char *name, umode_t mode, struct dentry *parent, bool *value); +void debugfs_create_str(const char *name, umode_t mode, + struct dentry *parent, char **value); struct dentry *debugfs_create_blob(const char *name, umode_t mode, struct dentry *parent, @@ -156,6 +158,9 @@ ssize_t debugfs_read_file_bool(struct file *file, char __user *user_buf, ssize_t debugfs_write_file_bool(struct file *file, const char __user *user_buf, size_t count, loff_t *ppos); +ssize_t debugfs_read_file_str(struct file *file, char __user *user_buf, + size_t count, loff_t *ppos); + #else #include <linux/err.h> @@ -297,6 +302,11 @@ static inline struct dentry *debugfs_create_bool(const char *name, umode_t mode, return ERR_PTR(-ENODEV); } +static inline void debugfs_create_str(const char *name, umode_t mode, + struct dentry *parent, + char **value) +{ } + static inline struct dentry *debugfs_create_blob(const char *name, umode_t mode, struct dentry *parent, struct debugfs_blob_wrapper *blob) @@ -348,6 +358,13 @@ static inline ssize_t debugfs_write_file_bool(struct file *file, return -ENODEV; } +static inline ssize_t debugfs_read_file_str(struct file *file, + char __user *user_buf, + size_t count, loff_t *ppos) +{ + return -ENODEV; +} + #endif /** diff --git a/include/linux/kcov.h b/include/linux/kcov.h index 4e3037dc1204..55dc338f6bcd 100644 --- a/include/linux/kcov.h +++ b/include/linux/kcov.h @@ -2,6 +2,7 @@ #ifndef _LINUX_KCOV_H #define _LINUX_KCOV_H +#include <linux/sched.h> #include <uapi/linux/kcov.h> struct task_struct; diff --git a/include/linux/psi.h b/include/linux/psi.h index 7361023f3fdd..65eb1476ac70 100644 --- a/include/linux/psi.h +++ b/include/linux/psi.h @@ -20,7 +20,6 @@ void psi_task_change(struct task_struct *task, int clear, int set); void psi_task_switch(struct task_struct *prev, struct task_struct *next, bool sleep); -void psi_memstall_tick(struct task_struct *task, int cpu); void psi_memstall_enter(unsigned long *flags); void psi_memstall_leave(unsigned long *flags); diff --git a/include/linux/psi_types.h b/include/linux/psi_types.h index b95f3211566a..0a23300d49af 100644 --- a/include/linux/psi_types.h +++ b/include/linux/psi_types.h @@ -50,9 +50,10 @@ enum psi_states { PSI_MEM_SOME, PSI_MEM_FULL, PSI_CPU_SOME, + PSI_CPU_FULL, /* Only per-CPU, to weigh the CPU in the global average: */ PSI_NONIDLE, - NR_PSI_STATES = 6, + NR_PSI_STATES = 7, }; enum psi_aggregators { diff --git a/include/linux/sched.h b/include/linux/sched.h index 743a613c9cf3..f652a8ccad73 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -14,7 +14,6 @@ #include <linux/pid.h> #include <linux/sem.h> #include <linux/shm.h> -#include <linux/kcov.h> #include <linux/mutex.h> #include <linux/plist.h> #include <linux/hrtimer.h> @@ -985,6 +984,7 @@ struct task_struct { /* Signal handlers: */ struct signal_struct *signal; struct sighand_struct __rcu *sighand; + struct sigqueue *sigqueue_cache; sigset_t blocked; sigset_t real_blocked; /* Restored if set_restore_sigmask() was used: */ @@ -1101,7 +1101,7 @@ struct task_struct { #ifdef CONFIG_CPUSETS /* Protected by ->alloc_lock: */ nodemask_t mems_allowed; - /* Seqence number to catch updates: */ + /* Sequence number to catch updates: */ seqcount_spinlock_t mems_allowed_seq; int cpuset_mem_spread_rotor; int cpuset_slab_spread_rotor; diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 3c31ba88aca5..db2c0f34aaaf 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -26,10 +26,11 @@ int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, enum { sysctl_hung_task_timeout_secs = 0 }; #endif +extern unsigned int sysctl_sched_child_runs_first; + extern unsigned int sysctl_sched_latency; extern unsigned int sysctl_sched_min_granularity; extern unsigned int sysctl_sched_wakeup_granularity; -extern unsigned int sysctl_sched_child_runs_first; enum sched_tunable_scaling { SCHED_TUNABLESCALING_NONE, @@ -37,7 +38,7 @@ enum sched_tunable_scaling { SCHED_TUNABLESCALING_LINEAR, SCHED_TUNABLESCALING_END, }; -extern enum sched_tunable_scaling sysctl_sched_tunable_scaling; +extern unsigned int sysctl_sched_tunable_scaling; extern unsigned int sysctl_numa_balancing_scan_delay; extern unsigned int sysctl_numa_balancing_scan_period_min; @@ -48,8 +49,8 @@ extern unsigned int sysctl_numa_balancing_scan_size; extern __read_mostly unsigned int sysctl_sched_migration_cost; extern __read_mostly unsigned int sysctl_sched_nr_migrate; -int sched_proc_update_handler(struct ctl_table *table, int write, - void *buffer, size_t *length, loff_t *ppos); +extern int sysctl_resched_latency_warn_ms; +extern int sysctl_resched_latency_warn_once; #endif /* diff --git a/include/linux/signal.h b/include/linux/signal.h index 1e98548d7cf6..0dbfda8d99d0 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -266,6 +266,7 @@ static inline void init_sigpending(struct sigpending *sig) } extern void flush_sigqueue(struct sigpending *queue); +extern void exit_task_sigqueue_cache(struct task_struct *tsk); /* Test if 'sig' is valid signal. Use this instead of testing _NSIG directly */ static inline int valid_signal(unsigned long sig) diff --git a/include/uapi/linux/ptrace.h b/include/uapi/linux/ptrace.h index 83ee45fa634b..3747bf816f9a 100644 --- a/include/uapi/linux/ptrace.h +++ b/include/uapi/linux/ptrace.h @@ -102,6 +102,16 @@ struct ptrace_syscall_info { }; }; +#define PTRACE_GET_RSEQ_CONFIGURATION 0x420f + +struct ptrace_rseq_configuration { + __u64 rseq_abi_pointer; + __u32 rseq_abi_size; + __u32 signature; + __u32 flags; + __u32 pad; +}; + /* * These values are stored in task->ptrace_message * by tracehook_report_syscall_* to describe the current syscall-stop. |