summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'kernel')
-rw-r--r--kernel/cgroup/cpuset.c38
-rw-r--r--kernel/fork.c25
-rw-r--r--kernel/kexec_file.c2
-rw-r--r--kernel/locking/rtmutex.c9
-rw-r--r--kernel/resource.c6
-rw-r--r--kernel/trace/fgraph.c31
-rw-r--r--kernel/trace/trace.c2
-rw-r--r--kernel/trace/trace_osnoise.c50
-rw-r--r--kernel/trace/trace_selftest.c23
-rw-r--r--kernel/workqueue.c50
10 files changed, 131 insertions, 105 deletions
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c
index 40ec4abaf440..4bd9e50bcc8e 100644
--- a/kernel/cgroup/cpuset.c
+++ b/kernel/cgroup/cpuset.c
@@ -233,6 +233,13 @@ static cpumask_var_t isolated_cpus;
static struct list_head remote_children;
/*
+ * A flag to force sched domain rebuild at the end of an operation while
+ * inhibiting it in the intermediate stages when set. Currently it is only
+ * set in hotplug code.
+ */
+static bool force_sd_rebuild;
+
+/*
* Partition root states:
*
* 0 - member (not a partition root)
@@ -1475,7 +1482,7 @@ static void update_partition_sd_lb(struct cpuset *cs, int old_prs)
clear_bit(CS_SCHED_LOAD_BALANCE, &cs->flags);
}
- if (rebuild_domains)
+ if (rebuild_domains && !force_sd_rebuild)
rebuild_sched_domains_locked();
}
@@ -1833,7 +1840,7 @@ static void remote_partition_check(struct cpuset *cs, struct cpumask *newmask,
remote_partition_disable(child, tmp);
disable_cnt++;
}
- if (disable_cnt)
+ if (disable_cnt && !force_sd_rebuild)
rebuild_sched_domains_locked();
}
@@ -1991,6 +1998,8 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd,
part_error = PERR_CPUSEMPTY;
goto write_error;
}
+ /* Check newmask again, whether cpus are available for parent/cs */
+ nocpu |= tasks_nocpu_error(parent, cs, newmask);
/*
* partcmd_update with newmask:
@@ -2440,7 +2449,8 @@ get_css:
}
rcu_read_unlock();
- if (need_rebuild_sched_domains && !(flags & HIER_NO_SD_REBUILD))
+ if (need_rebuild_sched_domains && !(flags & HIER_NO_SD_REBUILD) &&
+ !force_sd_rebuild)
rebuild_sched_domains_locked();
}
@@ -2523,7 +2533,8 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs,
*/
if (!*buf) {
cpumask_clear(trialcs->cpus_allowed);
- cpumask_clear(trialcs->effective_xcpus);
+ if (cpumask_empty(trialcs->exclusive_cpus))
+ cpumask_clear(trialcs->effective_xcpus);
} else {
retval = cpulist_parse(buf, trialcs->cpus_allowed);
if (retval < 0)
@@ -3101,7 +3112,8 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs,
cs->flags = trialcs->flags;
spin_unlock_irq(&callback_lock);
- if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed)
+ if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed &&
+ !force_sd_rebuild)
rebuild_sched_domains_locked();
if (spread_flag_changed)
@@ -4498,11 +4510,9 @@ hotplug_update_tasks(struct cpuset *cs,
update_tasks_nodemask(cs);
}
-static bool force_rebuild;
-
void cpuset_force_rebuild(void)
{
- force_rebuild = true;
+ force_sd_rebuild = true;
}
/**
@@ -4650,15 +4660,9 @@ static void cpuset_handle_hotplug(void)
!cpumask_empty(subpartitions_cpus);
mems_updated = !nodes_equal(top_cpuset.effective_mems, new_mems);
- /*
- * In the rare case that hotplug removes all the cpus in
- * subpartitions_cpus, we assumed that cpus are updated.
- */
- if (!cpus_updated && !cpumask_empty(subpartitions_cpus))
- cpus_updated = true;
-
/* For v1, synchronize cpus_allowed to cpu_active_mask */
if (cpus_updated) {
+ cpuset_force_rebuild();
spin_lock_irq(&callback_lock);
if (!on_dfl)
cpumask_copy(top_cpuset.cpus_allowed, &new_cpus);
@@ -4714,8 +4718,8 @@ static void cpuset_handle_hotplug(void)
}
/* rebuild sched domains if cpus_allowed has changed */
- if (cpus_updated || force_rebuild) {
- force_rebuild = false;
+ if (force_sd_rebuild) {
+ force_sd_rebuild = false;
rebuild_sched_domains_cpuslocked();
}
diff --git a/kernel/fork.c b/kernel/fork.c
index 18bdc87209d0..cc760491f201 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -2053,23 +2053,10 @@ static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **re
*/
int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret)
{
- if (!pid)
- return -EINVAL;
-
- scoped_guard(rcu) {
- struct task_struct *tsk;
-
- if (flags & PIDFD_THREAD)
- tsk = pid_task(pid, PIDTYPE_PID);
- else
- tsk = pid_task(pid, PIDTYPE_TGID);
- if (!tsk)
- return -EINVAL;
+ bool thread = flags & PIDFD_THREAD;
- /* Don't create pidfds for kernel threads for now. */
- if (tsk->flags & PF_KTHREAD)
- return -EINVAL;
- }
+ if (!pid || !pid_has_task(pid, thread ? PIDTYPE_PID : PIDTYPE_TGID))
+ return -EINVAL;
return __pidfd_prepare(pid, flags, ret);
}
@@ -2416,12 +2403,6 @@ __latent_entropy struct task_struct *copy_process(
if (clone_flags & CLONE_PIDFD) {
int flags = (clone_flags & CLONE_THREAD) ? PIDFD_THREAD : 0;
- /* Don't create pidfds for kernel threads for now. */
- if (args->kthread) {
- retval = -EINVAL;
- goto bad_fork_free_pid;
- }
-
/* Note that no task has been attached to @pid yet. */
retval = __pidfd_prepare(pid, flags, &pidfile);
if (retval < 0)
diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c
index 3d64290d24c9..3eedb8c226ad 100644
--- a/kernel/kexec_file.c
+++ b/kernel/kexec_file.c
@@ -752,7 +752,7 @@ static int kexec_calculate_store_digests(struct kimage *image)
#ifdef CONFIG_CRASH_HOTPLUG
/* Exclude elfcorehdr segment to allow future changes via hotplug */
- if (j == image->elfcorehdr_index)
+ if (i == image->elfcorehdr_index)
continue;
#endif
diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c
index 88d08eeb8bc0..fba1229f1de6 100644
--- a/kernel/locking/rtmutex.c
+++ b/kernel/locking/rtmutex.c
@@ -1644,6 +1644,7 @@ static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock,
}
static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock,
+ struct rt_mutex_base *lock,
struct rt_mutex_waiter *w)
{
/*
@@ -1656,10 +1657,10 @@ static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock,
if (build_ww_mutex() && w->ww_ctx)
return;
- /*
- * Yell loudly and stop the task right here.
- */
+ raw_spin_unlock_irq(&lock->wait_lock);
+
WARN(1, "rtmutex deadlock detected\n");
+
while (1) {
set_current_state(TASK_INTERRUPTIBLE);
rt_mutex_schedule();
@@ -1713,7 +1714,7 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock,
} else {
__set_current_state(TASK_RUNNING);
remove_waiter(lock, waiter);
- rt_mutex_handle_deadlock(ret, chwalk, waiter);
+ rt_mutex_handle_deadlock(ret, chwalk, lock, waiter);
}
/*
diff --git a/kernel/resource.c b/kernel/resource.c
index 14777afb0a99..a83040fde236 100644
--- a/kernel/resource.c
+++ b/kernel/resource.c
@@ -1826,8 +1826,7 @@ static resource_size_t gfr_start(struct resource *base, resource_size_t size,
if (flags & GFR_DESCENDING) {
resource_size_t end;
- end = min_t(resource_size_t, base->end,
- (1ULL << MAX_PHYSMEM_BITS) - 1);
+ end = min_t(resource_size_t, base->end, PHYSMEM_END);
return end - size + 1;
}
@@ -1844,8 +1843,7 @@ static bool gfr_continue(struct resource *base, resource_size_t addr,
* @size did not wrap 0.
*/
return addr > addr - size &&
- addr <= min_t(resource_size_t, base->end,
- (1ULL << MAX_PHYSMEM_BITS) - 1);
+ addr <= min_t(resource_size_t, base->end, PHYSMEM_END);
}
static resource_size_t gfr_next(resource_size_t addr, resource_size_t size,
diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c
index d1d5ea2d0a1b..d7d4fb403f6f 100644
--- a/kernel/trace/fgraph.c
+++ b/kernel/trace/fgraph.c
@@ -1206,18 +1206,24 @@ static void init_task_vars(int idx)
read_unlock(&tasklist_lock);
}
-static void ftrace_graph_enable_direct(bool enable_branch)
+static void ftrace_graph_enable_direct(bool enable_branch, struct fgraph_ops *gops)
{
trace_func_graph_ent_t func = NULL;
trace_func_graph_ret_t retfunc = NULL;
int i;
- for_each_set_bit(i, &fgraph_array_bitmask,
- sizeof(fgraph_array_bitmask) * BITS_PER_BYTE) {
- func = fgraph_array[i]->entryfunc;
- retfunc = fgraph_array[i]->retfunc;
- fgraph_direct_gops = fgraph_array[i];
- }
+ if (gops) {
+ func = gops->entryfunc;
+ retfunc = gops->retfunc;
+ fgraph_direct_gops = gops;
+ } else {
+ for_each_set_bit(i, &fgraph_array_bitmask,
+ sizeof(fgraph_array_bitmask) * BITS_PER_BYTE) {
+ func = fgraph_array[i]->entryfunc;
+ retfunc = fgraph_array[i]->retfunc;
+ fgraph_direct_gops = fgraph_array[i];
+ }
+ }
if (WARN_ON_ONCE(!func))
return;
@@ -1256,8 +1262,6 @@ int register_ftrace_graph(struct fgraph_ops *gops)
ret = -ENOSPC;
goto out;
}
-
- fgraph_array[i] = gops;
gops->idx = i;
ftrace_graph_active++;
@@ -1266,7 +1270,7 @@ int register_ftrace_graph(struct fgraph_ops *gops)
ftrace_graph_disable_direct(true);
if (ftrace_graph_active == 1) {
- ftrace_graph_enable_direct(false);
+ ftrace_graph_enable_direct(false, gops);
register_pm_notifier(&ftrace_suspend_notifier);
ret = start_graph_tracing();
if (ret)
@@ -1281,14 +1285,15 @@ int register_ftrace_graph(struct fgraph_ops *gops)
} else {
init_task_vars(gops->idx);
}
-
/* Always save the function, and reset at unregistering */
gops->saved_func = gops->entryfunc;
ret = ftrace_startup_subops(&graph_ops, &gops->ops, command);
+ if (!ret)
+ fgraph_array[i] = gops;
+
error:
if (ret) {
- fgraph_array[i] = &fgraph_stub;
ftrace_graph_active--;
gops->saved_func = NULL;
fgraph_lru_release_index(i);
@@ -1324,7 +1329,7 @@ void unregister_ftrace_graph(struct fgraph_ops *gops)
ftrace_shutdown_subops(&graph_ops, &gops->ops, command);
if (ftrace_graph_active == 1)
- ftrace_graph_enable_direct(true);
+ ftrace_graph_enable_direct(true, NULL);
else if (!ftrace_graph_active)
ftrace_graph_disable_direct(false);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index ebe7ce2f5f4a..edf6bc817aa1 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -3958,6 +3958,8 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu)
break;
entries++;
ring_buffer_iter_advance(buf_iter);
+ /* This could be a big loop */
+ cond_resched();
}
per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c
index 66a871553d4a..bbe47781617e 100644
--- a/kernel/trace/trace_osnoise.c
+++ b/kernel/trace/trace_osnoise.c
@@ -253,20 +253,31 @@ static inline struct timerlat_variables *this_cpu_tmr_var(void)
}
/*
+ * Protect the interface.
+ */
+static struct mutex interface_lock;
+
+/*
* tlat_var_reset - Reset the values of the given timerlat_variables
*/
static inline void tlat_var_reset(void)
{
struct timerlat_variables *tlat_var;
int cpu;
+
+ /* Synchronize with the timerlat interfaces */
+ mutex_lock(&interface_lock);
/*
* So far, all the values are initialized as 0, so
* zeroing the structure is perfect.
*/
for_each_cpu(cpu, cpu_online_mask) {
tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu);
+ if (tlat_var->kthread)
+ hrtimer_cancel(&tlat_var->timer);
memset(tlat_var, 0, sizeof(*tlat_var));
}
+ mutex_unlock(&interface_lock);
}
#else /* CONFIG_TIMERLAT_TRACER */
#define tlat_var_reset() do {} while (0)
@@ -332,11 +343,6 @@ struct timerlat_sample {
#endif
/*
- * Protect the interface.
- */
-static struct mutex interface_lock;
-
-/*
* Tracer data.
*/
static struct osnoise_data {
@@ -1612,6 +1618,7 @@ out:
static struct cpumask osnoise_cpumask;
static struct cpumask save_cpumask;
+static struct cpumask kthread_cpumask;
/*
* osnoise_sleep - sleep until the next period
@@ -1675,6 +1682,7 @@ static inline int osnoise_migration_pending(void)
*/
mutex_lock(&interface_lock);
this_cpu_osn_var()->kthread = NULL;
+ cpumask_clear_cpu(smp_processor_id(), &kthread_cpumask);
mutex_unlock(&interface_lock);
return 1;
@@ -1945,11 +1953,16 @@ static void stop_kthread(unsigned int cpu)
{
struct task_struct *kthread;
+ mutex_lock(&interface_lock);
kthread = per_cpu(per_cpu_osnoise_var, cpu).kthread;
if (kthread) {
- if (test_bit(OSN_WORKLOAD, &osnoise_options)) {
+ per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL;
+ mutex_unlock(&interface_lock);
+
+ if (cpumask_test_and_clear_cpu(cpu, &kthread_cpumask) &&
+ !WARN_ON(!test_bit(OSN_WORKLOAD, &osnoise_options))) {
kthread_stop(kthread);
- } else {
+ } else if (!WARN_ON(test_bit(OSN_WORKLOAD, &osnoise_options))) {
/*
* This is a user thread waiting on the timerlat_fd. We need
* to close all users, and the best way to guarantee this is
@@ -1958,8 +1971,8 @@ static void stop_kthread(unsigned int cpu)
kill_pid(kthread->thread_pid, SIGKILL, 1);
put_task_struct(kthread);
}
- per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL;
} else {
+ mutex_unlock(&interface_lock);
/* if no workload, just return */
if (!test_bit(OSN_WORKLOAD, &osnoise_options)) {
/*
@@ -1967,7 +1980,6 @@ static void stop_kthread(unsigned int cpu)
*/
per_cpu(per_cpu_osnoise_var, cpu).sampling = false;
barrier();
- return;
}
}
}
@@ -1982,12 +1994,8 @@ static void stop_per_cpu_kthreads(void)
{
int cpu;
- cpus_read_lock();
-
- for_each_online_cpu(cpu)
+ for_each_possible_cpu(cpu)
stop_kthread(cpu);
-
- cpus_read_unlock();
}
/*
@@ -2021,6 +2029,7 @@ static int start_kthread(unsigned int cpu)
}
per_cpu(per_cpu_osnoise_var, cpu).kthread = kthread;
+ cpumask_set_cpu(cpu, &kthread_cpumask);
return 0;
}
@@ -2048,8 +2057,16 @@ static int start_per_cpu_kthreads(void)
*/
cpumask_and(current_mask, cpu_online_mask, &osnoise_cpumask);
- for_each_possible_cpu(cpu)
+ for_each_possible_cpu(cpu) {
+ if (cpumask_test_and_clear_cpu(cpu, &kthread_cpumask)) {
+ struct task_struct *kthread;
+
+ kthread = per_cpu(per_cpu_osnoise_var, cpu).kthread;
+ if (!WARN_ON(!kthread))
+ kthread_stop(kthread);
+ }
per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL;
+ }
for_each_cpu(cpu, current_mask) {
retval = start_kthread(cpu);
@@ -2579,7 +2596,8 @@ static int timerlat_fd_release(struct inode *inode, struct file *file)
osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu);
tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu);
- hrtimer_cancel(&tlat_var->timer);
+ if (tlat_var->kthread)
+ hrtimer_cancel(&tlat_var->timer);
memset(tlat_var, 0, sizeof(*tlat_var));
osn_var->sampling = 0;
diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c
index 97f1e4bc47dc..c4ad7cd7e778 100644
--- a/kernel/trace/trace_selftest.c
+++ b/kernel/trace/trace_selftest.c
@@ -942,7 +942,7 @@ static __init int test_graph_storage_multi(void)
{
struct fgraph_fixture *fixture;
bool printed = false;
- int i, ret;
+ int i, j, ret;
pr_cont("PASSED\n");
pr_info("Testing multiple fgraph storage on a function: ");
@@ -953,22 +953,35 @@ static __init int test_graph_storage_multi(void)
if (ret && ret != -ENODEV) {
pr_cont("*Could not set filter* ");
printed = true;
- goto out;
+ goto out2;
}
+ }
+ for (j = 0; j < ARRAY_SIZE(store_bytes); j++) {
+ fixture = &store_bytes[j];
ret = register_ftrace_graph(&fixture->gops);
if (ret) {
pr_warn("Failed to init store_bytes fgraph tracing\n");
printed = true;
- goto out;
+ goto out1;
}
}
DYN_FTRACE_TEST_NAME();
-out:
+out1:
+ while (--j >= 0) {
+ fixture = &store_bytes[j];
+ unregister_ftrace_graph(&fixture->gops);
+
+ if (fixture->error_str && !printed) {
+ pr_cont("*** %s ***", fixture->error_str);
+ printed = true;
+ }
+ }
+out2:
while (--i >= 0) {
fixture = &store_bytes[i];
- unregister_ftrace_graph(&fixture->gops);
+ ftrace_free_filter(&fixture->gops.ops);
if (fixture->error_str && !printed) {
pr_cont("*** %s ***", fixture->error_str);
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index 1745ca788ede..e7b005ff3750 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -377,7 +377,7 @@ struct workqueue_struct {
/* hot fields used during command issue, aligned to cacheline */
unsigned int flags ____cacheline_aligned; /* WQ: WQ_* flags */
- struct pool_workqueue __percpu __rcu **cpu_pwq; /* I: per-cpu pwqs */
+ struct pool_workqueue __rcu * __percpu *cpu_pwq; /* I: per-cpu pwqs */
struct wq_node_nr_active *node_nr_active[]; /* I: per-node nr_active */
};
@@ -897,7 +897,7 @@ static struct worker_pool *get_work_pool(struct work_struct *work)
static unsigned long shift_and_mask(unsigned long v, u32 shift, u32 bits)
{
- return (v >> shift) & ((1 << bits) - 1);
+ return (v >> shift) & ((1U << bits) - 1);
}
static void work_offqd_unpack(struct work_offq_data *offqd, unsigned long data)
@@ -3351,7 +3351,6 @@ woke_up:
set_pf_worker(false);
ida_free(&pool->worker_ida, worker->id);
- WARN_ON_ONCE(!list_empty(&worker->entry));
return 0;
}
@@ -4167,7 +4166,6 @@ already_gone:
static bool __flush_work(struct work_struct *work, bool from_cancel)
{
struct wq_barrier barr;
- unsigned long data;
if (WARN_ON(!wq_online))
return false;
@@ -4185,29 +4183,35 @@ static bool __flush_work(struct work_struct *work, bool from_cancel)
* was queued on a BH workqueue, we also know that it was running in the
* BH context and thus can be busy-waited.
*/
- data = *work_data_bits(work);
- if (from_cancel &&
- !WARN_ON_ONCE(data & WORK_STRUCT_PWQ) && (data & WORK_OFFQ_BH)) {
- /*
- * On RT, prevent a live lock when %current preempted soft
- * interrupt processing or prevents ksoftirqd from running by
- * keeping flipping BH. If the BH work item runs on a different
- * CPU then this has no effect other than doing the BH
- * disable/enable dance for nothing. This is copied from
- * kernel/softirq.c::tasklet_unlock_spin_wait().
- */
- while (!try_wait_for_completion(&barr.done)) {
- if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
- local_bh_disable();
- local_bh_enable();
- } else {
- cpu_relax();
+ if (from_cancel) {
+ unsigned long data = *work_data_bits(work);
+
+ if (!WARN_ON_ONCE(data & WORK_STRUCT_PWQ) &&
+ (data & WORK_OFFQ_BH)) {
+ /*
+ * On RT, prevent a live lock when %current preempted
+ * soft interrupt processing or prevents ksoftirqd from
+ * running by keeping flipping BH. If the BH work item
+ * runs on a different CPU then this has no effect other
+ * than doing the BH disable/enable dance for nothing.
+ * This is copied from
+ * kernel/softirq.c::tasklet_unlock_spin_wait().
+ */
+ while (!try_wait_for_completion(&barr.done)) {
+ if (IS_ENABLED(CONFIG_PREEMPT_RT)) {
+ local_bh_disable();
+ local_bh_enable();
+ } else {
+ cpu_relax();
+ }
}
+ goto out_destroy;
}
- } else {
- wait_for_completion(&barr.done);
}
+ wait_for_completion(&barr.done);
+
+out_destroy:
destroy_work_on_stack(&barr.work);
return true;
}