diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/cgroup/cpuset.c | 38 | ||||
-rw-r--r-- | kernel/fork.c | 25 | ||||
-rw-r--r-- | kernel/kexec_file.c | 2 | ||||
-rw-r--r-- | kernel/locking/rtmutex.c | 9 | ||||
-rw-r--r-- | kernel/resource.c | 6 | ||||
-rw-r--r-- | kernel/trace/fgraph.c | 31 | ||||
-rw-r--r-- | kernel/trace/trace.c | 2 | ||||
-rw-r--r-- | kernel/trace/trace_osnoise.c | 50 | ||||
-rw-r--r-- | kernel/trace/trace_selftest.c | 23 | ||||
-rw-r--r-- | kernel/workqueue.c | 50 |
10 files changed, 131 insertions, 105 deletions
diff --git a/kernel/cgroup/cpuset.c b/kernel/cgroup/cpuset.c index 40ec4abaf440..4bd9e50bcc8e 100644 --- a/kernel/cgroup/cpuset.c +++ b/kernel/cgroup/cpuset.c @@ -233,6 +233,13 @@ static cpumask_var_t isolated_cpus; static struct list_head remote_children; /* + * A flag to force sched domain rebuild at the end of an operation while + * inhibiting it in the intermediate stages when set. Currently it is only + * set in hotplug code. + */ +static bool force_sd_rebuild; + +/* * Partition root states: * * 0 - member (not a partition root) @@ -1475,7 +1482,7 @@ static void update_partition_sd_lb(struct cpuset *cs, int old_prs) clear_bit(CS_SCHED_LOAD_BALANCE, &cs->flags); } - if (rebuild_domains) + if (rebuild_domains && !force_sd_rebuild) rebuild_sched_domains_locked(); } @@ -1833,7 +1840,7 @@ static void remote_partition_check(struct cpuset *cs, struct cpumask *newmask, remote_partition_disable(child, tmp); disable_cnt++; } - if (disable_cnt) + if (disable_cnt && !force_sd_rebuild) rebuild_sched_domains_locked(); } @@ -1991,6 +1998,8 @@ static int update_parent_effective_cpumask(struct cpuset *cs, int cmd, part_error = PERR_CPUSEMPTY; goto write_error; } + /* Check newmask again, whether cpus are available for parent/cs */ + nocpu |= tasks_nocpu_error(parent, cs, newmask); /* * partcmd_update with newmask: @@ -2440,7 +2449,8 @@ get_css: } rcu_read_unlock(); - if (need_rebuild_sched_domains && !(flags & HIER_NO_SD_REBUILD)) + if (need_rebuild_sched_domains && !(flags & HIER_NO_SD_REBUILD) && + !force_sd_rebuild) rebuild_sched_domains_locked(); } @@ -2523,7 +2533,8 @@ static int update_cpumask(struct cpuset *cs, struct cpuset *trialcs, */ if (!*buf) { cpumask_clear(trialcs->cpus_allowed); - cpumask_clear(trialcs->effective_xcpus); + if (cpumask_empty(trialcs->exclusive_cpus)) + cpumask_clear(trialcs->effective_xcpus); } else { retval = cpulist_parse(buf, trialcs->cpus_allowed); if (retval < 0) @@ -3101,7 +3112,8 @@ static int update_flag(cpuset_flagbits_t bit, struct cpuset *cs, cs->flags = trialcs->flags; spin_unlock_irq(&callback_lock); - if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed) + if (!cpumask_empty(trialcs->cpus_allowed) && balance_flag_changed && + !force_sd_rebuild) rebuild_sched_domains_locked(); if (spread_flag_changed) @@ -4498,11 +4510,9 @@ hotplug_update_tasks(struct cpuset *cs, update_tasks_nodemask(cs); } -static bool force_rebuild; - void cpuset_force_rebuild(void) { - force_rebuild = true; + force_sd_rebuild = true; } /** @@ -4650,15 +4660,9 @@ static void cpuset_handle_hotplug(void) !cpumask_empty(subpartitions_cpus); mems_updated = !nodes_equal(top_cpuset.effective_mems, new_mems); - /* - * In the rare case that hotplug removes all the cpus in - * subpartitions_cpus, we assumed that cpus are updated. - */ - if (!cpus_updated && !cpumask_empty(subpartitions_cpus)) - cpus_updated = true; - /* For v1, synchronize cpus_allowed to cpu_active_mask */ if (cpus_updated) { + cpuset_force_rebuild(); spin_lock_irq(&callback_lock); if (!on_dfl) cpumask_copy(top_cpuset.cpus_allowed, &new_cpus); @@ -4714,8 +4718,8 @@ static void cpuset_handle_hotplug(void) } /* rebuild sched domains if cpus_allowed has changed */ - if (cpus_updated || force_rebuild) { - force_rebuild = false; + if (force_sd_rebuild) { + force_sd_rebuild = false; rebuild_sched_domains_cpuslocked(); } diff --git a/kernel/fork.c b/kernel/fork.c index 18bdc87209d0..cc760491f201 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -2053,23 +2053,10 @@ static int __pidfd_prepare(struct pid *pid, unsigned int flags, struct file **re */ int pidfd_prepare(struct pid *pid, unsigned int flags, struct file **ret) { - if (!pid) - return -EINVAL; - - scoped_guard(rcu) { - struct task_struct *tsk; - - if (flags & PIDFD_THREAD) - tsk = pid_task(pid, PIDTYPE_PID); - else - tsk = pid_task(pid, PIDTYPE_TGID); - if (!tsk) - return -EINVAL; + bool thread = flags & PIDFD_THREAD; - /* Don't create pidfds for kernel threads for now. */ - if (tsk->flags & PF_KTHREAD) - return -EINVAL; - } + if (!pid || !pid_has_task(pid, thread ? PIDTYPE_PID : PIDTYPE_TGID)) + return -EINVAL; return __pidfd_prepare(pid, flags, ret); } @@ -2416,12 +2403,6 @@ __latent_entropy struct task_struct *copy_process( if (clone_flags & CLONE_PIDFD) { int flags = (clone_flags & CLONE_THREAD) ? PIDFD_THREAD : 0; - /* Don't create pidfds for kernel threads for now. */ - if (args->kthread) { - retval = -EINVAL; - goto bad_fork_free_pid; - } - /* Note that no task has been attached to @pid yet. */ retval = __pidfd_prepare(pid, flags, &pidfile); if (retval < 0) diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index 3d64290d24c9..3eedb8c226ad 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -752,7 +752,7 @@ static int kexec_calculate_store_digests(struct kimage *image) #ifdef CONFIG_CRASH_HOTPLUG /* Exclude elfcorehdr segment to allow future changes via hotplug */ - if (j == image->elfcorehdr_index) + if (i == image->elfcorehdr_index) continue; #endif diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 88d08eeb8bc0..fba1229f1de6 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -1644,6 +1644,7 @@ static int __sched rt_mutex_slowlock_block(struct rt_mutex_base *lock, } static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock, + struct rt_mutex_base *lock, struct rt_mutex_waiter *w) { /* @@ -1656,10 +1657,10 @@ static void __sched rt_mutex_handle_deadlock(int res, int detect_deadlock, if (build_ww_mutex() && w->ww_ctx) return; - /* - * Yell loudly and stop the task right here. - */ + raw_spin_unlock_irq(&lock->wait_lock); + WARN(1, "rtmutex deadlock detected\n"); + while (1) { set_current_state(TASK_INTERRUPTIBLE); rt_mutex_schedule(); @@ -1713,7 +1714,7 @@ static int __sched __rt_mutex_slowlock(struct rt_mutex_base *lock, } else { __set_current_state(TASK_RUNNING); remove_waiter(lock, waiter); - rt_mutex_handle_deadlock(ret, chwalk, waiter); + rt_mutex_handle_deadlock(ret, chwalk, lock, waiter); } /* diff --git a/kernel/resource.c b/kernel/resource.c index 14777afb0a99..a83040fde236 100644 --- a/kernel/resource.c +++ b/kernel/resource.c @@ -1826,8 +1826,7 @@ static resource_size_t gfr_start(struct resource *base, resource_size_t size, if (flags & GFR_DESCENDING) { resource_size_t end; - end = min_t(resource_size_t, base->end, - (1ULL << MAX_PHYSMEM_BITS) - 1); + end = min_t(resource_size_t, base->end, PHYSMEM_END); return end - size + 1; } @@ -1844,8 +1843,7 @@ static bool gfr_continue(struct resource *base, resource_size_t addr, * @size did not wrap 0. */ return addr > addr - size && - addr <= min_t(resource_size_t, base->end, - (1ULL << MAX_PHYSMEM_BITS) - 1); + addr <= min_t(resource_size_t, base->end, PHYSMEM_END); } static resource_size_t gfr_next(resource_size_t addr, resource_size_t size, diff --git a/kernel/trace/fgraph.c b/kernel/trace/fgraph.c index d1d5ea2d0a1b..d7d4fb403f6f 100644 --- a/kernel/trace/fgraph.c +++ b/kernel/trace/fgraph.c @@ -1206,18 +1206,24 @@ static void init_task_vars(int idx) read_unlock(&tasklist_lock); } -static void ftrace_graph_enable_direct(bool enable_branch) +static void ftrace_graph_enable_direct(bool enable_branch, struct fgraph_ops *gops) { trace_func_graph_ent_t func = NULL; trace_func_graph_ret_t retfunc = NULL; int i; - for_each_set_bit(i, &fgraph_array_bitmask, - sizeof(fgraph_array_bitmask) * BITS_PER_BYTE) { - func = fgraph_array[i]->entryfunc; - retfunc = fgraph_array[i]->retfunc; - fgraph_direct_gops = fgraph_array[i]; - } + if (gops) { + func = gops->entryfunc; + retfunc = gops->retfunc; + fgraph_direct_gops = gops; + } else { + for_each_set_bit(i, &fgraph_array_bitmask, + sizeof(fgraph_array_bitmask) * BITS_PER_BYTE) { + func = fgraph_array[i]->entryfunc; + retfunc = fgraph_array[i]->retfunc; + fgraph_direct_gops = fgraph_array[i]; + } + } if (WARN_ON_ONCE(!func)) return; @@ -1256,8 +1262,6 @@ int register_ftrace_graph(struct fgraph_ops *gops) ret = -ENOSPC; goto out; } - - fgraph_array[i] = gops; gops->idx = i; ftrace_graph_active++; @@ -1266,7 +1270,7 @@ int register_ftrace_graph(struct fgraph_ops *gops) ftrace_graph_disable_direct(true); if (ftrace_graph_active == 1) { - ftrace_graph_enable_direct(false); + ftrace_graph_enable_direct(false, gops); register_pm_notifier(&ftrace_suspend_notifier); ret = start_graph_tracing(); if (ret) @@ -1281,14 +1285,15 @@ int register_ftrace_graph(struct fgraph_ops *gops) } else { init_task_vars(gops->idx); } - /* Always save the function, and reset at unregistering */ gops->saved_func = gops->entryfunc; ret = ftrace_startup_subops(&graph_ops, &gops->ops, command); + if (!ret) + fgraph_array[i] = gops; + error: if (ret) { - fgraph_array[i] = &fgraph_stub; ftrace_graph_active--; gops->saved_func = NULL; fgraph_lru_release_index(i); @@ -1324,7 +1329,7 @@ void unregister_ftrace_graph(struct fgraph_ops *gops) ftrace_shutdown_subops(&graph_ops, &gops->ops, command); if (ftrace_graph_active == 1) - ftrace_graph_enable_direct(true); + ftrace_graph_enable_direct(true, NULL); else if (!ftrace_graph_active) ftrace_graph_disable_direct(false); diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index ebe7ce2f5f4a..edf6bc817aa1 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -3958,6 +3958,8 @@ void tracing_iter_reset(struct trace_iterator *iter, int cpu) break; entries++; ring_buffer_iter_advance(buf_iter); + /* This could be a big loop */ + cond_resched(); } per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries; diff --git a/kernel/trace/trace_osnoise.c b/kernel/trace/trace_osnoise.c index 66a871553d4a..bbe47781617e 100644 --- a/kernel/trace/trace_osnoise.c +++ b/kernel/trace/trace_osnoise.c @@ -253,20 +253,31 @@ static inline struct timerlat_variables *this_cpu_tmr_var(void) } /* + * Protect the interface. + */ +static struct mutex interface_lock; + +/* * tlat_var_reset - Reset the values of the given timerlat_variables */ static inline void tlat_var_reset(void) { struct timerlat_variables *tlat_var; int cpu; + + /* Synchronize with the timerlat interfaces */ + mutex_lock(&interface_lock); /* * So far, all the values are initialized as 0, so * zeroing the structure is perfect. */ for_each_cpu(cpu, cpu_online_mask) { tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu); + if (tlat_var->kthread) + hrtimer_cancel(&tlat_var->timer); memset(tlat_var, 0, sizeof(*tlat_var)); } + mutex_unlock(&interface_lock); } #else /* CONFIG_TIMERLAT_TRACER */ #define tlat_var_reset() do {} while (0) @@ -332,11 +343,6 @@ struct timerlat_sample { #endif /* - * Protect the interface. - */ -static struct mutex interface_lock; - -/* * Tracer data. */ static struct osnoise_data { @@ -1612,6 +1618,7 @@ out: static struct cpumask osnoise_cpumask; static struct cpumask save_cpumask; +static struct cpumask kthread_cpumask; /* * osnoise_sleep - sleep until the next period @@ -1675,6 +1682,7 @@ static inline int osnoise_migration_pending(void) */ mutex_lock(&interface_lock); this_cpu_osn_var()->kthread = NULL; + cpumask_clear_cpu(smp_processor_id(), &kthread_cpumask); mutex_unlock(&interface_lock); return 1; @@ -1945,11 +1953,16 @@ static void stop_kthread(unsigned int cpu) { struct task_struct *kthread; + mutex_lock(&interface_lock); kthread = per_cpu(per_cpu_osnoise_var, cpu).kthread; if (kthread) { - if (test_bit(OSN_WORKLOAD, &osnoise_options)) { + per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL; + mutex_unlock(&interface_lock); + + if (cpumask_test_and_clear_cpu(cpu, &kthread_cpumask) && + !WARN_ON(!test_bit(OSN_WORKLOAD, &osnoise_options))) { kthread_stop(kthread); - } else { + } else if (!WARN_ON(test_bit(OSN_WORKLOAD, &osnoise_options))) { /* * This is a user thread waiting on the timerlat_fd. We need * to close all users, and the best way to guarantee this is @@ -1958,8 +1971,8 @@ static void stop_kthread(unsigned int cpu) kill_pid(kthread->thread_pid, SIGKILL, 1); put_task_struct(kthread); } - per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL; } else { + mutex_unlock(&interface_lock); /* if no workload, just return */ if (!test_bit(OSN_WORKLOAD, &osnoise_options)) { /* @@ -1967,7 +1980,6 @@ static void stop_kthread(unsigned int cpu) */ per_cpu(per_cpu_osnoise_var, cpu).sampling = false; barrier(); - return; } } } @@ -1982,12 +1994,8 @@ static void stop_per_cpu_kthreads(void) { int cpu; - cpus_read_lock(); - - for_each_online_cpu(cpu) + for_each_possible_cpu(cpu) stop_kthread(cpu); - - cpus_read_unlock(); } /* @@ -2021,6 +2029,7 @@ static int start_kthread(unsigned int cpu) } per_cpu(per_cpu_osnoise_var, cpu).kthread = kthread; + cpumask_set_cpu(cpu, &kthread_cpumask); return 0; } @@ -2048,8 +2057,16 @@ static int start_per_cpu_kthreads(void) */ cpumask_and(current_mask, cpu_online_mask, &osnoise_cpumask); - for_each_possible_cpu(cpu) + for_each_possible_cpu(cpu) { + if (cpumask_test_and_clear_cpu(cpu, &kthread_cpumask)) { + struct task_struct *kthread; + + kthread = per_cpu(per_cpu_osnoise_var, cpu).kthread; + if (!WARN_ON(!kthread)) + kthread_stop(kthread); + } per_cpu(per_cpu_osnoise_var, cpu).kthread = NULL; + } for_each_cpu(cpu, current_mask) { retval = start_kthread(cpu); @@ -2579,7 +2596,8 @@ static int timerlat_fd_release(struct inode *inode, struct file *file) osn_var = per_cpu_ptr(&per_cpu_osnoise_var, cpu); tlat_var = per_cpu_ptr(&per_cpu_timerlat_var, cpu); - hrtimer_cancel(&tlat_var->timer); + if (tlat_var->kthread) + hrtimer_cancel(&tlat_var->timer); memset(tlat_var, 0, sizeof(*tlat_var)); osn_var->sampling = 0; diff --git a/kernel/trace/trace_selftest.c b/kernel/trace/trace_selftest.c index 97f1e4bc47dc..c4ad7cd7e778 100644 --- a/kernel/trace/trace_selftest.c +++ b/kernel/trace/trace_selftest.c @@ -942,7 +942,7 @@ static __init int test_graph_storage_multi(void) { struct fgraph_fixture *fixture; bool printed = false; - int i, ret; + int i, j, ret; pr_cont("PASSED\n"); pr_info("Testing multiple fgraph storage on a function: "); @@ -953,22 +953,35 @@ static __init int test_graph_storage_multi(void) if (ret && ret != -ENODEV) { pr_cont("*Could not set filter* "); printed = true; - goto out; + goto out2; } + } + for (j = 0; j < ARRAY_SIZE(store_bytes); j++) { + fixture = &store_bytes[j]; ret = register_ftrace_graph(&fixture->gops); if (ret) { pr_warn("Failed to init store_bytes fgraph tracing\n"); printed = true; - goto out; + goto out1; } } DYN_FTRACE_TEST_NAME(); -out: +out1: + while (--j >= 0) { + fixture = &store_bytes[j]; + unregister_ftrace_graph(&fixture->gops); + + if (fixture->error_str && !printed) { + pr_cont("*** %s ***", fixture->error_str); + printed = true; + } + } +out2: while (--i >= 0) { fixture = &store_bytes[i]; - unregister_ftrace_graph(&fixture->gops); + ftrace_free_filter(&fixture->gops.ops); if (fixture->error_str && !printed) { pr_cont("*** %s ***", fixture->error_str); diff --git a/kernel/workqueue.c b/kernel/workqueue.c index 1745ca788ede..e7b005ff3750 100644 --- a/kernel/workqueue.c +++ b/kernel/workqueue.c @@ -377,7 +377,7 @@ struct workqueue_struct { /* hot fields used during command issue, aligned to cacheline */ unsigned int flags ____cacheline_aligned; /* WQ: WQ_* flags */ - struct pool_workqueue __percpu __rcu **cpu_pwq; /* I: per-cpu pwqs */ + struct pool_workqueue __rcu * __percpu *cpu_pwq; /* I: per-cpu pwqs */ struct wq_node_nr_active *node_nr_active[]; /* I: per-node nr_active */ }; @@ -897,7 +897,7 @@ static struct worker_pool *get_work_pool(struct work_struct *work) static unsigned long shift_and_mask(unsigned long v, u32 shift, u32 bits) { - return (v >> shift) & ((1 << bits) - 1); + return (v >> shift) & ((1U << bits) - 1); } static void work_offqd_unpack(struct work_offq_data *offqd, unsigned long data) @@ -3351,7 +3351,6 @@ woke_up: set_pf_worker(false); ida_free(&pool->worker_ida, worker->id); - WARN_ON_ONCE(!list_empty(&worker->entry)); return 0; } @@ -4167,7 +4166,6 @@ already_gone: static bool __flush_work(struct work_struct *work, bool from_cancel) { struct wq_barrier barr; - unsigned long data; if (WARN_ON(!wq_online)) return false; @@ -4185,29 +4183,35 @@ static bool __flush_work(struct work_struct *work, bool from_cancel) * was queued on a BH workqueue, we also know that it was running in the * BH context and thus can be busy-waited. */ - data = *work_data_bits(work); - if (from_cancel && - !WARN_ON_ONCE(data & WORK_STRUCT_PWQ) && (data & WORK_OFFQ_BH)) { - /* - * On RT, prevent a live lock when %current preempted soft - * interrupt processing or prevents ksoftirqd from running by - * keeping flipping BH. If the BH work item runs on a different - * CPU then this has no effect other than doing the BH - * disable/enable dance for nothing. This is copied from - * kernel/softirq.c::tasklet_unlock_spin_wait(). - */ - while (!try_wait_for_completion(&barr.done)) { - if (IS_ENABLED(CONFIG_PREEMPT_RT)) { - local_bh_disable(); - local_bh_enable(); - } else { - cpu_relax(); + if (from_cancel) { + unsigned long data = *work_data_bits(work); + + if (!WARN_ON_ONCE(data & WORK_STRUCT_PWQ) && + (data & WORK_OFFQ_BH)) { + /* + * On RT, prevent a live lock when %current preempted + * soft interrupt processing or prevents ksoftirqd from + * running by keeping flipping BH. If the BH work item + * runs on a different CPU then this has no effect other + * than doing the BH disable/enable dance for nothing. + * This is copied from + * kernel/softirq.c::tasklet_unlock_spin_wait(). + */ + while (!try_wait_for_completion(&barr.done)) { + if (IS_ENABLED(CONFIG_PREEMPT_RT)) { + local_bh_disable(); + local_bh_enable(); + } else { + cpu_relax(); + } } + goto out_destroy; } - } else { - wait_for_completion(&barr.done); } + wait_for_completion(&barr.done); + +out_destroy: destroy_work_on_stack(&barr.work); return true; } |