diff options
Diffstat (limited to 'kernel')
-rw-r--r-- | kernel/bpf/btf.c | 5 | ||||
-rw-r--r-- | kernel/bpf/dispatcher.c | 6 | ||||
-rw-r--r-- | kernel/bpf/memalloc.c | 18 | ||||
-rw-r--r-- | kernel/bpf/verifier.c | 18 | ||||
-rw-r--r-- | kernel/events/core.c | 152 | ||||
-rw-r--r-- | kernel/events/hw_breakpoint_test.c | 4 | ||||
-rw-r--r-- | kernel/events/ring_buffer.c | 2 | ||||
-rw-r--r-- | kernel/kprobes.c | 5 | ||||
-rw-r--r-- | kernel/power/hibernate.c | 2 | ||||
-rw-r--r-- | kernel/rcu/tree.c | 10 | ||||
-rw-r--r-- | kernel/sched/core.c | 24 | ||||
-rw-r--r-- | kernel/sched/deadline.c | 4 | ||||
-rw-r--r-- | kernel/sched/rt.c | 4 | ||||
-rw-r--r-- | kernel/sched/sched.h | 32 | ||||
-rw-r--r-- | kernel/trace/bpf_trace.c | 2 | ||||
-rw-r--r-- | kernel/trace/fprobe.c | 5 | ||||
-rw-r--r-- | kernel/trace/ftrace.c | 16 | ||||
-rw-r--r-- | kernel/trace/kprobe_event_gen_test.c | 18 | ||||
-rw-r--r-- | kernel/trace/ring_buffer.c | 11 | ||||
-rw-r--r-- | kernel/utsname_sysctl.c | 1 |
20 files changed, 232 insertions, 107 deletions
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index eba603cec2c5..35c07afac924 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -4436,6 +4436,11 @@ static int btf_func_proto_check(struct btf_verifier_env *env, return -EINVAL; } + if (btf_type_is_resolve_source_only(ret_type)) { + btf_verifier_log_type(env, t, "Invalid return type"); + return -EINVAL; + } + if (btf_type_needs_resolve(ret_type) && !env_type_is_resolved(env, ret_type_id)) { err = btf_resolve(env, ret_type, ret_type_id); diff --git a/kernel/bpf/dispatcher.c b/kernel/bpf/dispatcher.c index fa64b80b8bca..04f0a045dcaa 100644 --- a/kernel/bpf/dispatcher.c +++ b/kernel/bpf/dispatcher.c @@ -4,6 +4,7 @@ #include <linux/hash.h> #include <linux/bpf.h> #include <linux/filter.h> +#include <linux/init.h> /* The BPF dispatcher is a multiway branch code generator. The * dispatcher is a mechanism to avoid the performance penalty of an @@ -90,6 +91,11 @@ int __weak arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int n return -ENOTSUPP; } +int __weak __init bpf_arch_init_dispatcher_early(void *ip) +{ + return -ENOTSUPP; +} + static int bpf_dispatcher_prepare(struct bpf_dispatcher *d, void *image, void *buf) { s64 ips[BPF_DISPATCHER_MAX] = {}, *ipsp = &ips[0]; diff --git a/kernel/bpf/memalloc.c b/kernel/bpf/memalloc.c index 5f83be1d2018..4901fa1048cd 100644 --- a/kernel/bpf/memalloc.c +++ b/kernel/bpf/memalloc.c @@ -418,14 +418,17 @@ static void drain_mem_cache(struct bpf_mem_cache *c) /* No progs are using this bpf_mem_cache, but htab_map_free() called * bpf_mem_cache_free() for all remaining elements and they can be in * free_by_rcu or in waiting_for_gp lists, so drain those lists now. + * + * Except for waiting_for_gp list, there are no concurrent operations + * on these lists, so it is safe to use __llist_del_all(). */ llist_for_each_safe(llnode, t, __llist_del_all(&c->free_by_rcu)) free_one(c, llnode); llist_for_each_safe(llnode, t, llist_del_all(&c->waiting_for_gp)) free_one(c, llnode); - llist_for_each_safe(llnode, t, llist_del_all(&c->free_llist)) + llist_for_each_safe(llnode, t, __llist_del_all(&c->free_llist)) free_one(c, llnode); - llist_for_each_safe(llnode, t, llist_del_all(&c->free_llist_extra)) + llist_for_each_safe(llnode, t, __llist_del_all(&c->free_llist_extra)) free_one(c, llnode); } @@ -493,6 +496,16 @@ void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma) rcu_in_progress = 0; for_each_possible_cpu(cpu) { c = per_cpu_ptr(ma->cache, cpu); + /* + * refill_work may be unfinished for PREEMPT_RT kernel + * in which irq work is invoked in a per-CPU RT thread. + * It is also possible for kernel with + * arch_irq_work_has_interrupt() being false and irq + * work is invoked in timer interrupt. So waiting for + * the completion of irq work to ease the handling of + * concurrency. + */ + irq_work_sync(&c->refill_work); drain_mem_cache(c); rcu_in_progress += atomic_read(&c->call_rcu_in_progress); } @@ -507,6 +520,7 @@ void bpf_mem_alloc_destroy(struct bpf_mem_alloc *ma) cc = per_cpu_ptr(ma->caches, cpu); for (i = 0; i < NUM_CACHES; i++) { c = &cc->cache[i]; + irq_work_sync(&c->refill_work); drain_mem_cache(c); rcu_in_progress += atomic_read(&c->call_rcu_in_progress); } diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 014ee0953dbd..225666307bba 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1027,12 +1027,17 @@ out: */ static void *realloc_array(void *arr, size_t old_n, size_t new_n, size_t size) { + void *new_arr; + if (!new_n || old_n == new_n) goto out; - arr = krealloc_array(arr, new_n, size, GFP_KERNEL); - if (!arr) + new_arr = krealloc_array(arr, new_n, size, GFP_KERNEL); + if (!new_arr) { + kfree(arr); return NULL; + } + arr = new_arr; if (new_n > old_n) memset(arr + old_n * size, 0, (new_n - old_n) * size); @@ -6618,8 +6623,12 @@ static int release_reference(struct bpf_verifier_env *env, return err; bpf_for_each_reg_in_vstate(env->cur_state, state, reg, ({ - if (reg->ref_obj_id == ref_obj_id) - __mark_reg_unknown(env, reg); + if (reg->ref_obj_id == ref_obj_id) { + if (!env->allow_ptr_leaks) + __mark_reg_not_init(env, reg); + else + __mark_reg_unknown(env, reg); + } })); return 0; @@ -6946,6 +6955,7 @@ static int set_user_ringbuf_callback_state(struct bpf_verifier_env *env, __mark_reg_not_init(env, &callee->regs[BPF_REG_5]); callee->in_callback_fn = true; + callee->callback_ret_range = tnum_range(0, 1); return 0; } diff --git a/kernel/events/core.c b/kernel/events/core.c index aefc1e08e015..4ec3717003d5 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -54,6 +54,7 @@ #include <linux/highmem.h> #include <linux/pgtable.h> #include <linux/buildid.h> +#include <linux/task_work.h> #include "internal.h" @@ -2276,11 +2277,26 @@ event_sched_out(struct perf_event *event, event->pmu->del(event, 0); event->oncpu = -1; - if (READ_ONCE(event->pending_disable) >= 0) { - WRITE_ONCE(event->pending_disable, -1); + if (event->pending_disable) { + event->pending_disable = 0; perf_cgroup_event_disable(event, ctx); state = PERF_EVENT_STATE_OFF; } + + if (event->pending_sigtrap) { + bool dec = true; + + event->pending_sigtrap = 0; + if (state != PERF_EVENT_STATE_OFF && + !event->pending_work) { + event->pending_work = 1; + dec = false; + task_work_add(current, &event->pending_task, TWA_RESUME); + } + if (dec) + local_dec(&event->ctx->nr_pending); + } + perf_event_set_state(event, state); if (!is_software_event(event)) @@ -2432,7 +2448,7 @@ static void __perf_event_disable(struct perf_event *event, * hold the top-level event's child_mutex, so any descendant that * goes to exit will block in perf_event_exit_event(). * - * When called from perf_pending_event it's OK because event->ctx + * When called from perf_pending_irq it's OK because event->ctx * is the current context on this CPU and preemption is disabled, * hence we can't get into perf_event_task_sched_out for this context. */ @@ -2471,9 +2487,8 @@ EXPORT_SYMBOL_GPL(perf_event_disable); void perf_event_disable_inatomic(struct perf_event *event) { - WRITE_ONCE(event->pending_disable, smp_processor_id()); - /* can fail, see perf_pending_event_disable() */ - irq_work_queue(&event->pending); + event->pending_disable = 1; + irq_work_queue(&event->pending_irq); } #define MAX_INTERRUPTS (~0ULL) @@ -3428,11 +3443,23 @@ static void perf_event_context_sched_out(struct task_struct *task, int ctxn, raw_spin_lock_nested(&next_ctx->lock, SINGLE_DEPTH_NESTING); if (context_equiv(ctx, next_ctx)) { + perf_pmu_disable(pmu); + + /* PMIs are disabled; ctx->nr_pending is stable. */ + if (local_read(&ctx->nr_pending) || + local_read(&next_ctx->nr_pending)) { + /* + * Must not swap out ctx when there's pending + * events that rely on the ctx->task relation. + */ + raw_spin_unlock(&next_ctx->lock); + rcu_read_unlock(); + goto inside_switch; + } + WRITE_ONCE(ctx->task, next); WRITE_ONCE(next_ctx->task, task); - perf_pmu_disable(pmu); - if (cpuctx->sched_cb_usage && pmu->sched_task) pmu->sched_task(ctx, false); @@ -3473,6 +3500,7 @@ unlock: raw_spin_lock(&ctx->lock); perf_pmu_disable(pmu); +inside_switch: if (cpuctx->sched_cb_usage && pmu->sched_task) pmu->sched_task(ctx, false); task_ctx_sched_out(cpuctx, ctx, EVENT_ALL); @@ -4939,7 +4967,7 @@ static void perf_addr_filters_splice(struct perf_event *event, static void _free_event(struct perf_event *event) { - irq_work_sync(&event->pending); + irq_work_sync(&event->pending_irq); unaccount_event(event); @@ -6439,7 +6467,8 @@ static void perf_sigtrap(struct perf_event *event) return; /* - * perf_pending_event() can race with the task exiting. + * Both perf_pending_task() and perf_pending_irq() can race with the + * task exiting. */ if (current->flags & PF_EXITING) return; @@ -6448,23 +6477,33 @@ static void perf_sigtrap(struct perf_event *event) event->attr.type, event->attr.sig_data); } -static void perf_pending_event_disable(struct perf_event *event) +/* + * Deliver the pending work in-event-context or follow the context. + */ +static void __perf_pending_irq(struct perf_event *event) { - int cpu = READ_ONCE(event->pending_disable); + int cpu = READ_ONCE(event->oncpu); + /* + * If the event isn't running; we done. event_sched_out() will have + * taken care of things. + */ if (cpu < 0) return; + /* + * Yay, we hit home and are in the context of the event. + */ if (cpu == smp_processor_id()) { - WRITE_ONCE(event->pending_disable, -1); - - if (event->attr.sigtrap) { + if (event->pending_sigtrap) { + event->pending_sigtrap = 0; perf_sigtrap(event); - atomic_set_release(&event->event_limit, 1); /* rearm event */ - return; + local_dec(&event->ctx->nr_pending); + } + if (event->pending_disable) { + event->pending_disable = 0; + perf_event_disable_local(event); } - - perf_event_disable_local(event); return; } @@ -6484,35 +6523,62 @@ static void perf_pending_event_disable(struct perf_event *event) * irq_work_queue(); // FAILS * * irq_work_run() - * perf_pending_event() + * perf_pending_irq() * * But the event runs on CPU-B and wants disabling there. */ - irq_work_queue_on(&event->pending, cpu); + irq_work_queue_on(&event->pending_irq, cpu); } -static void perf_pending_event(struct irq_work *entry) +static void perf_pending_irq(struct irq_work *entry) { - struct perf_event *event = container_of(entry, struct perf_event, pending); + struct perf_event *event = container_of(entry, struct perf_event, pending_irq); int rctx; - rctx = perf_swevent_get_recursion_context(); /* * If we 'fail' here, that's OK, it means recursion is already disabled * and we won't recurse 'further'. */ + rctx = perf_swevent_get_recursion_context(); - perf_pending_event_disable(event); - + /* + * The wakeup isn't bound to the context of the event -- it can happen + * irrespective of where the event is. + */ if (event->pending_wakeup) { event->pending_wakeup = 0; perf_event_wakeup(event); } + __perf_pending_irq(event); + if (rctx >= 0) perf_swevent_put_recursion_context(rctx); } +static void perf_pending_task(struct callback_head *head) +{ + struct perf_event *event = container_of(head, struct perf_event, pending_task); + int rctx; + + /* + * If we 'fail' here, that's OK, it means recursion is already disabled + * and we won't recurse 'further'. + */ + preempt_disable_notrace(); + rctx = perf_swevent_get_recursion_context(); + + if (event->pending_work) { + event->pending_work = 0; + perf_sigtrap(event); + local_dec(&event->ctx->nr_pending); + } + + if (rctx >= 0) + perf_swevent_put_recursion_context(rctx); + preempt_enable_notrace(); +} + #ifdef CONFIG_GUEST_PERF_EVENTS struct perf_guest_info_callbacks __rcu *perf_guest_cbs; @@ -9212,8 +9278,8 @@ int perf_event_account_interrupt(struct perf_event *event) */ static int __perf_event_overflow(struct perf_event *event, - int throttle, struct perf_sample_data *data, - struct pt_regs *regs) + int throttle, struct perf_sample_data *data, + struct pt_regs *regs) { int events = atomic_read(&event->event_limit); int ret = 0; @@ -9236,24 +9302,36 @@ static int __perf_event_overflow(struct perf_event *event, if (events && atomic_dec_and_test(&event->event_limit)) { ret = 1; event->pending_kill = POLL_HUP; - event->pending_addr = data->addr; - perf_event_disable_inatomic(event); } + if (event->attr.sigtrap) { + /* + * Should not be able to return to user space without processing + * pending_sigtrap (kernel events can overflow multiple times). + */ + WARN_ON_ONCE(event->pending_sigtrap && event->attr.exclude_kernel); + if (!event->pending_sigtrap) { + event->pending_sigtrap = 1; + local_inc(&event->ctx->nr_pending); + } + event->pending_addr = data->addr; + irq_work_queue(&event->pending_irq); + } + READ_ONCE(event->overflow_handler)(event, data, regs); if (*perf_event_fasync(event) && event->pending_kill) { event->pending_wakeup = 1; - irq_work_queue(&event->pending); + irq_work_queue(&event->pending_irq); } return ret; } int perf_event_overflow(struct perf_event *event, - struct perf_sample_data *data, - struct pt_regs *regs) + struct perf_sample_data *data, + struct pt_regs *regs) { return __perf_event_overflow(event, 1, data, regs); } @@ -9768,6 +9846,7 @@ void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size, perf_sample_data_init(&data, 0, 0); data.raw = &raw; + data.sample_flags |= PERF_SAMPLE_RAW; perf_trace_buf_update(record, event_type); @@ -11570,8 +11649,8 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, init_waitqueue_head(&event->waitq); - event->pending_disable = -1; - init_irq_work(&event->pending, perf_pending_event); + init_irq_work(&event->pending_irq, perf_pending_irq); + init_task_work(&event->pending_task, perf_pending_task); mutex_init(&event->mmap_mutex); raw_spin_lock_init(&event->addr_filters.lock); @@ -11593,9 +11672,6 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu, if (parent_event) event->event_caps = parent_event->event_caps; - if (event->attr.sigtrap) - atomic_set(&event->event_limit, 1); - if (task) { event->attach_state = PERF_ATTACH_TASK; /* diff --git a/kernel/events/hw_breakpoint_test.c b/kernel/events/hw_breakpoint_test.c index 5ced822df788..c57610f52bb4 100644 --- a/kernel/events/hw_breakpoint_test.c +++ b/kernel/events/hw_breakpoint_test.c @@ -295,11 +295,11 @@ static int test_init(struct kunit *test) { /* Most test cases want 2 distinct CPUs. */ if (num_online_cpus() < 2) - return -EINVAL; + kunit_skip(test, "not enough cpus"); /* Want the system to not use breakpoints elsewhere. */ if (hw_breakpoint_is_used()) - return -EBUSY; + kunit_skip(test, "hw breakpoint already in use"); return 0; } diff --git a/kernel/events/ring_buffer.c b/kernel/events/ring_buffer.c index 726132039c38..273a0fe7910a 100644 --- a/kernel/events/ring_buffer.c +++ b/kernel/events/ring_buffer.c @@ -22,7 +22,7 @@ static void perf_output_wakeup(struct perf_output_handle *handle) atomic_set(&handle->rb->poll, EPOLLIN); handle->event->pending_wakeup = 1; - irq_work_queue(&handle->event->pending); + irq_work_queue(&handle->event->pending_irq); } /* diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 3220b0a2fb4a..cd9f5a66a690 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -2429,8 +2429,11 @@ int enable_kprobe(struct kprobe *kp) if (!kprobes_all_disarmed && kprobe_disabled(p)) { p->flags &= ~KPROBE_FLAG_DISABLED; ret = arm_kprobe(p); - if (ret) + if (ret) { p->flags |= KPROBE_FLAG_DISABLED; + if (p != kp) + kp->flags |= KPROBE_FLAG_DISABLED; + } } out: mutex_unlock(&kprobe_mutex); diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index f58a0aa92310..793c55a2becb 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -645,7 +645,7 @@ static void power_down(void) int error; if (hibernation_mode == HIBERNATION_SUSPEND) { - error = suspend_devices_and_enter(PM_SUSPEND_MEM); + error = suspend_devices_and_enter(mem_sleep_current); if (error) { hibernation_mode = hibernation_ops ? HIBERNATION_PLATFORM : diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index 6bb8e72bc815..93416afebd59 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -1403,30 +1403,32 @@ static void rcu_poll_gp_seq_end(unsigned long *snap) // where caller does not hold the root rcu_node structure's lock. static void rcu_poll_gp_seq_start_unlocked(unsigned long *snap) { + unsigned long flags; struct rcu_node *rnp = rcu_get_root(); if (rcu_init_invoked()) { lockdep_assert_irqs_enabled(); - raw_spin_lock_irq_rcu_node(rnp); + raw_spin_lock_irqsave_rcu_node(rnp, flags); } rcu_poll_gp_seq_start(snap); if (rcu_init_invoked()) - raw_spin_unlock_irq_rcu_node(rnp); + raw_spin_unlock_irqrestore_rcu_node(rnp, flags); } // Make the polled API aware of the end of a grace period, but where // caller does not hold the root rcu_node structure's lock. static void rcu_poll_gp_seq_end_unlocked(unsigned long *snap) { + unsigned long flags; struct rcu_node *rnp = rcu_get_root(); if (rcu_init_invoked()) { lockdep_assert_irqs_enabled(); - raw_spin_lock_irq_rcu_node(rnp); + raw_spin_lock_irqsave_rcu_node(rnp, flags); } rcu_poll_gp_seq_end(snap); if (rcu_init_invoked()) - raw_spin_unlock_irq_rcu_node(rnp); + raw_spin_unlock_irqrestore_rcu_node(rnp, flags); } /* diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 5800b0623ff3..cb2aa2b54c7a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4823,10 +4823,10 @@ static inline void finish_task(struct task_struct *prev) #ifdef CONFIG_SMP -static void do_balance_callbacks(struct rq *rq, struct callback_head *head) +static void do_balance_callbacks(struct rq *rq, struct balance_callback *head) { void (*func)(struct rq *rq); - struct callback_head *next; + struct balance_callback *next; lockdep_assert_rq_held(rq); @@ -4853,15 +4853,15 @@ static void balance_push(struct rq *rq); * This abuse is tolerated because it places all the unlikely/odd cases behind * a single test, namely: rq->balance_callback == NULL. */ -struct callback_head balance_push_callback = { +struct balance_callback balance_push_callback = { .next = NULL, - .func = (void (*)(struct callback_head *))balance_push, + .func = balance_push, }; -static inline struct callback_head * +static inline struct balance_callback * __splice_balance_callbacks(struct rq *rq, bool split) { - struct callback_head *head = rq->balance_callback; + struct balance_callback *head = rq->balance_callback; if (likely(!head)) return NULL; @@ -4883,7 +4883,7 @@ __splice_balance_callbacks(struct rq *rq, bool split) return head; } -static inline struct callback_head *splice_balance_callbacks(struct rq *rq) +static inline struct balance_callback *splice_balance_callbacks(struct rq *rq) { return __splice_balance_callbacks(rq, true); } @@ -4893,7 +4893,7 @@ static void __balance_callbacks(struct rq *rq) do_balance_callbacks(rq, __splice_balance_callbacks(rq, false)); } -static inline void balance_callbacks(struct rq *rq, struct callback_head *head) +static inline void balance_callbacks(struct rq *rq, struct balance_callback *head) { unsigned long flags; @@ -4910,12 +4910,12 @@ static inline void __balance_callbacks(struct rq *rq) { } -static inline struct callback_head *splice_balance_callbacks(struct rq *rq) +static inline struct balance_callback *splice_balance_callbacks(struct rq *rq) { return NULL; } -static inline void balance_callbacks(struct rq *rq, struct callback_head *head) +static inline void balance_callbacks(struct rq *rq, struct balance_callback *head) { } @@ -6188,7 +6188,7 @@ static void sched_core_balance(struct rq *rq) preempt_enable(); } -static DEFINE_PER_CPU(struct callback_head, core_balance_head); +static DEFINE_PER_CPU(struct balance_callback, core_balance_head); static void queue_core_balance(struct rq *rq) { @@ -7419,7 +7419,7 @@ static int __sched_setscheduler(struct task_struct *p, int oldpolicy = -1, policy = attr->sched_policy; int retval, oldprio, newprio, queued, running; const struct sched_class *prev_class; - struct callback_head *head; + struct balance_callback *head; struct rq_flags rf; int reset_on_fork; int queue_flags = DEQUEUE_SAVE | DEQUEUE_MOVE | DEQUEUE_NOCLOCK; diff --git a/kernel/sched/deadline.c b/kernel/sched/deadline.c index 86dea6a05267..9ae8f41e3372 100644 --- a/kernel/sched/deadline.c +++ b/kernel/sched/deadline.c @@ -644,8 +644,8 @@ static inline bool need_pull_dl_task(struct rq *rq, struct task_struct *prev) return rq->online && dl_task(prev); } -static DEFINE_PER_CPU(struct callback_head, dl_push_head); -static DEFINE_PER_CPU(struct callback_head, dl_pull_head); +static DEFINE_PER_CPU(struct balance_callback, dl_push_head); +static DEFINE_PER_CPU(struct balance_callback, dl_pull_head); static void push_dl_tasks(struct rq *); static void pull_dl_task(struct rq *); diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index d869bcf898cc..ed2a47e4ddae 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -410,8 +410,8 @@ static inline int has_pushable_tasks(struct rq *rq) return !plist_head_empty(&rq->rt.pushable_tasks); } -static DEFINE_PER_CPU(struct callback_head, rt_push_head); -static DEFINE_PER_CPU(struct callback_head, rt_pull_head); +static DEFINE_PER_CPU(struct balance_callback, rt_push_head); +static DEFINE_PER_CPU(struct balance_callback, rt_pull_head); static void push_rt_tasks(struct rq *); static void pull_rt_task(struct rq *); diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 1644242ecd11..a4a20046e586 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -938,6 +938,12 @@ struct uclamp_rq { DECLARE_STATIC_KEY_FALSE(sched_uclamp_used); #endif /* CONFIG_UCLAMP_TASK */ +struct rq; +struct balance_callback { + struct balance_callback *next; + void (*func)(struct rq *rq); +}; + /* * This is the main, per-CPU runqueue data structure. * @@ -1036,7 +1042,7 @@ struct rq { unsigned long cpu_capacity; unsigned long cpu_capacity_orig; - struct callback_head *balance_callback; + struct balance_callback *balance_callback; unsigned char nohz_idle_balance; unsigned char idle_balance; @@ -1182,6 +1188,14 @@ static inline bool is_migration_disabled(struct task_struct *p) #endif } +DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); + +#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu))) +#define this_rq() this_cpu_ptr(&runqueues) +#define task_rq(p) cpu_rq(task_cpu(p)) +#define cpu_curr(cpu) (cpu_rq(cpu)->curr) +#define raw_rq() raw_cpu_ptr(&runqueues) + struct sched_group; #ifdef CONFIG_SCHED_CORE static inline struct cpumask *sched_group_span(struct sched_group *sg); @@ -1269,7 +1283,7 @@ static inline bool sched_group_cookie_match(struct rq *rq, return true; for_each_cpu_and(cpu, sched_group_span(group), p->cpus_ptr) { - if (sched_core_cookie_match(rq, p)) + if (sched_core_cookie_match(cpu_rq(cpu), p)) return true; } return false; @@ -1384,14 +1398,6 @@ static inline void update_idle_core(struct rq *rq) static inline void update_idle_core(struct rq *rq) { } #endif -DECLARE_PER_CPU_SHARED_ALIGNED(struct rq, runqueues); - -#define cpu_rq(cpu) (&per_cpu(runqueues, (cpu))) -#define this_rq() this_cpu_ptr(&runqueues) -#define task_rq(p) cpu_rq(task_cpu(p)) -#define cpu_curr(cpu) (cpu_rq(cpu)->curr) -#define raw_rq() raw_cpu_ptr(&runqueues) - #ifdef CONFIG_FAIR_GROUP_SCHED static inline struct task_struct *task_of(struct sched_entity *se) { @@ -1544,7 +1550,7 @@ struct rq_flags { #endif }; -extern struct callback_head balance_push_callback; +extern struct balance_callback balance_push_callback; /* * Lockdep annotation that avoids accidental unlocks; it's like a @@ -1724,7 +1730,7 @@ init_numa_balancing(unsigned long clone_flags, struct task_struct *p) static inline void queue_balance_callback(struct rq *rq, - struct callback_head *head, + struct balance_callback *head, void (*func)(struct rq *rq)) { lockdep_assert_rq_held(rq); @@ -1737,7 +1743,7 @@ queue_balance_callback(struct rq *rq, if (unlikely(head->next || rq->balance_callback == &balance_push_callback)) return; - head->func = (void (*)(struct callback_head *))func; + head->func = func; head->next = rq->balance_callback; rq->balance_callback = head; } diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 49fb9ec8366d..1ed08967fb97 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -687,6 +687,7 @@ BPF_CALL_5(bpf_perf_event_output, struct pt_regs *, regs, struct bpf_map *, map, perf_sample_data_init(sd, 0, 0); sd->raw = &raw; + sd->sample_flags |= PERF_SAMPLE_RAW; err = __bpf_perf_event_output(regs, map, flags, sd); @@ -745,6 +746,7 @@ u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, perf_fetch_caller_regs(regs); perf_sample_data_init(sd, 0, 0); sd->raw = &raw; + sd->sample_flags |= PERF_SAMPLE_RAW; ret = __bpf_perf_event_output(regs, map, flags, sd); out: diff --git a/kernel/trace/fprobe.c b/kernel/trace/fprobe.c index aac63ca9c3d1..e8143e368074 100644 --- a/kernel/trace/fprobe.c +++ b/kernel/trace/fprobe.c @@ -141,6 +141,8 @@ static int fprobe_init_rethook(struct fprobe *fp, int num) return -E2BIG; fp->rethook = rethook_alloc((void *)fp, fprobe_exit_handler); + if (!fp->rethook) + return -ENOMEM; for (i = 0; i < size; i++) { struct fprobe_rethook_node *node; @@ -301,7 +303,8 @@ int unregister_fprobe(struct fprobe *fp) { int ret; - if (!fp || fp->ops.func != fprobe_handler) + if (!fp || (fp->ops.saved_func != fprobe_handler && + fp->ops.saved_func != fprobe_kprobe_handler)) return -EINVAL; /* diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index fbf2543111c0..7dc023641bf1 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -3028,18 +3028,8 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command) command |= FTRACE_UPDATE_TRACE_FUNC; } - if (!command || !ftrace_enabled) { - /* - * If these are dynamic or per_cpu ops, they still - * need their data freed. Since, function tracing is - * not currently active, we can just free them - * without synchronizing all CPUs. - */ - if (ops->flags & FTRACE_OPS_FL_DYNAMIC) - goto free_ops; - - return 0; - } + if (!command || !ftrace_enabled) + goto out; /* * If the ops uses a trampoline, then it needs to be @@ -3076,6 +3066,7 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command) removed_ops = NULL; ops->flags &= ~FTRACE_OPS_FL_REMOVING; +out: /* * Dynamic ops may be freed, we must make sure that all * callers are done before leaving this function. @@ -3103,7 +3094,6 @@ int ftrace_shutdown(struct ftrace_ops *ops, int command) if (IS_ENABLED(CONFIG_PREEMPTION)) synchronize_rcu_tasks(); - free_ops: ftrace_trampoline_free(ops); } diff --git a/kernel/trace/kprobe_event_gen_test.c b/kernel/trace/kprobe_event_gen_test.c index 80e04a1e1977..d81f7c51025c 100644 --- a/kernel/trace/kprobe_event_gen_test.c +++ b/kernel/trace/kprobe_event_gen_test.c @@ -100,20 +100,20 @@ static int __init test_gen_kprobe_cmd(void) KPROBE_GEN_TEST_FUNC, KPROBE_GEN_TEST_ARG0, KPROBE_GEN_TEST_ARG1); if (ret) - goto free; + goto out; /* Use kprobe_event_add_fields to add the rest of the fields */ ret = kprobe_event_add_fields(&cmd, KPROBE_GEN_TEST_ARG2, KPROBE_GEN_TEST_ARG3); if (ret) - goto free; + goto out; /* * This actually creates the event. */ ret = kprobe_event_gen_cmd_end(&cmd); if (ret) - goto free; + goto out; /* * Now get the gen_kprobe_test event file. We need to prevent @@ -136,13 +136,11 @@ static int __init test_gen_kprobe_cmd(void) goto delete; } out: + kfree(buf); return ret; delete: /* We got an error after creating the event, delete it */ ret = kprobe_event_delete("gen_kprobe_test"); - free: - kfree(buf); - goto out; } @@ -170,14 +168,14 @@ static int __init test_gen_kretprobe_cmd(void) KPROBE_GEN_TEST_FUNC, "$retval"); if (ret) - goto free; + goto out; /* * This actually creates the event. */ ret = kretprobe_event_gen_cmd_end(&cmd); if (ret) - goto free; + goto out; /* * Now get the gen_kretprobe_test event file. We need to @@ -201,13 +199,11 @@ static int __init test_gen_kretprobe_cmd(void) goto delete; } out: + kfree(buf); return ret; delete: /* We got an error after creating the event, delete it */ ret = kprobe_event_delete("gen_kretprobe_test"); - free: - kfree(buf); - goto out; } diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index 199759c73519..9712083832f4 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -937,6 +937,9 @@ void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu) struct ring_buffer_per_cpu *cpu_buffer; struct rb_irq_work *rbwork; + if (!buffer) + return; + if (cpu == RING_BUFFER_ALL_CPUS) { /* Wake up individual ones too. One level recursion */ @@ -945,7 +948,15 @@ void ring_buffer_wake_waiters(struct trace_buffer *buffer, int cpu) rbwork = &buffer->irq_work; } else { + if (WARN_ON_ONCE(!buffer->buffers)) + return; + if (WARN_ON_ONCE(cpu >= nr_cpu_ids)) + return; + cpu_buffer = buffer->buffers[cpu]; + /* The CPU buffer may not have been initialized yet */ + if (!cpu_buffer) + return; rbwork = &cpu_buffer->irq_work; } diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c index 064072c16e3d..f50398cb790d 100644 --- a/kernel/utsname_sysctl.c +++ b/kernel/utsname_sysctl.c @@ -74,6 +74,7 @@ static int proc_do_uts_string(struct ctl_table *table, int write, static DEFINE_CTL_TABLE_POLL(hostname_poll); static DEFINE_CTL_TABLE_POLL(domainname_poll); +// Note: update 'enum uts_proc' to match any changes to this table static struct ctl_table uts_kern_table[] = { { .procname = "arch", |