summaryrefslogtreecommitdiff
path: root/kernel
diff options
context:
space:
mode:
authorJakub Kicinski <kuba@kernel.org>2024-10-31 17:30:16 -0700
committerJakub Kicinski <kuba@kernel.org>2024-11-07 13:44:16 -0800
commit2696e451dfb07f92d0e995ef456bd9110a48806a (patch)
treee9859c86a019eb946b5d421a502989197d2db694 /kernel
parent702c290a1cb16f4a64567cae0bedb848399f7915 (diff)
parentbfc64d9b7e8cac82be6b8629865e137d962578f8 (diff)
Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Cross-merge networking fixes after downstream PR (net-6.12-rc7). Conflicts: drivers/net/ethernet/freescale/enetc/enetc_pf.c e15c5506dd39 ("net: enetc: allocate vf_state during PF probes") 3774409fd4c6 ("net: enetc: build enetc_pf_common.c as a separate module") https://lore.kernel.org/20241105114100.118bd35e@canb.auug.org.au Adjacent changes: drivers/net/ethernet/ti/am65-cpsw-nuss.c de794169cf17 ("net: ethernet: ti: am65-cpsw: Fix multi queue Rx on J7") 4a7b2ba94a59 ("net: ethernet: ti: am65-cpsw: Use tstats instead of open coded version") Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'kernel')
-rw-r--r--kernel/events/core.c2
-rw-r--r--kernel/fork.c2
-rw-r--r--kernel/irq/msi.c2
-rw-r--r--kernel/sched/core.c8
-rw-r--r--kernel/sched/ext.c18
-rw-r--r--kernel/sched/ext.h2
-rw-r--r--kernel/sched/fair.c25
-rw-r--r--kernel/sched/sched.h36
-rw-r--r--kernel/sched/syscalls.c2
-rw-r--r--kernel/trace/trace.c4
10 files changed, 77 insertions, 24 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c
index cdd09769e6c5..df27d08a7232 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -13959,7 +13959,7 @@ static void perf_event_clear_cpumask(unsigned int cpu)
}
/* migrate */
- list_for_each_entry_rcu(pmu, &pmus, entry, lockdep_is_held(&pmus_srcu)) {
+ list_for_each_entry(pmu, &pmus, entry) {
if (pmu->scope == PERF_PMU_SCOPE_NONE ||
WARN_ON_ONCE(pmu->scope >= PERF_PMU_MAX_SCOPE))
continue;
diff --git a/kernel/fork.c b/kernel/fork.c
index 3bf38d260cb3..22f43721d031 100644
--- a/kernel/fork.c
+++ b/kernel/fork.c
@@ -105,6 +105,7 @@
#include <linux/rseq.h>
#include <uapi/linux/pidfd.h>
#include <linux/pidfs.h>
+#include <linux/tick.h>
#include <asm/pgalloc.h>
#include <linux/uaccess.h>
@@ -2292,6 +2293,7 @@ __latent_entropy struct task_struct *copy_process(
acct_clear_integrals(p);
posix_cputimers_init(&p->posix_cputimers);
+ tick_dep_init_task(p);
p->io_context = NULL;
audit_set_context(p, NULL);
diff --git a/kernel/irq/msi.c b/kernel/irq/msi.c
index 3a24d6b5f559..396a067a8a56 100644
--- a/kernel/irq/msi.c
+++ b/kernel/irq/msi.c
@@ -718,7 +718,7 @@ static int msi_domain_alloc(struct irq_domain *domain, unsigned int virq,
ret = ops->msi_init(domain, info, virq + i, hwirq + i, arg);
if (ret < 0) {
if (ops->msi_free) {
- for (i--; i > 0; i--)
+ for (i--; i >= 0; i--)
ops->msi_free(domain, info, virq + i);
}
irq_domain_free_irqs_top(domain, virq, nr_irqs);
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index dbfb5717d6af..719e0ed1e976 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -4711,7 +4711,7 @@ int sched_fork(unsigned long clone_flags, struct task_struct *p)
if (rt_prio(p->prio)) {
p->sched_class = &rt_sched_class;
#ifdef CONFIG_SCHED_CLASS_EXT
- } else if (task_should_scx(p)) {
+ } else if (task_should_scx(p->policy)) {
p->sched_class = &ext_sched_class;
#endif
} else {
@@ -7025,7 +7025,7 @@ int default_wake_function(wait_queue_entry_t *curr, unsigned mode, int wake_flag
}
EXPORT_SYMBOL(default_wake_function);
-const struct sched_class *__setscheduler_class(struct task_struct *p, int prio)
+const struct sched_class *__setscheduler_class(int policy, int prio)
{
if (dl_prio(prio))
return &dl_sched_class;
@@ -7034,7 +7034,7 @@ const struct sched_class *__setscheduler_class(struct task_struct *p, int prio)
return &rt_sched_class;
#ifdef CONFIG_SCHED_CLASS_EXT
- if (task_should_scx(p))
+ if (task_should_scx(policy))
return &ext_sched_class;
#endif
@@ -7142,7 +7142,7 @@ void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task)
queue_flag &= ~DEQUEUE_MOVE;
prev_class = p->sched_class;
- next_class = __setscheduler_class(p, prio);
+ next_class = __setscheduler_class(p->policy, prio);
if (prev_class != next_class && p->se.sched_delayed)
dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK);
diff --git a/kernel/sched/ext.c b/kernel/sched/ext.c
index 8b98ab2dca5a..b5f4b1a5ae98 100644
--- a/kernel/sched/ext.c
+++ b/kernel/sched/ext.c
@@ -4257,14 +4257,14 @@ static const struct kset_uevent_ops scx_uevent_ops = {
* Used by sched_fork() and __setscheduler_prio() to pick the matching
* sched_class. dl/rt are already handled.
*/
-bool task_should_scx(struct task_struct *p)
+bool task_should_scx(int policy)
{
if (!scx_enabled() ||
unlikely(scx_ops_enable_state() == SCX_OPS_DISABLING))
return false;
if (READ_ONCE(scx_switching_all))
return true;
- return p->policy == SCHED_EXT;
+ return policy == SCHED_EXT;
}
/**
@@ -4494,11 +4494,16 @@ static void scx_ops_disable_workfn(struct kthread_work *work)
scx_task_iter_start(&sti);
while ((p = scx_task_iter_next_locked(&sti))) {
const struct sched_class *old_class = p->sched_class;
+ const struct sched_class *new_class =
+ __setscheduler_class(p->policy, p->prio);
struct sched_enq_and_set_ctx ctx;
+ if (old_class != new_class && p->se.sched_delayed)
+ dequeue_task(task_rq(p), p, DEQUEUE_SLEEP | DEQUEUE_DELAYED);
+
sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx);
- p->sched_class = __setscheduler_class(p, p->prio);
+ p->sched_class = new_class;
check_class_changing(task_rq(p), p, old_class);
sched_enq_and_set_task(&ctx);
@@ -5204,12 +5209,17 @@ static int scx_ops_enable(struct sched_ext_ops *ops, struct bpf_link *link)
scx_task_iter_start(&sti);
while ((p = scx_task_iter_next_locked(&sti))) {
const struct sched_class *old_class = p->sched_class;
+ const struct sched_class *new_class =
+ __setscheduler_class(p->policy, p->prio);
struct sched_enq_and_set_ctx ctx;
+ if (old_class != new_class && p->se.sched_delayed)
+ dequeue_task(task_rq(p), p, DEQUEUE_SLEEP | DEQUEUE_DELAYED);
+
sched_deq_and_put_task(p, DEQUEUE_SAVE | DEQUEUE_MOVE, &ctx);
p->scx.slice = SCX_SLICE_DFL;
- p->sched_class = __setscheduler_class(p, p->prio);
+ p->sched_class = new_class;
check_class_changing(task_rq(p), p, old_class);
sched_enq_and_set_task(&ctx);
diff --git a/kernel/sched/ext.h b/kernel/sched/ext.h
index 246019519231..b1675bb59fc4 100644
--- a/kernel/sched/ext.h
+++ b/kernel/sched/ext.h
@@ -18,7 +18,7 @@ bool scx_can_stop_tick(struct rq *rq);
void scx_rq_activate(struct rq *rq);
void scx_rq_deactivate(struct rq *rq);
int scx_check_setscheduler(struct task_struct *p, int policy);
-bool task_should_scx(struct task_struct *p);
+bool task_should_scx(int policy);
void init_sched_ext_class(void);
static inline u32 scx_cpuperf_target(s32 cpu)
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index c157d4860a3b..2d16c8545c71 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -3369,7 +3369,7 @@ retry_pids:
vma = vma_next(&vmi);
}
- do {
+ for (; vma; vma = vma_next(&vmi)) {
if (!vma_migratable(vma) || !vma_policy_mof(vma) ||
is_vm_hugetlb_page(vma) || (vma->vm_flags & VM_MIXEDMAP)) {
trace_sched_skip_vma_numa(mm, vma, NUMAB_SKIP_UNSUITABLE);
@@ -3491,7 +3491,7 @@ retry_pids:
*/
if (vma_pids_forced)
break;
- } for_each_vma(vmi, vma);
+ }
/*
* If no VMAs are remaining and VMAs were skipped due to the PID
@@ -5625,8 +5625,9 @@ pick_next_entity(struct rq *rq, struct cfs_rq *cfs_rq)
struct sched_entity *se = pick_eevdf(cfs_rq);
if (se->sched_delayed) {
dequeue_entities(rq, se, DEQUEUE_SLEEP | DEQUEUE_DELAYED);
- SCHED_WARN_ON(se->sched_delayed);
- SCHED_WARN_ON(se->on_rq);
+ /*
+ * Must not reference @se again, see __block_task().
+ */
return NULL;
}
return se;
@@ -7176,7 +7177,11 @@ static int dequeue_entities(struct rq *rq, struct sched_entity *se, int flags)
/* Fix-up what dequeue_task_fair() skipped */
hrtick_update(rq);
- /* Fix-up what block_task() skipped. */
+ /*
+ * Fix-up what block_task() skipped.
+ *
+ * Must be last, @p might not be valid after this.
+ */
__block_task(rq, p);
}
@@ -7193,12 +7198,14 @@ static bool dequeue_task_fair(struct rq *rq, struct task_struct *p, int flags)
if (!(p->se.sched_delayed && (task_on_rq_migrating(p) || (flags & DEQUEUE_SAVE))))
util_est_dequeue(&rq->cfs, p);
- if (dequeue_entities(rq, &p->se, flags) < 0) {
- util_est_update(&rq->cfs, p, DEQUEUE_SLEEP);
+ util_est_update(&rq->cfs, p, flags & DEQUEUE_SLEEP);
+ if (dequeue_entities(rq, &p->se, flags) < 0)
return false;
- }
- util_est_update(&rq->cfs, p, flags & DEQUEUE_SLEEP);
+ /*
+ * Must not reference @p after dequeue_entities(DEQUEUE_DELAYED).
+ */
+
hrtick_update(rq);
return true;
}
diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h
index 081519ffab46..6c54a57275cc 100644
--- a/kernel/sched/sched.h
+++ b/kernel/sched/sched.h
@@ -2769,8 +2769,6 @@ static inline void sub_nr_running(struct rq *rq, unsigned count)
static inline void __block_task(struct rq *rq, struct task_struct *p)
{
- WRITE_ONCE(p->on_rq, 0);
- ASSERT_EXCLUSIVE_WRITER(p->on_rq);
if (p->sched_contributes_to_load)
rq->nr_uninterruptible++;
@@ -2778,6 +2776,38 @@ static inline void __block_task(struct rq *rq, struct task_struct *p)
atomic_inc(&rq->nr_iowait);
delayacct_blkio_start();
}
+
+ ASSERT_EXCLUSIVE_WRITER(p->on_rq);
+
+ /*
+ * The moment this write goes through, ttwu() can swoop in and migrate
+ * this task, rendering our rq->__lock ineffective.
+ *
+ * __schedule() try_to_wake_up()
+ * LOCK rq->__lock LOCK p->pi_lock
+ * pick_next_task()
+ * pick_next_task_fair()
+ * pick_next_entity()
+ * dequeue_entities()
+ * __block_task()
+ * RELEASE p->on_rq = 0 if (p->on_rq && ...)
+ * break;
+ *
+ * ACQUIRE (after ctrl-dep)
+ *
+ * cpu = select_task_rq();
+ * set_task_cpu(p, cpu);
+ * ttwu_queue()
+ * ttwu_do_activate()
+ * LOCK rq->__lock
+ * activate_task()
+ * STORE p->on_rq = 1
+ * UNLOCK rq->__lock
+ *
+ * Callers must ensure to not reference @p after this -- we no longer
+ * own it.
+ */
+ smp_store_release(&p->on_rq, 0);
}
extern void activate_task(struct rq *rq, struct task_struct *p, int flags);
@@ -3800,7 +3830,7 @@ static inline int rt_effective_prio(struct task_struct *p, int prio)
extern int __sched_setscheduler(struct task_struct *p, const struct sched_attr *attr, bool user, bool pi);
extern int __sched_setaffinity(struct task_struct *p, struct affinity_context *ctx);
-extern const struct sched_class *__setscheduler_class(struct task_struct *p, int prio);
+extern const struct sched_class *__setscheduler_class(int policy, int prio);
extern void set_load_weight(struct task_struct *p, bool update_load);
extern void enqueue_task(struct rq *rq, struct task_struct *p, int flags);
extern bool dequeue_task(struct rq *rq, struct task_struct *p, int flags);
diff --git a/kernel/sched/syscalls.c b/kernel/sched/syscalls.c
index 0470bcc3d204..24f9f90b6574 100644
--- a/kernel/sched/syscalls.c
+++ b/kernel/sched/syscalls.c
@@ -707,7 +707,7 @@ change:
}
prev_class = p->sched_class;
- next_class = __setscheduler_class(p, newprio);
+ next_class = __setscheduler_class(policy, newprio);
if (prev_class != next_class && p->se.sched_delayed)
dequeue_task(rq, p, DEQUEUE_SLEEP | DEQUEUE_DELAYED | DEQUEUE_NOCLOCK);
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index a8f52b6527ca..2b64b3ec67d9 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -5501,6 +5501,10 @@ static const struct file_operations tracing_iter_fops = {
static const char readme_msg[] =
"tracing mini-HOWTO:\n\n"
+ "By default tracefs removes all OTH file permission bits.\n"
+ "When mounting tracefs an optional group id can be specified\n"
+ "which adds the group to every directory and file in tracefs:\n\n"
+ "\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n"
"# echo 0 > tracing_on : quick way to disable tracing\n"
"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
" Important files:\n"