diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-09-18 15:03:58 +0200 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-09-18 15:03:58 +0200 |
commit | 9f0c253ddddca608457a42e509267bed2dee0a50 (patch) | |
tree | 6e00e1ed61e997c06169c70c9365f213fe412fc8 /drivers | |
parent | 941c122da5c8355335dc16011c1c291a32cd1118 (diff) | |
parent | 5e645f31139183ac9a282238da18ca6bbc1c6f4a (diff) |
Merge tag 'perf-core-2024-09-18' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf events updates from Ingo Molnar:
- Implement per-PMU context rescheduling to significantly improve
single-PMU performance, and related cleanups/fixes (Peter Zijlstra
and Namhyung Kim)
- Fix ancient bug resulting in a lot of events being dropped
erroneously at higher sampling frequencies (Luo Gengkun)
- uprobes enhancements:
- Implement RCU-protected hot path optimizations for better
performance:
"For baseline vs SRCU, peak througput increased from 3.7 M/s
(million uprobe triggerings per second) up to about 8 M/s. For
uretprobes it's a bit more modest with bump from 2.4 M/s to
5 M/s.
For SRCU vs RCU Tasks Trace, peak throughput for uprobes
increases further from 8 M/s to 10.3 M/s (+28%!), and for
uretprobes from 5.3 M/s to 5.8 M/s (+11%), as we have more
work to do on uretprobes side.
Even single-thread (no contention) performance is slightly
better: 3.276 M/s to 3.396 M/s (+3.5%) for uprobes, and 2.055
M/s to 2.174 M/s (+5.8%) for uretprobes."
(Andrii Nakryiko et al)
- Document mmap_lock, don't abuse get_user_pages_remote() (Oleg
Nesterov)
- Cleanups & fixes to prepare for future work:
- Remove uprobe_register_refctr()
- Simplify error handling for alloc_uprobe()
- Make uprobe_register() return struct uprobe *
- Fold __uprobe_unregister() into uprobe_unregister()
- Shift put_uprobe() from delete_uprobe() to uprobe_unregister()
- BPF: Fix use-after-free in bpf_uprobe_multi_link_attach()
(Oleg Nesterov)
- New feature & ABI extension: allow events to use PERF_SAMPLE READ
with inheritance, enabling sample based profiling of a group of
counters over a hierarchy of processes or threads (Ben Gainey)
- Intel uncore & power events updates:
- Add Arrow Lake and Lunar Lake support
- Add PERF_EV_CAP_READ_SCOPE
- Clean up and enhance cpumask and hotplug support
(Kan Liang)
- Add LNL uncore iMC freerunning support
- Use D0:F0 as a default device
(Zhenyu Wang)
- Intel PT: fix AUX snapshot handling race (Adrian Hunter)
- Misc fixes and cleanups (James Clark, Jiri Olsa, Oleg Nesterov and
Peter Zijlstra)
* tag 'perf-core-2024-09-18' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip: (40 commits)
dmaengine: idxd: Clean up cpumask and hotplug for perfmon
iommu/vt-d: Clean up cpumask and hotplug for perfmon
perf/x86/intel/cstate: Clean up cpumask and hotplug
perf: Add PERF_EV_CAP_READ_SCOPE
perf: Generic hotplug support for a PMU with a scope
uprobes: perform lockless SRCU-protected uprobes_tree lookup
rbtree: provide rb_find_rcu() / rb_find_add_rcu()
perf/uprobe: split uprobe_unregister()
uprobes: travers uprobe's consumer list locklessly under SRCU protection
uprobes: get rid of enum uprobe_filter_ctx in uprobe filter callbacks
uprobes: protected uprobe lifetime with SRCU
uprobes: revamp uprobe refcounting and lifetime management
bpf: Fix use-after-free in bpf_uprobe_multi_link_attach()
perf/core: Fix small negative period being ignored
perf: Really fix event_function_call() locking
perf: Optimize __pmu_ctx_sched_out()
perf: Add context time freeze
perf: Fix event_function_call() locking
perf: Extract a few helpers
perf: Optimize context reschedule for single PMU cases
...
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/dma/idxd/idxd.h | 7 | ||||
-rw-r--r-- | drivers/dma/idxd/init.c | 3 | ||||
-rw-r--r-- | drivers/dma/idxd/perfmon.c | 98 | ||||
-rw-r--r-- | drivers/iommu/intel/iommu.h | 2 | ||||
-rw-r--r-- | drivers/iommu/intel/perfmon.c | 111 |
5 files changed, 3 insertions, 218 deletions
diff --git a/drivers/dma/idxd/idxd.h b/drivers/dma/idxd/idxd.h index 868b724a3b75..d84e21daa991 100644 --- a/drivers/dma/idxd/idxd.h +++ b/drivers/dma/idxd/idxd.h @@ -124,7 +124,6 @@ struct idxd_pmu { struct pmu pmu; char name[IDXD_NAME_SIZE]; - int cpu; int n_counters; int counter_width; @@ -135,8 +134,6 @@ struct idxd_pmu { unsigned long supported_filters; int n_filters; - - struct hlist_node cpuhp_node; }; #define IDXD_MAX_PRIORITY 0xf @@ -803,14 +800,10 @@ void idxd_user_counter_increment(struct idxd_wq *wq, u32 pasid, int index); int perfmon_pmu_init(struct idxd_device *idxd); void perfmon_pmu_remove(struct idxd_device *idxd); void perfmon_counter_overflow(struct idxd_device *idxd); -void perfmon_init(void); -void perfmon_exit(void); #else static inline int perfmon_pmu_init(struct idxd_device *idxd) { return 0; } static inline void perfmon_pmu_remove(struct idxd_device *idxd) {} static inline void perfmon_counter_overflow(struct idxd_device *idxd) {} -static inline void perfmon_init(void) {} -static inline void perfmon_exit(void) {} #endif /* debugfs */ diff --git a/drivers/dma/idxd/init.c b/drivers/dma/idxd/init.c index 21f6905b554d..5725ea82c409 100644 --- a/drivers/dma/idxd/init.c +++ b/drivers/dma/idxd/init.c @@ -878,8 +878,6 @@ static int __init idxd_init_module(void) else support_enqcmd = true; - perfmon_init(); - err = idxd_driver_register(&idxd_drv); if (err < 0) goto err_idxd_driver_register; @@ -928,7 +926,6 @@ static void __exit idxd_exit_module(void) idxd_driver_unregister(&idxd_drv); pci_unregister_driver(&idxd_pci_driver); idxd_cdev_remove(); - perfmon_exit(); idxd_remove_debugfs(); } module_exit(idxd_exit_module); diff --git a/drivers/dma/idxd/perfmon.c b/drivers/dma/idxd/perfmon.c index 5e94247e1ea7..f511cf15845b 100644 --- a/drivers/dma/idxd/perfmon.c +++ b/drivers/dma/idxd/perfmon.c @@ -6,29 +6,6 @@ #include "idxd.h" #include "perfmon.h" -static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr, - char *buf); - -static cpumask_t perfmon_dsa_cpu_mask; -static bool cpuhp_set_up; -static enum cpuhp_state cpuhp_slot; - -/* - * perf userspace reads this attribute to determine which cpus to open - * counters on. It's connected to perfmon_dsa_cpu_mask, which is - * maintained by the cpu hotplug handlers. - */ -static DEVICE_ATTR_RO(cpumask); - -static struct attribute *perfmon_cpumask_attrs[] = { - &dev_attr_cpumask.attr, - NULL, -}; - -static struct attribute_group cpumask_attr_group = { - .attrs = perfmon_cpumask_attrs, -}; - /* * These attributes specify the bits in the config word that the perf * syscall uses to pass the event ids and categories to perfmon. @@ -67,16 +44,9 @@ static struct attribute_group perfmon_format_attr_group = { static const struct attribute_group *perfmon_attr_groups[] = { &perfmon_format_attr_group, - &cpumask_attr_group, NULL, }; -static ssize_t cpumask_show(struct device *dev, struct device_attribute *attr, - char *buf) -{ - return cpumap_print_to_pagebuf(true, buf, &perfmon_dsa_cpu_mask); -} - static bool is_idxd_event(struct idxd_pmu *idxd_pmu, struct perf_event *event) { return &idxd_pmu->pmu == event->pmu; @@ -217,7 +187,6 @@ static int perfmon_pmu_event_init(struct perf_event *event) return -EINVAL; event->hw.event_base = ioread64(PERFMON_TABLE_OFFSET(idxd)); - event->cpu = idxd->idxd_pmu->cpu; event->hw.config = event->attr.config; if (event->group_leader != event) @@ -488,6 +457,7 @@ static void idxd_pmu_init(struct idxd_pmu *idxd_pmu) idxd_pmu->pmu.stop = perfmon_pmu_event_stop; idxd_pmu->pmu.read = perfmon_pmu_event_update; idxd_pmu->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE; + idxd_pmu->pmu.scope = PERF_PMU_SCOPE_SYS_WIDE; idxd_pmu->pmu.module = THIS_MODULE; } @@ -496,47 +466,11 @@ void perfmon_pmu_remove(struct idxd_device *idxd) if (!idxd->idxd_pmu) return; - cpuhp_state_remove_instance(cpuhp_slot, &idxd->idxd_pmu->cpuhp_node); perf_pmu_unregister(&idxd->idxd_pmu->pmu); kfree(idxd->idxd_pmu); idxd->idxd_pmu = NULL; } -static int perf_event_cpu_online(unsigned int cpu, struct hlist_node *node) -{ - struct idxd_pmu *idxd_pmu; - - idxd_pmu = hlist_entry_safe(node, typeof(*idxd_pmu), cpuhp_node); - - /* select the first online CPU as the designated reader */ - if (cpumask_empty(&perfmon_dsa_cpu_mask)) { - cpumask_set_cpu(cpu, &perfmon_dsa_cpu_mask); - idxd_pmu->cpu = cpu; - } - - return 0; -} - -static int perf_event_cpu_offline(unsigned int cpu, struct hlist_node *node) -{ - struct idxd_pmu *idxd_pmu; - unsigned int target; - - idxd_pmu = hlist_entry_safe(node, typeof(*idxd_pmu), cpuhp_node); - - if (!cpumask_test_and_clear_cpu(cpu, &perfmon_dsa_cpu_mask)) - return 0; - - target = cpumask_any_but(cpu_online_mask, cpu); - /* migrate events if there is a valid target */ - if (target < nr_cpu_ids) { - cpumask_set_cpu(target, &perfmon_dsa_cpu_mask); - perf_pmu_migrate_context(&idxd_pmu->pmu, cpu, target); - } - - return 0; -} - int perfmon_pmu_init(struct idxd_device *idxd) { union idxd_perfcap perfcap; @@ -544,12 +478,6 @@ int perfmon_pmu_init(struct idxd_device *idxd) int rc = -ENODEV; /* - * perfmon module initialization failed, nothing to do - */ - if (!cpuhp_set_up) - return -ENODEV; - - /* * If perfmon_offset or num_counters is 0, it means perfmon is * not supported on this hardware. */ @@ -624,11 +552,6 @@ int perfmon_pmu_init(struct idxd_device *idxd) if (rc) goto free; - rc = cpuhp_state_add_instance(cpuhp_slot, &idxd_pmu->cpuhp_node); - if (rc) { - perf_pmu_unregister(&idxd->idxd_pmu->pmu); - goto free; - } out: return rc; free: @@ -637,22 +560,3 @@ free: goto out; } - -void __init perfmon_init(void) -{ - int rc = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, - "driver/dma/idxd/perf:online", - perf_event_cpu_online, - perf_event_cpu_offline); - if (WARN_ON(rc < 0)) - return; - - cpuhp_slot = rc; - cpuhp_set_up = true; -} - -void __exit perfmon_exit(void) -{ - if (cpuhp_set_up) - cpuhp_remove_multi_state(cpuhp_slot); -} diff --git a/drivers/iommu/intel/iommu.h b/drivers/iommu/intel/iommu.h index 428d253f1348..1497f3112b12 100644 --- a/drivers/iommu/intel/iommu.h +++ b/drivers/iommu/intel/iommu.h @@ -700,8 +700,6 @@ struct iommu_pmu { DECLARE_BITMAP(used_mask, IOMMU_PMU_IDX_MAX); struct perf_event *event_list[IOMMU_PMU_IDX_MAX]; unsigned char irq_name[16]; - struct hlist_node cpuhp_node; - int cpu; }; #define IOMMU_IRQ_ID_OFFSET_PRQ (DMAR_UNITS_SUPPORTED) diff --git a/drivers/iommu/intel/perfmon.c b/drivers/iommu/intel/perfmon.c index 44083d01852d..75f493bcb353 100644 --- a/drivers/iommu/intel/perfmon.c +++ b/drivers/iommu/intel/perfmon.c @@ -34,28 +34,9 @@ static struct attribute_group iommu_pmu_events_attr_group = { .attrs = attrs_empty, }; -static cpumask_t iommu_pmu_cpu_mask; - -static ssize_t -cpumask_show(struct device *dev, struct device_attribute *attr, char *buf) -{ - return cpumap_print_to_pagebuf(true, buf, &iommu_pmu_cpu_mask); -} -static DEVICE_ATTR_RO(cpumask); - -static struct attribute *iommu_pmu_cpumask_attrs[] = { - &dev_attr_cpumask.attr, - NULL -}; - -static struct attribute_group iommu_pmu_cpumask_attr_group = { - .attrs = iommu_pmu_cpumask_attrs, -}; - static const struct attribute_group *iommu_pmu_attr_groups[] = { &iommu_pmu_format_attr_group, &iommu_pmu_events_attr_group, - &iommu_pmu_cpumask_attr_group, NULL }; @@ -565,6 +546,7 @@ static int __iommu_pmu_register(struct intel_iommu *iommu) iommu_pmu->pmu.attr_groups = iommu_pmu_attr_groups; iommu_pmu->pmu.attr_update = iommu_pmu_attr_update; iommu_pmu->pmu.capabilities = PERF_PMU_CAP_NO_EXCLUDE; + iommu_pmu->pmu.scope = PERF_PMU_SCOPE_SYS_WIDE; iommu_pmu->pmu.module = THIS_MODULE; return perf_pmu_register(&iommu_pmu->pmu, iommu_pmu->pmu.name, -1); @@ -773,89 +755,6 @@ static void iommu_pmu_unset_interrupt(struct intel_iommu *iommu) iommu->perf_irq = 0; } -static int iommu_pmu_cpu_online(unsigned int cpu, struct hlist_node *node) -{ - struct iommu_pmu *iommu_pmu = hlist_entry_safe(node, typeof(*iommu_pmu), cpuhp_node); - - if (cpumask_empty(&iommu_pmu_cpu_mask)) - cpumask_set_cpu(cpu, &iommu_pmu_cpu_mask); - - if (cpumask_test_cpu(cpu, &iommu_pmu_cpu_mask)) - iommu_pmu->cpu = cpu; - - return 0; -} - -static int iommu_pmu_cpu_offline(unsigned int cpu, struct hlist_node *node) -{ - struct iommu_pmu *iommu_pmu = hlist_entry_safe(node, typeof(*iommu_pmu), cpuhp_node); - int target = cpumask_first(&iommu_pmu_cpu_mask); - - /* - * The iommu_pmu_cpu_mask has been updated when offline the CPU - * for the first iommu_pmu. Migrate the other iommu_pmu to the - * new target. - */ - if (target < nr_cpu_ids && target != iommu_pmu->cpu) { - perf_pmu_migrate_context(&iommu_pmu->pmu, cpu, target); - iommu_pmu->cpu = target; - return 0; - } - - if (!cpumask_test_and_clear_cpu(cpu, &iommu_pmu_cpu_mask)) - return 0; - - target = cpumask_any_but(cpu_online_mask, cpu); - - if (target < nr_cpu_ids) - cpumask_set_cpu(target, &iommu_pmu_cpu_mask); - else - return 0; - - perf_pmu_migrate_context(&iommu_pmu->pmu, cpu, target); - iommu_pmu->cpu = target; - - return 0; -} - -static int nr_iommu_pmu; -static enum cpuhp_state iommu_cpuhp_slot; - -static int iommu_pmu_cpuhp_setup(struct iommu_pmu *iommu_pmu) -{ - int ret; - - if (!nr_iommu_pmu) { - ret = cpuhp_setup_state_multi(CPUHP_AP_ONLINE_DYN, - "driver/iommu/intel/perfmon:online", - iommu_pmu_cpu_online, - iommu_pmu_cpu_offline); - if (ret < 0) - return ret; - iommu_cpuhp_slot = ret; - } - - ret = cpuhp_state_add_instance(iommu_cpuhp_slot, &iommu_pmu->cpuhp_node); - if (ret) { - if (!nr_iommu_pmu) - cpuhp_remove_multi_state(iommu_cpuhp_slot); - return ret; - } - nr_iommu_pmu++; - - return 0; -} - -static void iommu_pmu_cpuhp_free(struct iommu_pmu *iommu_pmu) -{ - cpuhp_state_remove_instance(iommu_cpuhp_slot, &iommu_pmu->cpuhp_node); - - if (--nr_iommu_pmu) - return; - - cpuhp_remove_multi_state(iommu_cpuhp_slot); -} - void iommu_pmu_register(struct intel_iommu *iommu) { struct iommu_pmu *iommu_pmu = iommu->pmu; @@ -866,17 +765,12 @@ void iommu_pmu_register(struct intel_iommu *iommu) if (__iommu_pmu_register(iommu)) goto err; - if (iommu_pmu_cpuhp_setup(iommu_pmu)) - goto unregister; - /* Set interrupt for overflow */ if (iommu_pmu_set_interrupt(iommu)) - goto cpuhp_free; + goto unregister; return; -cpuhp_free: - iommu_pmu_cpuhp_free(iommu_pmu); unregister: perf_pmu_unregister(&iommu_pmu->pmu); err: @@ -892,6 +786,5 @@ void iommu_pmu_unregister(struct intel_iommu *iommu) return; iommu_pmu_unset_interrupt(iommu); - iommu_pmu_cpuhp_free(iommu_pmu); perf_pmu_unregister(&iommu_pmu->pmu); } |