diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2021-11-01 13:12:15 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2021-11-01 13:12:15 -0700 |
commit | 91e1c99e175ae6bb6be765c6fcd40e869f8f6aee (patch) | |
tree | d3dfcf33345ee7dcad4b1e6d215f581eea5fd2ee /tools | |
parent | 5a47ebe98e6e5113ea8213d019a794d5851fbd46 (diff) | |
parent | 2de71ee153efa93099d2ab864acffeec70a8dcd5 (diff) |
Merge tag 'perf-core-2021-10-31' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip
Pull perf updates from Thomas Gleixner:
"Core:
- Allow ftrace to instrument parts of the perf core code
- Add a new mem_hops field to perf_mem_data_src which allows to
represent intra-node/package or inter-node/off-package details to
prepare for next generation systems which have more hieararchy
within the node/pacakge level.
Tools:
- Update for the new mem_hops field in perf_mem_data_src
Arch:
- A set of constraints fixes for the Intel uncore PMU
- The usual set of small fixes and improvements for x86 and PPC"
* tag 'perf-core-2021-10-31' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
perf/x86/intel: Fix ICL/SPR INST_RETIRED.PREC_DIST encodings
powerpc/perf: Fix data source encodings for L2.1 and L3.1 accesses
tools/perf: Add mem_hops field in perf_mem_data_src structure
perf: Add mem_hops field in perf_mem_data_src structure
perf: Add comment about current state of PERF_MEM_LVL_* namespace and remove an extra line
perf/core: Allow ftrace for functions in kernel/event/core.c
perf/x86: Add new event for AUX output counter index
perf/x86: Add compiler barrier after updating BTS
perf/x86/intel/uncore: Fix Intel SPR M3UPI event constraints
perf/x86/intel/uncore: Fix Intel SPR M2PCIE event constraints
perf/x86/intel/uncore: Fix Intel SPR IIO event constraints
perf/x86/intel/uncore: Fix Intel SPR CHA event constraints
perf/x86/intel/uncore: Fix Intel ICX IIO event constraints
perf/x86/intel/uncore: Fix invalid unit check
perf/x86/intel/uncore: Support extra IMC channel on Ice Lake server
Diffstat (limited to 'tools')
-rw-r--r-- | tools/include/uapi/linux/perf_event.h | 19 | ||||
-rw-r--r-- | tools/perf/util/mem-events.c | 20 |
2 files changed, 34 insertions, 5 deletions
diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index f92880a15645..2fc09579e24a 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -1210,14 +1210,16 @@ union perf_mem_data_src { mem_remote:1, /* remote */ mem_snoopx:2, /* snoop mode, ext */ mem_blk:3, /* access blocked */ - mem_rsvd:21; + mem_hops:3, /* hop level */ + mem_rsvd:18; }; }; #elif defined(__BIG_ENDIAN_BITFIELD) union perf_mem_data_src { __u64 val; struct { - __u64 mem_rsvd:21, + __u64 mem_rsvd:18, + mem_hops:3, /* hop level */ mem_blk:3, /* access blocked */ mem_snoopx:2, /* snoop mode, ext */ mem_remote:1, /* remote */ @@ -1241,7 +1243,13 @@ union perf_mem_data_src { #define PERF_MEM_OP_EXEC 0x10 /* code (execution) */ #define PERF_MEM_OP_SHIFT 0 -/* memory hierarchy (memory level, hit or miss) */ +/* + * PERF_MEM_LVL_* namespace being depricated to some extent in the + * favour of newer composite PERF_MEM_{LVLNUM_,REMOTE_,SNOOPX_} fields. + * Supporting this namespace inorder to not break defined ABIs. + * + * memory hierarchy (memory level, hit or miss) + */ #define PERF_MEM_LVL_NA 0x01 /* not available */ #define PERF_MEM_LVL_HIT 0x02 /* hit level */ #define PERF_MEM_LVL_MISS 0x04 /* miss level */ @@ -1307,6 +1315,11 @@ union perf_mem_data_src { #define PERF_MEM_BLK_ADDR 0x04 /* address conflict */ #define PERF_MEM_BLK_SHIFT 40 +/* hop level */ +#define PERF_MEM_HOPS_0 0x01 /* remote core, same node */ +/* 2-7 available */ +#define PERF_MEM_HOPS_SHIFT 43 + #define PERF_MEM_S(a, s) \ (((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT) diff --git a/tools/perf/util/mem-events.c b/tools/perf/util/mem-events.c index f0e75df72b80..3167b4628b6d 100644 --- a/tools/perf/util/mem-events.c +++ b/tools/perf/util/mem-events.c @@ -301,6 +301,16 @@ static const char * const mem_lvlnum[] = { [PERF_MEM_LVLNUM_NA] = "N/A", }; +static const char * const mem_hops[] = { + "N/A", + /* + * While printing, 'Remote' will be added to represent + * 'Remote core, same node' accesses as remote field need + * to be set with mem_hops field. + */ + "core, same node", +}; + int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info) { size_t i, l = 0; @@ -320,12 +330,14 @@ int perf_mem__lvl_scnprintf(char *out, size_t sz, struct mem_info *mem_info) /* already taken care of */ m &= ~(PERF_MEM_LVL_HIT|PERF_MEM_LVL_MISS); - if (mem_info && mem_info->data_src.mem_remote) { strcat(out, "Remote "); l += 7; } + if (mem_info && mem_info->data_src.mem_hops) + l += scnprintf(out + l, sz - l, "%s ", mem_hops[mem_info->data_src.mem_hops]); + printed = 0; for (i = 0; m && i < ARRAY_SIZE(mem_lvl); i++, m >>= 1) { if (!(m & 0x1)) @@ -472,8 +484,12 @@ int c2c_decode_stats(struct c2c_stats *stats, struct mem_info *mi) /* * Skylake might report unknown remote level via this * bit, consider it when evaluating remote HITMs. + * + * Incase of power, remote field can also be used to denote cache + * accesses from the another core of same node. Hence, setting + * mrem only when HOPS is zero along with set remote field. */ - bool mrem = data_src->mem_remote; + bool mrem = (data_src->mem_remote && !data_src->mem_hops); int err = 0; #define HITM_INC(__f) \ |