From 1b3bf0747d4f1a963e59c26e602868bdce195318 Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Tue, 11 Jun 2024 10:19:50 -0700 Subject: tools/power/turbostat: Switch to new Intel CPU model defines New CPU #defines encode vendor and family as well as model. N.B. Copied VFM_*() defines here from to avoid an application picking a second internal kernel header file. Signed-off-by: Tony Luck Acked-by: Rafael J. Wysocki Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 165 +++++++++++++++++++--------------- 1 file changed, 95 insertions(+), 70 deletions(-) (limited to 'tools/power') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 9f5d053d4bc6..02312e62c857 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -9,6 +9,30 @@ #define _GNU_SOURCE #include MSRHEADER + +// copied from arch/x86/include/asm/cpu_device_id.h +#define VFM_MODEL_BIT 0 +#define VFM_FAMILY_BIT 8 +#define VFM_VENDOR_BIT 16 +#define VFM_RSVD_BIT 24 + +#define VFM_MODEL_MASK GENMASK(VFM_FAMILY_BIT - 1, VFM_MODEL_BIT) +#define VFM_FAMILY_MASK GENMASK(VFM_VENDOR_BIT - 1, VFM_FAMILY_BIT) +#define VFM_VENDOR_MASK GENMASK(VFM_RSVD_BIT - 1, VFM_VENDOR_BIT) + +#define VFM_MODEL(vfm) (((vfm) & VFM_MODEL_MASK) >> VFM_MODEL_BIT) +#define VFM_FAMILY(vfm) (((vfm) & VFM_FAMILY_MASK) >> VFM_FAMILY_BIT) +#define VFM_VENDOR(vfm) (((vfm) & VFM_VENDOR_MASK) >> VFM_VENDOR_BIT) + +#define VFM_MAKE(_vendor, _family, _model) ( \ + ((_model) << VFM_MODEL_BIT) | \ + ((_family) << VFM_FAMILY_BIT) | \ + ((_vendor) << VFM_VENDOR_BIT) \ +) +// end copied section + +#define X86_VENDOR_INTEL 0 + #include INTEL_FAMILY_HEADER #include BUILD_BUG_HEADER #include @@ -367,7 +391,7 @@ struct platform_features { }; struct platform_data { - unsigned int model; + unsigned int vfm; const struct platform_features *features; }; @@ -910,75 +934,75 @@ static const struct platform_features amd_features_with_rapl = { }; static const struct platform_data turbostat_pdata[] = { - { INTEL_FAM6_NEHALEM, &nhm_features }, - { INTEL_FAM6_NEHALEM_G, &nhm_features }, - { INTEL_FAM6_NEHALEM_EP, &nhm_features }, - { INTEL_FAM6_NEHALEM_EX, &nhx_features }, - { INTEL_FAM6_WESTMERE, &nhm_features }, - { INTEL_FAM6_WESTMERE_EP, &nhm_features }, - { INTEL_FAM6_WESTMERE_EX, &nhx_features }, - { INTEL_FAM6_SANDYBRIDGE, &snb_features }, - { INTEL_FAM6_SANDYBRIDGE_X, &snx_features }, - { INTEL_FAM6_IVYBRIDGE, &ivb_features }, - { INTEL_FAM6_IVYBRIDGE_X, &ivx_features }, - { INTEL_FAM6_HASWELL, &hsw_features }, - { INTEL_FAM6_HASWELL_X, &hsx_features }, - { INTEL_FAM6_HASWELL_L, &hswl_features }, - { INTEL_FAM6_HASWELL_G, &hswg_features }, - { INTEL_FAM6_BROADWELL, &bdw_features }, - { INTEL_FAM6_BROADWELL_G, &bdwg_features }, - { INTEL_FAM6_BROADWELL_X, &bdx_features }, - { INTEL_FAM6_BROADWELL_D, &bdx_features }, - { INTEL_FAM6_SKYLAKE_L, &skl_features }, - { INTEL_FAM6_SKYLAKE, &skl_features }, - { INTEL_FAM6_SKYLAKE_X, &skx_features }, - { INTEL_FAM6_KABYLAKE_L, &skl_features }, - { INTEL_FAM6_KABYLAKE, &skl_features }, - { INTEL_FAM6_COMETLAKE, &skl_features }, - { INTEL_FAM6_COMETLAKE_L, &skl_features }, - { INTEL_FAM6_CANNONLAKE_L, &cnl_features }, - { INTEL_FAM6_ICELAKE_X, &icx_features }, - { INTEL_FAM6_ICELAKE_D, &icx_features }, - { INTEL_FAM6_ICELAKE_L, &cnl_features }, - { INTEL_FAM6_ICELAKE_NNPI, &cnl_features }, - { INTEL_FAM6_ROCKETLAKE, &cnl_features }, - { INTEL_FAM6_TIGERLAKE_L, &cnl_features }, - { INTEL_FAM6_TIGERLAKE, &cnl_features }, - { INTEL_FAM6_SAPPHIRERAPIDS_X, &spr_features }, - { INTEL_FAM6_EMERALDRAPIDS_X, &spr_features }, - { INTEL_FAM6_GRANITERAPIDS_X, &spr_features }, - { INTEL_FAM6_LAKEFIELD, &cnl_features }, - { INTEL_FAM6_ALDERLAKE, &adl_features }, - { INTEL_FAM6_ALDERLAKE_L, &adl_features }, - { INTEL_FAM6_RAPTORLAKE, &adl_features }, - { INTEL_FAM6_RAPTORLAKE_P, &adl_features }, - { INTEL_FAM6_RAPTORLAKE_S, &adl_features }, - { INTEL_FAM6_METEORLAKE, &cnl_features }, - { INTEL_FAM6_METEORLAKE_L, &cnl_features }, - { INTEL_FAM6_ARROWLAKE_H, &arl_features }, - { INTEL_FAM6_ARROWLAKE_U, &arl_features }, - { INTEL_FAM6_ARROWLAKE, &arl_features }, - { INTEL_FAM6_LUNARLAKE_M, &arl_features }, - { INTEL_FAM6_ATOM_SILVERMONT, &slv_features }, - { INTEL_FAM6_ATOM_SILVERMONT_D, &slvd_features }, - { INTEL_FAM6_ATOM_AIRMONT, &amt_features }, - { INTEL_FAM6_ATOM_GOLDMONT, &gmt_features }, - { INTEL_FAM6_ATOM_GOLDMONT_D, &gmtd_features }, - { INTEL_FAM6_ATOM_GOLDMONT_PLUS, &gmtp_features }, - { INTEL_FAM6_ATOM_TREMONT_D, &tmtd_features }, - { INTEL_FAM6_ATOM_TREMONT, &tmt_features }, - { INTEL_FAM6_ATOM_TREMONT_L, &tmt_features }, - { INTEL_FAM6_ATOM_GRACEMONT, &adl_features }, - { INTEL_FAM6_ATOM_CRESTMONT_X, &srf_features }, - { INTEL_FAM6_ATOM_CRESTMONT, &grr_features }, - { INTEL_FAM6_XEON_PHI_KNL, &knl_features }, - { INTEL_FAM6_XEON_PHI_KNM, &knl_features }, + { INTEL_NEHALEM, &nhm_features }, + { INTEL_NEHALEM_G, &nhm_features }, + { INTEL_NEHALEM_EP, &nhm_features }, + { INTEL_NEHALEM_EX, &nhx_features }, + { INTEL_WESTMERE, &nhm_features }, + { INTEL_WESTMERE_EP, &nhm_features }, + { INTEL_WESTMERE_EX, &nhx_features }, + { INTEL_SANDYBRIDGE, &snb_features }, + { INTEL_SANDYBRIDGE_X, &snx_features }, + { INTEL_IVYBRIDGE, &ivb_features }, + { INTEL_IVYBRIDGE_X, &ivx_features }, + { INTEL_HASWELL, &hsw_features }, + { INTEL_HASWELL_X, &hsx_features }, + { INTEL_HASWELL_L, &hswl_features }, + { INTEL_HASWELL_G, &hswg_features }, + { INTEL_BROADWELL, &bdw_features }, + { INTEL_BROADWELL_G, &bdwg_features }, + { INTEL_BROADWELL_X, &bdx_features }, + { INTEL_BROADWELL_D, &bdx_features }, + { INTEL_SKYLAKE_L, &skl_features }, + { INTEL_SKYLAKE, &skl_features }, + { INTEL_SKYLAKE_X, &skx_features }, + { INTEL_KABYLAKE_L, &skl_features }, + { INTEL_KABYLAKE, &skl_features }, + { INTEL_COMETLAKE, &skl_features }, + { INTEL_COMETLAKE_L, &skl_features }, + { INTEL_CANNONLAKE_L, &cnl_features }, + { INTEL_ICELAKE_X, &icx_features }, + { INTEL_ICELAKE_D, &icx_features }, + { INTEL_ICELAKE_L, &cnl_features }, + { INTEL_ICELAKE_NNPI, &cnl_features }, + { INTEL_ROCKETLAKE, &cnl_features }, + { INTEL_TIGERLAKE_L, &cnl_features }, + { INTEL_TIGERLAKE, &cnl_features }, + { INTEL_SAPPHIRERAPIDS_X, &spr_features }, + { INTEL_EMERALDRAPIDS_X, &spr_features }, + { INTEL_GRANITERAPIDS_X, &spr_features }, + { INTEL_LAKEFIELD, &cnl_features }, + { INTEL_ALDERLAKE, &adl_features }, + { INTEL_ALDERLAKE_L, &adl_features }, + { INTEL_RAPTORLAKE, &adl_features }, + { INTEL_RAPTORLAKE_P, &adl_features }, + { INTEL_RAPTORLAKE_S, &adl_features }, + { INTEL_METEORLAKE, &cnl_features }, + { INTEL_METEORLAKE_L, &cnl_features }, + { INTEL_ARROWLAKE_H, &arl_features }, + { INTEL_ARROWLAKE_U, &arl_features }, + { INTEL_ARROWLAKE, &arl_features }, + { INTEL_LUNARLAKE_M, &arl_features }, + { INTEL_ATOM_SILVERMONT, &slv_features }, + { INTEL_ATOM_SILVERMONT_D, &slvd_features }, + { INTEL_ATOM_AIRMONT, &amt_features }, + { INTEL_ATOM_GOLDMONT, &gmt_features }, + { INTEL_ATOM_GOLDMONT_D, &gmtd_features }, + { INTEL_ATOM_GOLDMONT_PLUS, &gmtp_features }, + { INTEL_ATOM_TREMONT_D, &tmtd_features }, + { INTEL_ATOM_TREMONT, &tmt_features }, + { INTEL_ATOM_TREMONT_L, &tmt_features }, + { INTEL_ATOM_GRACEMONT, &adl_features }, + { INTEL_ATOM_CRESTMONT_X, &srf_features }, + { INTEL_ATOM_CRESTMONT, &grr_features }, + { INTEL_XEON_PHI_KNL, &knl_features }, + { INTEL_XEON_PHI_KNM, &knl_features }, /* * Missing support for - * INTEL_FAM6_ICELAKE - * INTEL_FAM6_ATOM_SILVERMONT_MID - * INTEL_FAM6_ATOM_AIRMONT_MID - * INTEL_FAM6_ATOM_AIRMONT_NP + * INTEL_ICELAKE + * INTEL_ATOM_SILVERMONT_MID + * INTEL_ATOM_AIRMONT_MID + * INTEL_ATOM_AIRMONT_NP */ { 0, NULL }, }; @@ -1003,11 +1027,12 @@ void probe_platform_features(unsigned int family, unsigned int model) return; } - if (!genuine_intel || family != 6) + if (!genuine_intel) return; for (i = 0; turbostat_pdata[i].features; i++) { - if (turbostat_pdata[i].model == model) { + if (VFM_FAMILY(turbostat_pdata[i].vfm) == family && + VFM_MODEL(turbostat_pdata[i].vfm) == model) { platform = turbostat_pdata[i].features; return; } -- cgit v1.2.3-70-g09d2 From c81c8ee445e84703cd90171365ed596ef94399cc Mon Sep 17 00:00:00 2001 From: Patryk Wlazlyn Date: Fri, 24 May 2024 12:06:39 +0200 Subject: tools/power turbostat: Remove anonymous union from rapl_counter_info_t fd_perf field used to be part of the union, but later moved out of it, because we test it with fd_perf != -1 to determine if any perf counter is opened, making the union unused. Signed-off-by: Patryk Wlazlyn Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'tools/power') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 02312e62c857..1ac74cd49cc9 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1085,15 +1085,9 @@ struct rapl_counter_info_t { unsigned long long flags[NUM_RAPL_COUNTERS]; double scale[NUM_RAPL_COUNTERS]; enum rapl_unit unit[NUM_RAPL_COUNTERS]; - - union { - /* Active when source == RAPL_SOURCE_MSR */ - struct { - unsigned long long msr[NUM_RAPL_COUNTERS]; - unsigned long long msr_mask[NUM_RAPL_COUNTERS]; - int msr_shift[NUM_RAPL_COUNTERS]; - }; - }; + unsigned long long msr[NUM_RAPL_COUNTERS]; + unsigned long long msr_mask[NUM_RAPL_COUNTERS]; + int msr_shift[NUM_RAPL_COUNTERS]; int fd_perf; }; -- cgit v1.2.3-70-g09d2 From 73ed3c941a76ff3348022477975c84b8d183599a Mon Sep 17 00:00:00 2001 From: Patryk Wlazlyn Date: Fri, 24 May 2024 12:22:57 +0200 Subject: tools/power turbostat: Replace enum rapl_source and cstate_source with counter_source Reuse the enum. It means the same thing in both cases. Signed-off-by: Patryk Wlazlyn Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 39 +++++++++++++++++------------------ 1 file changed, 19 insertions(+), 20 deletions(-) (limited to 'tools/power') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 1ac74cd49cc9..0fc6c107e371 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -86,8 +86,7 @@ enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE }; enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC, COUNTER_K2M }; enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT, FORMAT_AVERAGE }; enum amperf_source { AMPERF_SOURCE_PERF, AMPERF_SOURCE_MSR }; -enum rapl_source { RAPL_SOURCE_NONE, RAPL_SOURCE_PERF, RAPL_SOURCE_MSR }; -enum cstate_source { CSTATE_SOURCE_NONE, CSTATE_SOURCE_PERF, CSTATE_SOURCE_MSR }; +enum counter_source { COUNTER_SOURCE_NONE, COUNTER_SOURCE_PERF, COUNTER_SOURCE_MSR }; struct sysfs_path { char path[PATH_BYTES]; @@ -1081,7 +1080,7 @@ enum rapl_unit { struct rapl_counter_info_t { unsigned long long data[NUM_RAPL_COUNTERS]; - enum rapl_source source[NUM_RAPL_COUNTERS]; + enum counter_source source[NUM_RAPL_COUNTERS]; unsigned long long flags[NUM_RAPL_COUNTERS]; double scale[NUM_RAPL_COUNTERS]; enum rapl_unit unit[NUM_RAPL_COUNTERS]; @@ -1243,7 +1242,7 @@ enum ccstate_rci_index { struct cstate_counter_info_t { unsigned long long data[NUM_CSTATE_COUNTERS]; - enum cstate_source source[NUM_CSTATE_COUNTERS]; + enum counter_source source[NUM_CSTATE_COUNTERS]; unsigned long long msr[NUM_CSTATE_COUNTERS]; int fd_perf_core; int fd_perf_pkg; @@ -3601,7 +3600,7 @@ size_t rapl_counter_info_count_perf(const struct rapl_counter_info_t *rci) size_t ret = 0; for (int i = 0; i < NUM_RAPL_COUNTERS; ++i) - if (rci->source[i] == RAPL_SOURCE_PERF) + if (rci->source[i] == COUNTER_SOURCE_PERF) ++ret; return ret; @@ -3612,7 +3611,7 @@ static size_t cstate_counter_info_count_perf(const struct cstate_counter_info_t size_t ret = 0; for (int i = 0; i < NUM_CSTATE_COUNTERS; ++i) - if (cci->source[i] == CSTATE_SOURCE_PERF) + if (cci->source[i] == COUNTER_SOURCE_PERF) ++ret; return ret; @@ -3653,10 +3652,10 @@ int get_rapl_counters(int cpu, unsigned int domain, struct core_data *c, struct for (unsigned int i = 0, pi = 1; i < NUM_RAPL_COUNTERS; ++i) { switch (rci->source[i]) { - case RAPL_SOURCE_NONE: + case COUNTER_SOURCE_NONE: break; - case RAPL_SOURCE_PERF: + case COUNTER_SOURCE_PERF: assert(pi < ARRAY_SIZE(perf_data)); assert(rci->fd_perf != -1); @@ -3669,7 +3668,7 @@ int get_rapl_counters(int cpu, unsigned int domain, struct core_data *c, struct ++pi; break; - case RAPL_SOURCE_MSR: + case COUNTER_SOURCE_MSR: if (debug) fprintf(stderr, "Reading rapl counter via msr at %u\n", i); @@ -3791,10 +3790,10 @@ int get_cstate_counters(unsigned int cpu, struct thread_data *t, struct core_dat for (unsigned int i = 0, pi = 0; i < NUM_CSTATE_COUNTERS; ++i) { switch (cci->source[i]) { - case CSTATE_SOURCE_NONE: + case COUNTER_SOURCE_NONE: break; - case CSTATE_SOURCE_PERF: + case COUNTER_SOURCE_PERF: assert(pi < ARRAY_SIZE(perf_data)); assert(cci->fd_perf_core != -1 || cci->fd_perf_pkg != -1); @@ -3807,7 +3806,7 @@ int get_cstate_counters(unsigned int cpu, struct thread_data *t, struct core_dat ++pi; break; - case CSTATE_SOURCE_MSR: + case COUNTER_SOURCE_MSR: assert(!no_msr); if (get_msr(cpu, cci->msr[i], &cci->data[i])) return -13 - i; @@ -3828,7 +3827,7 @@ int get_cstate_counters(unsigned int cpu, struct thread_data *t, struct core_dat * when invoked for the thread sibling. */ #define PERF_COUNTER_WRITE_DATA(out_counter, index) do { \ - if (cci->source[index] != CSTATE_SOURCE_NONE) \ + if (cci->source[index] != COUNTER_SOURCE_NONE) \ out_counter = cci->data[index]; \ } while (0) @@ -6894,7 +6893,7 @@ void rapl_perf_init(void) rci->fd_perf = -1; for (size_t i = 0; i < NUM_RAPL_COUNTERS; ++i) { rci->data[i] = 0; - rci->source[i] = RAPL_SOURCE_NONE; + rci->source[i] = COUNTER_SOURCE_NONE; } } @@ -6936,14 +6935,14 @@ void rapl_perf_init(void) /* Use perf API for this counter */ if (!no_perf && cai->perf_name && add_rapl_perf_counter(cpu, rci, cai, &scale, &unit) != -1) { - rci->source[cai->rci_index] = RAPL_SOURCE_PERF; + rci->source[cai->rci_index] = COUNTER_SOURCE_PERF; rci->scale[cai->rci_index] = scale * cai->compat_scale; rci->unit[cai->rci_index] = unit; rci->flags[cai->rci_index] = cai->flags; /* Use MSR for this counter */ } else if (!no_msr && cai->msr && probe_msr(cpu, cai->msr) == 0) { - rci->source[cai->rci_index] = RAPL_SOURCE_MSR; + rci->source[cai->rci_index] = COUNTER_SOURCE_MSR; rci->msr[cai->rci_index] = cai->msr; rci->msr_mask[cai->rci_index] = cai->msr_mask; rci->msr_shift[cai->rci_index] = cai->msr_shift; @@ -6953,7 +6952,7 @@ void rapl_perf_init(void) } } - if (rci->source[cai->rci_index] != RAPL_SOURCE_NONE) + if (rci->source[cai->rci_index] != COUNTER_SOURCE_NONE) has_counter = 1; } @@ -7146,17 +7145,17 @@ void cstate_perf_init_(bool soft_c1) /* Use perf API for this counter */ if (!no_perf && cai->perf_name && add_cstate_perf_counter(cpu, cci, cai) != -1) { - cci->source[cai->rci_index] = CSTATE_SOURCE_PERF; + cci->source[cai->rci_index] = COUNTER_SOURCE_PERF; /* User MSR for this counter */ } else if (!no_msr && cai->msr && pkg_cstate_limit >= cai->pkg_cstate_limit && probe_msr(cpu, cai->msr) == 0) { - cci->source[cai->rci_index] = CSTATE_SOURCE_MSR; + cci->source[cai->rci_index] = COUNTER_SOURCE_MSR; cci->msr[cai->rci_index] = cai->msr; } } - if (cci->source[cai->rci_index] != CSTATE_SOURCE_NONE) { + if (cci->source[cai->rci_index] != COUNTER_SOURCE_NONE) { has_counter = true; cores_visited[core_id] = true; pkg_visited[pkg_id] = true; -- cgit v1.2.3-70-g09d2 From 25e713c6b54a899b5a8b5e09b58f4b87a5b3664b Mon Sep 17 00:00:00 2001 From: Patryk Wlazlyn Date: Fri, 24 May 2024 13:04:33 +0200 Subject: tools/power turbostat: Add ZERO_ARRAY for zero initializing builtin array It makes it harder to shoot yourself in the foot, by using additional __must_be_array() check. Signed-off-by: Patryk Wlazlyn Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'tools/power') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 0fc6c107e371..29751b41ea0d 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1060,6 +1060,8 @@ size_t cpu_present_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affi #define MAX_ADDED_PACKAGE_COUNTERS 16 #define BITMASK_SIZE 32 +#define ZERO_ARRAY(arr) (memset(arr, 0, sizeof(arr)) + __must_be_array(arr)) + /* Indexes used to map data read from perf and MSRs into global variables */ enum rapl_rci_index { RAPL_RCI_INDEX_ENERGY_PKG = 0, @@ -3733,9 +3735,9 @@ int get_cstate_counters(unsigned int cpu, struct thread_data *t, struct core_dat assert(ccstate_counter_info); assert(cpu <= ccstate_counter_info_size); - memset(perf_data, 0, sizeof(perf_data)); - memset(perf_data_core, 0, sizeof(perf_data_core)); - memset(perf_data_pkg, 0, sizeof(perf_data_pkg)); + ZERO_ARRAY(perf_data); + ZERO_ARRAY(perf_data_core); + ZERO_ARRAY(perf_data_pkg); cci = &ccstate_counter_info[cpu]; -- cgit v1.2.3-70-g09d2 From 67bab430f4e71332aced7562a67d73444f5b4b77 Mon Sep 17 00:00:00 2001 From: Patryk Wlazlyn Date: Fri, 24 May 2024 14:31:52 +0200 Subject: tools/power turbostat: Group SMI counter with APERF and MPERF These three counters now are treated similar to other perf counters groups. This simplifies and gets rid of a lot of special cases for APERF and MPERF. Signed-off-by: Patryk Wlazlyn Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 635 +++++++++++++++------------------- 1 file changed, 280 insertions(+), 355 deletions(-) (limited to 'tools/power') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 29751b41ea0d..495235055fa2 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -85,7 +85,6 @@ enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE }; enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC, COUNTER_K2M }; enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT, FORMAT_AVERAGE }; -enum amperf_source { AMPERF_SOURCE_PERF, AMPERF_SOURCE_MSR }; enum counter_source { COUNTER_SOURCE_NONE, COUNTER_SOURCE_PERF, COUNTER_SOURCE_MSR }; struct sysfs_path { @@ -275,7 +274,6 @@ char *proc_stat = "/proc/stat"; FILE *outf; int *fd_percpu; int *fd_instr_count_percpu; -struct amperf_group_fd *fd_amperf_percpu; /* File descriptors for perf group with APERF and MPERF counters. */ struct timeval interval_tv = { 5, 0 }; struct timespec interval_ts = { 5, 0 }; @@ -290,6 +288,7 @@ unsigned int summary_only; unsigned int list_header_only; unsigned int dump_only; unsigned int has_aperf; +unsigned int has_aperf_access; unsigned int has_epb; unsigned int has_turbo; unsigned int is_hybrid; @@ -330,7 +329,6 @@ unsigned int first_counter_read = 1; int ignore_stdin; bool no_msr; bool no_perf; -enum amperf_source amperf_source; enum gfx_sysfs_idx { GFX_rc6, @@ -1381,6 +1379,67 @@ static struct cstate_counter_arch_info ccstate_counter_arch_infos[] = { }, }; +/* Indexes used to map data read from perf and MSRs into global variables */ +enum msr_rci_index { + MSR_RCI_INDEX_APERF = 0, + MSR_RCI_INDEX_MPERF = 1, + MSR_RCI_INDEX_SMI = 2, + NUM_MSR_COUNTERS, +}; + +struct msr_counter_info_t { + unsigned long long data[NUM_MSR_COUNTERS]; + enum counter_source source[NUM_MSR_COUNTERS]; + unsigned long long msr[NUM_MSR_COUNTERS]; + unsigned long long msr_mask[NUM_MSR_COUNTERS]; + int fd_perf; +}; + +struct msr_counter_info_t *msr_counter_info; +unsigned int msr_counter_info_size; + +struct msr_counter_arch_info { + const char *perf_subsys; + const char *perf_name; + unsigned long long msr; + unsigned long long msr_mask; + unsigned int rci_index; /* Maps data from perf counters to global variables */ + bool needed; + bool present; +}; + +enum msr_arch_info_index { + MSR_ARCH_INFO_APERF_INDEX = 0, + MSR_ARCH_INFO_MPERF_INDEX = 1, + MSR_ARCH_INFO_SMI_INDEX = 2, +}; + +static struct msr_counter_arch_info msr_counter_arch_infos[] = { + [MSR_ARCH_INFO_APERF_INDEX] = { + .perf_subsys = "msr", + .perf_name = "aperf", + .msr = MSR_IA32_APERF, + .msr_mask = 0xFFFFFFFFFFFFFFFF, + .rci_index = MSR_RCI_INDEX_APERF, + }, + + [MSR_ARCH_INFO_MPERF_INDEX] = { + .perf_subsys = "msr", + .perf_name = "mperf", + .msr = MSR_IA32_MPERF, + .msr_mask = 0xFFFFFFFFFFFFFFFF, + .rci_index = MSR_RCI_INDEX_MPERF, + }, + + [MSR_ARCH_INFO_SMI_INDEX] = { + .perf_subsys = "msr", + .perf_name = "smi", + .msr = MSR_SMI_COUNT, + .msr_mask = 0xFFFFFFFF, + .rci_index = MSR_RCI_INDEX_SMI, + }, +}; + struct thread_data { struct timeval tv_begin; struct timeval tv_end; @@ -1767,7 +1826,7 @@ int get_msr_fd(int cpu) static void bic_disable_msr_access(void) { - const unsigned long bic_msrs = BIC_SMI | BIC_Mod_c6 | BIC_CoreTmp | + const unsigned long bic_msrs = BIC_Mod_c6 | BIC_CoreTmp | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_PkgTmp; bic_enabled &= ~bic_msrs; @@ -3402,30 +3461,6 @@ static unsigned int read_perf_counter_info_n(const char *const path, const char return v; } -static unsigned int read_msr_type(void) -{ - const char *const path = "/sys/bus/event_source/devices/msr/type"; - const char *const format = "%u"; - - return read_perf_counter_info_n(path, format); -} - -static unsigned int read_aperf_config(void) -{ - const char *const path = "/sys/bus/event_source/devices/msr/events/aperf"; - const char *const format = "event=%x"; - - return read_perf_counter_info_n(path, format); -} - -static unsigned int read_mperf_config(void) -{ - const char *const path = "/sys/bus/event_source/devices/msr/events/mperf"; - const char *const format = "event=%x"; - - return read_perf_counter_info_n(path, format); -} - static unsigned int read_perf_type(const char *subsys) { const char *const path_format = "/sys/bus/event_source/devices/%s/type"; @@ -3479,124 +3514,6 @@ static double read_perf_rapl_scale(const char *subsys, const char *event_name) return scale; } -static struct amperf_group_fd open_amperf_fd(int cpu) -{ - const unsigned int msr_type = read_msr_type(); - const unsigned int aperf_config = read_aperf_config(); - const unsigned int mperf_config = read_mperf_config(); - struct amperf_group_fd fds = {.aperf = -1, .mperf = -1 }; - - fds.aperf = open_perf_counter(cpu, msr_type, aperf_config, -1, PERF_FORMAT_GROUP); - fds.mperf = open_perf_counter(cpu, msr_type, mperf_config, fds.aperf, PERF_FORMAT_GROUP); - - return fds; -} - -static int get_amperf_fd(int cpu) -{ - assert(fd_amperf_percpu); - - if (fd_amperf_percpu[cpu].aperf) - return fd_amperf_percpu[cpu].aperf; - - fd_amperf_percpu[cpu] = open_amperf_fd(cpu); - - return fd_amperf_percpu[cpu].aperf; -} - -/* Read APERF, MPERF and TSC using the perf API. */ -static int read_aperf_mperf_tsc_perf(struct thread_data *t, int cpu) -{ - union { - struct { - unsigned long nr_entries; - unsigned long aperf; - unsigned long mperf; - }; - - unsigned long as_array[3]; - } cnt; - - const int fd_amperf = get_amperf_fd(cpu); - - /* - * Read the TSC with rdtsc, because we want the absolute value and not - * the offset from the start of the counter. - */ - t->tsc = rdtsc(); - - const int n = read(fd_amperf, &cnt.as_array[0], sizeof(cnt.as_array)); - - if (n != sizeof(cnt.as_array)) - return -2; - - t->aperf = cnt.aperf * aperf_mperf_multiplier; - t->mperf = cnt.mperf * aperf_mperf_multiplier; - - return 0; -} - -/* Read APERF, MPERF and TSC using the MSR driver and rdtsc instruction. */ -static int read_aperf_mperf_tsc_msr(struct thread_data *t, int cpu) -{ - unsigned long long tsc_before, tsc_between, tsc_after, aperf_time, mperf_time; - int aperf_mperf_retry_count = 0; - - /* - * The TSC, APERF and MPERF must be read together for - * APERF/MPERF and MPERF/TSC to give accurate results. - * - * Unfortunately, APERF and MPERF are read by - * individual system call, so delays may occur - * between them. If the time to read them - * varies by a large amount, we re-read them. - */ - - /* - * This initial dummy APERF read has been seen to - * reduce jitter in the subsequent reads. - */ - - if (get_msr(cpu, MSR_IA32_APERF, &t->aperf)) - return -3; - -retry: - t->tsc = rdtsc(); /* re-read close to APERF */ - - tsc_before = t->tsc; - - if (get_msr(cpu, MSR_IA32_APERF, &t->aperf)) - return -3; - - tsc_between = rdtsc(); - - if (get_msr(cpu, MSR_IA32_MPERF, &t->mperf)) - return -4; - - tsc_after = rdtsc(); - - aperf_time = tsc_between - tsc_before; - mperf_time = tsc_after - tsc_between; - - /* - * If the system call latency to read APERF and MPERF - * differ by more than 2x, then try again. - */ - if ((aperf_time > (2 * mperf_time)) || (mperf_time > (2 * aperf_time))) { - aperf_mperf_retry_count++; - if (aperf_mperf_retry_count < 5) - goto retry; - else - warnx("cpu%d jitter %lld %lld", cpu, aperf_time, mperf_time); - } - aperf_mperf_retry_count = 0; - - t->aperf = t->aperf * aperf_mperf_multiplier; - t->mperf = t->mperf * aperf_mperf_multiplier; - - return 0; -} - size_t rapl_counter_info_count_perf(const struct rapl_counter_info_t *rci) { size_t ret = 0; @@ -3853,6 +3770,84 @@ int get_cstate_counters(unsigned int cpu, struct thread_data *t, struct core_dat return 0; } +size_t msr_counter_info_count_perf(const struct msr_counter_info_t *mci) +{ + size_t ret = 0; + + for (int i = 0; i < NUM_MSR_COUNTERS; ++i) + if (mci->source[i] == COUNTER_SOURCE_PERF) + ++ret; + + return ret; +} + +int get_smi_aperf_mperf(unsigned int cpu, struct thread_data *t) +{ + unsigned long long perf_data[NUM_MSR_COUNTERS + 1]; + + struct msr_counter_info_t *mci; + + if (debug) + fprintf(stderr, "%s: cpu%d\n", __func__, cpu); + + assert(msr_counter_info); + assert(cpu <= msr_counter_info_size); + + mci = &msr_counter_info[cpu]; + + ZERO_ARRAY(perf_data); + ZERO_ARRAY(mci->data); + + if (mci->fd_perf != -1) { + const size_t num_perf_counters = msr_counter_info_count_perf(mci); + const ssize_t expected_read_size = (num_perf_counters + 1) * sizeof(unsigned long long); + const ssize_t actual_read_size = read(mci->fd_perf, &perf_data[0], sizeof(perf_data)); + + if (actual_read_size != expected_read_size) + err(-1, "%s: failed to read perf_data (%zu %zu)", __func__, expected_read_size, + actual_read_size); + } + + for (unsigned int i = 0, pi = 1; i < NUM_MSR_COUNTERS; ++i) { + switch (mci->source[i]) { + case COUNTER_SOURCE_NONE: + break; + + case COUNTER_SOURCE_PERF: + assert(pi < ARRAY_SIZE(perf_data)); + assert(mci->fd_perf != -1); + + if (debug) + fprintf(stderr, "Reading msr counter via perf at %u: %llu\n", i, perf_data[pi]); + + mci->data[i] = perf_data[pi]; + + ++pi; + break; + + case COUNTER_SOURCE_MSR: + assert(!no_msr); + + if (get_msr(cpu, mci->msr[i], &mci->data[i])) + return -2 - i; + + mci->data[i] &= mci->msr_mask[i]; + + if (debug) + fprintf(stderr, "Reading msr counter via msr at %u: %llu\n", i, mci->data[i]); + + break; + } + } + + BUILD_BUG_ON(NUM_MSR_COUNTERS != 3); + t->aperf = mci->data[MSR_RCI_INDEX_APERF]; + t->mperf = mci->data[MSR_RCI_INDEX_MPERF]; + t->smi_count = mci->data[MSR_RCI_INDEX_SMI]; + + return 0; +} + /* * get_counters(...) * migrate to cpu @@ -3878,24 +3873,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) t->tsc = rdtsc(); /* we are running on local CPU of interest */ - if (DO_BIC(BIC_Avg_MHz) || DO_BIC(BIC_Busy) || DO_BIC(BIC_Bzy_MHz) || DO_BIC(BIC_IPC) - || soft_c1_residency_display(BIC_Avg_MHz)) { - int status = -1; - - assert(!no_perf || !no_msr); - - switch (amperf_source) { - case AMPERF_SOURCE_PERF: - status = read_aperf_mperf_tsc_perf(t, cpu); - break; - case AMPERF_SOURCE_MSR: - status = read_aperf_mperf_tsc_msr(t, cpu); - break; - } - - if (status != 0) - return status; - } + get_smi_aperf_mperf(cpu, t); if (DO_BIC(BIC_IPC)) if (read(get_instr_count_fd(cpu), &t->instr_count, sizeof(long long)) != sizeof(long long)) @@ -3903,11 +3881,6 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (DO_BIC(BIC_IRQ)) t->irq_count = irqs_per_cpu[cpu]; - if (DO_BIC(BIC_SMI)) { - if (get_msr(cpu, MSR_SMI_COUNT, &msr)) - return -5; - t->smi_count = msr & 0xFFFFFFFF; - } get_cstate_counters(cpu, t, c, p); @@ -4489,25 +4462,6 @@ void free_fd_percpu(void) fd_percpu = NULL; } -void free_fd_amperf_percpu(void) -{ - int i; - - if (!fd_amperf_percpu) - return; - - for (i = 0; i < topo.max_cpu_num + 1; ++i) { - if (fd_amperf_percpu[i].mperf != 0) - close(fd_amperf_percpu[i].mperf); - - if (fd_amperf_percpu[i].aperf != 0) - close(fd_amperf_percpu[i].aperf); - } - - free(fd_amperf_percpu); - fd_amperf_percpu = NULL; -} - void free_fd_instr_count_percpu(void) { if (!fd_instr_count_percpu) @@ -4542,6 +4496,21 @@ void free_fd_cstate(void) ccstate_counter_info_size = 0; } +void free_fd_msr(void) +{ + if (!msr_counter_info) + return; + + for (int cpu = 0; cpu < topo.max_cpu_num; ++cpu) { + if (msr_counter_info[cpu].fd_perf != -1) + close(msr_counter_info[cpu].fd_perf); + } + + free(msr_counter_info); + msr_counter_info = NULL; + msr_counter_info_size = 0; +} + void free_fd_rapl_percpu(void) { if (!rapl_counter_info_perdomain) @@ -4601,7 +4570,7 @@ void free_all_buffers(void) free_fd_percpu(); free_fd_instr_count_percpu(); - free_fd_amperf_percpu(); + free_fd_msr(); free_fd_rapl_percpu(); free_fd_cstate(); @@ -4938,6 +4907,7 @@ static void update_effective_set(bool startup) } void linux_perf_init(void); +void msr_perf_init(void); void rapl_perf_init(void); void cstate_perf_init(void); @@ -4946,6 +4916,7 @@ void re_initialize(void) free_all_buffers(); setup_all_buffers(false); linux_perf_init(); + msr_perf_init(); rapl_perf_init(); cstate_perf_init(); fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus, @@ -6799,15 +6770,6 @@ static int has_instr_count_access(void) return has_access; } -bool is_aperf_access_required(void) -{ - return BIC_IS_ENABLED(BIC_Avg_MHz) - || BIC_IS_ENABLED(BIC_Busy) - || BIC_IS_ENABLED(BIC_Bzy_MHz) - || BIC_IS_ENABLED(BIC_IPC) - || BIC_IS_ENABLED(BIC_CPU_c1); -} - int add_rapl_perf_counter_(int cpu, struct rapl_counter_info_t *rci, const struct rapl_counter_arch_info *cai, double *scale_, enum rapl_unit *unit_) { @@ -6866,14 +6828,6 @@ void linux_perf_init(void) if (fd_instr_count_percpu == NULL) err(-1, "calloc fd_instr_count_percpu"); } - - const bool aperf_required = is_aperf_access_required(); - - if (aperf_required && has_aperf && amperf_source == AMPERF_SOURCE_PERF) { - fd_amperf_percpu = calloc(topo.max_cpu_num + 1, sizeof(*fd_amperf_percpu)); - if (fd_amperf_percpu == NULL) - err(-1, "calloc fd_amperf_percpu"); - } } void rapl_perf_init(void) @@ -6966,75 +6920,11 @@ void rapl_perf_init(void) free(domain_visited); } -static int has_amperf_access_via_msr(void) -{ - if (no_msr) - return 0; - - if (probe_msr(base_cpu, MSR_IA32_APERF)) - return 0; - - if (probe_msr(base_cpu, MSR_IA32_MPERF)) - return 0; - - return 1; -} - -static int has_amperf_access_via_perf(void) -{ - struct amperf_group_fd fds; - - /* - * Cache the last result, so we don't warn the user multiple times - * - * Negative means cached, no access - * Zero means not cached - * Positive means cached, has access - */ - static int has_access_cached; - - if (no_perf) - return 0; - - if (has_access_cached != 0) - return has_access_cached > 0; - - fds = open_amperf_fd(base_cpu); - has_access_cached = (fds.aperf != -1) && (fds.mperf != -1); - - if (fds.aperf == -1) - warnx("Failed to access %s. Some of the counters may not be available\n" - "\tRun as root to enable them or use %s to disable the access explicitly", - "APERF perf counter", "--no-perf"); - else - close(fds.aperf); - - if (fds.mperf == -1) - warnx("Failed to access %s. Some of the counters may not be available\n" - "\tRun as root to enable them or use %s to disable the access explicitly", - "MPERF perf counter", "--no-perf"); - else - close(fds.mperf); - - if (has_access_cached == 0) - has_access_cached = -1; - - return has_access_cached > 0; -} - -/* Check if we can access APERF and MPERF */ +/* Assumes msr_counter_info is populated */ static int has_amperf_access(void) { - if (!is_aperf_access_required()) - return 0; - - if (!no_msr && has_amperf_access_via_msr()) - return 1; - - if (!no_perf && has_amperf_access_via_perf()) - return 1; - - return 0; + return msr_counter_arch_infos[MSR_ARCH_INFO_APERF_INDEX].present && + msr_counter_arch_infos[MSR_ARCH_INFO_MPERF_INDEX].present; } int *get_cstate_perf_group_fd(struct cstate_counter_info_t *cci, const char *group_name) @@ -7083,6 +6973,114 @@ int add_cstate_perf_counter(int cpu, struct cstate_counter_info_t *cci, const st return ret; } +int add_msr_perf_counter_(int cpu, struct msr_counter_info_t *cci, const struct msr_counter_arch_info *cai) +{ + if (no_perf) + return -1; + + const unsigned int type = read_perf_type(cai->perf_subsys); + const unsigned int config = read_rapl_config(cai->perf_subsys, cai->perf_name); + + const int fd_counter = open_perf_counter(cpu, type, config, cci->fd_perf, PERF_FORMAT_GROUP); + + if (fd_counter == -1) + return -1; + + /* If it's the first counter opened, make it a group descriptor */ + if (cci->fd_perf == -1) + cci->fd_perf = fd_counter; + + return fd_counter; +} + +int add_msr_perf_counter(int cpu, struct msr_counter_info_t *cci, const struct msr_counter_arch_info *cai) +{ + int ret = add_msr_perf_counter_(cpu, cci, cai); + + if (debug) + fprintf(stderr, "%s: %s/%s: %d (cpu: %d)\n", __func__, cai->perf_subsys, cai->perf_name, ret, cpu); + + return ret; +} + +void msr_perf_init_(void) +{ + const int mci_num = topo.max_cpu_num + 1; + + msr_counter_info = calloc(mci_num, sizeof(*msr_counter_info)); + if (!msr_counter_info) + err(1, "calloc msr_counter_info"); + msr_counter_info_size = mci_num; + + for (int cpu = 0; cpu < mci_num; ++cpu) + msr_counter_info[cpu].fd_perf = -1; + + for (int cidx = 0; cidx < NUM_MSR_COUNTERS; ++cidx) { + + struct msr_counter_arch_info *cai = &msr_counter_arch_infos[cidx]; + + cai->present = false; + + for (int cpu = 0; cpu < mci_num; ++cpu) { + + struct msr_counter_info_t *const cci = &msr_counter_info[cpu]; + + if (cpu_is_not_allowed(cpu)) + continue; + + if (cai->needed) { + /* Use perf API for this counter */ + if (!no_perf && cai->perf_name && add_msr_perf_counter(cpu, cci, cai) != -1) { + cci->source[cai->rci_index] = COUNTER_SOURCE_PERF; + cai->present = true; + + /* User MSR for this counter */ + } else if (!no_msr && cai->msr && probe_msr(cpu, cai->msr) == 0) { + cci->source[cai->rci_index] = COUNTER_SOURCE_MSR; + cci->msr[cai->rci_index] = cai->msr; + cci->msr_mask[cai->rci_index] = cai->msr_mask; + cai->present = true; + } + } + } + } +} + +/* Initialize data for reading perf counters from the MSR group. */ +void msr_perf_init(void) +{ + bool need_amperf = false, need_smi = false; + const bool need_soft_c1 = (!platform->has_msr_core_c1_res) && (platform->supported_cstates & CC1); + + need_amperf = BIC_IS_ENABLED(BIC_Avg_MHz) || BIC_IS_ENABLED(BIC_Busy) || BIC_IS_ENABLED(BIC_Bzy_MHz) + || BIC_IS_ENABLED(BIC_IPC) || need_soft_c1; + + if (BIC_IS_ENABLED(BIC_SMI)) + need_smi = true; + + /* Enable needed counters */ + msr_counter_arch_infos[MSR_ARCH_INFO_APERF_INDEX].needed = need_amperf; + msr_counter_arch_infos[MSR_ARCH_INFO_MPERF_INDEX].needed = need_amperf; + msr_counter_arch_infos[MSR_ARCH_INFO_SMI_INDEX].needed = need_smi; + + msr_perf_init_(); + + const bool has_amperf = has_amperf_access(); + const bool has_smi = msr_counter_arch_infos[MSR_ARCH_INFO_SMI_INDEX].present; + + has_aperf_access = has_amperf; + + if (has_amperf) { + BIC_PRESENT(BIC_Avg_MHz); + BIC_PRESENT(BIC_Busy); + BIC_PRESENT(BIC_Bzy_MHz); + BIC_PRESENT(BIC_SMI); + } + + if (has_smi) + BIC_PRESENT(BIC_SMI); +} + void cstate_perf_init_(bool soft_c1) { bool has_counter; @@ -7340,12 +7338,6 @@ void process_cpuid() __cpuid(0x6, eax, ebx, ecx, edx); has_aperf = ecx & (1 << 0); - if (has_aperf && has_amperf_access()) { - BIC_PRESENT(BIC_Avg_MHz); - BIC_PRESENT(BIC_Busy); - BIC_PRESENT(BIC_Bzy_MHz); - BIC_PRESENT(BIC_IPC); - } do_dts = eax & (1 << 0); if (do_dts) BIC_PRESENT(BIC_CoreTmp); @@ -7462,6 +7454,11 @@ static void counter_info_init(void) if (platform->has_msr_atom_pkg_c6_residency && cai->msr == MSR_PKG_C6_RESIDENCY) cai->msr = MSR_ATOM_PKG_C6_RESIDENCY; } + + for (int i = 0; i < NUM_MSR_COUNTERS; ++i) { + msr_counter_arch_infos[i].present = false; + msr_counter_arch_infos[i].needed = false; + } } void probe_pm_features(void) @@ -7837,21 +7834,6 @@ void set_base_cpu(void) err(-ENODEV, "No valid cpus found"); } -static void set_amperf_source(void) -{ - amperf_source = AMPERF_SOURCE_PERF; - - const bool aperf_required = is_aperf_access_required(); - - if (no_perf || !aperf_required || !has_amperf_access_via_perf()) - amperf_source = AMPERF_SOURCE_MSR; - - if (quiet || !debug) - return; - - fprintf(outf, "aperf/mperf source preference: %s\n", amperf_source == AMPERF_SOURCE_MSR ? "msr" : "perf"); -} - bool has_added_counters(void) { /* @@ -7862,54 +7844,8 @@ bool has_added_counters(void) return sys.added_core_counters | sys.added_thread_counters | sys.added_package_counters; } -bool is_msr_access_required(void) -{ - if (no_msr) - return false; - - if (has_added_counters()) - return true; - - return BIC_IS_ENABLED(BIC_SMI) - || BIC_IS_ENABLED(BIC_CPU_c1) - || BIC_IS_ENABLED(BIC_CPU_c3) - || BIC_IS_ENABLED(BIC_CPU_c6) - || BIC_IS_ENABLED(BIC_CPU_c7) - || BIC_IS_ENABLED(BIC_Mod_c6) - || BIC_IS_ENABLED(BIC_CoreTmp) - || BIC_IS_ENABLED(BIC_Totl_c0) - || BIC_IS_ENABLED(BIC_Any_c0) - || BIC_IS_ENABLED(BIC_GFX_c0) - || BIC_IS_ENABLED(BIC_CPUGFX) - || BIC_IS_ENABLED(BIC_Pkgpc3) - || BIC_IS_ENABLED(BIC_Pkgpc6) - || BIC_IS_ENABLED(BIC_Pkgpc2) - || BIC_IS_ENABLED(BIC_Pkgpc7) - || BIC_IS_ENABLED(BIC_Pkgpc8) - || BIC_IS_ENABLED(BIC_Pkgpc9) - || BIC_IS_ENABLED(BIC_Pkgpc10) - /* TODO: Multiplex access with perf */ - || BIC_IS_ENABLED(BIC_CorWatt) - || BIC_IS_ENABLED(BIC_Cor_J) - || BIC_IS_ENABLED(BIC_PkgWatt) - || BIC_IS_ENABLED(BIC_CorWatt) - || BIC_IS_ENABLED(BIC_GFXWatt) - || BIC_IS_ENABLED(BIC_RAMWatt) - || BIC_IS_ENABLED(BIC_Pkg_J) - || BIC_IS_ENABLED(BIC_Cor_J) - || BIC_IS_ENABLED(BIC_GFX_J) - || BIC_IS_ENABLED(BIC_RAM_J) - || BIC_IS_ENABLED(BIC_PKG__) - || BIC_IS_ENABLED(BIC_RAM__) - || BIC_IS_ENABLED(BIC_PkgTmp) - || (is_aperf_access_required() && !has_amperf_access_via_perf()); -} - void check_msr_access(void) { - if (!is_msr_access_required()) - no_msr = 1; - check_dev_msr(); check_msr_permission(); @@ -7919,19 +7855,8 @@ void check_msr_access(void) void check_perf_access(void) { - const bool intrcount_required = BIC_IS_ENABLED(BIC_IPC); - - if (no_perf || !intrcount_required || !has_instr_count_access()) + if (no_perf || !BIC_IS_ENABLED(BIC_IPC) || !has_instr_count_access()) bic_enabled &= ~BIC_IPC; - - const bool aperf_required = is_aperf_access_required(); - - if (!aperf_required || !has_amperf_access()) { - bic_enabled &= ~BIC_Avg_MHz; - bic_enabled &= ~BIC_Busy; - bic_enabled &= ~BIC_Bzy_MHz; - bic_enabled &= ~BIC_IPC; - } } void turbostat_init() @@ -7943,7 +7868,7 @@ void turbostat_init() process_cpuid(); counter_info_init(); probe_pm_features(); - set_amperf_source(); + msr_perf_init(); linux_perf_init(); rapl_perf_init(); cstate_perf_init(); @@ -7951,8 +7876,8 @@ void turbostat_init() for_all_cpus(get_cpu_type, ODD_COUNTERS); for_all_cpus(get_cpu_type, EVEN_COUNTERS); - if (DO_BIC(BIC_IPC)) - (void)get_instr_count_fd(base_cpu); + if (BIC_IS_ENABLED(BIC_IPC) && has_aperf_access && get_instr_count_fd(base_cpu) != -1) + BIC_PRESENT(BIC_IPC); /* * If TSC tweak is needed, but couldn't get it, -- cgit v1.2.3-70-g09d2 From 361b8fc73cf63dc0c3be3778720631f1a33ba9db Mon Sep 17 00:00:00 2001 From: Patryk Wlazlyn Date: Tue, 28 May 2024 15:46:10 +0200 Subject: tools/power turbostat: Extend --add option with perf counters User can now read perf counters using "--add perf//". Other details work similarly to how --add works with MSRs. Signed-off-by: Patryk Wlazlyn Signed-off-by: Len Brown --- tools/power/x86/turbostat/Makefile | 1 + tools/power/x86/turbostat/turbostat.8 | 5 +- tools/power/x86/turbostat/turbostat.c | 562 +++++++++++++++++++++++++++++++++- 3 files changed, 557 insertions(+), 11 deletions(-) (limited to 'tools/power') diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile index b1e6817f1e54..3946d5254a1f 100644 --- a/tools/power/x86/turbostat/Makefile +++ b/tools/power/x86/turbostat/Makefile @@ -46,6 +46,7 @@ snapshot: turbostat @echo "#define GENMASK_ULL(h, l) (((~0ULL) << (l)) & (~0ULL >> (sizeof(long long) * 8 - 1 - (h))))" >> $(SNAPSHOT)/bits.h @echo '#define BUILD_BUG_ON(cond) do { enum { compile_time_check ## __COUNTER__ = 1/(!(cond)) }; } while (0)' > $(SNAPSHOT)/build_bug.h + @echo '#define __must_be_array(arr) 0' >> $(SNAPSHOT)/build_bug.h @echo PWD=. > $(SNAPSHOT)/Makefile @echo "CFLAGS += -DMSRHEADER='\"msr-index.h\"'" >> $(SNAPSHOT)/Makefile diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 8d37acd39201..5537fc6b5bc3 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -28,10 +28,13 @@ name as necessary to disambiguate it from others is necessary. Note that option .PP \fB--add attributes\fP add column with counter having specified 'attributes'. The 'location' attribute is required, all others are optional. .nf - location: {\fBmsrDDD\fP | \fBmsr0xXXX\fP | \fB/sys/path...\fP} + location: {\fBmsrDDD\fP | \fBmsr0xXXX\fP | \fB/sys/path...\fP | \fBperf//\fP} msrDDD is a decimal offset, eg. msr16 msr0xXXX is a hex offset, eg. msr0x10 /sys/path... is an absolute path to a sysfs attribute + is a perf device from /sys/bus/event_source/devices/ eg. cstate_core + is a perf event for given device from /sys/bus/event_source/devices//events/ eg. c1-residency + perf/cstate_core/c1-residency would then use /sys/bus/event_source/devices/cstate_core/events/c1-residency scope: {\fBcpu\fP | \fBcore\fP | \fBpackage\fP} sample and print the counter for every cpu, core, or package. diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 495235055fa2..be345a4bbe96 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -79,14 +79,40 @@ */ #define NAME_BYTES 20 #define PATH_BYTES 128 +#define PERF_NAME_BYTES 128 #define MAX_NOFILE 0x8000 +#define COUNTER_KIND_PERF_PREFIX "perf/" +#define COUNTER_KIND_PERF_PREFIX_LEN strlen(COUNTER_KIND_PERF_PREFIX) +#define PERF_DEV_NAME_BYTES 32 +#define PERF_EVT_NAME_BYTES 32 + enum counter_scope { SCOPE_CPU, SCOPE_CORE, SCOPE_PACKAGE }; enum counter_type { COUNTER_ITEMS, COUNTER_CYCLES, COUNTER_SECONDS, COUNTER_USEC, COUNTER_K2M }; enum counter_format { FORMAT_RAW, FORMAT_DELTA, FORMAT_PERCENT, FORMAT_AVERAGE }; enum counter_source { COUNTER_SOURCE_NONE, COUNTER_SOURCE_PERF, COUNTER_SOURCE_MSR }; +struct perf_counter_info { + struct perf_counter_info *next; + + /* How to open the counter / What counter it is. */ + char device[PERF_DEV_NAME_BYTES]; + char event[PERF_EVT_NAME_BYTES]; + + /* How to show/format the counter. */ + char name[PERF_NAME_BYTES]; + unsigned int width; + enum counter_scope scope; + enum counter_type type; + enum counter_format format; + double scale; + + /* For reading the counter. */ + int *fd_perf_per_domain; + size_t num_domains; +}; + struct sysfs_path { char path[PATH_BYTES]; int id; @@ -1457,6 +1483,7 @@ struct thread_data { unsigned int flags; bool is_atom; unsigned long long counter[MAX_ADDED_THREAD_COUNTERS]; + unsigned long long perf_counter[MAX_ADDED_THREAD_COUNTERS]; } *thread_even, *thread_odd; struct core_data { @@ -1470,6 +1497,7 @@ struct core_data { unsigned int core_id; unsigned long long core_throt_cnt; unsigned long long counter[MAX_ADDED_CORE_COUNTERS]; + unsigned long long perf_counter[MAX_ADDED_CORE_COUNTERS]; } *core_even, *core_odd; struct pkg_data { @@ -1503,6 +1531,7 @@ struct pkg_data { unsigned int pkg_temp_c; unsigned int uncore_mhz; unsigned long long counter[MAX_ADDED_PACKAGE_COUNTERS]; + unsigned long long perf_counter[MAX_ADDED_PACKAGE_COUNTERS]; } *package_even, *package_odd; #define ODD_COUNTERS thread_odd, core_odd, package_odd @@ -1637,12 +1666,21 @@ int idx_valid(int idx) } struct sys_counters { + /* MSR added counters */ unsigned int added_thread_counters; unsigned int added_core_counters; unsigned int added_package_counters; struct msr_counter *tp; struct msr_counter *cp; struct msr_counter *pp; + + /* perf added counters */ + unsigned int added_thread_perf_counters; + unsigned int added_core_perf_counters; + unsigned int added_package_perf_counters; + struct perf_counter_info *perf_tp; + struct perf_counter_info *perf_cp; + struct perf_counter_info *perf_pp; } sys; static size_t free_msr_counters_(struct msr_counter **pp) @@ -1902,6 +1940,23 @@ int probe_msr(int cpu, off_t offset) return 0; } +/* Convert CPU ID to domain ID for given added perf counter. */ +unsigned int cpu_to_domain(const struct perf_counter_info *pc, int cpu) +{ + switch (pc->scope) { + case SCOPE_CPU: + return cpu; + + case SCOPE_CORE: + return cpus[cpu].physical_core_id; + + case SCOPE_PACKAGE: + return cpus[cpu].physical_package_id; + } + + __builtin_unreachable(); +} + #define MAX_DEFERRED 16 char *deferred_add_names[MAX_DEFERRED]; char *deferred_skip_names[MAX_DEFERRED]; @@ -1925,6 +1980,7 @@ void help(void) "to print statistics, until interrupted.\n" " -a, --add add a counter\n" " eg. --add msr0x10,u64,cpu,delta,MY_TSC\n" + " eg. --add perf/cstate_pkg/c2-residency,package,delta,percent,perfPC2\n" " -c, --cpu cpu-set limit output to summary plus cpu-set:\n" " {core | package | j,k,l..m,n-p }\n" " -d, --debug displays usec, Time_Of_Day_Seconds and more debugging\n" @@ -2034,6 +2090,7 @@ unsigned long long bic_lookup(char *name_list, enum show_hide_mode mode) void print_header(char *delim) { struct msr_counter *mp; + struct perf_counter_info *pp; int printed = 0; if (DO_BIC(BIC_USEC)) @@ -2091,6 +2148,21 @@ void print_header(char *delim) } } + for (pp = sys.perf_tp; pp; pp = pp->next) { + + if (pp->format == FORMAT_RAW) { + if (pp->width == 64) + outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name); + else + outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name); + } else { + if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) + outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name); + else + outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name); + } + } + if (DO_BIC(BIC_CPU_c1)) outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : "")); if (DO_BIC(BIC_CPU_c3)) @@ -2131,6 +2203,21 @@ void print_header(char *delim) } } + for (pp = sys.perf_cp; pp; pp = pp->next) { + + if (pp->format == FORMAT_RAW) { + if (pp->width == 64) + outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name); + else + outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name); + } else { + if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) + outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name); + else + outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name); + } + } + if (DO_BIC(BIC_PkgTmp)) outp += sprintf(outp, "%sPkgTmp", (printed++ ? delim : "")); @@ -2226,6 +2313,21 @@ void print_header(char *delim) } } + for (pp = sys.perf_pp; pp; pp = pp->next) { + + if (pp->format == FORMAT_RAW) { + if (pp->width == 64) + outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), pp->name); + else + outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), pp->name); + } else { + if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) + outp += sprintf(outp, "%s%8s", (printed++ ? delim : ""), pp->name); + else + outp += sprintf(outp, "%s%s", (printed++ ? delim : ""), pp->name); + } + } + outp += sprintf(outp, "\n"); } @@ -2346,6 +2448,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data char *fmt8; int i; struct msr_counter *mp; + struct perf_counter_info *pp; char *delim = "\t"; int printed = 0; @@ -2483,6 +2586,31 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data } } + /* Added perf counters */ + for (i = 0, pp = sys.perf_tp; pp; ++i, pp = pp->next) { + if (pp->format == FORMAT_RAW) { + if (pp->width == 32) + outp += + sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), + (unsigned int)t->perf_counter[i]); + else + outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->perf_counter[i]); + } else if (pp->format == FORMAT_DELTA) { + if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) + outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), t->perf_counter[i]); + else + outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), t->perf_counter[i]); + } else if (pp->format == FORMAT_PERCENT) { + if (pp->type == COUNTER_USEC) + outp += + sprintf(outp, "%s%.2f", (printed++ ? delim : ""), + t->perf_counter[i] / interval_float / 10000); + else + outp += + sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->perf_counter[i] / tsc); + } + } + /* C1 */ if (DO_BIC(BIC_CPU_c1)) outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1 / tsc); @@ -2526,6 +2654,24 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data } } + for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) { + if (pp->format == FORMAT_RAW) { + if (pp->width == 32) + outp += + sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), + (unsigned int)c->perf_counter[i]); + else + outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->perf_counter[i]); + } else if (pp->format == FORMAT_DELTA) { + if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) + outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), c->perf_counter[i]); + else + outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), c->perf_counter[i]); + } else if (pp->format == FORMAT_PERCENT) { + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * c->perf_counter[i] / tsc); + } + } + fmt8 = "%s%.2f"; if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl) @@ -2680,6 +2826,26 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), (unsigned int)p->counter[i] / 1000); } + for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) { + if (pp->format == FORMAT_RAW) { + if (pp->width == 32) + outp += + sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), + (unsigned int)p->perf_counter[i]); + else + outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->perf_counter[i]); + } else if (pp->format == FORMAT_DELTA) { + if ((pp->type == COUNTER_ITEMS) && sums_need_wide_columns) + outp += sprintf(outp, "%s%8lld", (printed++ ? delim : ""), p->perf_counter[i]); + else + outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->perf_counter[i]); + } else if (pp->format == FORMAT_PERCENT) { + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->perf_counter[i] / tsc); + } else if (pp->type == COUNTER_K2M) + outp += + sprintf(outp, "%s%d", (printed++ ? delim : ""), (unsigned int)p->perf_counter[i] / 1000); + } + done: if (*(outp - 1) != '\n') outp += sprintf(outp, "\n"); @@ -2733,6 +2899,7 @@ int delta_package(struct pkg_data *new, struct pkg_data *old) { int i; struct msr_counter *mp; + struct perf_counter_info *pp; if (DO_BIC(BIC_Totl_c0)) old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0; @@ -2793,6 +2960,15 @@ int delta_package(struct pkg_data *new, struct pkg_data *old) old->counter[i] = new->counter[i] - old->counter[i]; } + for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) { + if (pp->format == FORMAT_RAW) + old->perf_counter[i] = new->perf_counter[i]; + else if (pp->format == FORMAT_AVERAGE) + old->perf_counter[i] = new->perf_counter[i]; + else + old->perf_counter[i] = new->perf_counter[i] - old->perf_counter[i]; + } + return 0; } @@ -2800,6 +2976,7 @@ void delta_core(struct core_data *new, struct core_data *old) { int i; struct msr_counter *mp; + struct perf_counter_info *pp; old->c3 = new->c3 - old->c3; old->c6 = new->c6 - old->c6; @@ -2816,6 +2993,13 @@ void delta_core(struct core_data *new, struct core_data *old) else old->counter[i] = new->counter[i] - old->counter[i]; } + + for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) { + if (pp->format == FORMAT_RAW) + old->perf_counter[i] = new->perf_counter[i]; + else + old->perf_counter[i] = new->perf_counter[i] - old->perf_counter[i]; + } } int soft_c1_residency_display(int bic) @@ -2833,6 +3017,7 @@ int delta_thread(struct thread_data *new, struct thread_data *old, struct core_d { int i; struct msr_counter *mp; + struct perf_counter_info *pp; /* we run cpuid just the 1st time, copy the results */ if (DO_BIC(BIC_APIC)) @@ -2911,6 +3096,14 @@ int delta_thread(struct thread_data *new, struct thread_data *old, struct core_d else old->counter[i] = new->counter[i] - old->counter[i]; } + + for (i = 0, pp = sys.perf_tp; pp; i++, pp = pp->next) { + if (pp->format == FORMAT_RAW) + old->perf_counter[i] = new->perf_counter[i]; + else + old->perf_counter[i] = new->perf_counter[i] - old->perf_counter[i]; + } + return 0; } @@ -3013,6 +3206,10 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data for (i = 0, mp = sys.pp; mp; i++, mp = mp->next) p->counter[i] = 0; + + memset(&t->perf_counter[0], 0, sizeof(t->perf_counter)); + memset(&c->perf_counter[0], 0, sizeof(c->perf_counter)); + memset(&p->perf_counter[0], 0, sizeof(p->perf_counter)); } void rapl_counter_accumulate(struct rapl_counter *dst, const struct rapl_counter *src) @@ -3033,6 +3230,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) { int i; struct msr_counter *mp; + struct perf_counter_info *pp; /* copy un-changing apic_id's */ if (DO_BIC(BIC_APIC)) @@ -3063,6 +3261,12 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) average.threads.counter[i] += t->counter[i]; } + for (i = 0, pp = sys.perf_tp; pp; i++, pp = pp->next) { + if (pp->format == FORMAT_RAW) + continue; + average.threads.perf_counter[i] += t->perf_counter[i]; + } + /* sum per-core values only for 1st thread in core */ if (!is_cpu_first_thread_in_core(t, c, p)) return 0; @@ -3083,6 +3287,12 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) average.cores.counter[i] += c->counter[i]; } + for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) { + if (pp->format == FORMAT_RAW) + continue; + average.cores.perf_counter[i] += c->perf_counter[i]; + } + /* sum per-pkg values only for 1st core in pkg */ if (!is_cpu_first_core_in_package(t, c, p)) return 0; @@ -3134,6 +3344,14 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) else average.packages.counter[i] += p->counter[i]; } + + for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) { + if ((pp->format == FORMAT_RAW) && (topo.num_packages == 0)) + average.packages.perf_counter[i] = p->perf_counter[i]; + else + average.packages.perf_counter[i] += p->perf_counter[i]; + } + return 0; } @@ -3145,6 +3363,7 @@ void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data { int i; struct msr_counter *mp; + struct perf_counter_info *pp; clear_counters(&average.threads, &average.cores, &average.packages); @@ -3216,6 +3435,35 @@ void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data } average.packages.counter[i] /= topo.allowed_packages; } + + for (i = 0, pp = sys.perf_tp; pp; i++, pp = pp->next) { + if (pp->format == FORMAT_RAW) + continue; + if (pp->type == COUNTER_ITEMS) { + if (average.threads.perf_counter[i] > 9999999) + sums_need_wide_columns = 1; + continue; + } + average.threads.perf_counter[i] /= topo.allowed_cpus; + } + for (i = 0, pp = sys.perf_cp; pp; i++, pp = pp->next) { + if (pp->format == FORMAT_RAW) + continue; + if (pp->type == COUNTER_ITEMS) { + if (average.cores.perf_counter[i] > 9999999) + sums_need_wide_columns = 1; + } + average.cores.perf_counter[i] /= topo.allowed_cores; + } + for (i = 0, pp = sys.perf_pp; pp; i++, pp = pp->next) { + if (pp->format == FORMAT_RAW) + continue; + if (pp->type == COUNTER_ITEMS) { + if (average.packages.perf_counter[i] > 9999999) + sums_need_wide_columns = 1; + } + average.packages.perf_counter[i] /= topo.allowed_packages; + } } static unsigned long long rdtsc(void) @@ -3848,6 +4096,31 @@ int get_smi_aperf_mperf(unsigned int cpu, struct thread_data *t) return 0; } +int perf_counter_info_read_values(struct perf_counter_info *pp, int cpu, unsigned long long *out, size_t out_size) +{ + unsigned int domain; + unsigned long long value; + int fd_counter; + + for (size_t i = 0; pp; ++i, pp = pp->next) { + domain = cpu_to_domain(pp, cpu); + assert(domain < pp->num_domains); + + fd_counter = pp->fd_perf_per_domain[domain]; + + if (fd_counter == -1) + continue; + + if (read(fd_counter, &value, sizeof(value)) != sizeof(value)) + return 1; + + assert(i < out_size); + out[i] = value * pp->scale; + } + + return 0; +} + /* * get_counters(...) * migrate to cpu @@ -3889,6 +4162,9 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) return -10; } + if (perf_counter_info_read_values(sys.perf_tp, cpu, t->perf_counter, MAX_ADDED_THREAD_COUNTERS)) + return -10; + /* collect core counters only for 1st thread in core */ if (!is_cpu_first_thread_in_core(t, c, p)) goto done; @@ -3927,6 +4203,9 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) return -10; } + if (perf_counter_info_read_values(sys.perf_cp, cpu, c->perf_counter, MAX_ADDED_CORE_COUNTERS)) + return -10; + /* collect package counters only for 1st core in package */ if (!is_cpu_first_core_in_package(t, c, p)) goto done; @@ -3999,6 +4278,10 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (get_mp(cpu, mp, &p->counter[i], path)) return -10; } + + if (perf_counter_info_read_values(sys.perf_pp, cpu, p->perf_counter, MAX_ADDED_PACKAGE_COUNTERS)) + return -10; + done: gettimeofday(&t->tv_end, (struct timezone *)NULL); @@ -4528,6 +4811,36 @@ void free_fd_rapl_percpu(void) rapl_counter_info_perdomain_size = 0; } +void free_fd_added_perf_counters_(struct perf_counter_info *pp) +{ + if (!pp) + return; + + if (!pp->fd_perf_per_domain) + return; + + while (pp) { + for (size_t domain = 0; domain < pp->num_domains; ++domain) { + if (pp->fd_perf_per_domain[domain] != -1) { + close(pp->fd_perf_per_domain[domain]); + pp->fd_perf_per_domain[domain] = -1; + } + } + + free(pp->fd_perf_per_domain); + pp->fd_perf_per_domain = NULL; + + pp = pp->next; + } +} + +void free_fd_added_perf_counters(void) +{ + free_fd_added_perf_counters_(sys.perf_tp); + free_fd_added_perf_counters_(sys.perf_cp); + free_fd_added_perf_counters_(sys.perf_pp); +} + void free_all_buffers(void) { int i; @@ -4573,6 +4886,7 @@ void free_all_buffers(void) free_fd_msr(); free_fd_rapl_percpu(); free_fd_cstate(); + free_fd_added_perf_counters(); free(irq_column_2_cpu); free(irqs_per_cpu); @@ -4910,6 +5224,7 @@ void linux_perf_init(void); void msr_perf_init(void); void rapl_perf_init(void); void cstate_perf_init(void); +void added_perf_counters_init(void); void re_initialize(void) { @@ -4919,6 +5234,7 @@ void re_initialize(void) msr_perf_init(); rapl_perf_init(); cstate_perf_init(); + added_perf_counters_init(); fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus, topo.allowed_cpus); } @@ -7859,6 +8175,117 @@ void check_perf_access(void) bic_enabled &= ~BIC_IPC; } +int added_perf_counters_init_(struct perf_counter_info *pinfo) +{ + size_t num_domains = 0; + unsigned int next_domain; + bool *domain_visited; + unsigned int perf_type, perf_config; + double perf_scale; + int fd_perf; + + if (!pinfo) + return 0; + + const size_t max_num_domains = MAX(topo.max_cpu_num + 1, MAX(topo.max_core_id + 1, topo.max_package_id + 1)); + + domain_visited = calloc(max_num_domains, sizeof(*domain_visited)); + + while (pinfo) { + switch (pinfo->scope) { + case SCOPE_CPU: + num_domains = topo.max_cpu_num + 1; + break; + + case SCOPE_CORE: + num_domains = topo.max_core_id + 1; + break; + + case SCOPE_PACKAGE: + num_domains = topo.max_package_id + 1; + break; + } + + /* Allocate buffer for file descriptor for each domain. */ + pinfo->fd_perf_per_domain = calloc(num_domains, sizeof(*pinfo->fd_perf_per_domain)); + if (!pinfo->fd_perf_per_domain) + errx(1, "%s: alloc %s", __func__, "fd_perf_per_domain"); + + for (size_t i = 0; i < num_domains; ++i) + pinfo->fd_perf_per_domain[i] = -1; + + pinfo->num_domains = num_domains; + pinfo->scale = 1.0; + + memset(domain_visited, 0, max_num_domains * sizeof(*domain_visited)); + + for (int cpu = 0; cpu < topo.max_cpu_num + 1; ++cpu) { + + next_domain = cpu_to_domain(pinfo, cpu); + + assert(next_domain < num_domains); + + if (cpu_is_not_allowed(cpu)) + continue; + + if (domain_visited[next_domain]) + continue; + + perf_type = read_perf_type(pinfo->device); + if (perf_type == (unsigned int)-1) { + warnx("%s: perf/%s/%s: failed to read %s", + __func__, pinfo->device, pinfo->event, "type"); + continue; + } + + perf_config = read_rapl_config(pinfo->device, pinfo->event); + if (perf_config == (unsigned int)-1) { + warnx("%s: perf/%s/%s: failed to read %s", + __func__, pinfo->device, pinfo->event, "config"); + continue; + } + + /* Scale is not required, some counters just don't have it. */ + perf_scale = read_perf_rapl_scale(pinfo->device, pinfo->event); + if (perf_scale == 0.0) + perf_scale = 1.0; + + fd_perf = open_perf_counter(cpu, perf_type, perf_config, -1, 0); + if (fd_perf == -1) { + warnx("%s: perf/%s/%s: failed to open counter on cpu%d", + __func__, pinfo->device, pinfo->event, cpu); + continue; + } + + domain_visited[next_domain] = 1; + pinfo->fd_perf_per_domain[next_domain] = fd_perf; + pinfo->scale = perf_scale; + + if (debug) + printf("Add perf/%s/%s cpu%d: %d\n", + pinfo->device, pinfo->event, cpu, pinfo->fd_perf_per_domain[next_domain]); + } + + pinfo = pinfo->next; + } + + free(domain_visited); + + return 0; +} + +void added_perf_counters_init(void) +{ + if (added_perf_counters_init_(sys.perf_tp)) + errx(1, "%s: %s", __func__, "thread"); + + if (added_perf_counters_init_(sys.perf_cp)) + errx(1, "%s: %s", __func__, "core"); + + if (added_perf_counters_init_(sys.perf_pp)) + errx(1, "%s: %s", __func__, "package"); +} + void turbostat_init() { setup_all_buffers(true); @@ -7872,6 +8299,7 @@ void turbostat_init() linux_perf_init(); rapl_perf_init(); cstate_perf_init(); + added_perf_counters_init(); for_all_cpus(get_cpu_type, ODD_COUNTERS); for_all_cpus(get_cpu_type, EVEN_COUNTERS); @@ -8061,6 +8489,7 @@ int add_counter(unsigned int msr_num, char *path, char *name, msrp = calloc(1, sizeof(struct msr_counter)); if (msrp == NULL) err(-1, "calloc msr_counter"); + msrp->msr_num = msr_num; strncpy(msrp->name, name, NAME_BYTES - 1); msrp->width = width; @@ -8101,11 +8530,106 @@ int add_counter(unsigned int msr_num, char *path, char *name, return 0; } +/* + * Initialize the fields used for identifying and opening the counter. + * + * Defer the initialization of any runtime buffers for actually reading + * the counters for when we initialize all perf counters, so we can later + * easily call re_initialize(). + */ +struct perf_counter_info *make_perf_counter_info(const char *perf_device, + const char *perf_event, + const char *name, + unsigned int width, + enum counter_scope scope, + enum counter_type type, enum counter_format format) +{ + struct perf_counter_info *pinfo; + + pinfo = calloc(1, sizeof(*pinfo)); + if (!pinfo) + errx(1, "%s: Failed to allocate %s/%s\n", __func__, perf_device, perf_event); + + strncpy(pinfo->device, perf_device, ARRAY_SIZE(pinfo->device) - 1); + strncpy(pinfo->event, perf_event, ARRAY_SIZE(pinfo->event) - 1); + + strncpy(pinfo->name, name, ARRAY_SIZE(pinfo->name) - 1); + pinfo->width = width; + pinfo->scope = scope; + pinfo->type = type; + pinfo->format = format; + + return pinfo; +} + +int add_perf_counter(const char *perf_device, const char *perf_event, const char *name_buffer, unsigned int width, + enum counter_scope scope, enum counter_type type, enum counter_format format) +{ + struct perf_counter_info *pinfo; + + switch (scope) { + case SCOPE_CPU: + if (sys.added_thread_perf_counters >= MAX_ADDED_THREAD_COUNTERS) { + warnx("ignoring thread counter perf/%s/%s", perf_device, perf_event); + return -1; + } + break; + + case SCOPE_CORE: + if (sys.added_core_perf_counters >= MAX_ADDED_CORE_COUNTERS) { + warnx("ignoring core counter perf/%s/%s", perf_device, perf_event); + return -1; + } + break; + + case SCOPE_PACKAGE: + if (sys.added_package_perf_counters >= MAX_ADDED_PACKAGE_COUNTERS) { + warnx("ignoring package counter perf/%s/%s", perf_device, perf_event); + return -1; + } + break; + } + + pinfo = make_perf_counter_info(perf_device, perf_event, name_buffer, width, scope, type, format); + + if (!pinfo) + return -1; + + switch (scope) { + case SCOPE_CPU: + pinfo->next = sys.perf_tp; + sys.perf_tp = pinfo; + ++sys.added_thread_perf_counters; + break; + + case SCOPE_CORE: + pinfo->next = sys.perf_cp; + sys.perf_cp = pinfo; + ++sys.added_core_perf_counters; + break; + + case SCOPE_PACKAGE: + pinfo->next = sys.perf_pp; + sys.perf_pp = pinfo; + ++sys.added_package_perf_counters; + break; + } + + // FIXME: we might not have debug here yet + if (debug) + printf("%s: %s/%s, name: %s, scope%d\n", + __func__, pinfo->device, pinfo->event, pinfo->name, pinfo->scope); + + return 0; +} + void parse_add_command(char *add_command) { int msr_num = 0; char *path = NULL; - char name_buffer[NAME_BYTES] = ""; + char perf_device[PERF_DEV_NAME_BYTES] = ""; + char perf_event[PERF_EVT_NAME_BYTES] = ""; + char name_buffer[PERF_NAME_BYTES] = ""; int width = 64; int fail = 0; enum counter_scope scope = SCOPE_CPU; @@ -8120,6 +8644,11 @@ void parse_add_command(char *add_command) if (sscanf(add_command, "msr%d", &msr_num) == 1) goto next; + BUILD_BUG_ON(ARRAY_SIZE(perf_device) <= 31); + BUILD_BUG_ON(ARRAY_SIZE(perf_event) <= 31); + if (sscanf(add_command, "perf/%31[^/]/%31[^,]", &perf_device[0], &perf_event[0]) == 2) + goto next; + if (*add_command == '/') { path = add_command; goto next; @@ -8167,7 +8696,8 @@ void parse_add_command(char *add_command) goto next; } - if (sscanf(add_command, "%18s,%*s", name_buffer) == 1) { /* 18 < NAME_BYTES */ + BUILD_BUG_ON(ARRAY_SIZE(name_buffer) <= 18); + if (sscanf(add_command, "%18s,%*s", name_buffer) == 1) { char *eos; eos = strchr(name_buffer, ','); @@ -8184,21 +8714,33 @@ next: } } - if ((msr_num == 0) && (path == NULL)) { - fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter ) required\n"); + if ((msr_num == 0) && (path == NULL) && (perf_device[0] == '\0' || perf_event[0] == '\0')) { + fprintf(stderr, "--add: (msrDDD | msr0xXXX | /path_to_counter | perf/device/event ) required\n"); fail++; } + /* Test for non-empty perf_device and perf_event */ + const bool is_perf_counter = perf_device[0] && perf_event[0]; + /* generate default column header */ if (*name_buffer == '\0') { - if (width == 32) - sprintf(name_buffer, "M0x%x%s", msr_num, format == FORMAT_PERCENT ? "%" : ""); - else - sprintf(name_buffer, "M0X%x%s", msr_num, format == FORMAT_PERCENT ? "%" : ""); + if (is_perf_counter) { + snprintf(name_buffer, ARRAY_SIZE(name_buffer), "perf/%s", perf_event); + } else { + if (width == 32) + sprintf(name_buffer, "M0x%x%s", msr_num, format == FORMAT_PERCENT ? "%" : ""); + else + sprintf(name_buffer, "M0X%x%s", msr_num, format == FORMAT_PERCENT ? "%" : ""); + } } - if (add_counter(msr_num, path, name_buffer, width, scope, type, format, 0, 0)) - fail++; + if (is_perf_counter) { + if (add_perf_counter(perf_device, perf_event, name_buffer, width, scope, type, format)) + fail++; + } else { + if (add_counter(msr_num, path, name_buffer, width, scope, type, format, 0, 0)) + fail++; + } if (fail) { help(); -- cgit v1.2.3-70-g09d2 From 25826c20da55a114c2cba8c4f237dfe7a7b4f8f6 Mon Sep 17 00:00:00 2001 From: Patryk Wlazlyn Date: Tue, 9 Jul 2024 14:22:30 +0200 Subject: tools/power turbostat: Fix formatting in turbostat.8 We had an extra "+" at the beginning of some lines that look like a poorly formated patch. Signed-off-by: Patryk Wlazlyn Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.8 | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools/power') diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 5537fc6b5bc3..8a21e9b56071 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -70,10 +70,10 @@ The column name "all" can be used to enable all disabled-by-default built-in cou .PP \fB--quiet\fP Do not decode and print the system configuration header information. .PP -+\fB--no-msr\fP Disable all the uses of the MSR driver. -+.PP -+\fB--no-perf\fP Disable all the uses of the perf API. -+.PP +\fB--no-msr\fP Disable all the uses of the MSR driver. +.PP +\fB--no-perf\fP Disable all the uses of the perf API. +.PP \fB--interval seconds\fP overrides the default 5.0 second measurement interval. .PP \fB--num_iterations num\fP number of the measurement iterations. -- cgit v1.2.3-70-g09d2 From 9f50066b0dd47cff045ba6da37e6def52783ba55 Mon Sep 17 00:00:00 2001 From: Patryk Wlazlyn Date: Tue, 9 Jul 2024 14:46:20 +0200 Subject: tools/power turbostat: Add perf added counter example to turbostat.8 We had few lines about the feature, but without any complete examples. Signed-off-by: Patryk Wlazlyn Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.8 | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'tools/power') diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 8a21e9b56071..71ae2d5159ad 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -336,6 +336,24 @@ CPU PRF_CTRL .fi +.SH ADD PERF COUNTER EXAMPLE +Here we limit turbostat to showing just the CPU number for cpu0 - cpu3. +We add a counter showing time spent in C1 core cstate, +labeling it with the column header, "pCPU%c1", and display it only once, +after the conclusion of 0.1 second sleep. +We also show CPU%c1 built-in counter that should show similar values. +.nf +sudo ./turbostat --quiet --cpu 0-3 --show CPU,CPU%c1 --add perf/cstate_core/c1-residency,cpu,delta,percent,pCPU%c1 sleep .1 +0.102448 sec +CPU pCPU%c1 CPU%c1 +- 34.89 34.89 +0 45.99 45.99 +1 45.94 45.94 +2 23.83 23.83 +3 23.84 23.84 + +.fi + .SH INPUT For interval-mode, turbostat will immediately end the current interval -- cgit v1.2.3-70-g09d2 From 478a01016c08d33635359254fc1ab61a8c83dd1a Mon Sep 17 00:00:00 2001 From: Patryk Wlazlyn Date: Tue, 9 Jul 2024 14:53:14 +0200 Subject: tools/power turbostat: Fix typo in turbostat.8 "After" was missing an "r", nothing to see here. Signed-off-by: Patryk Wlazlyn Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.8 | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/power') diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 71ae2d5159ad..44cbc7cb382d 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -323,7 +323,7 @@ available on all processors. Here we limit turbostat to showing just the CPU number for cpu0 - cpu3. We add a counter showing the 32-bit raw value of MSR 0x199 (MSR_IA32_PERF_CTL), labeling it with the column header, "PRF_CTRL", and display it only once, -afte the conclusion of a 0.1 second sleep. +after the conclusion of a 0.1 second sleep. .nf sudo ./turbostat --quiet --cpu 0-3 --show CPU --add msr0x199,u32,raw,PRF_CTRL sleep .1 0.101604 sec -- cgit v1.2.3-70-g09d2 From 52e130764ab6bdc439bcf124ac3e15f52ca0c8e5 Mon Sep 17 00:00:00 2001 From: Patryk Wlazlyn Date: Wed, 10 Jul 2024 11:34:38 +0200 Subject: tools/power turbostat: Move debug prints from stdout to stderr This leaves the stdout cleaner, having only counter data. It makes it easier for programs to parse the output of turbostat, for example selftests. Signed-off-by: Patryk Wlazlyn Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'tools/power') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index be345a4bbe96..cccd9f4311a0 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1984,6 +1984,7 @@ void help(void) " -c, --cpu cpu-set limit output to summary plus cpu-set:\n" " {core | package | j,k,l..m,n-p }\n" " -d, --debug displays usec, Time_Of_Day_Seconds and more debugging\n" + " debug messages are printed to stderr\n" " -D, --Dump displays the raw counter values\n" " -e, --enable [all | column]\n" " shows all or the specified disabled column\n" @@ -8262,7 +8263,7 @@ int added_perf_counters_init_(struct perf_counter_info *pinfo) pinfo->scale = perf_scale; if (debug) - printf("Add perf/%s/%s cpu%d: %d\n", + fprintf(stderr, "Add perf/%s/%s cpu%d: %d\n", pinfo->device, pinfo->event, cpu, pinfo->fd_perf_per_domain[next_domain]); } @@ -8422,7 +8423,7 @@ struct msr_counter *find_msrp_by_name(struct msr_counter *head, char *name) for (mp = head; mp; mp = mp->next) { if (debug) - printf("%s: %s %s\n", __func__, name, mp->name); + fprintf(stderr, "%s: %s %s\n", __func__, name, mp->name); if (!strncmp(name, mp->name, strlen(mp->name))) return mp; } @@ -8439,8 +8440,8 @@ int add_counter(unsigned int msr_num, char *path, char *name, errx(1, "Requested MSR counter 0x%x, but in --no-msr mode", msr_num); if (debug) - printf("%s(msr%d, %s, %s, width%d, scope%d, type%d, format%d, flags%x, id%d)\n", __func__, msr_num, - path, name, width, scope, type, format, flags, id); + fprintf(stderr, "%s(msr%d, %s, %s, width%d, scope%d, type%d, format%d, flags%x, id%d)\n", + __func__, msr_num, path, name, width, scope, type, format, flags, id); switch (scope) { @@ -8448,7 +8449,7 @@ int add_counter(unsigned int msr_num, char *path, char *name, msrp = find_msrp_by_name(sys.tp, name); if (msrp) { if (debug) - printf("%s: %s FOUND\n", __func__, name); + fprintf(stderr, "%s: %s FOUND\n", __func__, name); break; } if (sys.added_thread_counters++ >= MAX_ADDED_THREAD_COUNTERS) { @@ -8460,7 +8461,7 @@ int add_counter(unsigned int msr_num, char *path, char *name, msrp = find_msrp_by_name(sys.cp, name); if (msrp) { if (debug) - printf("%s: %s FOUND\n", __func__, name); + fprintf(stderr, "%s: %s FOUND\n", __func__, name); break; } if (sys.added_core_counters++ >= MAX_ADDED_CORE_COUNTERS) { @@ -8472,7 +8473,7 @@ int add_counter(unsigned int msr_num, char *path, char *name, msrp = find_msrp_by_name(sys.pp, name); if (msrp) { if (debug) - printf("%s: %s FOUND\n", __func__, name); + fprintf(stderr, "%s: %s FOUND\n", __func__, name); break; } if (sys.added_package_counters++ >= MAX_ADDED_PACKAGE_COUNTERS) { @@ -8617,7 +8618,7 @@ int add_perf_counter(const char *perf_device, const char *perf_event, const char // FIXME: we might not have debug here yet if (debug) - printf("%s: %s/%s, name: %s, scope%d\n", + fprintf(stderr, "%s: %s/%s, name: %s, scope%d\n", __func__, pinfo->device, pinfo->event, pinfo->name, pinfo->scope); return 0; -- cgit v1.2.3-70-g09d2 From b2e4a5dfafcce87ed2b9aac0b2887f421cd2930a Mon Sep 17 00:00:00 2001 From: Patryk Wlazlyn Date: Wed, 10 Jul 2024 14:28:03 +0200 Subject: tools/power turbostat: Move verbose counter messages to level 2 Printing information about the source and value during initialization and reading of the counter for each cpu, while useful when debugging, results in too verbose output. Signed-off-by: Patryk Wlazlyn Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 24 +++++++++++------------- 1 file changed, 11 insertions(+), 13 deletions(-) (limited to 'tools/power') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index cccd9f4311a0..86bbea7d8e57 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -3797,7 +3797,7 @@ int get_rapl_counters(int cpu, unsigned int domain, struct core_data *c, struct unsigned long long perf_data[NUM_RAPL_COUNTERS + 1]; struct rapl_counter_info_t *rci; - if (debug) + if (debug >= 2) fprintf(stderr, "%s: cpu%d domain%d\n", __func__, cpu, domain); assert(rapl_counter_info_perdomain); @@ -3827,7 +3827,7 @@ int get_rapl_counters(int cpu, unsigned int domain, struct core_data *c, struct assert(pi < ARRAY_SIZE(perf_data)); assert(rci->fd_perf != -1); - if (debug) + if (debug >= 2) fprintf(stderr, "Reading rapl counter via perf at %u (%llu %e %lf)\n", i, perf_data[pi], rci->scale[i], perf_data[pi] * rci->scale[i]); @@ -3837,7 +3837,7 @@ int get_rapl_counters(int cpu, unsigned int domain, struct core_data *c, struct break; case COUNTER_SOURCE_MSR: - if (debug) + if (debug >= 2) fprintf(stderr, "Reading rapl counter via msr at %u\n", i); assert(!no_msr); @@ -3895,7 +3895,7 @@ int get_cstate_counters(unsigned int cpu, struct thread_data *t, struct core_dat struct cstate_counter_info_t *cci; - if (debug) + if (debug >= 2) fprintf(stderr, "%s: cpu%d\n", __func__, cpu); assert(ccstate_counter_info); @@ -3965,9 +3965,8 @@ int get_cstate_counters(unsigned int cpu, struct thread_data *t, struct core_dat assert(pi < ARRAY_SIZE(perf_data)); assert(cci->fd_perf_core != -1 || cci->fd_perf_pkg != -1); - if (debug) { + if (debug >= 2) fprintf(stderr, "cstate via %s %u: %llu\n", "perf", i, perf_data[pi]); - } cci->data[i] = perf_data[pi]; @@ -3979,9 +3978,8 @@ int get_cstate_counters(unsigned int cpu, struct thread_data *t, struct core_dat if (get_msr(cpu, cci->msr[i], &cci->data[i])) return -13 - i; - if (debug) { + if (debug >= 2) fprintf(stderr, "cstate via %s0x%llx %u: %llu\n", "msr", cci->msr[i], i, cci->data[i]); - } break; } @@ -4036,7 +4034,7 @@ int get_smi_aperf_mperf(unsigned int cpu, struct thread_data *t) struct msr_counter_info_t *mci; - if (debug) + if (debug >= 2) fprintf(stderr, "%s: cpu%d\n", __func__, cpu); assert(msr_counter_info); @@ -4066,7 +4064,7 @@ int get_smi_aperf_mperf(unsigned int cpu, struct thread_data *t) assert(pi < ARRAY_SIZE(perf_data)); assert(mci->fd_perf != -1); - if (debug) + if (debug >= 2) fprintf(stderr, "Reading msr counter via perf at %u: %llu\n", i, perf_data[pi]); mci->data[i] = perf_data[pi]; @@ -4082,7 +4080,7 @@ int get_smi_aperf_mperf(unsigned int cpu, struct thread_data *t) mci->data[i] &= mci->msr_mask[i]; - if (debug) + if (debug >= 2) fprintf(stderr, "Reading msr counter via msr at %u: %llu\n", i, mci->data[i]); break; @@ -7125,7 +7123,7 @@ int add_rapl_perf_counter(int cpu, struct rapl_counter_info_t *rci, const struct { int ret = add_rapl_perf_counter_(cpu, rci, cai, scale, unit); - if (debug) + if (debug >= 2) fprintf(stderr, "%s: %d (cpu: %d)\n", __func__, ret, cpu); return ret; @@ -7284,7 +7282,7 @@ int add_cstate_perf_counter(int cpu, struct cstate_counter_info_t *cci, const st { int ret = add_cstate_perf_counter_(cpu, cci, cai); - if (debug) + if (debug >= 2) fprintf(stderr, "%s: %d (cpu: %d)\n", __func__, ret, cpu); return ret; -- cgit v1.2.3-70-g09d2 From f0e4ed752fda6997b41917c94a5478b340178001 Mon Sep 17 00:00:00 2001 From: Patryk Wlazlyn Date: Mon, 22 Jul 2024 22:11:03 +0200 Subject: tools/power turbostat: Add early support for PMT counters Allows users to read Intel PMT (Platform Monitoring Technology) counters, providing interface similar to one used to add MSR and perf counters. Because PMT is exposed as a raw MMIO range, without metadata, user has to supply the necessary information to find and correctly display the requested counter. Signed-off-by: Patryk Wlazlyn Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 768 +++++++++++++++++++++++++++++++++- 1 file changed, 766 insertions(+), 2 deletions(-) (limited to 'tools/power') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 86bbea7d8e57..c9fdfb074ce9 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include @@ -1082,6 +1083,9 @@ size_t cpu_present_setsize, cpu_effective_setsize, cpu_allowed_setsize, cpu_affi #define MAX_ADDED_THREAD_COUNTERS 24 #define MAX_ADDED_CORE_COUNTERS 8 #define MAX_ADDED_PACKAGE_COUNTERS 16 +#define PMT_MAX_ADDED_THREAD_COUNTERS 24 +#define PMT_MAX_ADDED_CORE_COUNTERS 8 +#define PMT_MAX_ADDED_PACKAGE_COUNTERS 16 #define BITMASK_SIZE 32 #define ZERO_ARRAY(arr) (memset(arr, 0, sizeof(arr)) + __must_be_array(arr)) @@ -1466,6 +1470,100 @@ static struct msr_counter_arch_info msr_counter_arch_infos[] = { }, }; +/* Can be redefined when compiling, useful for testing. */ +#ifndef SYSFS_TELEM_PATH +#define SYSFS_TELEM_PATH "/sys/class/intel_pmt" +#endif + +#define PMT_COUNTER_NAME_SIZE_BYTES 16 +#define PMT_COUNTER_TYPE_NAME_SIZE_BYTES 32 + +struct pmt_mmio { + struct pmt_mmio *next; + + unsigned int guid; + unsigned int size; + + /* Base pointer to the mmaped memory. */ + void *mmio_base; + + /* + * Offset to be applied to the mmio_base + * to get the beginning of the PMT counters for given GUID. + */ + unsigned long pmt_offset; +} *pmt_mmios; + +enum pmt_datatype { + PMT_TYPE_RAW, +}; + +struct pmt_domain_info { + /* + * Pointer to the MMIO obtained by applying a counter offset + * to the mmio_base of the mmaped region for the given GUID. + * + * This is where to read the raw value of the counter from. + */ + unsigned long *pcounter; +}; + +struct pmt_counter { + struct pmt_counter *next; + + /* PMT metadata */ + char name[PMT_COUNTER_NAME_SIZE_BYTES]; + enum pmt_datatype type; + enum counter_scope scope; + unsigned int lsb; + unsigned int msb; + + /* BIC-like metadata */ + enum counter_format format; + + unsigned int num_domains; + struct pmt_domain_info *domains; +}; + +unsigned int pmt_counter_get_width(const struct pmt_counter *p) +{ + return (p->msb - p->lsb)+1; +} + +void pmt_counter_resize_(struct pmt_counter *pcounter, unsigned int new_size) +{ + struct pmt_domain_info *new_mem; + + new_mem = (struct pmt_domain_info*) reallocarray(pcounter->domains, new_size, sizeof(*pcounter->domains)); + if (!new_mem) { + fprintf(stderr, "%s: failed to allocate memory for PMT counters\n", __func__); + exit(1); + } + + /* Zero initialize just allocated memory. */ + const size_t num_new_domains = new_size - pcounter->num_domains; + + memset(&new_mem[pcounter->num_domains], 0, num_new_domains * sizeof(*pcounter->domains)); + + pcounter->num_domains = new_size; + pcounter->domains = new_mem; +} + +void pmt_counter_resize(struct pmt_counter *pcounter, unsigned int new_size) +{ + /* + * Allocate more memory ahead of time. + * + * Always allocate space for at least 8 elements + * and double the size when growing. + */ + if (new_size < 8) + new_size = 8; + new_size = MAX(new_size, pcounter->num_domains*2); + + pmt_counter_resize_(pcounter, new_size); +} + struct thread_data { struct timeval tv_begin; struct timeval tv_end; @@ -1484,6 +1582,7 @@ struct thread_data { bool is_atom; unsigned long long counter[MAX_ADDED_THREAD_COUNTERS]; unsigned long long perf_counter[MAX_ADDED_THREAD_COUNTERS]; + unsigned long long pmt_counter[PMT_MAX_ADDED_THREAD_COUNTERS]; } *thread_even, *thread_odd; struct core_data { @@ -1498,6 +1597,7 @@ struct core_data { unsigned long long core_throt_cnt; unsigned long long counter[MAX_ADDED_CORE_COUNTERS]; unsigned long long perf_counter[MAX_ADDED_CORE_COUNTERS]; + unsigned long long pmt_counter[PMT_MAX_ADDED_CORE_COUNTERS]; } *core_even, *core_odd; struct pkg_data { @@ -1532,6 +1632,7 @@ struct pkg_data { unsigned int uncore_mhz; unsigned long long counter[MAX_ADDED_PACKAGE_COUNTERS]; unsigned long long perf_counter[MAX_ADDED_PACKAGE_COUNTERS]; + unsigned long long pmt_counter[PMT_MAX_ADDED_PACKAGE_COUNTERS]; } *package_even, *package_odd; #define ODD_COUNTERS thread_odd, core_odd, package_odd @@ -1681,6 +1782,10 @@ struct sys_counters { struct perf_counter_info *perf_tp; struct perf_counter_info *perf_cp; struct perf_counter_info *perf_pp; + + struct pmt_counter *pmt_tp; + struct pmt_counter *pmt_cp; + struct pmt_counter *pmt_pp; } sys; static size_t free_msr_counters_(struct msr_counter **pp) @@ -1981,6 +2086,7 @@ void help(void) " -a, --add add a counter\n" " eg. --add msr0x10,u64,cpu,delta,MY_TSC\n" " eg. --add perf/cstate_pkg/c2-residency,package,delta,percent,perfPC2\n" + " eg. --add pmt,name=XTAL,type=raw,domain=package0,offset=0,lsb=0,msb=63,guid=0x1a067102\n" " -c, --cpu cpu-set limit output to summary plus cpu-set:\n" " {core | package | j,k,l..m,n-p }\n" " -d, --debug displays usec, Time_Of_Day_Seconds and more debugging\n" @@ -2092,6 +2198,7 @@ void print_header(char *delim) { struct msr_counter *mp; struct perf_counter_info *pp; + struct pmt_counter *ppmt; int printed = 0; if (DO_BIC(BIC_USEC)) @@ -2164,6 +2271,21 @@ void print_header(char *delim) } } + ppmt = sys.pmt_tp; + while (ppmt) { + switch(ppmt->type) { + case PMT_TYPE_RAW: + if (pmt_counter_get_width(ppmt) <= 32) + outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name); + else + outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name); + + break; + } + + ppmt = ppmt->next; + } + if (DO_BIC(BIC_CPU_c1)) outp += sprintf(outp, "%sCPU%%c1", (printed++ ? delim : "")); if (DO_BIC(BIC_CPU_c3)) @@ -2219,6 +2341,21 @@ void print_header(char *delim) } } + ppmt = sys.pmt_cp; + while (ppmt) { + switch(ppmt->type) { + case PMT_TYPE_RAW: + if (pmt_counter_get_width(ppmt) <= 32) + outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name); + else + outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name); + + break; + } + + ppmt = ppmt->next; + } + if (DO_BIC(BIC_PkgTmp)) outp += sprintf(outp, "%sPkgTmp", (printed++ ? delim : "")); @@ -2329,6 +2466,21 @@ void print_header(char *delim) } } + ppmt = sys.pmt_pp; + while (ppmt) { + switch(ppmt->type) { + case PMT_TYPE_RAW: + if (pmt_counter_get_width(ppmt) <= 32) + outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name); + else + outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name); + + break; + } + + ppmt = ppmt->next; + } + outp += sprintf(outp, "\n"); } @@ -2450,6 +2602,7 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data int i; struct msr_counter *mp; struct perf_counter_info *pp; + struct pmt_counter *ppmt; char *delim = "\t"; int printed = 0; @@ -2612,6 +2765,19 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data } } + for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) { + switch (ppmt->type) { + case PMT_TYPE_RAW: + if (pmt_counter_get_width(ppmt) <= 32) + outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), + (unsigned int)t->pmt_counter[i]); + else + outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->pmt_counter[i]); + + break; + } + } + /* C1 */ if (DO_BIC(BIC_CPU_c1)) outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * t->c1 / tsc); @@ -2673,6 +2839,19 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data } } + for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) { + switch (ppmt->type) { + case PMT_TYPE_RAW: + if (pmt_counter_get_width(ppmt) <= 32) + outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), + (unsigned int)c->pmt_counter[i]); + else + outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->pmt_counter[i]); + + break; + } + } + fmt8 = "%s%.2f"; if (DO_BIC(BIC_CorWatt) && platform->has_per_core_rapl) @@ -2842,9 +3021,23 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data outp += sprintf(outp, "%s%lld", (printed++ ? delim : ""), p->perf_counter[i]); } else if (pp->format == FORMAT_PERCENT) { outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->perf_counter[i] / tsc); - } else if (pp->type == COUNTER_K2M) + } else if (pp->type == COUNTER_K2M) { outp += sprintf(outp, "%s%d", (printed++ ? delim : ""), (unsigned int)p->perf_counter[i] / 1000); + } + } + + for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) { + switch (ppmt->type) { + case PMT_TYPE_RAW: + if (pmt_counter_get_width(ppmt) <= 32) + outp += sprintf(outp, "%s0x%08x", (printed++ ? delim : ""), + (unsigned int)p->pmt_counter[i]); + else + outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->pmt_counter[i]); + + break; + } } done: @@ -2901,6 +3094,7 @@ int delta_package(struct pkg_data *new, struct pkg_data *old) int i; struct msr_counter *mp; struct perf_counter_info *pp; + struct pmt_counter *ppmt; if (DO_BIC(BIC_Totl_c0)) old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0; @@ -2970,6 +3164,13 @@ int delta_package(struct pkg_data *new, struct pkg_data *old) old->perf_counter[i] = new->perf_counter[i] - old->perf_counter[i]; } + for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) { + if (ppmt->format == FORMAT_RAW) + old->pmt_counter[i] = new->pmt_counter[i]; + else + old->pmt_counter[i] = new->pmt_counter[i] - old->pmt_counter[i]; + } + return 0; } @@ -2978,6 +3179,7 @@ void delta_core(struct core_data *new, struct core_data *old) int i; struct msr_counter *mp; struct perf_counter_info *pp; + struct pmt_counter *ppmt; old->c3 = new->c3 - old->c3; old->c6 = new->c6 - old->c6; @@ -3001,6 +3203,13 @@ void delta_core(struct core_data *new, struct core_data *old) else old->perf_counter[i] = new->perf_counter[i] - old->perf_counter[i]; } + + for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) { + if (ppmt->format == FORMAT_RAW) + old->pmt_counter[i] = new->pmt_counter[i]; + else + old->pmt_counter[i] = new->pmt_counter[i] - old->pmt_counter[i]; + } } int soft_c1_residency_display(int bic) @@ -3019,6 +3228,7 @@ int delta_thread(struct thread_data *new, struct thread_data *old, struct core_d int i; struct msr_counter *mp; struct perf_counter_info *pp; + struct pmt_counter *ppmt; /* we run cpuid just the 1st time, copy the results */ if (DO_BIC(BIC_APIC)) @@ -3105,6 +3315,13 @@ int delta_thread(struct thread_data *new, struct thread_data *old, struct core_d old->perf_counter[i] = new->perf_counter[i] - old->perf_counter[i]; } + for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) { + if (ppmt->format == FORMAT_RAW) + old->pmt_counter[i] = new->pmt_counter[i]; + else + old->pmt_counter[i] = new->pmt_counter[i] - old->pmt_counter[i]; + } + return 0; } @@ -3211,6 +3428,10 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data memset(&t->perf_counter[0], 0, sizeof(t->perf_counter)); memset(&c->perf_counter[0], 0, sizeof(c->perf_counter)); memset(&p->perf_counter[0], 0, sizeof(p->perf_counter)); + + memset(&t->pmt_counter[0], 0, ARRAY_SIZE(t->pmt_counter)); + memset(&c->pmt_counter[0], 0, ARRAY_SIZE(c->pmt_counter)); + memset(&p->pmt_counter[0], 0, ARRAY_SIZE(p->pmt_counter)); } void rapl_counter_accumulate(struct rapl_counter *dst, const struct rapl_counter *src) @@ -3232,6 +3453,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) int i; struct msr_counter *mp; struct perf_counter_info *pp; + struct pmt_counter *ppmt; /* copy un-changing apic_id's */ if (DO_BIC(BIC_APIC)) @@ -3268,6 +3490,10 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) average.threads.perf_counter[i] += t->perf_counter[i]; } + for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) { + average.threads.pmt_counter[i] += t->pmt_counter[i]; + } + /* sum per-core values only for 1st thread in core */ if (!is_cpu_first_thread_in_core(t, c, p)) return 0; @@ -3294,6 +3520,10 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) average.cores.perf_counter[i] += c->perf_counter[i]; } + for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) { + average.cores.pmt_counter[i] += c->pmt_counter[i]; + } + /* sum per-pkg values only for 1st core in pkg */ if (!is_cpu_first_core_in_package(t, c, p)) return 0; @@ -3353,6 +3583,10 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) average.packages.perf_counter[i] += p->perf_counter[i]; } + for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) { + average.packages.pmt_counter[i] += p->pmt_counter[i]; + } + return 0; } @@ -3365,6 +3599,7 @@ void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data int i; struct msr_counter *mp; struct perf_counter_info *pp; + struct pmt_counter *ppmt; clear_counters(&average.threads, &average.cores, &average.packages); @@ -3465,6 +3700,16 @@ void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data } average.packages.perf_counter[i] /= topo.allowed_packages; } + + for (i = 0, ppmt = sys.pmt_tp; ppmt; i++, ppmt = ppmt->next) { + average.threads.pmt_counter[i] /= topo.allowed_cpus; + } + for (i = 0, ppmt = sys.pmt_cp; ppmt; i++, ppmt = ppmt->next) { + average.cores.pmt_counter[i] /= topo.allowed_cores; + } + for (i = 0, ppmt = sys.pmt_pp; ppmt; i++, ppmt = ppmt->next) { + average.packages.pmt_counter[i] /= topo.allowed_packages; + } } static unsigned long long rdtsc(void) @@ -4120,6 +4365,32 @@ int perf_counter_info_read_values(struct perf_counter_info *pp, int cpu, unsigne return 0; } +unsigned long pmt_gen_value_mask(unsigned int lsb, unsigned int msb) +{ + unsigned long mask; + + if (msb == 63) + mask = 0xffffffffffffffff; + else + mask = ((1<<(msb+1))-1); + + mask -= (1<num_domains); + + const unsigned long *pmmio = ppmt->domains[domain_id].pcounter; + const unsigned long value = pmmio ? *pmmio : 0; + const unsigned long value_mask = pmt_gen_value_mask(ppmt->lsb, ppmt->msb); + const unsigned long value_shift = ppmt->lsb; + + return (value & value_mask) >> value_shift; +} + /* * get_counters(...) * migrate to cpu @@ -4130,6 +4401,7 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) int cpu = t->cpu_id; unsigned long long msr; struct msr_counter *mp; + struct pmt_counter *pp; int i; int status; @@ -4164,6 +4436,9 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (perf_counter_info_read_values(sys.perf_tp, cpu, t->perf_counter, MAX_ADDED_THREAD_COUNTERS)) return -10; + for (i = 0, pp = sys.pmt_tp; pp; i++, pp = pp->next) + t->pmt_counter[i] = pmt_read_counter(pp, t->cpu_id); + /* collect core counters only for 1st thread in core */ if (!is_cpu_first_thread_in_core(t, c, p)) goto done; @@ -4205,6 +4480,9 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (perf_counter_info_read_values(sys.perf_cp, cpu, c->perf_counter, MAX_ADDED_CORE_COUNTERS)) return -10; + for (i = 0, pp = sys.pmt_cp; pp; i++, pp = pp->next) + c->pmt_counter[i] = pmt_read_counter(pp, c->core_id); + /* collect package counters only for 1st core in package */ if (!is_cpu_first_core_in_package(t, c, p)) goto done; @@ -4281,6 +4559,9 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (perf_counter_info_read_values(sys.perf_pp, cpu, p->perf_counter, MAX_ADDED_PACKAGE_COUNTERS)) return -10; + for (i = 0, pp = sys.pmt_pp; pp; i++, pp = pp->next) + p->pmt_counter[i] = pmt_read_counter(pp, p->package_id); + done: gettimeofday(&t->tv_end, (struct timezone *)NULL); @@ -8285,6 +8566,304 @@ void added_perf_counters_init(void) errx(1, "%s: %s", __func__, "package"); } +int parse_telem_info_file(int fd_dir, const char *info_filename, const char *format, unsigned long *output) +{ + int fd_telem_info; + FILE *file_telem_info; + unsigned long value; + + fd_telem_info = openat(fd_dir, info_filename, O_RDONLY); + if (fd_telem_info == -1) + return -1; + + file_telem_info = fdopen(fd_telem_info, "r"); + if (file_telem_info == NULL) { + close(fd_telem_info); + return -1; + } + + if (fscanf(file_telem_info, format, &value) != 1) { + fclose(file_telem_info); + return -1; + } + + fclose(file_telem_info); + + *output = value; + + return 0; +} + +struct pmt_mmio* pmt_mmio_open(unsigned int target_guid) +{ + DIR *dirp; + struct dirent *entry; + struct stat st; + unsigned int telem_idx; + int fd_telem_dir, fd_pmt; + unsigned long guid, size, offset; + size_t mmap_size; + void *mmio; + struct pmt_mmio *ret = NULL; + + if (stat(SYSFS_TELEM_PATH, &st) == -1) + return NULL; + + dirp = opendir(SYSFS_TELEM_PATH); + if (dirp == NULL) + return NULL; + + for (;;) { + entry = readdir(dirp); + + if (entry == NULL) + break; + + if (strcmp(entry->d_name, ".") == 0) + continue; + + if (strcmp(entry->d_name, "..") == 0) + continue; + + if (sscanf(entry->d_name, "telem%u", &telem_idx) != 1) + continue; + + if (fstatat(dirfd(dirp), entry->d_name, &st, 0) == -1) { + break; + } + + if (!S_ISDIR(st.st_mode)) + continue; + + fd_telem_dir = openat(dirfd(dirp), entry->d_name, O_RDONLY); + if (fd_telem_dir == -1) { + break; + } + + if (parse_telem_info_file(fd_telem_dir, "guid", "%lx", &guid)) { + close(fd_telem_dir); + break; + } + + if (parse_telem_info_file(fd_telem_dir, "size", "%lu", &size)) { + close(fd_telem_dir); + break; + } + + if (guid != target_guid) { + close(fd_telem_dir); + continue; + } + + if (parse_telem_info_file(fd_telem_dir, "offset", "%lu", &offset)) { + close(fd_telem_dir); + break; + } + + assert(offset == 0); + + fd_pmt = openat(fd_telem_dir, "telem", O_RDONLY); + if (fd_pmt == -1) + goto loop_cleanup_and_break; + + mmap_size = (size + 0x1000UL) & (~0x1000UL); + mmio = mmap(0, mmap_size, PROT_READ, MAP_SHARED, fd_pmt, 0); + if (mmio != MAP_FAILED) { + + if (debug) + fprintf(stderr, "%s: 0x%lx mmaped at: %p\n", __func__, guid, mmio); + + ret = calloc(1, sizeof(*ret)); + + if (!ret) { + fprintf(stderr, "%s: Failed to allocate pmt_mmio\n", __func__); + exit(1); + } + + ret->guid = guid; + ret->mmio_base = mmio; + ret->pmt_offset = offset; + ret->size = size; + + ret->next = pmt_mmios; + pmt_mmios = ret; + } + +loop_cleanup_and_break: + close(fd_pmt); + close(fd_telem_dir); + break; + } + + closedir(dirp); + + return ret; +} + +struct pmt_mmio* pmt_mmio_find(unsigned int guid) +{ + struct pmt_mmio *pmmio = pmt_mmios; + + while (pmmio) { + if (pmmio->guid == guid) + return pmmio; + + pmmio = pmmio->next; + } + + return NULL; +} + +void* pmt_get_counter_pointer(struct pmt_mmio *pmmio, unsigned long counter_offset) +{ + char *ret; + + /* Get base of mmaped PMT file. */ + ret = (char*)pmmio->mmio_base; + + /* + * Apply PMT MMIO offset to obtain beginning of the mmaped telemetry data. + * It's not guaranteed that the mmaped memory begins with the telemetry data + * - we might have to apply the offset first. + */ + ret += pmmio->pmt_offset; + + /* Apply the counter offset to get the address to the mmaped counter. */ + ret += counter_offset; + + return ret; +} + +struct pmt_mmio* pmt_add_guid(unsigned int guid) +{ + struct pmt_mmio *ret; + + ret = pmt_mmio_find(guid); + if (!ret) + ret = pmt_mmio_open(guid); + + return ret; +} + +enum pmt_open_mode { + PMT_OPEN_TRY, /* Open failure is not an error. */ + PMT_OPEN_REQUIRED, /* Open failure is a fatal error. */ +}; + +struct pmt_counter* pmt_find_counter(struct pmt_counter *pcounter, const char *name) +{ + while (pcounter) { + if (strcmp(pcounter->name, name) == 0) + break; + + pcounter = pcounter->next; + } + + return pcounter; +} + +struct pmt_counter** pmt_get_scope_root(enum counter_scope scope) +{ + switch(scope) { + case SCOPE_CPU: + return &sys.pmt_tp; + case SCOPE_CORE: + return &sys.pmt_cp; + case SCOPE_PACKAGE: + return &sys.pmt_pp; + } + + __builtin_unreachable(); +} + +void pmt_counter_add_domain(struct pmt_counter *pcounter, unsigned long *pmmio, unsigned int domain_id) +{ + /* Make sure the new domain fits. */ + if (domain_id >= pcounter->num_domains) + pmt_counter_resize(pcounter, domain_id+1); + + assert(pcounter->domains); + assert(domain_id < pcounter->num_domains); + + pcounter->domains[domain_id].pcounter = pmmio; +} + +int pmt_add_counter(unsigned int guid, const char *name, enum pmt_datatype type, + unsigned int lsb, unsigned int msb, unsigned int offset, enum counter_scope scope, + enum counter_format format, unsigned int domain_id, enum pmt_open_mode mode) +{ + struct pmt_mmio *mmio; + struct pmt_counter *pcounter; + struct pmt_counter ** const pmt_root = pmt_get_scope_root(scope); + bool new_counter = false; + int conflict = 0; + + if (lsb > msb) { + fprintf(stderr, "%s: %s: `%s` must be satisfied\n", __func__, "lsb <= msb", name); + exit(1); + } + + if (msb >= 64) { + fprintf(stderr, "%s: %s: `%s` must be satisfied\n", __func__, "msb < 64", name); + exit(1); + } + + mmio = pmt_add_guid(guid); + if (!mmio) { + if (mode != PMT_OPEN_TRY) { + fprintf(stderr, "%s: failed to map PMT MMIO for guid %x\n", __func__, guid); + exit(1); + } + + return 1; + } + + if (offset >= mmio->size) { + if (mode != PMT_OPEN_TRY) { + fprintf(stderr, "%s: offset %u outside of PMT MMIO size %u\n", __func__, offset, mmio->size); + exit(1); + } + + return 1; + } + + pcounter = pmt_find_counter(*pmt_root, name); + if (!pcounter) { + pcounter = calloc(1, sizeof(*pcounter)); + new_counter = true; + } + + if (new_counter) { + strncpy(pcounter->name, name, ARRAY_SIZE(pcounter->name)-1); + pcounter->type = type; + pcounter->scope = scope; + pcounter->lsb = lsb; + pcounter->msb = msb; + pcounter->format = format; + } else { + conflict += pcounter->type != type; + conflict += pcounter->scope != scope; + conflict += pcounter->lsb != lsb; + conflict += pcounter->msb != msb; + conflict += pcounter->format != format; + } + + if (conflict) { + fprintf(stderr, "%s: conflicting parameters for the PMT counter with the same name %s\n", + __func__, name); + exit(1); + } + + pmt_counter_add_domain(pcounter, pmt_get_counter_pointer(mmio, offset), domain_id); + + if (new_counter) { + pcounter->next = *pmt_root; + *pmt_root = pcounter; + } + + return 0; +} + void turbostat_init() { setup_all_buffers(true); @@ -8622,7 +9201,7 @@ int add_perf_counter(const char *perf_device, const char *perf_event, const char return 0; } -void parse_add_command(char *add_command) +void parse_add_command_msr(char *add_command) { int msr_num = 0; char *path = NULL; @@ -8747,6 +9326,191 @@ next: } } +bool starts_with(const char *str, const char *prefix) +{ + return strncmp(prefix, str, strlen(prefix)) == 0; +} + +void parse_add_command_pmt(char *add_command) +{ + char *name = NULL; + char *type_name = NULL; + char *format_name = NULL; + unsigned int offset; + unsigned int lsb; + unsigned int msb; + unsigned int guid; + unsigned int domain_id; + enum counter_scope scope = 0; + enum pmt_datatype type = PMT_TYPE_RAW; + enum counter_format format = FORMAT_RAW; + bool has_offset = false; + bool has_lsb = false; + bool has_msb = false; + bool has_format = true; /* Format has a default value. */ + bool has_guid = false; + bool has_scope = false; + bool has_type = true; /* Type has a default value. */ + + /* Consume the "pmt," prefix. */ + add_command = strchr(add_command, ','); + if (!add_command) { + help(); + exit(1); + } + ++add_command; + + while (add_command) { + if (starts_with(add_command, "name=")) { + name = add_command + strlen("name="); + goto next; + } + + if (starts_with(add_command, "type=")) { + type_name = add_command + strlen("type="); + goto next; + } + + if (starts_with(add_command, "domain=")) { + const size_t prefix_len = strlen("domain="); + + if (sscanf(add_command + prefix_len, "cpu%u", &domain_id) == 1) { + scope = SCOPE_CPU; + has_scope = true; + } else if (sscanf(add_command + prefix_len, "core%u", &domain_id) == 1) { + scope = SCOPE_CORE; + has_scope = true; + } else if (sscanf(add_command + prefix_len, "package%u", &domain_id) == 1) { + scope = SCOPE_PACKAGE; + has_scope = true; + } + + if (!has_scope) { + printf("%s: invalid value for scope. Expected cpu%%u, core%%u or package%%u.\n", + __func__); + exit(1); + } + + goto next; + } + + if (starts_with(add_command, "format=")) { + format_name = add_command + strlen("format="); + goto next; + } + + if (sscanf(add_command, "offset=%u", &offset) == 1) { + has_offset = true; + goto next; + } + + if (sscanf(add_command, "lsb=%u", &lsb) == 1) { + has_lsb = true; + goto next; + } + + if (sscanf(add_command, "msb=%u", &msb) == 1) { + has_msb = true; + goto next; + } + + if (sscanf(add_command, "guid=%x", &guid) == 1) { + has_guid = true; + goto next; + } + +next: + add_command = strchr(add_command, ','); + if (add_command) { + *add_command = '\0'; + add_command++; + } + } + + if (!name) { + printf("%s: missing %s\n", __func__, "name"); + exit(1); + } + + if (strlen(name) >= PMT_COUNTER_NAME_SIZE_BYTES) { + printf("%s: name has to be at most %d characters long\n", + __func__, PMT_COUNTER_NAME_SIZE_BYTES); + exit(1); + } + + if (format_name) { + has_format = false; + + if (strcmp("raw", format_name) == 0) { + format = FORMAT_RAW; + has_format = true; + } + + if (strcmp("delta", format_name) == 0) { + format = FORMAT_DELTA; + has_format = true; + } + + if (!has_format) { + fprintf(stderr, "%s: Invalid format %s. Expected raw or delta\n", __func__, format_name); + exit(1); + } + } + + if (type_name) { + has_type = false; + + if (strcmp("raw", type_name) == 0) { + type = PMT_TYPE_RAW; + has_type = true; + } + + if (!has_type) { + printf("%s: invalid %s: %s\n", __func__, "type", type_name); + exit(1); + } + } + + if (!has_offset) { + printf("%s : missing %s\n", __func__, "offset"); + exit(1); + } + + if (!has_lsb) { + printf("%s: missing %s\n", __func__, "lsb"); + exit(1); + } + + if (!has_msb) { + printf("%s: missing %s\n", __func__, "msb"); + exit(1); + } + + if (!has_guid) { + printf("%s: missing %s\n", __func__, "guid"); + exit(1); + } + + if (!has_scope) { + printf("%s: missing %s\n", __func__, "scope"); + exit(1); + } + + if (lsb > msb) { + printf("%s: lsb > msb doesn't make sense\n", __func__); + exit(1); + } + + pmt_add_counter(guid, name, type, lsb, msb, offset, scope, format, domain_id, PMT_OPEN_REQUIRED); +} + +void parse_add_command(char *add_command) +{ + if (strncmp(add_command, "pmt", strlen("pmt")) == 0) + return parse_add_command_pmt(add_command); + return parse_add_command_msr(add_command); +} + int is_deferred_add(char *name) { int i; -- cgit v1.2.3-70-g09d2 From 640540beb88363a825524295664acfdb0f5d5fc2 Mon Sep 17 00:00:00 2001 From: Patryk Wlazlyn Date: Mon, 22 Jul 2024 22:12:22 +0200 Subject: tools/power turbostat: Add MTL's PMT DC6 builtin counter Provide a definition for metadata that allows reading DC6 residency counter via PMT and exposes it as a builtin counter. Note that this residency counter is updated and read via entirely different mechanisms vs the MSR-based residency counters. On MTL processors, there are times when Die%c6 will report above 100%. This is still useful, but don't expect 3 digits of precision... Signed-off-by: Patryk Wlazlyn Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 70 ++++++++++++++++++++++++++++++++++- 1 file changed, 69 insertions(+), 1 deletion(-) (limited to 'tools/power') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index c9fdfb074ce9..f769e8beabd3 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -193,6 +193,7 @@ struct msr_counter bic[] = { { 0x0, "SAM%mc6", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "SAMMHz", NULL, 0, 0, 0, NULL, 0 }, { 0x0, "SAMAMHz", NULL, 0, 0, 0, NULL, 0 }, + { 0x0, "Die%c6", NULL, 0, 0, 0, NULL, 0 }, }; #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) @@ -254,11 +255,12 @@ struct msr_counter bic[] = { #define BIC_SAM_mc6 (1ULL << 55) #define BIC_SAMMHz (1ULL << 56) #define BIC_SAMACTMHz (1ULL << 57) +#define BIC_Diec6 (1ULL << 58) #define BIC_TOPOLOGY (BIC_Package | BIC_Node | BIC_CoreCnt | BIC_PkgCnt | BIC_Core | BIC_CPU | BIC_Die ) #define BIC_THERMAL_PWR ( BIC_CoreTmp | BIC_PkgTmp | BIC_PkgWatt | BIC_CorWatt | BIC_GFXWatt | BIC_RAMWatt | BIC_PKG__ | BIC_RAM__) #define BIC_FREQUENCY (BIC_Avg_MHz | BIC_Busy | BIC_Bzy_MHz | BIC_TSC_MHz | BIC_GFXMHz | BIC_GFXACTMHz | BIC_SAMMHz | BIC_SAMACTMHz | BIC_UNCORE_MHZ) -#define BIC_IDLE (BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6) +#define BIC_IDLE (BIC_sysfs | BIC_CPU_c1 | BIC_CPU_c3 | BIC_CPU_c6 | BIC_CPU_c7 | BIC_GFX_rc6 | BIC_Pkgpc2 | BIC_Pkgpc3 | BIC_Pkgpc6 | BIC_Pkgpc7 | BIC_Pkgpc8 | BIC_Pkgpc9 | BIC_Pkgpc10 | BIC_CPU_LPI | BIC_SYS_LPI | BIC_Mod_c6 | BIC_Totl_c0 | BIC_Any_c0 | BIC_GFX_c0 | BIC_CPUGFX | BIC_SAM_mc6 | BIC_Diec6) #define BIC_OTHER ( BIC_IRQ | BIC_SMI | BIC_ThreadC | BIC_CoreTmp | BIC_IPC) #define BIC_DISABLED_BY_DEFAULT (BIC_USEC | BIC_TOD | BIC_APIC | BIC_X2APIC) @@ -1475,6 +1477,11 @@ static struct msr_counter_arch_info msr_counter_arch_infos[] = { #define SYSFS_TELEM_PATH "/sys/class/intel_pmt" #endif +#define PMT_COUNTER_MTL_DC6_OFFSET 120 +#define PMT_COUNTER_MTL_DC6_LSB 0 +#define PMT_COUNTER_MTL_DC6_MSB 63 +#define PMT_MTL_DC6_GUID 0x1a067102 + #define PMT_COUNTER_NAME_SIZE_BYTES 16 #define PMT_COUNTER_TYPE_NAME_SIZE_BYTES 32 @@ -1496,6 +1503,7 @@ struct pmt_mmio { enum pmt_datatype { PMT_TYPE_RAW, + PMT_TYPE_XTAL_TIME, }; struct pmt_domain_info { @@ -1630,6 +1638,7 @@ struct pkg_data { struct rapl_counter rapl_dram_perf_status; /* MSR_DRAM_PERF_STATUS */ unsigned int pkg_temp_c; unsigned int uncore_mhz; + unsigned long long die_c6; unsigned long long counter[MAX_ADDED_PACKAGE_COUNTERS]; unsigned long long perf_counter[MAX_ADDED_PACKAGE_COUNTERS]; unsigned long long pmt_counter[PMT_MAX_ADDED_PACKAGE_COUNTERS]; @@ -2281,6 +2290,10 @@ void print_header(char *delim) outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name); break; + + case PMT_TYPE_XTAL_TIME: + outp += sprintf(outp, "%s%s", delim, ppmt->name); + break; } ppmt = ppmt->next; @@ -2351,6 +2364,10 @@ void print_header(char *delim) outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name); break; + + case PMT_TYPE_XTAL_TIME: + outp += sprintf(outp, "%s%s", delim, ppmt->name); + break; } ppmt = ppmt->next; @@ -2400,6 +2417,8 @@ void print_header(char *delim) outp += sprintf(outp, "%sPkg%%pc9", (printed++ ? delim : "")); if (DO_BIC(BIC_Pkgpc10)) outp += sprintf(outp, "%sPk%%pc10", (printed++ ? delim : "")); + if (DO_BIC(BIC_Diec6)) + outp += sprintf(outp, "%sDie%%c6", (printed++ ? delim : "")); if (DO_BIC(BIC_CPU_LPI)) outp += sprintf(outp, "%sCPU%%LPI", (printed++ ? delim : "")); if (DO_BIC(BIC_SYS_LPI)) @@ -2476,6 +2495,10 @@ void print_header(char *delim) outp += sprintf(outp, "%s%18.18s", (printed++ ? delim : ""), ppmt->name); break; + + case PMT_TYPE_XTAL_TIME: + outp += sprintf(outp, "%s%s", delim, ppmt->name); + break; } ppmt = ppmt->next; @@ -2775,6 +2798,13 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), t->pmt_counter[i]); break; + + case PMT_TYPE_XTAL_TIME: + const unsigned long value_raw = t->pmt_counter[i]; + const double value_converted = 100.0 * value_raw / crystal_hz / interval_float; + + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); + break; } } @@ -2849,6 +2879,13 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), c->pmt_counter[i]); break; + + case PMT_TYPE_XTAL_TIME: + const unsigned long value_raw = c->pmt_counter[i]; + const double value_converted = 100.0 * value_raw / crystal_hz / interval_float; + + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); + break; } } @@ -2931,6 +2968,9 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data if (DO_BIC(BIC_Pkgpc10)) outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10 / tsc); + if (DO_BIC(BIC_Diec6)) + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->die_c6 / crystal_hz / interval_float); + if (DO_BIC(BIC_CPU_LPI)) { if (p->cpu_lpi >= 0) outp += @@ -3037,6 +3077,13 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data outp += sprintf(outp, "%s0x%016llx", (printed++ ? delim : ""), p->pmt_counter[i]); break; + + case PMT_TYPE_XTAL_TIME: + const unsigned long value_raw = p->pmt_counter[i]; + const double value_converted = 100.0 * value_raw / crystal_hz / interval_float; + + outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), value_converted); + break; } } @@ -3115,6 +3162,7 @@ int delta_package(struct pkg_data *new, struct pkg_data *old) old->pc8 = new->pc8 - old->pc8; old->pc9 = new->pc9 - old->pc9; old->pc10 = new->pc10 - old->pc10; + old->die_c6 = new->die_c6 - old->die_c6; old->cpu_lpi = new->cpu_lpi - old->cpu_lpi; old->sys_lpi = new->sys_lpi - old->sys_lpi; old->pkg_temp_c = new->pkg_temp_c; @@ -3398,6 +3446,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data p->pc8 = 0; p->pc9 = 0; p->pc10 = 0; + p->die_c6 = 0; p->cpu_lpi = 0; p->sys_lpi = 0; @@ -3547,6 +3596,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) average.packages.pc8 += p->pc8; average.packages.pc9 += p->pc9; average.packages.pc10 += p->pc10; + average.packages.die_c6 += p->die_c6; average.packages.cpu_lpi = p->cpu_lpi; average.packages.sys_lpi = p->sys_lpi; @@ -3642,6 +3692,7 @@ void compute_average(struct thread_data *t, struct core_data *c, struct pkg_data average.packages.pc8 /= topo.allowed_packages; average.packages.pc9 /= topo.allowed_packages; average.packages.pc10 /= topo.allowed_packages; + average.packages.die_c6 /= topo.allowed_packages; for (i = 0, mp = sys.tp; mp; i++, mp = mp->next) { if (mp->format == FORMAT_RAW) @@ -5505,6 +5556,7 @@ void msr_perf_init(void); void rapl_perf_init(void); void cstate_perf_init(void); void added_perf_counters_init(void); +void pmt_init(void); void re_initialize(void) { @@ -5515,6 +5567,7 @@ void re_initialize(void) rapl_perf_init(); cstate_perf_init(); added_perf_counters_init(); + pmt_init(); fprintf(outf, "turbostat: re-initialized with num_cpus %d, allowed_cpus %d\n", topo.num_cpus, topo.allowed_cpus); } @@ -8864,6 +8917,15 @@ int pmt_add_counter(unsigned int guid, const char *name, enum pmt_datatype type, return 0; } +void pmt_init(void) +{ + if (BIC_IS_ENABLED(BIC_Diec6)) { + pmt_add_counter(PMT_MTL_DC6_GUID, "Die%c6", PMT_TYPE_XTAL_TIME, PMT_COUNTER_MTL_DC6_LSB, + PMT_COUNTER_MTL_DC6_MSB, PMT_COUNTER_MTL_DC6_OFFSET, SCOPE_PACKAGE, FORMAT_DELTA, + 0, PMT_OPEN_TRY); + } +} + void turbostat_init() { setup_all_buffers(true); @@ -8878,6 +8940,7 @@ void turbostat_init() rapl_perf_init(); cstate_perf_init(); added_perf_counters_init(); + pmt_init(); for_all_cpus(get_cpu_type, ODD_COUNTERS); for_all_cpus(get_cpu_type, EVEN_COUNTERS); @@ -9465,6 +9528,11 @@ next: has_type = true; } + if (strcmp("txtal_time", type_name) == 0) { + type = PMT_TYPE_XTAL_TIME; + has_type = true; + } + if (!has_type) { printf("%s: invalid %s: %s\n", __func__, "type", type_name); exit(1); -- cgit v1.2.3-70-g09d2 From 944264a2a99cc4dd4b10eaa9798b6aab80adf4be Mon Sep 17 00:00:00 2001 From: Patryk Wlazlyn Date: Tue, 23 Jul 2024 20:12:32 +0200 Subject: tools/power turbostat: Document PMT in turbostat.8 Add a general description of the user interface for adding PMT counters with the new --add pmt,... option. Provide a complete example for requesting two counters. Signed-off-by: Patryk Wlazlyn Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.8 | 65 +++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) (limited to 'tools/power') diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index 44cbc7cb382d..067717bce1d4 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -55,6 +55,39 @@ name as necessary to disambiguate it from others is necessary. Note that option as the column header. .fi .PP +\fB--add pmt,[attr_name=attr_value, ...]\fP add column with a PMT (Intel Platform Monitoring Technology) counter in a similar way to --add option above, but require PMT metadata to be supplied to correctly read and display the counter. The metadata can be found in the Intel PMT XML files, hosted at https://github.com/intel/Intel-PMT. For a complete example see "ADD PMT COUNTER EXAMPLE". +.nf + name="name_string" + For column header. + + type={\fBraw\fP} + 'raw' shows the counter contents in hex. + default: raw + + format={\fBraw\fP | \fBdelta\fP} + 'raw' shows the counter contents in hex. + 'delta' shows the difference in values during the measurement interval. + default: raw + + domain={\fBcpu%u\fP | \fBcore%u\fP | \fBpackage%u\fP} + 'cpu' per cpu/thread counter. + 'core' per core counter. + 'package' per package counter. + '%u' denotes id of the domain that the counter is associated with. For example core4 would mean that the counter is associated with core number 4. + + offset=\fB%u\fP + '%u' offset within the PMT MMIO region. + + lsb=\fB%u\fP + '%u' least significant bit within the 64 bit value read from 'offset'. Together with 'msb', used to form a read mask. + + msb=\fB%u\fP + '%u' most significant bit within the 64 bit value read from 'offset'. Together with 'lsb', used to form a read mask. + + guid=\fB%x\fP + '%x' hex identifier of the PMT MMIO region. +.fi +.PP \fB--cpu cpu-set\fP limit output to system summary plus the specified cpu-set. If cpu-set is the string "core", then the system summary plus the first CPU in each core are printed -- eg. subsequent HT siblings are not printed. Or if cpu-set is the string "package", then the system summary plus the first CPU in each package is printed. Otherwise, the system summary plus the specified set of CPUs are printed. The cpu-set is ordered from low to high, comma delimited with ".." and "-" permitted to denote a range. eg. 1,2,8,14..17,21-44 .PP \fB--hide column\fP do not show the specified built-in columns. May be invoked multiple times, or with a comma-separated list of column names. @@ -354,6 +387,38 @@ CPU pCPU%c1 CPU%c1 .fi +.SH ADD PMT COUNTER EXAMPLE +Here we limit turbostat to showing just the CPU number 0. +We add two counters, showing crystal clock count and the DC6 residency. +All the parameters passed are based on the metadata found in the PMT XML files. + +For the crystal clock count, we +label it with the column header, "XTAL", +we set the type to 'raw', to read the number of clock ticks in hex, +we set the format to 'delta', to display the difference in ticks during the measurement interval, +we set the domain to 'package0', to collect it and associate it with the whole package number 0, +we set the offset to '0', which is a offset of the counter within the PMT MMIO region, +we set the lsb and msb to cover all 64 bits of the read 64 bit value, +and finally we set the guid to '0x1a067102', that identifies the PMT MMIO region to which the 'offset' is applied to read the counter value. + +For the DC6 residency counter, we +label it with the column header, "Die%c6", +we set the type to 'txtal_time', to obtain the percent residency value +we set the format to 'delta', to display the difference in ticks during the measurement interval, +we set the domain to 'package0', to collect it and associate it with the whole package number 0, +we set the offset to '0', which is a offset of the counter within the PMT MMIO region, +we set the lsb and msb to cover all 64 bits of the read 64 bit value, +and finally we set the guid to '0x1a067102', that identifies the PMT MMIO region to which the 'offset' is applied to read the counter value. + +.nf +sudo ./turbostat --quiet --cpu 0 --show CPU --add pmt,name=XTAL,type=raw,format=delta,domain=package0,offset=0,lsb=0,msb=63,guid=0x1a067102 --add pmt,name=Die%c6,type=txtal_time,format=delta,domain=package0,offset=120,lsb=0,msb=63,guid=0x1a067102 +0.104352 sec +CPU XTAL Die%c6 +- 0x0000006d4d957ca7 0.00 +0 0x0000006d4d957ca7 0.00 +0.102448 sec +.fi + .SH INPUT For interval-mode, turbostat will immediately end the current interval -- cgit v1.2.3-70-g09d2 From 19d076903b95896ce55c7cc3679f795731591ac6 Mon Sep 17 00:00:00 2001 From: Patryk Wlazlyn Date: Wed, 24 Jul 2024 13:17:30 +0200 Subject: tools/power turbostat: Include umask=%x in perf counter's config Some counters, like cpu/cache-misses/, expose and require umask=%x parameter alongside event=%x in the sysfs perf counter's event file. This change make sure we parse and use it when opening user added counters. Signed-off-by: Patryk Wlazlyn Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 60 +++++++++++++++++++++++++++++------ 1 file changed, 50 insertions(+), 10 deletions(-) (limited to 'tools/power') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index f769e8beabd3..ed5f032c6b3f 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -4017,15 +4017,55 @@ static unsigned int read_perf_type(const char *subsys) return read_perf_counter_info_n(path, format); } -static unsigned int read_rapl_config(const char *subsys, const char *event_name) +static unsigned int read_perf_config(const char *subsys, const char *event_name) { const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s"; - const char *const format = "event=%x"; + FILE *fconfig = NULL; char path[128]; + char config_str[64]; + unsigned int config; + unsigned int umask; + bool has_config = false; + bool has_umask = false; + unsigned int ret = -1; snprintf(path, sizeof(path), path_format, subsys, event_name); - return read_perf_counter_info_n(path, format); + fconfig = fopen(path, "r"); + if (!fconfig) + return -1; + + if (fgets(config_str, ARRAY_SIZE(config_str), fconfig) != config_str) + goto cleanup_and_exit; + + for (char *pconfig_str = &config_str[0]; pconfig_str;) { + if (sscanf(pconfig_str, "event=%x", &config) == 1) { + has_config = true; + goto next; + } + + if (sscanf(pconfig_str, "umask=%x", &umask) == 1) { + has_umask = true; + goto next; + } + + next: + pconfig_str = strchr(pconfig_str, ','); + if (pconfig_str) { + *pconfig_str = '\0'; + ++pconfig_str; + } + } + + if (!has_umask) + umask = 0; + + if (has_config) + ret = (umask << 8) | config; + +cleanup_and_exit: + fclose(fconfig); + return ret; } static unsigned int read_perf_rapl_unit(const char *subsys, const char *event_name) @@ -4044,7 +4084,7 @@ static unsigned int read_perf_rapl_unit(const char *subsys, const char *event_na return RAPL_UNIT_INVALID; } -static double read_perf_rapl_scale(const char *subsys, const char *event_name) +static double read_perf_scale(const char *subsys, const char *event_name) { const char *const path_format = "/sys/bus/event_source/devices/%s/events/%s.scale"; const char *const format = "%lf"; @@ -7425,7 +7465,7 @@ int add_rapl_perf_counter_(int cpu, struct rapl_counter_info_t *rci, const struc if (no_perf) return -1; - const double scale = read_perf_rapl_scale(cai->perf_subsys, cai->perf_name); + const double scale = read_perf_scale(cai->perf_subsys, cai->perf_name); if (scale == 0.0) return -1; @@ -7436,7 +7476,7 @@ int add_rapl_perf_counter_(int cpu, struct rapl_counter_info_t *rci, const struc return -1; const unsigned int rapl_type = read_perf_type(cai->perf_subsys); - const unsigned int rapl_energy_pkg_config = read_rapl_config(cai->perf_subsys, cai->perf_name); + const unsigned int rapl_energy_pkg_config = read_perf_config(cai->perf_subsys, cai->perf_name); const int fd_counter = open_perf_counter(cpu, rapl_type, rapl_energy_pkg_config, rci->fd_perf, PERF_FORMAT_GROUP); @@ -7598,7 +7638,7 @@ int add_cstate_perf_counter_(int cpu, struct cstate_counter_info_t *cci, const s return -1; const unsigned int type = read_perf_type(cai->perf_subsys); - const unsigned int config = read_rapl_config(cai->perf_subsys, cai->perf_name); + const unsigned int config = read_perf_config(cai->perf_subsys, cai->perf_name); const int fd_counter = open_perf_counter(cpu, type, config, *pfd_group, PERF_FORMAT_GROUP); @@ -7628,7 +7668,7 @@ int add_msr_perf_counter_(int cpu, struct msr_counter_info_t *cci, const struct return -1; const unsigned int type = read_perf_type(cai->perf_subsys); - const unsigned int config = read_rapl_config(cai->perf_subsys, cai->perf_name); + const unsigned int config = read_perf_config(cai->perf_subsys, cai->perf_name); const int fd_counter = open_perf_counter(cpu, type, config, cci->fd_perf, PERF_FORMAT_GROUP); @@ -8571,7 +8611,7 @@ int added_perf_counters_init_(struct perf_counter_info *pinfo) continue; } - perf_config = read_rapl_config(pinfo->device, pinfo->event); + perf_config = read_perf_config(pinfo->device, pinfo->event); if (perf_config == (unsigned int)-1) { warnx("%s: perf/%s/%s: failed to read %s", __func__, pinfo->device, pinfo->event, "config"); @@ -8579,7 +8619,7 @@ int added_perf_counters_init_(struct perf_counter_info *pinfo) } /* Scale is not required, some counters just don't have it. */ - perf_scale = read_perf_rapl_scale(pinfo->device, pinfo->event); + perf_scale = read_perf_scale(pinfo->device, pinfo->event); if (perf_scale == 0.0) perf_scale = 1.0; -- cgit v1.2.3-70-g09d2 From 866d2d36b81d7d0e6d91423b6dd9b1bcfd0510dd Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 26 Jul 2024 14:16:28 -0400 Subject: tools/power turbostat: version 2024.07.26 Release 2024.07.26: Enable turbostat extensions to add both perf and PMT (Intel Platform Monitoring Technology) counters from the cmdline. Demonstrate PMT access with built-in support for Meteor Lake's Die%c6 counter. This commit: Clean up white-space nits introduced since version 2024.05.10 Signed-off-by: Len Brown --- tools/power/x86/turbostat/turbostat.c | 105 +++++++++++++++++----------------- 1 file changed, 52 insertions(+), 53 deletions(-) (limited to 'tools/power') diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index ed5f032c6b3f..089220aaa5c9 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -1057,8 +1057,7 @@ void probe_platform_features(unsigned int family, unsigned int model) return; for (i = 0; turbostat_pdata[i].features; i++) { - if (VFM_FAMILY(turbostat_pdata[i].vfm) == family && - VFM_MODEL(turbostat_pdata[i].vfm) == model) { + if (VFM_FAMILY(turbostat_pdata[i].vfm) == family && VFM_MODEL(turbostat_pdata[i].vfm) == model) { platform = turbostat_pdata[i].features; return; } @@ -1448,28 +1447,28 @@ enum msr_arch_info_index { static struct msr_counter_arch_info msr_counter_arch_infos[] = { [MSR_ARCH_INFO_APERF_INDEX] = { - .perf_subsys = "msr", - .perf_name = "aperf", - .msr = MSR_IA32_APERF, - .msr_mask = 0xFFFFFFFFFFFFFFFF, - .rci_index = MSR_RCI_INDEX_APERF, - }, + .perf_subsys = "msr", + .perf_name = "aperf", + .msr = MSR_IA32_APERF, + .msr_mask = 0xFFFFFFFFFFFFFFFF, + .rci_index = MSR_RCI_INDEX_APERF, + }, [MSR_ARCH_INFO_MPERF_INDEX] = { - .perf_subsys = "msr", - .perf_name = "mperf", - .msr = MSR_IA32_MPERF, - .msr_mask = 0xFFFFFFFFFFFFFFFF, - .rci_index = MSR_RCI_INDEX_MPERF, - }, + .perf_subsys = "msr", + .perf_name = "mperf", + .msr = MSR_IA32_MPERF, + .msr_mask = 0xFFFFFFFFFFFFFFFF, + .rci_index = MSR_RCI_INDEX_MPERF, + }, [MSR_ARCH_INFO_SMI_INDEX] = { - .perf_subsys = "msr", - .perf_name = "smi", - .msr = MSR_SMI_COUNT, - .msr_mask = 0xFFFFFFFF, - .rci_index = MSR_RCI_INDEX_SMI, - }, + .perf_subsys = "msr", + .perf_name = "smi", + .msr = MSR_SMI_COUNT, + .msr_mask = 0xFFFFFFFF, + .rci_index = MSR_RCI_INDEX_SMI, + }, }; /* Can be redefined when compiling, useful for testing. */ @@ -1535,14 +1534,14 @@ struct pmt_counter { unsigned int pmt_counter_get_width(const struct pmt_counter *p) { - return (p->msb - p->lsb)+1; + return (p->msb - p->lsb) + 1; } void pmt_counter_resize_(struct pmt_counter *pcounter, unsigned int new_size) { struct pmt_domain_info *new_mem; - new_mem = (struct pmt_domain_info*) reallocarray(pcounter->domains, new_size, sizeof(*pcounter->domains)); + new_mem = (struct pmt_domain_info *)reallocarray(pcounter->domains, new_size, sizeof(*pcounter->domains)); if (!new_mem) { fprintf(stderr, "%s: failed to allocate memory for PMT counters\n", __func__); exit(1); @@ -1567,7 +1566,7 @@ void pmt_counter_resize(struct pmt_counter *pcounter, unsigned int new_size) */ if (new_size < 8) new_size = 8; - new_size = MAX(new_size, pcounter->num_domains*2); + new_size = MAX(new_size, pcounter->num_domains * 2); pmt_counter_resize_(pcounter, new_size); } @@ -2282,7 +2281,7 @@ void print_header(char *delim) ppmt = sys.pmt_tp; while (ppmt) { - switch(ppmt->type) { + switch (ppmt->type) { case PMT_TYPE_RAW: if (pmt_counter_get_width(ppmt) <= 32) outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name); @@ -2356,7 +2355,7 @@ void print_header(char *delim) ppmt = sys.pmt_cp; while (ppmt) { - switch(ppmt->type) { + switch (ppmt->type) { case PMT_TYPE_RAW: if (pmt_counter_get_width(ppmt) <= 32) outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name); @@ -2487,7 +2486,7 @@ void print_header(char *delim) ppmt = sys.pmt_pp; while (ppmt) { - switch(ppmt->type) { + switch (ppmt->type) { case PMT_TYPE_RAW: if (pmt_counter_get_width(ppmt) <= 32) outp += sprintf(outp, "%s%10.10s", (printed++ ? delim : ""), ppmt->name); @@ -2969,7 +2968,8 @@ int format_counters(struct thread_data *t, struct core_data *c, struct pkg_data outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->pc10 / tsc); if (DO_BIC(BIC_Diec6)) - outp += sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->die_c6 / crystal_hz / interval_float); + outp += + sprintf(outp, "%s%.2f", (printed++ ? delim : ""), 100.0 * p->die_c6 / crystal_hz / interval_float); if (DO_BIC(BIC_CPU_LPI)) { if (p->cpu_lpi >= 0) @@ -4049,7 +4049,7 @@ static unsigned int read_perf_config(const char *subsys, const char *event_name) goto next; } - next: +next: pconfig_str = strchr(pconfig_str, ','); if (pconfig_str) { *pconfig_str = '\0'; @@ -4463,9 +4463,9 @@ unsigned long pmt_gen_value_mask(unsigned int lsb, unsigned int msb) if (msb == 63) mask = 0xffffffffffffffff; else - mask = ((1<<(msb+1))-1); + mask = ((1 << (msb + 1)) - 1); - mask -= (1<device, pinfo->event, cpu, pinfo->fd_perf_per_domain[next_domain]); + pinfo->device, pinfo->event, cpu, pinfo->fd_perf_per_domain[next_domain]); } pinfo = pinfo->next; @@ -8687,7 +8687,7 @@ int parse_telem_info_file(int fd_dir, const char *info_filename, const char *for return 0; } -struct pmt_mmio* pmt_mmio_open(unsigned int target_guid) +struct pmt_mmio *pmt_mmio_open(unsigned int target_guid) { DIR *dirp; struct dirent *entry; @@ -8793,7 +8793,7 @@ loop_cleanup_and_break: return ret; } -struct pmt_mmio* pmt_mmio_find(unsigned int guid) +struct pmt_mmio *pmt_mmio_find(unsigned int guid) { struct pmt_mmio *pmmio = pmt_mmios; @@ -8802,22 +8802,22 @@ struct pmt_mmio* pmt_mmio_find(unsigned int guid) return pmmio; pmmio = pmmio->next; - } + } return NULL; } -void* pmt_get_counter_pointer(struct pmt_mmio *pmmio, unsigned long counter_offset) +void *pmt_get_counter_pointer(struct pmt_mmio *pmmio, unsigned long counter_offset) { char *ret; /* Get base of mmaped PMT file. */ - ret = (char*)pmmio->mmio_base; + ret = (char *)pmmio->mmio_base; /* * Apply PMT MMIO offset to obtain beginning of the mmaped telemetry data. * It's not guaranteed that the mmaped memory begins with the telemetry data - * - we might have to apply the offset first. + * - we might have to apply the offset first. */ ret += pmmio->pmt_offset; @@ -8827,7 +8827,7 @@ void* pmt_get_counter_pointer(struct pmt_mmio *pmmio, unsigned long counter_offs return ret; } -struct pmt_mmio* pmt_add_guid(unsigned int guid) +struct pmt_mmio *pmt_add_guid(unsigned int guid) { struct pmt_mmio *ret; @@ -8843,7 +8843,7 @@ enum pmt_open_mode { PMT_OPEN_REQUIRED, /* Open failure is a fatal error. */ }; -struct pmt_counter* pmt_find_counter(struct pmt_counter *pcounter, const char *name) +struct pmt_counter *pmt_find_counter(struct pmt_counter *pcounter, const char *name) { while (pcounter) { if (strcmp(pcounter->name, name) == 0) @@ -8855,9 +8855,9 @@ struct pmt_counter* pmt_find_counter(struct pmt_counter *pcounter, const char *n return pcounter; } -struct pmt_counter** pmt_get_scope_root(enum counter_scope scope) +struct pmt_counter **pmt_get_scope_root(enum counter_scope scope) { - switch(scope) { + switch (scope) { case SCOPE_CPU: return &sys.pmt_tp; case SCOPE_CORE: @@ -8873,7 +8873,7 @@ void pmt_counter_add_domain(struct pmt_counter *pcounter, unsigned long *pmmio, { /* Make sure the new domain fits. */ if (domain_id >= pcounter->num_domains) - pmt_counter_resize(pcounter, domain_id+1); + pmt_counter_resize(pcounter, domain_id + 1); assert(pcounter->domains); assert(domain_id < pcounter->num_domains); @@ -8882,12 +8882,12 @@ void pmt_counter_add_domain(struct pmt_counter *pcounter, unsigned long *pmmio, } int pmt_add_counter(unsigned int guid, const char *name, enum pmt_datatype type, - unsigned int lsb, unsigned int msb, unsigned int offset, enum counter_scope scope, - enum counter_format format, unsigned int domain_id, enum pmt_open_mode mode) + unsigned int lsb, unsigned int msb, unsigned int offset, enum counter_scope scope, + enum counter_format format, unsigned int domain_id, enum pmt_open_mode mode) { struct pmt_mmio *mmio; struct pmt_counter *pcounter; - struct pmt_counter ** const pmt_root = pmt_get_scope_root(scope); + struct pmt_counter **const pmt_root = pmt_get_scope_root(scope); bool new_counter = false; int conflict = 0; @@ -8927,7 +8927,7 @@ int pmt_add_counter(unsigned int guid, const char *name, enum pmt_datatype type, } if (new_counter) { - strncpy(pcounter->name, name, ARRAY_SIZE(pcounter->name)-1); + strncpy(pcounter->name, name, ARRAY_SIZE(pcounter->name) - 1); pcounter->type = type; pcounter->scope = scope; pcounter->lsb = lsb; @@ -9071,7 +9071,7 @@ int get_and_dump_counters(void) void print_version() { - fprintf(outf, "turbostat version 2024.05.10 - Len Brown \n"); + fprintf(outf, "turbostat version 2024.07.26 - Len Brown \n"); } #define COMMAND_LINE_SIZE 2048 @@ -9299,7 +9299,7 @@ int add_perf_counter(const char *perf_device, const char *perf_event, const char // FIXME: we might not have debug here yet if (debug) fprintf(stderr, "%s: %s/%s, name: %s, scope%d\n", - __func__, pinfo->device, pinfo->event, pinfo->name, pinfo->scope); + __func__, pinfo->device, pinfo->event, pinfo->name, pinfo->scope); return 0; } @@ -9450,10 +9450,10 @@ void parse_add_command_pmt(char *add_command) bool has_offset = false; bool has_lsb = false; bool has_msb = false; - bool has_format = true; /* Format has a default value. */ + bool has_format = true; /* Format has a default value. */ bool has_guid = false; bool has_scope = false; - bool has_type = true; /* Type has a default value. */ + bool has_type = true; /* Type has a default value. */ /* Consume the "pmt," prefix. */ add_command = strchr(add_command, ','); @@ -9490,7 +9490,7 @@ void parse_add_command_pmt(char *add_command) if (!has_scope) { printf("%s: invalid value for scope. Expected cpu%%u, core%%u or package%%u.\n", - __func__); + __func__); exit(1); } @@ -9536,8 +9536,7 @@ next: } if (strlen(name) >= PMT_COUNTER_NAME_SIZE_BYTES) { - printf("%s: name has to be at most %d characters long\n", - __func__, PMT_COUNTER_NAME_SIZE_BYTES); + printf("%s: name has to be at most %d characters long\n", __func__, PMT_COUNTER_NAME_SIZE_BYTES); exit(1); } -- cgit v1.2.3-70-g09d2