From 640d5175a671cd0df0b9e3b5935dc80fc5248973 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 3 Nov 2017 15:13:29 -0300 Subject: perf evlist: Set the correct idx when adding dummy events The evsel->idx field is used mainly to access the right bucket in per-event arrays such as the annotation ones, but also to set evsel->tracking, that in turn will decide what of the events will ask for PERF_RECORD_{MMAP,COMM,EXEC} to be generated, i.e. which perf_event_attr will have its mmap, etc fields set. When we were adding the "dummy" event using perf_evlist__add_dummy() we were not setting it correctly, which could result in multiple tracking events. Now that I'll try using a dummy event to be the tracking one when using 'perf record --delay', i.e. when we process the --delay setting we may already have the evlist set up, like with: perf record -e cycles,instructions --delay 1000 ./workload We will need to add a "dummy" event, then reset evsel->tracking for the first event, "cycles", and set it instead to the dummy one, and also setting its attr.enable_on_exec, so that we get the PERF_RECORD_MMAP, etc metadata events while waiting to enable the explicitely requested events, so lets get this straight and set the right evsel->idx. Cc: Adrian Hunter Cc: Bram Stolk Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-nrdfchshqxf7diszhxcecqb9@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index c6c891e154a6..ccb749f9a83f 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -257,7 +257,7 @@ int perf_evlist__add_dummy(struct perf_evlist *evlist) .config = PERF_COUNT_SW_DUMMY, .size = sizeof(attr), /* to capture ABI version */ }; - struct perf_evsel *evsel = perf_evsel__new(&attr); + struct perf_evsel *evsel = perf_evsel__new_idx(&attr, evlist->nr_entries); if (evsel == NULL) return -ENOMEM; -- cgit v1.2.3-70-g09d2 From d3dbf43c56f9176be325ce1cc72a44c8d3c210dc Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 3 Nov 2017 15:34:34 -0300 Subject: perf record: Generate PERF_RECORD_{MMAP,COMM,EXEC} with --delay When we use an initial delay, e.g.: 'perf record --delay 1000', we do not enable the events until that delay has passed after we started the workload, including the tracking event, i.e. the one for which we have attr.mmap, etc, enabled to ask the kernel to generate the PERF_RECORD_{MMAP,COMM,EXEC} metadata events that will then allow us to resolve addresses in samples to the map, dso and symbol. There will be a shadow that even synthesizing samples won't cover, i.e. the workload that we start and other processes forking while we wait for the initial delay to expire. So use a dummy event to be the tracking one and make it be enabled on exec. Before: # perf record --delay 1000 stress --cpu 1 --timeout 5 stress: info: [9029] dispatching hogs: 1 cpu, 0 io, 0 vm, 0 hdd stress: info: [9029] successful run completed in 5s [ perf record: Woken up 3 times to write data ] [ perf record: Captured and wrote 0.624 MB perf.data (15908 samples) ] # perf script | head :9031 9031 32001.826888: 1 cycles:ppp: ffffffff831aa30d event_function (/lib/modules/4.14.0-rc6+/build/vmlinux) :9031 9031 32001.826893: 1 cycles:ppp: ffffffff8300d1a0 intel_bts_enable_local (/lib/modules/4.14.0-rc6+/build/vmlinux) :9031 9031 32001.826895: 7 cycles:ppp: ffffffff83023870 sched_clock (/lib/modules/4.14.0-rc6+/build/vmlinux) :9031 9031 32001.826897: 103 cycles:ppp: ffffffff8300c331 intel_pmu_handle_irq (/lib/modules/4.14.0-rc6+/build/vmlinux) :9031 9031 32001.826899: 1615 cycles:ppp: ffffffff830231f8 native_sched_clock (/lib/modules/4.14.0-rc6+/build/vmlinux) :9031 9031 32001.826902: 26724 cycles:ppp: ffffffff8384c6a7 native_irq_return_iret (/lib/modules/4.14.0-rc6+/build/vmlinux) :9031 9031 32001.826913: 329739 cycles:ppp: 7fb2a5410932 [unknown] ([unknown]) :9031 9031 32001.827033: 1225451 cycles:ppp: 7fb2a5410930 [unknown] ([unknown]) :9031 9031 32001.827474: 1391725 cycles:ppp: 7fb2a5410930 [unknown] ([unknown]) :9031 9031 32001.827978: 1233697 cycles:ppp: 7fb2a5410928 [unknown] ([unknown]) # After: # perf record --delay 1000 stress --cpu 1 --timeout 5 stress: info: [9741] dispatching hogs: 1 cpu, 0 io, 0 vm, 0 hdd stress: info: [9741] successful run completed in 5s [ perf record: Woken up 3 times to write data ] [ perf record: Captured and wrote 0.751 MB perf.data (15976 samples) ] # perf script | head stress 9742 32110.959106: 1 cycles:ppp: ffffffff831b26f6 __perf_event_task_sched_in (/lib/modules/4.14.0-rc6+/build/vmlinux) stress 9742 32110.959110: 1 cycles:ppp: ffffffff8300c2e9 intel_pmu_handle_irq (/lib/modules/4.14.0-rc6+/build/vmlinux) stress 9742 32110.959112: 7 cycles:ppp: ffffffff830231e0 native_sched_clock (/lib/modules/4.14.0-rc6+/build/vmlinux) stress 9742 32110.959115: 101 cycles:ppp: ffffffff83023870 sched_clock (/lib/modules/4.14.0-rc6+/build/vmlinux) stress 9742 32110.959117: 1533 cycles:ppp: ffffffff830231f8 native_sched_clock (/lib/modules/4.14.0-rc6+/build/vmlinux) stress 9742 32110.959119: 23992 cycles:ppp: ffffffff831b0900 ctx_sched_in (/lib/modules/4.14.0-rc6+/build/vmlinux) stress 9742 32110.959129: 329406 cycles:ppp: 7f4b1b661930 __random_r (/usr/lib64/libc-2.25.so) stress 9742 32110.959249: 1288322 cycles:ppp: 5566e1e7cbc9 hogcpu (/usr/bin/stress) stress 9742 32110.959712: 1464046 cycles:ppp: 7f4b1b66179e __random (/usr/lib64/libc-2.25.so) stress 9742 32110.960241: 1266918 cycles:ppp: 7f4b1b66195b __random_r (/usr/lib64/libc-2.25.so) # Reported-by: Bram Stolk Tested-by: Bram Stolk Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Fixes: 6619a53ef757 ("perf record: Add --initial-delay option") Link: http://lkml.kernel.org/n/tip-nrdfchshqxf7diszhxcecqb9@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 3d7f33e19df2..5f78ce943407 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -339,6 +339,22 @@ static int record__open(struct record *rec) struct perf_evsel_config_term *err_term; int rc = 0; + /* + * For initial_delay we need to add a dummy event so that we can track + * PERF_RECORD_MMAP while we wait for the initial delay to enable the + * real events, the ones asked by the user. + */ + if (opts->initial_delay) { + if (perf_evlist__add_dummy(evlist)) + return -ENOMEM; + + pos = perf_evlist__first(evlist); + pos->tracking = 0; + pos = perf_evlist__last(evlist); + pos->tracking = 1; + pos->attr.enable_on_exec = 1; + } + perf_evlist__config(evlist, opts, &callchain_param); evlist__for_each_entry(evlist, pos) { -- cgit v1.2.3-70-g09d2 From a17c4ca0ddef659d33fb6661995bd74e1a6a6101 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:25 +0200 Subject: perf annotate: Add annotation_line struct In order to make the annotation support generic, addadding 'struct annotation_line', which will hold generic data common to annotation sources (such as the one for python scripts, coming on upcoming patches). Having this, we can add different annotation line support other than objdump disasm. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-3-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 34 +++++++++++++++++----------------- tools/perf/ui/gtk/annotate.c | 6 +++--- tools/perf/util/annotate.c | 20 ++++++++++---------- tools/perf/util/annotate.h | 20 ++++++++++++-------- 4 files changed, 42 insertions(+), 38 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 8f7f59d1a2b5..a8c2f7405a41 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -84,7 +84,7 @@ static bool disasm_line__filter(struct ui_browser *browser __maybe_unused, void *entry) { if (annotate_browser__opts.hide_src_code) { - struct disasm_line *dl = list_entry(entry, struct disasm_line, node); + struct disasm_line *dl = list_entry(entry, struct disasm_line, al.node); return dl->offset == -1; } @@ -123,7 +123,7 @@ static int annotate_browser__cycles_width(struct annotate_browser *ab) static void annotate_browser__write(struct ui_browser *browser, void *entry, int row) { struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); - struct disasm_line *dl = list_entry(entry, struct disasm_line, node); + struct disasm_line *dl = list_entry(entry, struct disasm_line, al.node); struct browser_disasm_line *bdl = disasm_line__browser(dl); bool current_entry = ui_browser__is_current_entry(browser, row); bool change_color = (!annotate_browser__opts.hide_src_code && @@ -286,7 +286,7 @@ static bool disasm_line__is_valid_jump(struct disasm_line *dl, struct symbol *sy static bool is_fused(struct annotate_browser *ab, struct disasm_line *cursor) { - struct disasm_line *pos = list_prev_entry(cursor, node); + struct disasm_line *pos = list_prev_entry(cursor, al.node); const char *name; if (!pos) @@ -404,16 +404,16 @@ static void annotate_browser__set_top(struct annotate_browser *browser, browser->b.top_idx = browser->b.index = idx; while (browser->b.top_idx != 0 && back != 0) { - pos = list_entry(pos->node.prev, struct disasm_line, node); + pos = list_entry(pos->al.node.prev, struct disasm_line, al.node); - if (disasm_line__filter(&browser->b, &pos->node)) + if (disasm_line__filter(&browser->b, &pos->al.node)) continue; --browser->b.top_idx; --back; } - browser->b.top = pos; + browser->b.top = &pos->al; browser->b.navkeypressed = true; } @@ -446,7 +446,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, pthread_mutex_lock(¬es->lock); - list_for_each_entry(pos, ¬es->src->source, node) { + list_for_each_entry(pos, ¬es->src->source, al.node) { struct browser_disasm_line *bpos = disasm_line__browser(pos); const char *path = NULL; double max_percent = 0.0; @@ -492,7 +492,7 @@ static bool annotate_browser__toggle_source(struct annotate_browser *browser) off_t offset = browser->b.index - browser->b.top_idx; browser->b.seek(&browser->b, offset, SEEK_CUR); - dl = list_entry(browser->b.top, struct disasm_line, node); + dl = list_entry(browser->b.top, struct disasm_line, al.node); bdl = disasm_line__browser(dl); if (annotate_browser__opts.hide_src_code) { @@ -589,10 +589,10 @@ struct disasm_line *annotate_browser__find_offset(struct annotate_browser *brows struct disasm_line *pos; *idx = 0; - list_for_each_entry(pos, ¬es->src->source, node) { + list_for_each_entry(pos, ¬es->src->source, al.node) { if (pos->offset == offset) return pos; - if (!disasm_line__filter(&browser->b, &pos->node)) + if (!disasm_line__filter(&browser->b, &pos->al.node)) ++*idx; } @@ -630,8 +630,8 @@ struct disasm_line *annotate_browser__find_string(struct annotate_browser *brows struct disasm_line *pos = browser->selection; *idx = browser->b.index; - list_for_each_entry_continue(pos, ¬es->src->source, node) { - if (disasm_line__filter(&browser->b, &pos->node)) + list_for_each_entry_continue(pos, ¬es->src->source, al.node) { + if (disasm_line__filter(&browser->b, &pos->al.node)) continue; ++*idx; @@ -669,8 +669,8 @@ struct disasm_line *annotate_browser__find_string_reverse(struct annotate_browse struct disasm_line *pos = browser->selection; *idx = browser->b.index; - list_for_each_entry_continue_reverse(pos, ¬es->src->source, node) { - if (disasm_line__filter(&browser->b, &pos->node)) + list_for_each_entry_continue_reverse(pos, ¬es->src->source, al.node) { + if (disasm_line__filter(&browser->b, &pos->al.node)) continue; --*idx; @@ -1134,7 +1134,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, notes = symbol__annotation(sym); browser.start = map__rip_2objdump(map, sym->start); - list_for_each_entry(pos, ¬es->src->source, node) { + list_for_each_entry(pos, ¬es->src->source, al.node) { struct browser_disasm_line *bpos; size_t line_len = strlen(pos->line); @@ -1174,8 +1174,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, annotate_browser__update_addr_width(&browser); ret = annotate_browser__run(&browser, evsel, hbt); - list_for_each_entry_safe(pos, n, ¬es->src->source, node) { - list_del(&pos->node); + list_for_each_entry_safe(pos, n, ¬es->src->source, al.node) { + list_del(&pos->al.node); disasm_line__free(pos); } diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index fc7a2e105bfd..cf8092676c7a 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -119,7 +119,7 @@ static int perf_gtk__annotate_symbol(GtkWidget *window, struct symbol *sym, gtk_tree_view_set_model(GTK_TREE_VIEW(view), GTK_TREE_MODEL(store)); g_object_unref(GTK_TREE_MODEL(store)); - list_for_each_entry(pos, ¬es->src->source, node) { + list_for_each_entry(pos, ¬es->src->source, al.node) { GtkTreeIter iter; int ret = 0; @@ -148,8 +148,8 @@ static int perf_gtk__annotate_symbol(GtkWidget *window, struct symbol *sym, gtk_container_add(GTK_CONTAINER(window), view); - list_for_each_entry_safe(pos, n, ¬es->src->source, node) { - list_del(&pos->node); + list_for_each_entry_safe(pos, n, ¬es->src->source, al.node) { + list_del(&pos->al.node); disasm_line__free(pos); } diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index da1c4c4a0dd8..004e33dc897c 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -931,12 +931,12 @@ int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool r static void disasm__add(struct list_head *head, struct disasm_line *line) { - list_add_tail(&line->node, head); + list_add_tail(&line->al.node, head); } struct disasm_line *disasm__get_next_ip_line(struct list_head *head, struct disasm_line *pos) { - list_for_each_entry_continue(pos, head, node) + list_for_each_entry_continue(pos, head, al.node) if (pos->offset >= 0) return pos; @@ -1122,7 +1122,7 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st return 1; if (queue != NULL) { - list_for_each_entry_from(queue, ¬es->src->source, node) { + list_for_each_entry_from(queue, ¬es->src->source, al.node) { if (queue == dl) break; disasm_line__print(queue, sym, start, evsel, len, @@ -1305,7 +1305,7 @@ static void delete_last_nop(struct symbol *sym) struct disasm_line *dl; while (!list_empty(list)) { - dl = list_entry(list->prev, struct disasm_line, node); + dl = list_entry(list->prev, struct disasm_line, al.node); if (dl->ins.ops) { if (dl->ins.ops != &nop_ops) @@ -1317,7 +1317,7 @@ static void delete_last_nop(struct symbol *sym) return; } - list_del(&dl->node); + list_del(&dl->al.node); disasm_line__free(dl); } } @@ -1844,7 +1844,7 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, if (verbose > 0) symbol__annotate_hits(sym, evsel); - list_for_each_entry(pos, ¬es->src->source, node) { + list_for_each_entry(pos, ¬es->src->source, al.node) { if (context && queue == NULL) { queue = pos; queue_len = 0; @@ -1874,7 +1874,7 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, if (!context) break; if (queue_len == context) - queue = list_entry(queue->node.next, typeof(*queue), node); + queue = list_entry(queue->al.node.next, typeof(*queue), al.node); else ++queue_len; break; @@ -1911,8 +1911,8 @@ void disasm__purge(struct list_head *head) { struct disasm_line *pos, *n; - list_for_each_entry_safe(pos, n, head, node) { - list_del(&pos->node); + list_for_each_entry_safe(pos, n, head, al.node) { + list_del(&pos->al.node); disasm_line__free(pos); } } @@ -1939,7 +1939,7 @@ size_t disasm__fprintf(struct list_head *head, FILE *fp) struct disasm_line *pos; size_t printed = 0; - list_for_each_entry(pos, head, node) + list_for_each_entry(pos, head, al.node) printed += disasm_line__fprintf(pos, fp); return printed; diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index f6ba3560de5e..cc3cf6b50d55 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -59,15 +59,19 @@ bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2); struct annotation; +struct annotation_line { + struct list_head node; +}; + struct disasm_line { - struct list_head node; - s64 offset; - char *line; - struct ins ins; - int line_nr; - float ipc; - u64 cycles; - struct ins_operands ops; + struct annotation_line al; + s64 offset; + char *line; + struct ins ins; + int line_nr; + float ipc; + u64 cycles; + struct ins_operands ops; }; static inline bool disasm_line__has_offset(const struct disasm_line *dl) -- cgit v1.2.3-70-g09d2 From d5490b9647e6e41b203186ed0d73b4103f139fda Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:26 +0200 Subject: perf annotate: Move line/offset into annotation_line struct Move the line/line_nr/offset menbers to the annotation_line struct to be used as generic members for any annotation source. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-4-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 45 ++++++++++++++++++++------------------- tools/perf/ui/gtk/annotate.c | 14 ++++++------ tools/perf/util/annotate.c | 41 ++++++++++++++++++----------------- tools/perf/util/annotate.h | 6 +++--- 4 files changed, 54 insertions(+), 52 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index a8c2f7405a41..73d921c3e3ec 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -84,8 +84,9 @@ static bool disasm_line__filter(struct ui_browser *browser __maybe_unused, void *entry) { if (annotate_browser__opts.hide_src_code) { - struct disasm_line *dl = list_entry(entry, struct disasm_line, al.node); - return dl->offset == -1; + struct annotation_line *al = list_entry(entry, struct annotation_line, node); + + return al->offset == -1; } return false; @@ -141,7 +142,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int percent_max = bdl->samples[i].percent; } - if ((row == 0) && (dl->offset == -1 || percent_max == 0.0)) { + if ((row == 0) && (dl->al.offset == -1 || percent_max == 0.0)) { if (ab->have_cycles) { if (dl->ipc == 0.0 && dl->cycles == 0) show_title = true; @@ -149,7 +150,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int show_title = true; } - if (dl->offset != -1 && percent_max != 0.0) { + if (dl->al.offset != -1 && percent_max != 0.0) { for (i = 0; i < ab->nr_events; i++) { ui_browser__set_percent_color(browser, bdl->samples[i].percent, @@ -199,19 +200,19 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int if (!browser->navkeypressed) width += 1; - if (!*dl->line) + if (!*dl->al.line) ui_browser__write_nstring(browser, " ", width - pcnt_width - cycles_width); - else if (dl->offset == -1) { - if (dl->line_nr && annotate_browser__opts.show_linenr) + else if (dl->al.offset == -1) { + if (dl->al.line_nr && annotate_browser__opts.show_linenr) printed = scnprintf(bf, sizeof(bf), "%-*d ", - ab->addr_width + 1, dl->line_nr); + ab->addr_width + 1, dl->al.line_nr); else printed = scnprintf(bf, sizeof(bf), "%*s ", ab->addr_width, " "); ui_browser__write_nstring(browser, bf, printed); - ui_browser__write_nstring(browser, dl->line, width - printed - pcnt_width - cycles_width + 1); + ui_browser__write_nstring(browser, dl->al.line, width - printed - pcnt_width - cycles_width + 1); } else { - u64 addr = dl->offset; + u64 addr = dl->al.offset; int color = -1; if (!annotate_browser__opts.use_offset) @@ -247,7 +248,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int ui_browser__set_color(browser, color); if (dl->ins.ops && dl->ins.ops->scnprintf) { if (ins__is_jump(&dl->ins)) { - bool fwd = dl->ops.target.offset > dl->offset; + bool fwd = dl->ops.target.offset > dl->al.offset; ui_browser__write_graph(browser, fwd ? SLSMG_DARROW_CHAR : SLSMG_UARROW_CHAR); @@ -452,7 +453,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, double max_percent = 0.0; int i; - if (pos->offset == -1) { + if (pos->al.offset == -1) { RB_CLEAR_NODE(&bpos->rb_node); continue; } @@ -464,8 +465,8 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, bpos->samples[i].percent = disasm__calc_percent(notes, evsel->idx + i, - pos->offset, - next ? next->offset : len, + pos->al.offset, + next ? next->al.offset : len, &path, &sample); bpos->samples[i].he = sample; @@ -590,7 +591,7 @@ struct disasm_line *annotate_browser__find_offset(struct annotate_browser *brows *idx = 0; list_for_each_entry(pos, ¬es->src->source, al.node) { - if (pos->offset == offset) + if (pos->al.offset == offset) return pos; if (!disasm_line__filter(&browser->b, &pos->al.node)) ++*idx; @@ -636,7 +637,7 @@ struct disasm_line *annotate_browser__find_string(struct annotate_browser *brows ++*idx; - if (pos->line && strstr(pos->line, s) != NULL) + if (pos->al.line && strstr(pos->al.line, s) != NULL) return pos; } @@ -675,7 +676,7 @@ struct disasm_line *annotate_browser__find_string_reverse(struct annotate_browse --*idx; - if (pos->line && strstr(pos->line, s) != NULL) + if (pos->al.line && strstr(pos->al.line, s) != NULL) return pos; } @@ -901,7 +902,7 @@ show_help: case K_RIGHT: if (browser->selection == NULL) ui_helpline__puts("Huh? No selection. Report to linux-kernel@vger.kernel.org"); - else if (browser->selection->offset == -1) + else if (browser->selection->al.offset == -1) ui_helpline__puts("Actions are only available for assembly lines."); else if (!browser->selection->ins.ops) goto show_sup_ins; @@ -1136,13 +1137,13 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, list_for_each_entry(pos, ¬es->src->source, al.node) { struct browser_disasm_line *bpos; - size_t line_len = strlen(pos->line); + size_t line_len = strlen(pos->al.line); if (browser.b.width < line_len) browser.b.width = line_len; bpos = disasm_line__browser(pos); bpos->idx = browser.nr_entries++; - if (pos->offset != -1) { + if (pos->al.offset != -1) { bpos->idx_asm = browser.nr_asm_entries++; /* * FIXME: short term bandaid to cope with assembly @@ -1151,8 +1152,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, * * E.g. copy_user_generic_unrolled */ - if (pos->offset < (s64)size) - browser.offsets[pos->offset] = pos; + if (pos->al.offset < (s64)size) + browser.offsets[pos->al.offset] = pos; } else bpos->idx_asm = -1; } diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index cf8092676c7a..162f15712d2d 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -31,14 +31,14 @@ static int perf_gtk__get_percent(char *buf, size_t size, struct symbol *sym, strcpy(buf, ""); - if (dl->offset == (s64) -1) + if (dl->al.offset == (s64) -1) return 0; symhist = annotation__histogram(symbol__annotation(sym), evidx); - if (!symbol_conf.event_group && !symhist->addr[dl->offset].nr_samples) + if (!symbol_conf.event_group && !symhist->addr[dl->al.offset].nr_samples) return 0; - percent = 100.0 * symhist->addr[dl->offset].nr_samples / symhist->nr_samples; + percent = 100.0 * symhist->addr[dl->al.offset].nr_samples / symhist->nr_samples; markup = perf_gtk__get_percent_color(percent); if (markup) @@ -57,16 +57,16 @@ static int perf_gtk__get_offset(char *buf, size_t size, struct symbol *sym, strcpy(buf, ""); - if (dl->offset == (s64) -1) + if (dl->al.offset == (s64) -1) return 0; - return scnprintf(buf, size, "%"PRIx64, start + dl->offset); + return scnprintf(buf, size, "%"PRIx64, start + dl->al.offset); } static int perf_gtk__get_line(char *buf, size_t size, struct disasm_line *dl) { int ret = 0; - char *line = g_markup_escape_text(dl->line, -1); + char *line = g_markup_escape_text(dl->al.line, -1); const char *markup = ""; strcpy(buf, ""); @@ -74,7 +74,7 @@ static int perf_gtk__get_line(char *buf, size_t size, struct disasm_line *dl) if (!line) return 0; - if (dl->offset != (s64) -1) + if (dl->al.offset != (s64) -1) markup = NULL; if (markup) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 004e33dc897c..e8b69001229d 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -886,14 +886,15 @@ static struct disasm_line *disasm_line__new(s64 offset, char *line, struct disasm_line *dl = zalloc(sizeof(*dl) + privsize); if (dl != NULL) { - dl->offset = offset; - dl->line = strdup(line); - dl->line_nr = line_nr; - if (dl->line == NULL) + dl->al.offset = offset; + dl->al.line = strdup(line); + dl->al.line_nr = line_nr; + + if (dl->al.line == NULL) goto out_delete; if (offset != -1) { - if (disasm_line__parse(dl->line, &dl->ins.name, &dl->ops.raw) < 0) + if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0) goto out_free_line; disasm_line__init_ins(dl, arch, map); @@ -903,7 +904,7 @@ static struct disasm_line *disasm_line__new(s64 offset, char *line, return dl; out_free_line: - zfree(&dl->line); + zfree(&dl->al.line); out_delete: free(dl); return NULL; @@ -911,7 +912,7 @@ out_delete: void disasm_line__free(struct disasm_line *dl) { - zfree(&dl->line); + zfree(&dl->al.line); if (dl->ins.ops && dl->ins.ops->free) dl->ins.ops->free(&dl->ops); else @@ -937,7 +938,7 @@ static void disasm__add(struct list_head *head, struct disasm_line *line) struct disasm_line *disasm__get_next_ip_line(struct list_head *head, struct disasm_line *pos) { list_for_each_entry_continue(pos, head, al.node) - if (pos->offset >= 0) + if (pos->al.offset >= 0) return pos; return NULL; @@ -1077,7 +1078,7 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st static const char *prev_line; static const char *prev_color; - if (dl->offset != -1) { + if (dl->al.offset != -1) { const char *path = NULL; double percent, max_percent = 0.0; double *ppercents = &percent; @@ -1086,7 +1087,7 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st int i, nr_percent = 1; const char *color; struct annotation *notes = symbol__annotation(sym); - s64 offset = dl->offset; + s64 offset = dl->al.offset; const u64 addr = start + offset; struct disasm_line *next; struct block_range *br; @@ -1106,7 +1107,7 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st percent = disasm__calc_percent(notes, notes->src->lines ? i : evsel->idx + i, offset, - next ? next->offset : (s64) len, + next ? next->al.offset : (s64) len, &path, &sample); ppercents[i] = percent; @@ -1165,7 +1166,7 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st br = block_range__find(addr); color_fprintf(stdout, annotate__address_color(br), " %" PRIx64 ":", addr); - color_fprintf(stdout, annotate__asm_color(br), "%s", dl->line); + color_fprintf(stdout, annotate__asm_color(br), "%s", dl->al.line); annotate__branch_printf(br, addr); printf("\n"); @@ -1186,10 +1187,10 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st if (perf_evsel__is_group_event(evsel)) width *= evsel->nr_members; - if (!*dl->line) + if (!*dl->al.line) printf(" %*s:\n", width, " "); else - printf(" %*s: %s\n", width, " ", dl->line); + printf(" %*s: %s\n", width, " ", dl->al.line); } return 0; @@ -1311,9 +1312,9 @@ static void delete_last_nop(struct symbol *sym) if (dl->ins.ops != &nop_ops) return; } else { - if (!strstr(dl->line, " nop ") && - !strstr(dl->line, " nopl ") && - !strstr(dl->line, " nopw ")) + if (!strstr(dl->al.line, " nop ") && + !strstr(dl->al.line, " nopl ") && + !strstr(dl->al.line, " nopw ")) return; } @@ -1921,10 +1922,10 @@ static size_t disasm_line__fprintf(struct disasm_line *dl, FILE *fp) { size_t printed; - if (dl->offset == -1) - return fprintf(fp, "%s\n", dl->line); + if (dl->al.offset == -1) + return fprintf(fp, "%s\n", dl->al.line); - printed = fprintf(fp, "%#" PRIx64 " %s", dl->offset, dl->ins.name); + printed = fprintf(fp, "%#" PRIx64 " %s", dl->al.offset, dl->ins.name); if (dl->ops.raw[0] != '\0') { printed += fprintf(fp, "%.*s %s\n", 6 - (int)printed, " ", diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index cc3cf6b50d55..b7ca62855760 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -61,14 +61,14 @@ struct annotation; struct annotation_line { struct list_head node; + s64 offset; + char *line; + int line_nr; }; struct disasm_line { struct annotation_line al; - s64 offset; - char *line; struct ins ins; - int line_nr; float ipc; u64 cycles; struct ins_operands ops; -- cgit v1.2.3-70-g09d2 From 37236d5e0b6a765319dec3e64d828cb44ebecac6 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:27 +0200 Subject: perf annotate: Move ipc/cycles into annotation_line struct Move ipc/cycles into annotation_line struct to be used as generic members for any annotation source. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-5-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 16 ++++++++-------- tools/perf/util/annotate.h | 4 ++-- 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 73d921c3e3ec..d1aff2f7cb6c 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -144,7 +144,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int if ((row == 0) && (dl->al.offset == -1 || percent_max == 0.0)) { if (ab->have_cycles) { - if (dl->ipc == 0.0 && dl->cycles == 0) + if (dl->al.ipc == 0.0 && dl->al.cycles == 0) show_title = true; } else show_title = true; @@ -178,16 +178,16 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int } } if (ab->have_cycles) { - if (dl->ipc) - ui_browser__printf(browser, "%*.2f ", IPC_WIDTH - 1, dl->ipc); + if (dl->al.ipc) + ui_browser__printf(browser, "%*.2f ", IPC_WIDTH - 1, dl->al.ipc); else if (!show_title) ui_browser__write_nstring(browser, " ", IPC_WIDTH); else ui_browser__printf(browser, "%*s ", IPC_WIDTH - 1, "IPC"); - if (dl->cycles) + if (dl->al.cycles) ui_browser__printf(browser, "%*" PRIu64 " ", - CYCLES_WIDTH - 1, dl->cycles); + CYCLES_WIDTH - 1, dl->al.cycles); else if (!show_title) ui_browser__write_nstring(browser, " ", CYCLES_WIDTH); else @@ -474,7 +474,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, max_percent = bpos->samples[i].percent; } - if (max_percent < 0.01 && pos->ipc == 0) { + if (max_percent < 0.01 && pos->al.ipc == 0) { RB_CLEAR_NODE(&bpos->rb_node); continue; } @@ -994,7 +994,7 @@ static void count_and_fill(struct annotate_browser *browser, u64 start, u64 end, struct disasm_line *dl = browser->offsets[offset]; if (dl) - dl->ipc = ipc; + dl->al.ipc = ipc; } } } @@ -1025,7 +1025,7 @@ static void annotate__compute_ipc(struct annotate_browser *browser, size_t size, count_and_fill(browser, ch->start, offset, ch); dl = browser->offsets[offset]; if (dl && ch->num_aggr) - dl->cycles = ch->cycles_aggr / ch->num_aggr; + dl->al.cycles = ch->cycles_aggr / ch->num_aggr; browser->have_cycles = true; } } diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index b7ca62855760..a822c0a4987e 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -64,13 +64,13 @@ struct annotation_line { s64 offset; char *line; int line_nr; + float ipc; + u64 cycles; }; struct disasm_line { struct annotation_line al; struct ins ins; - float ipc; - u64 cycles; struct ins_operands ops; }; -- cgit v1.2.3-70-g09d2 From c34df25b40c20b478634b954a709749aebdc241a Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:28 +0200 Subject: perf annotate: Add symbol__annotate function Add symbol__annotate function to have generic annotation function to be called for all annotation sources. It calls the generic annotation init and then the specific annotation data retrieval function. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-6-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 2 +- tools/perf/ui/browsers/annotate.c | 6 ++-- tools/perf/ui/gtk/annotate.c | 4 +-- tools/perf/util/annotate.c | 58 ++++++++++++++++++++++----------------- tools/perf/util/annotate.h | 6 ++-- 5 files changed, 42 insertions(+), 34 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 477a8699f0b5..adfeeb488f1a 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -134,7 +134,7 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he) return err; } - err = symbol__disassemble(sym, map, NULL, 0, NULL, NULL); + err = symbol__annotate(sym, map, NULL, 0, NULL, NULL); if (err == 0) { out_assign: top->sym_filter_entry = he; diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index d1aff2f7cb6c..d77994c1cba9 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -1120,9 +1120,9 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, (nr_pcnt - 1); } - err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), - sizeof_bdl, &browser.arch, - perf_evsel__env_cpuid(evsel)); + err = symbol__annotate(sym, map, perf_evsel__env_arch(evsel), + sizeof_bdl, &browser.arch, + perf_evsel__env_cpuid(evsel)); if (err) { char msg[BUFSIZ]; symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg)); diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index 162f15712d2d..b498f1a92bb1 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -169,8 +169,8 @@ static int symbol__gtk_annotate(struct symbol *sym, struct map *map, if (map->dso->annotate_warned) return -1; - err = symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), - 0, NULL, NULL); + err = symbol__annotate(sym, map, perf_evsel__env_arch(evsel), + 0, NULL, NULL); if (err) { char msg[BUFSIZ]; symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg)); diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index e8b69001229d..f0093918882d 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1425,13 +1425,11 @@ static const char *annotate__norm_arch(const char *arch_name) return normalize_arch((char *)arch_name); } -int symbol__disassemble(struct symbol *sym, struct map *map, - const char *arch_name, size_t privsize, - struct arch **parch, char *cpuid) +static int symbol__disassemble(struct symbol *sym, struct map *map, + size_t privsize, struct arch *arch) { struct dso *dso = map->dso; char command[PATH_MAX * 2]; - struct arch *arch = NULL; FILE *file; char symfs_filename[PATH_MAX]; struct kcore_extract kce; @@ -1445,25 +1443,6 @@ int symbol__disassemble(struct symbol *sym, struct map *map, if (err) return err; - arch_name = annotate__norm_arch(arch_name); - if (!arch_name) - return -1; - - arch = arch__find(arch_name); - if (arch == NULL) - return -ENOTSUP; - - if (parch) - *parch = arch; - - if (arch->init) { - err = arch->init(arch, cpuid); - if (err) { - pr_err("%s: failed to initialize %s arch priv area\n", __func__, arch->name); - return err; - } - } - pr_debug("%s: filename=%s, sym=%s, start=%#" PRIx64 ", end=%#" PRIx64 "\n", __func__, symfs_filename, sym->name, map->unmap_ip(map, sym->start), map->unmap_ip(map, sym->end)); @@ -1581,6 +1560,35 @@ out_close_stdout: goto out_remove_tmp; } +int symbol__annotate(struct symbol *sym, struct map *map, + const char *arch_name, size_t privsize, + struct arch **parch, char *cpuid) +{ + struct arch *arch; + int err; + + arch_name = annotate__norm_arch(arch_name); + if (!arch_name) + return -1; + + arch = arch__find(arch_name); + if (arch == NULL) + return -ENOTSUP; + + if (parch) + *parch = arch; + + if (arch->init) { + err = arch->init(arch, cpuid); + if (err) { + pr_err("%s: failed to initialize %s arch priv area\n", __func__, arch->name); + return err; + } + } + + return symbol__disassemble(sym, map, privsize, arch); +} + static void insert_source_line(struct rb_root *root, struct source_line *src_line) { struct source_line *iter; @@ -1954,8 +1962,8 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, struct rb_root source_line = RB_ROOT; u64 len; - if (symbol__disassemble(sym, map, perf_evsel__env_arch(evsel), - 0, NULL, NULL) < 0) + if (symbol__annotate(sym, map, perf_evsel__env_arch(evsel), + 0, NULL, NULL) < 0) return -1; len = symbol__size(sym); diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index a822c0a4987e..e577f9d13a58 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -173,9 +173,9 @@ int hist_entry__inc_addr_samples(struct hist_entry *he, struct perf_sample *samp int symbol__alloc_hist(struct symbol *sym); void symbol__annotate_zero_histograms(struct symbol *sym); -int symbol__disassemble(struct symbol *sym, struct map *map, - const char *arch_name, size_t privsize, - struct arch **parch, char *cpuid); +int symbol__annotate(struct symbol *sym, struct map *map, + const char *arch_name, size_t privsize, + struct arch **parch, char *cpuid); enum symbol_disassemble_errno { SYMBOL_ANNOTATE_ERRNO__SUCCESS = 0, -- cgit v1.2.3-70-g09d2 From ea07c5aaed33d23875cd59da8b0892f76e882ccd Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:29 +0200 Subject: perf annotate: Add struct annotate_args Adding struct annotate_args to reduce the number of arguments, that need to travel all the way to line allocation. This makes the code easier to read and ease up the changes for following patches. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-7-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index f0093918882d..f5bd6826fa66 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -878,12 +878,17 @@ out_free_name: return -1; } -static struct disasm_line *disasm_line__new(s64 offset, char *line, - size_t privsize, int line_nr, +struct annotate_args { + size_t privsize; +}; + +static struct disasm_line *disasm_line__new(struct annotate_args *args, + s64 offset, char *line, + int line_nr, struct arch *arch, struct map *map) { - struct disasm_line *dl = zalloc(sizeof(*dl) + privsize); + struct disasm_line *dl = zalloc(sizeof(*dl) + args->privsize); if (dl != NULL) { dl->al.offset = offset; @@ -1217,8 +1222,8 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st * The ops.raw part will be parsed further according to type of the instruction. */ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, - struct arch *arch, - FILE *file, size_t privsize, + struct arch *arch, FILE *file, + struct annotate_args *args, int *line_nr) { struct annotation *notes = symbol__annotation(sym); @@ -1264,7 +1269,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, parsed_line = tmp2 + 1; } - dl = disasm_line__new(offset, parsed_line, privsize, *line_nr, arch, map); + dl = disasm_line__new(args, offset, parsed_line, *line_nr, arch, map); free(line); (*line_nr)++; @@ -1426,7 +1431,8 @@ static const char *annotate__norm_arch(const char *arch_name) } static int symbol__disassemble(struct symbol *sym, struct map *map, - size_t privsize, struct arch *arch) + struct annotate_args *args, + struct arch *arch) { struct dso *dso = map->dso; char command[PATH_MAX * 2]; @@ -1526,7 +1532,7 @@ static int symbol__disassemble(struct symbol *sym, struct map *map, * can associate it with the instructions till the next one. * See disasm_line__new() and struct disasm_line::line_nr. */ - if (symbol__parse_objdump_line(sym, map, arch, file, privsize, + if (symbol__parse_objdump_line(sym, map, arch, file, args, &lineno) < 0) break; nline++; @@ -1564,6 +1570,9 @@ int symbol__annotate(struct symbol *sym, struct map *map, const char *arch_name, size_t privsize, struct arch **parch, char *cpuid) { + struct annotate_args args = { + .privsize = privsize, + }; struct arch *arch; int err; @@ -1586,7 +1595,7 @@ int symbol__annotate(struct symbol *sym, struct map *map, } } - return symbol__disassemble(sym, map, privsize, arch); + return symbol__disassemble(sym, map, &args, arch); } static void insert_source_line(struct rb_root *root, struct source_line *src_line) -- cgit v1.2.3-70-g09d2 From 24fe7b88934b702442597662643222cd0a6a44a6 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:30 +0200 Subject: perf annotate: Add arch into struct annotate_args Add arch into struct annotate_args to reduce the number of arguments that need to travel all the way to line allocation. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-8-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index f5bd6826fa66..b4d3454618b0 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -880,12 +880,12 @@ out_free_name: struct annotate_args { size_t privsize; + struct arch *arch; }; static struct disasm_line *disasm_line__new(struct annotate_args *args, s64 offset, char *line, int line_nr, - struct arch *arch, struct map *map) { struct disasm_line *dl = zalloc(sizeof(*dl) + args->privsize); @@ -902,7 +902,7 @@ static struct disasm_line *disasm_line__new(struct annotate_args *args, if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0) goto out_free_line; - disasm_line__init_ins(dl, arch, map); + disasm_line__init_ins(dl, args->arch, map); } } @@ -1222,7 +1222,7 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st * The ops.raw part will be parsed further according to type of the instruction. */ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, - struct arch *arch, FILE *file, + FILE *file, struct annotate_args *args, int *line_nr) { @@ -1269,7 +1269,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, parsed_line = tmp2 + 1; } - dl = disasm_line__new(args, offset, parsed_line, *line_nr, arch, map); + dl = disasm_line__new(args, offset, parsed_line, *line_nr, map); free(line); (*line_nr)++; @@ -1431,8 +1431,7 @@ static const char *annotate__norm_arch(const char *arch_name) } static int symbol__disassemble(struct symbol *sym, struct map *map, - struct annotate_args *args, - struct arch *arch) + struct annotate_args *args) { struct dso *dso = map->dso; char command[PATH_MAX * 2]; @@ -1532,7 +1531,7 @@ static int symbol__disassemble(struct symbol *sym, struct map *map, * can associate it with the instructions till the next one. * See disasm_line__new() and struct disasm_line::line_nr. */ - if (symbol__parse_objdump_line(sym, map, arch, file, args, + if (symbol__parse_objdump_line(sym, map, file, args, &lineno) < 0) break; nline++; @@ -1580,7 +1579,7 @@ int symbol__annotate(struct symbol *sym, struct map *map, if (!arch_name) return -1; - arch = arch__find(arch_name); + args.arch = arch = arch__find(arch_name); if (arch == NULL) return -ENOTSUP; @@ -1595,7 +1594,7 @@ int symbol__annotate(struct symbol *sym, struct map *map, } } - return symbol__disassemble(sym, map, &args, arch); + return symbol__disassemble(sym, map, &args); } static void insert_source_line(struct rb_root *root, struct source_line *src_line) -- cgit v1.2.3-70-g09d2 From 1a04db70dcbf621f9919e95456c372281779c053 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:31 +0200 Subject: perf annotate: Add map into struct annotate_args Add map into struct annotate_args to reduce the number of arguments that need to travel all the way to line allocation. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-9-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index b4d3454618b0..30da4402a3e4 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -881,12 +881,12 @@ out_free_name: struct annotate_args { size_t privsize; struct arch *arch; + struct map *map; }; static struct disasm_line *disasm_line__new(struct annotate_args *args, s64 offset, char *line, - int line_nr, - struct map *map) + int line_nr) { struct disasm_line *dl = zalloc(sizeof(*dl) + args->privsize); @@ -902,7 +902,7 @@ static struct disasm_line *disasm_line__new(struct annotate_args *args, if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0) goto out_free_line; - disasm_line__init_ins(dl, args->arch, map); + disasm_line__init_ins(dl, args->arch, args->map); } } @@ -1221,11 +1221,11 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st * means that it's not a disassembly line so should be treated differently. * The ops.raw part will be parsed further according to type of the instruction. */ -static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, - FILE *file, +static int symbol__parse_objdump_line(struct symbol *sym, FILE *file, struct annotate_args *args, int *line_nr) { + struct map *map = args->map; struct annotation *notes = symbol__annotation(sym); struct disasm_line *dl; char *line = NULL, *parsed_line, *tmp, *tmp2; @@ -1269,7 +1269,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, struct map *map, parsed_line = tmp2 + 1; } - dl = disasm_line__new(args, offset, parsed_line, *line_nr, map); + dl = disasm_line__new(args, offset, parsed_line, *line_nr); free(line); (*line_nr)++; @@ -1430,9 +1430,9 @@ static const char *annotate__norm_arch(const char *arch_name) return normalize_arch((char *)arch_name); } -static int symbol__disassemble(struct symbol *sym, struct map *map, - struct annotate_args *args) +static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) { + struct map *map = args->map; struct dso *dso = map->dso; char command[PATH_MAX * 2]; FILE *file; @@ -1531,8 +1531,7 @@ static int symbol__disassemble(struct symbol *sym, struct map *map, * can associate it with the instructions till the next one. * See disasm_line__new() and struct disasm_line::line_nr. */ - if (symbol__parse_objdump_line(sym, map, file, args, - &lineno) < 0) + if (symbol__parse_objdump_line(sym, file, args, &lineno) < 0) break; nline++; } @@ -1571,6 +1570,7 @@ int symbol__annotate(struct symbol *sym, struct map *map, { struct annotate_args args = { .privsize = privsize, + .map = map, }; struct arch *arch; int err; @@ -1594,7 +1594,7 @@ int symbol__annotate(struct symbol *sym, struct map *map, } } - return symbol__disassemble(sym, map, &args); + return symbol__disassemble(sym, &args); } static void insert_source_line(struct rb_root *root, struct source_line *src_line) -- cgit v1.2.3-70-g09d2 From 4748834f96903f843719b02190f98e36b2c55192 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:32 +0200 Subject: perf annotate: Add offset/line/line_nr into struct annotate_args Add offset/line/line_nr into struct annotate_args to reduce the number of arguments that need to travel all the way to line allocation. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-10-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 30da4402a3e4..681c9c4ce9f9 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -882,23 +882,24 @@ struct annotate_args { size_t privsize; struct arch *arch; struct map *map; + s64 offset; + char *line; + int line_nr; }; -static struct disasm_line *disasm_line__new(struct annotate_args *args, - s64 offset, char *line, - int line_nr) +static struct disasm_line *disasm_line__new(struct annotate_args *args) { struct disasm_line *dl = zalloc(sizeof(*dl) + args->privsize); if (dl != NULL) { - dl->al.offset = offset; - dl->al.line = strdup(line); - dl->al.line_nr = line_nr; + dl->al.offset = args->offset; + dl->al.line = strdup(args->line); + dl->al.line_nr = args->line_nr; if (dl->al.line == NULL) goto out_delete; - if (offset != -1) { + if (args->offset != -1) { if (disasm_line__parse(dl->al.line, &dl->ins.name, &dl->ops.raw) < 0) goto out_free_line; @@ -1269,7 +1270,11 @@ static int symbol__parse_objdump_line(struct symbol *sym, FILE *file, parsed_line = tmp2 + 1; } - dl = disasm_line__new(args, offset, parsed_line, *line_nr); + args->offset = offset; + args->line = parsed_line; + args->line_nr = *line_nr; + + dl = disasm_line__new(args); free(line); (*line_nr)++; -- cgit v1.2.3-70-g09d2 From d03a686ea6e77b25edacc3eed386cef870e8d248 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:33 +0200 Subject: perf annotate: Add evsel into struct annotation_line_args Add evsel into struct annotate_args to reduce the number of arguments that need to travel all the way to line allocation. This change also allow us to move the arch name initialization under symbol__annotate function. Link: http://lkml.kernel.org/n/tip-a9ok53rrgt1s5e8uglyvy6qt@git.kernel.org Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-11-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 2 +- tools/perf/ui/gtk/annotate.c | 3 +-- tools/perf/util/annotate.c | 11 ++++++++--- tools/perf/util/annotate.h | 2 +- 4 files changed, 11 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index d77994c1cba9..3b72519c085f 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -1120,7 +1120,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, (nr_pcnt - 1); } - err = symbol__annotate(sym, map, perf_evsel__env_arch(evsel), + err = symbol__annotate(sym, map, evsel, sizeof_bdl, &browser.arch, perf_evsel__env_cpuid(evsel)); if (err) { diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index b498f1a92bb1..5e0a56df0b4c 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -169,8 +169,7 @@ static int symbol__gtk_annotate(struct symbol *sym, struct map *map, if (map->dso->annotate_warned) return -1; - err = symbol__annotate(sym, map, perf_evsel__env_arch(evsel), - 0, NULL, NULL); + err = symbol__annotate(sym, map, evsel, 0, NULL, NULL); if (err) { char msg[BUFSIZ]; symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg)); diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 681c9c4ce9f9..75f54eab22c8 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -882,6 +882,7 @@ struct annotate_args { size_t privsize; struct arch *arch; struct map *map; + struct perf_evsel *evsel; s64 offset; char *line; int line_nr; @@ -1570,16 +1571,21 @@ out_close_stdout: } int symbol__annotate(struct symbol *sym, struct map *map, - const char *arch_name, size_t privsize, + struct perf_evsel *evsel, size_t privsize, struct arch **parch, char *cpuid) { struct annotate_args args = { .privsize = privsize, .map = map, + .evsel = evsel, }; + const char *arch_name = NULL; struct arch *arch; int err; + if (evsel) + arch_name = perf_evsel__env_arch(evsel); + arch_name = annotate__norm_arch(arch_name); if (!arch_name) return -1; @@ -1975,8 +1981,7 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, struct rb_root source_line = RB_ROOT; u64 len; - if (symbol__annotate(sym, map, perf_evsel__env_arch(evsel), - 0, NULL, NULL) < 0) + if (symbol__annotate(sym, map, evsel, 0, NULL, NULL) < 0) return -1; len = symbol__size(sym); diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index e577f9d13a58..baf34032504a 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -174,7 +174,7 @@ int symbol__alloc_hist(struct symbol *sym); void symbol__annotate_zero_histograms(struct symbol *sym); int symbol__annotate(struct symbol *sym, struct map *map, - const char *arch_name, size_t privsize, + struct perf_evsel *evsel, size_t privsize, struct arch **parch, char *cpuid); enum symbol_disassemble_errno { -- cgit v1.2.3-70-g09d2 From c4c724364d398a9746410d5ff482e8c4c7228249 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:34 +0200 Subject: perf annotate: Add annotation_line__next function Rename disasm__get_next_ip_line() to annotation_line__next() to make it work over a generic struct annotation_line. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-12-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 7 ++++--- tools/perf/util/annotate.c | 13 +++++++------ tools/perf/util/annotate.h | 3 ++- 3 files changed, 13 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 3b72519c085f..881ad6122057 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -440,7 +440,8 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, struct map_symbol *ms = browser->b.priv; struct symbol *sym = ms->sym; struct annotation *notes = symbol__annotation(sym); - struct disasm_line *pos, *next; + struct annotation_line *next; + struct disasm_line *pos; s64 len = symbol__size(sym); browser->entries = RB_ROOT; @@ -458,7 +459,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, continue; } - next = disasm__get_next_ip_line(¬es->src->source, pos); + next = annotation_line__next(&pos->al, ¬es->src->source); for (i = 0; i < browser->nr_events; i++) { struct sym_hist_entry sample; @@ -466,7 +467,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, bpos->samples[i].percent = disasm__calc_percent(notes, evsel->idx + i, pos->al.offset, - next ? next->al.offset : len, + next ? next->offset : len, &path, &sample); bpos->samples[i].he = sample; diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 75f54eab22c8..e7da88d7bb27 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -942,10 +942,11 @@ static void disasm__add(struct list_head *head, struct disasm_line *line) list_add_tail(&line->al.node, head); } -struct disasm_line *disasm__get_next_ip_line(struct list_head *head, struct disasm_line *pos) +struct annotation_line * +annotation_line__next(struct annotation_line *pos, struct list_head *head) { - list_for_each_entry_continue(pos, head, al.node) - if (pos->al.offset >= 0) + list_for_each_entry_continue(pos, head, node) + if (pos->offset >= 0) return pos; return NULL; @@ -1096,10 +1097,10 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st struct annotation *notes = symbol__annotation(sym); s64 offset = dl->al.offset; const u64 addr = start + offset; - struct disasm_line *next; + struct annotation_line *next; struct block_range *br; - next = disasm__get_next_ip_line(¬es->src->source, dl); + next = annotation_line__next(&dl->al, ¬es->src->source); if (perf_evsel__is_group_event(evsel)) { nr_percent = evsel->nr_members; @@ -1114,7 +1115,7 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st percent = disasm__calc_percent(notes, notes->src->lines ? i : evsel->idx + i, offset, - next ? next->al.offset : (s64) len, + next ? next->offset : (s64) len, &path, &sample); ppercents[i] = percent; diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index baf34032504a..43bef6cacbc4 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -85,7 +85,8 @@ struct sym_hist_entry { }; void disasm_line__free(struct disasm_line *dl); -struct disasm_line *disasm__get_next_ip_line(struct list_head *head, struct disasm_line *pos); +struct annotation_line * +annotation_line__next(struct annotation_line *pos, struct list_head *head); int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw); size_t disasm__fprintf(struct list_head *head, FILE *fp); double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset, -- cgit v1.2.3-70-g09d2 From 82b9d7ff096b7e7ae3efaeb341ee673bb494bb61 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:35 +0200 Subject: perf annotate: Add annotation_line__add function Rename disasm__add() into annotation_line__add() to make it work over a generic struct annotation_line. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-13-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index e7da88d7bb27..11c7743203a0 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -937,9 +937,9 @@ int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool r return ins__scnprintf(&dl->ins, bf, size, &dl->ops); } -static void disasm__add(struct list_head *head, struct disasm_line *line) +static void annotation_line__add(struct annotation_line *al, struct list_head *head) { - list_add_tail(&line->al.node, head); + list_add_tail(&al->node, head); } struct annotation_line * @@ -1301,7 +1301,7 @@ static int symbol__parse_objdump_line(struct symbol *sym, FILE *file, dl->ops.target.name = strdup(target.sym->name); } - disasm__add(¬es->src->source, dl); + annotation_line__add(&dl->al, ¬es->src->source); return 0; } -- cgit v1.2.3-70-g09d2 From 5b12adc849be011fd6d99a16e39d83afee43c0a0 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:36 +0200 Subject: perf annotate: Move rb_node to struct annotation_line Move rb_node to struct annotation_line to make struct annotation_line the rb tree node for sorted lines used in both stdio and TUI code. This way we can unite the sorted lines lines codes for both TUI and stdio in the following patches. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-14-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 30 ++++++++++++++++-------------- tools/perf/util/annotate.h | 1 + 2 files changed, 17 insertions(+), 14 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 881ad6122057..cfde5a2ca3f4 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -26,7 +26,6 @@ struct disasm_line_samples { #define CYCLES_WIDTH 6 struct browser_disasm_line { - struct rb_node rb_node; u32 idx; int idx_asm; int jump_sources; @@ -362,9 +361,11 @@ static unsigned int annotate_browser__refresh(struct ui_browser *browser) return ret; } -static int disasm__cmp(struct browser_disasm_line *a, - struct browser_disasm_line *b, int nr_pcnt) +static int disasm__cmp(struct disasm_line *da, + struct disasm_line *db, int nr_pcnt) { + struct browser_disasm_line *a = disasm_line__browser(da); + struct browser_disasm_line *b = disasm_line__browser(db); int i; for (i = 0; i < nr_pcnt; i++) { @@ -375,24 +376,24 @@ static int disasm__cmp(struct browser_disasm_line *a, return 0; } -static void disasm_rb_tree__insert(struct rb_root *root, struct browser_disasm_line *bdl, +static void disasm_rb_tree__insert(struct rb_root *root, struct disasm_line *dl, int nr_events) { struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; - struct browser_disasm_line *l; + struct disasm_line *l; while (*p != NULL) { parent = *p; - l = rb_entry(parent, struct browser_disasm_line, rb_node); + l = rb_entry(parent, struct disasm_line, al.rb_node); - if (disasm__cmp(bdl, l, nr_events)) + if (disasm__cmp(dl, l, nr_events)) p = &(*p)->rb_left; else p = &(*p)->rb_right; } - rb_link_node(&bdl->rb_node, parent, p); - rb_insert_color(&bdl->rb_node, root); + rb_link_node(&dl->al.rb_node, parent, p); + rb_insert_color(&dl->al.rb_node, root); } static void annotate_browser__set_top(struct annotate_browser *browser, @@ -425,8 +426,9 @@ static void annotate_browser__set_rb_top(struct annotate_browser *browser, struct disasm_line *pos; u32 idx; - bpos = rb_entry(nd, struct browser_disasm_line, rb_node); - pos = ((struct disasm_line *)bpos) - 1; + pos = rb_entry(nd, struct disasm_line, al.rb_node); + bpos = disasm_line__browser(pos); + idx = bpos->idx; if (annotate_browser__opts.hide_src_code) idx = bpos->idx_asm; @@ -455,7 +457,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, int i; if (pos->al.offset == -1) { - RB_CLEAR_NODE(&bpos->rb_node); + RB_CLEAR_NODE(&pos->al.rb_node); continue; } @@ -476,10 +478,10 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, } if (max_percent < 0.01 && pos->al.ipc == 0) { - RB_CLEAR_NODE(&bpos->rb_node); + RB_CLEAR_NODE(&pos->al.rb_node); continue; } - disasm_rb_tree__insert(&browser->entries, bpos, + disasm_rb_tree__insert(&browser->entries, pos, browser->nr_events); } pthread_mutex_unlock(¬es->lock); diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 43bef6cacbc4..6f01e6117936 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -61,6 +61,7 @@ struct annotation; struct annotation_line { struct list_head node; + struct rb_node rb_node; s64 offset; char *line; int line_nr; -- cgit v1.2.3-70-g09d2 From c835e1914c4bcfdd41f43d270cafc6d8119d7782 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:37 +0200 Subject: perf annotate: Add annotation_line__(new|delete) functions Changing the way the annotation lines are allocated and adding annotation_line__(new|delete) functions to deal with this. Before the allocation schema was as follows: ----------------------------------------------------------- struct disasm_line | struct annotation_line | private space ----------------------------------------------------------- Where the private space is used in TUI code to store computed annotation data for events. The stdio code computes the data on the fly. The goal is to compute and store annotation line's data directly in the struct annotation_line itself, so this patch changes the line allocation schema as follows: ------------------------------------------------------------ privsize space | struct disasm_line | struct annotation_line ------------------------------------------------------------ Moving struct annotation_line to the end, because in following changes we will move here the non-fixed length event's data. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-15-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 4 ++- tools/perf/util/annotate.c | 63 ++++++++++++++++++++++++++++++++++----- tools/perf/util/annotate.h | 10 ++++++- 3 files changed, 68 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index cfde5a2ca3f4..7ca5ae625cc9 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -76,7 +76,9 @@ struct annotate_browser { static inline struct browser_disasm_line *disasm_line__browser(struct disasm_line *dl) { - return (struct browser_disasm_line *)(dl + 1); + struct annotation_line *al = &dl->al; + + return (void *) al - al->privsize; } static bool disasm_line__filter(struct ui_browser *browser __maybe_unused, diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 11c7743203a0..7c74700ae6d7 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -888,14 +888,64 @@ struct annotate_args { int line_nr; }; +static void annotation_line__delete(struct annotation_line *al) +{ + void *ptr = (void *) al - al->privsize; + + zfree(&al->line); + free(ptr); +} + +/* + * Allocating the annotation line data with following + * structure: + * + * -------------------------------------- + * private space | struct annotation_line + * -------------------------------------- + * + * Size of the private space is stored in 'struct annotation_line'. + * + */ +static struct annotation_line * +annotation_line__new(struct annotate_args *args, size_t privsize) +{ + struct annotation_line *al; + size_t size = privsize + sizeof(*al); + + al = zalloc(size); + if (al) { + al = (void *) al + privsize; + al->privsize = privsize; + al->offset = args->offset; + al->line = strdup(args->line); + al->line_nr = args->line_nr; + } + + return al; +} + +/* + * Allocating the disasm annotation line data with + * following structure: + * + * ------------------------------------------------------------ + * privsize space | struct disasm_line | struct annotation_line + * ------------------------------------------------------------ + * + * We have 'struct annotation_line' member as last member + * of 'struct disasm_line' to have an easy access. + * + */ static struct disasm_line *disasm_line__new(struct annotate_args *args) { - struct disasm_line *dl = zalloc(sizeof(*dl) + args->privsize); + struct disasm_line *dl = NULL; + struct annotation_line *al; + size_t privsize = args->privsize + offsetof(struct disasm_line, al); - if (dl != NULL) { - dl->al.offset = args->offset; - dl->al.line = strdup(args->line); - dl->al.line_nr = args->line_nr; + al = annotation_line__new(args, privsize); + if (al != NULL) { + dl = disasm_line(al); if (dl->al.line == NULL) goto out_delete; @@ -919,14 +969,13 @@ out_delete: void disasm_line__free(struct disasm_line *dl) { - zfree(&dl->al.line); if (dl->ins.ops && dl->ins.ops->free) dl->ins.ops->free(&dl->ops); else ins__delete(&dl->ops); free((void *)dl->ins.name); dl->ins.name = NULL; - free(dl); + annotation_line__delete(&dl->al); } int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw) diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 6f01e6117936..2e7a08afb04f 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -67,14 +67,22 @@ struct annotation_line { int line_nr; float ipc; u64 cycles; + size_t privsize; }; struct disasm_line { - struct annotation_line al; struct ins ins; struct ins_operands ops; + + /* This needs to be at the end. */ + struct annotation_line al; }; +static inline struct disasm_line *disasm_line(struct annotation_line *al) +{ + return al ? container_of(al, struct disasm_line, al) : NULL; +} + static inline bool disasm_line__has_offset(const struct disasm_line *dl) { return dl->ops.target.offset_avail; -- cgit v1.2.3-70-g09d2 From f8eb37bd7c33babc01d9c2e3074ce001eec6cfbb Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:38 +0200 Subject: perf annotate: Add annotated_source__purge function Mov disasm__purge() to annotated_source__purge() to make it work over a generic struct annotation_line. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-16-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 8 +++----- tools/perf/util/annotate.c | 12 ++++++------ tools/perf/util/annotate.h | 2 +- 3 files changed, 10 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 7ca5ae625cc9..4c54d5e76008 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -1084,7 +1084,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, struct perf_evsel *evsel, struct hist_browser_timer *hbt) { - struct disasm_line *pos, *n; + struct disasm_line *pos; struct annotation *notes; size_t size; struct map_symbol ms = { @@ -1180,10 +1180,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, annotate_browser__update_addr_width(&browser); ret = annotate_browser__run(&browser, evsel, hbt); - list_for_each_entry_safe(pos, n, ¬es->src->source, al.node) { - list_del(&pos->al.node); - disasm_line__free(pos); - } + + annotated_source__purge(notes->src); out_free_offsets: free(browser.offsets); diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 7c74700ae6d7..0c2eb95ba90a 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1985,13 +1985,13 @@ void symbol__annotate_decay_histogram(struct symbol *sym, int evidx) } } -void disasm__purge(struct list_head *head) +void annotated_source__purge(struct annotated_source *as) { - struct disasm_line *pos, *n; + struct annotation_line *al, *n; - list_for_each_entry_safe(pos, n, head, al.node) { - list_del(&pos->al.node); - disasm_line__free(pos); + list_for_each_entry_safe(al, n, &as->source, node) { + list_del(&al->node); + disasm_line__free(disasm_line(al)); } } @@ -2047,7 +2047,7 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, if (print_lines) symbol__free_source_line(sym, len); - disasm__purge(&symbol__annotation(sym)->src->source); + annotated_source__purge(symbol__annotation(sym)->src); return 0; } diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 2e7a08afb04f..cb60cafae1fb 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -212,7 +212,7 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, int min_pcnt, int max_lines, int context); void symbol__annotate_zero_histogram(struct symbol *sym, int evidx); void symbol__annotate_decay_histogram(struct symbol *sym, int evidx); -void disasm__purge(struct list_head *head); +void annotated_source__purge(struct annotated_source *as); bool ui__has_annotation(void); -- cgit v1.2.3-70-g09d2 From 7e304557ead5b309d59807b2f05ed47f2c0076c6 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:39 +0200 Subject: perf annotate: Add samples into struct annotation_line Add samples array into struct annotation_line to hold the annotation data. The data is populated in the following patches. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-17-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 8 ++++++++ tools/perf/util/annotate.h | 17 ++++++++++++----- 2 files changed, 20 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 0c2eb95ba90a..313fb2e90dba 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -911,7 +911,14 @@ static struct annotation_line * annotation_line__new(struct annotate_args *args, size_t privsize) { struct annotation_line *al; + struct perf_evsel *evsel = args->evsel; size_t size = privsize + sizeof(*al); + int nr = 1; + + if (perf_evsel__is_group_event(evsel)) + nr = evsel->nr_members; + + size += sizeof(al->samples[0]) * nr; al = zalloc(size); if (al) { @@ -920,6 +927,7 @@ annotation_line__new(struct annotate_args *args, size_t privsize) al->offset = args->offset; al->line = strdup(args->line); al->line_nr = args->line_nr; + al->samples_nr = nr; } return al; diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index cb60cafae1fb..55bdd9015f33 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -59,6 +59,16 @@ bool ins__is_fused(struct arch *arch, const char *ins1, const char *ins2); struct annotation; +struct sym_hist_entry { + u64 nr_samples; + u64 period; +}; + +struct annotation_data { + double percent; + struct sym_hist_entry he; +}; + struct annotation_line { struct list_head node; struct rb_node rb_node; @@ -68,6 +78,8 @@ struct annotation_line { float ipc; u64 cycles; size_t privsize; + int samples_nr; + struct annotation_data samples[0]; }; struct disasm_line { @@ -88,11 +100,6 @@ static inline bool disasm_line__has_offset(const struct disasm_line *dl) return dl->ops.target.offset_avail; } -struct sym_hist_entry { - u64 nr_samples; - u64 period; -}; - void disasm_line__free(struct disasm_line *dl); struct annotation_line * annotation_line__next(struct annotation_line *pos, struct list_head *head); -- cgit v1.2.3-70-g09d2 From 073ae601edc211383b62618effaaedaa8b1d22db Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:40 +0200 Subject: perf annotate: Add symbol__calc_percent function Add symbol__calc_percent function, that calculates annotation data for symbol and put the data in the struct annotation_line::samples array. Committer notes: Made symbol__calc_percent non static to be used in the next two patches, which will get some fixups from jolsa, doing it this way to keep this bisectable. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-18-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 62 +++++++++++++++++++++++++++++++++++++++++++++- tools/perf/util/annotate.h | 1 + 2 files changed, 62 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 313fb2e90dba..ff1036096347 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1628,6 +1628,62 @@ out_close_stdout: goto out_remove_tmp; } +static void calc_percent(struct sym_hist *hist, + struct annotation_data *sample, + s64 offset, s64 end) +{ + unsigned int hits = 0; + u64 period = 0; + + while (offset < end) { + hits += hist->addr[offset].nr_samples; + period += hist->addr[offset].period; + ++offset; + } + + if (hist->nr_samples) { + sample->he.period = period; + sample->he.nr_samples = hits; + sample->percent = 100.0 * hits / hist->nr_samples; + } +} + +static int annotation__calc_percent(struct annotation *notes, + struct perf_evsel *evsel, s64 len) +{ + struct annotation_line *al, *next; + + list_for_each_entry(al, ¬es->src->source, node) { + s64 end; + int i; + + if (al->offset == -1) + continue; + + next = annotation_line__next(al, ¬es->src->source); + end = next ? next->offset : len; + + for (i = 0; i < al->samples_nr; i++) { + struct annotation_data *sample; + struct sym_hist *hist; + + hist = annotation__histogram(notes, evsel->idx + i); + sample = &al->samples[i]; + + calc_percent(hist, sample, al->offset, end); + } + } + + return 0; +} + +int symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel) +{ + struct annotation *notes = symbol__annotation(sym); + + return annotation__calc_percent(notes, evsel, symbol__size(sym)); +} + int symbol__annotate(struct symbol *sym, struct map *map, struct perf_evsel *evsel, size_t privsize, struct arch **parch, char *cpuid) @@ -1663,7 +1719,11 @@ int symbol__annotate(struct symbol *sym, struct map *map, } } - return symbol__disassemble(sym, &args); + err = symbol__disassemble(sym, &args); + if (err) + return err; + + return symbol__calc_percent(sym, evsel); } static void insert_source_line(struct rb_root *root, struct source_line *src_line) diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 55bdd9015f33..6056840da4c9 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -107,6 +107,7 @@ int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool r size_t disasm__fprintf(struct list_head *head, FILE *fp); double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset, s64 end, const char **path, struct sym_hist_entry *sample); +int symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel); struct sym_hist { u64 nr_samples; -- cgit v1.2.3-70-g09d2 From 8b4c74dc5cd40a3bc77f8bc2b6b7b33dc125e302 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:41 +0200 Subject: perf annotate: Add symbol__calc_lines function Replace symbol__get_source_line() with symbol__calc_lines(), which calculates the source line tree over the struct annotation_line. This will allow us to remove redundant struct source_line in following patches. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-19-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 186 ++++++++++++++++----------------------------- tools/perf/util/annotate.h | 2 + 2 files changed, 68 insertions(+), 120 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index ff1036096347..96cf6767b5ce 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -892,6 +892,7 @@ static void annotation_line__delete(struct annotation_line *al) { void *ptr = (void *) al - al->privsize; + free_srcline(al->path); zfree(&al->line); free(ptr); } @@ -1726,21 +1727,21 @@ int symbol__annotate(struct symbol *sym, struct map *map, return symbol__calc_percent(sym, evsel); } -static void insert_source_line(struct rb_root *root, struct source_line *src_line) +static void insert_source_line(struct rb_root *root, struct annotation_line *al) { - struct source_line *iter; + struct annotation_line *iter; struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; int i, ret; while (*p != NULL) { parent = *p; - iter = rb_entry(parent, struct source_line, node); + iter = rb_entry(parent, struct annotation_line, rb_node); - ret = strcmp(iter->path, src_line->path); + ret = strcmp(iter->path, al->path); if (ret == 0) { - for (i = 0; i < src_line->nr_pcnt; i++) - iter->samples[i].percent_sum += src_line->samples[i].percent; + for (i = 0; i < al->samples_nr; i++) + iter->samples[i].percent_sum += al->samples[i].percent; return; } @@ -1750,18 +1751,18 @@ static void insert_source_line(struct rb_root *root, struct source_line *src_lin p = &(*p)->rb_right; } - for (i = 0; i < src_line->nr_pcnt; i++) - src_line->samples[i].percent_sum = src_line->samples[i].percent; + for (i = 0; i < al->samples_nr; i++) + al->samples[i].percent_sum = al->samples[i].percent; - rb_link_node(&src_line->node, parent, p); - rb_insert_color(&src_line->node, root); + rb_link_node(&al->rb_node, parent, p); + rb_insert_color(&al->rb_node, root); } -static int cmp_source_line(struct source_line *a, struct source_line *b) +static int cmp_source_line(struct annotation_line *a, struct annotation_line *b) { int i; - for (i = 0; i < a->nr_pcnt; i++) { + for (i = 0; i < a->samples_nr; i++) { if (a->samples[i].percent_sum == b->samples[i].percent_sum) continue; return a->samples[i].percent_sum > b->samples[i].percent_sum; @@ -1770,135 +1771,47 @@ static int cmp_source_line(struct source_line *a, struct source_line *b) return 0; } -static void __resort_source_line(struct rb_root *root, struct source_line *src_line) +static void __resort_source_line(struct rb_root *root, struct annotation_line *al) { - struct source_line *iter; + struct annotation_line *iter; struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; while (*p != NULL) { parent = *p; - iter = rb_entry(parent, struct source_line, node); + iter = rb_entry(parent, struct annotation_line, rb_node); - if (cmp_source_line(src_line, iter)) + if (cmp_source_line(al, iter)) p = &(*p)->rb_left; else p = &(*p)->rb_right; } - rb_link_node(&src_line->node, parent, p); - rb_insert_color(&src_line->node, root); + rb_link_node(&al->rb_node, parent, p); + rb_insert_color(&al->rb_node, root); } static void resort_source_line(struct rb_root *dest_root, struct rb_root *src_root) { - struct source_line *src_line; + struct annotation_line *al; struct rb_node *node; node = rb_first(src_root); while (node) { struct rb_node *next; - src_line = rb_entry(node, struct source_line, node); + al = rb_entry(node, struct annotation_line, rb_node); next = rb_next(node); rb_erase(node, src_root); - __resort_source_line(dest_root, src_line); + __resort_source_line(dest_root, al); node = next; } } -static void symbol__free_source_line(struct symbol *sym, int len) -{ - struct annotation *notes = symbol__annotation(sym); - struct source_line *src_line = notes->src->lines; - size_t sizeof_src_line; - int i; - - sizeof_src_line = sizeof(*src_line) + - (sizeof(src_line->samples) * (src_line->nr_pcnt - 1)); - - for (i = 0; i < len; i++) { - free_srcline(src_line->path); - src_line = (void *)src_line + sizeof_src_line; - } - - zfree(¬es->src->lines); -} - -/* Get the filename:line for the colored entries */ -static int symbol__get_source_line(struct symbol *sym, struct map *map, - struct perf_evsel *evsel, - struct rb_root *root, int len) -{ - u64 start; - int i, k; - int evidx = evsel->idx; - struct source_line *src_line; - struct annotation *notes = symbol__annotation(sym); - struct sym_hist *h = annotation__histogram(notes, evidx); - struct rb_root tmp_root = RB_ROOT; - int nr_pcnt = 1; - u64 nr_samples = h->nr_samples; - size_t sizeof_src_line = sizeof(struct source_line); - - if (perf_evsel__is_group_event(evsel)) { - for (i = 1; i < evsel->nr_members; i++) { - h = annotation__histogram(notes, evidx + i); - nr_samples += h->nr_samples; - } - nr_pcnt = evsel->nr_members; - sizeof_src_line += (nr_pcnt - 1) * sizeof(src_line->samples); - } - - if (!nr_samples) - return 0; - - src_line = notes->src->lines = calloc(len, sizeof_src_line); - if (!notes->src->lines) - return -1; - - start = map__rip_2objdump(map, sym->start); - - for (i = 0; i < len; i++) { - u64 offset; - double percent_max = 0.0; - - src_line->nr_pcnt = nr_pcnt; - - for (k = 0; k < nr_pcnt; k++) { - double percent = 0.0; - - h = annotation__histogram(notes, evidx + k); - nr_samples = h->addr[i].nr_samples; - if (h->nr_samples) - percent = 100.0 * nr_samples / h->nr_samples; - - if (percent > percent_max) - percent_max = percent; - src_line->samples[k].percent = percent; - src_line->samples[k].nr = nr_samples; - } - - if (percent_max <= 0.5) - goto next; - - offset = start + i; - src_line->path = get_srcline(map->dso, offset, NULL, - false, true); - insert_source_line(&tmp_root, src_line); - - next: - src_line = (void *)src_line + sizeof_src_line; - } - - resort_source_line(root, &tmp_root); - return 0; -} - static void print_summary(struct rb_root *root, const char *filename) { - struct source_line *src_line; + struct annotation_line *al; struct rb_node *node; printf("\nSorted summary for file %s\n", filename); @@ -1916,9 +1829,9 @@ static void print_summary(struct rb_root *root, const char *filename) char *path; int i; - src_line = rb_entry(node, struct source_line, node); - for (i = 0; i < src_line->nr_pcnt; i++) { - percent = src_line->samples[i].percent_sum; + al = rb_entry(node, struct annotation_line, rb_node); + for (i = 0; i < al->samples_nr; i++) { + percent = al->samples[i].percent_sum; color = get_percent_color(percent); color_fprintf(stdout, color, " %7.2f", percent); @@ -1926,7 +1839,7 @@ static void print_summary(struct rb_root *root, const char *filename) percent_max = percent; } - path = src_line->path; + path = al->path; color = get_percent_color(percent_max); color_fprintf(stdout, color, " %s\n", path); @@ -2091,29 +2004,62 @@ size_t disasm__fprintf(struct list_head *head, FILE *fp) return printed; } +static void annotation__calc_lines(struct annotation *notes, struct map *map, + struct rb_root *root, u64 start) +{ + struct annotation_line *al; + struct rb_root tmp_root = RB_ROOT; + + list_for_each_entry(al, ¬es->src->source, node) { + double percent_max = 0.0; + int i; + + for (i = 0; i < al->samples_nr; i++) { + struct annotation_data *sample; + + sample = &al->samples[i]; + + if (sample->percent > percent_max) + percent_max = sample->percent; + } + + if (percent_max <= 0.5) + continue; + + al->path = get_srcline(map->dso, start + al->offset, NULL, false, true); + insert_source_line(&tmp_root, al); + } + + resort_source_line(root, &tmp_root); +} + +static void symbol__calc_lines(struct symbol *sym, struct map *map, + struct rb_root *root) +{ + struct annotation *notes = symbol__annotation(sym); + u64 start = map__rip_2objdump(map, sym->start); + + annotation__calc_lines(notes, map, root, start); +} + int symbol__tty_annotate(struct symbol *sym, struct map *map, struct perf_evsel *evsel, bool print_lines, bool full_paths, int min_pcnt, int max_lines) { struct dso *dso = map->dso; struct rb_root source_line = RB_ROOT; - u64 len; if (symbol__annotate(sym, map, evsel, 0, NULL, NULL) < 0) return -1; - len = symbol__size(sym); - if (print_lines) { srcline_full_filename = full_paths; - symbol__get_source_line(sym, map, evsel, &source_line, len); + symbol__calc_lines(sym, map, &source_line); print_summary(&source_line, dso->long_name); } symbol__annotate_printf(sym, map, evsel, full_paths, min_pcnt, max_lines, 0); - if (print_lines) - symbol__free_source_line(sym, len); annotated_source__purge(symbol__annotation(sym)->src); diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 6056840da4c9..927810b19f0d 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -66,6 +66,7 @@ struct sym_hist_entry { struct annotation_data { double percent; + double percent_sum; struct sym_hist_entry he; }; @@ -78,6 +79,7 @@ struct annotation_line { float ipc; u64 cycles; size_t privsize; + char *path; int samples_nr; struct annotation_data samples[0]; }; -- cgit v1.2.3-70-g09d2 From f681d593d1ce7d2fc665c4047b45f4316408b892 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:42 +0200 Subject: perf annotate: Remove disasm__calc_percent() from disasm_line__print() Remove disasm__calc_percent() from disasm_line__print(), because we already have the data calculated in struct annotation_line. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-20-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 3 +++ tools/perf/util/annotate.c | 59 ++++++++++++---------------------------------- 2 files changed, 18 insertions(+), 44 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index adfeeb488f1a..0789f95ca2f3 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -226,6 +226,7 @@ static void perf_top__record_precise_ip(struct perf_top *top, static void perf_top__show_details(struct perf_top *top) { struct hist_entry *he = top->sym_filter_entry; + struct perf_evsel *evsel = hists_to_evsel(he->hists); struct annotation *notes; struct symbol *symbol; int more; @@ -238,6 +239,8 @@ static void perf_top__show_details(struct perf_top *top) pthread_mutex_lock(¬es->lock); + symbol__calc_percent(symbol, evsel); + if (notes->src == NULL) goto out_unlock; diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 96cf6767b5ce..209a25545542 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1145,41 +1145,19 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st static const char *prev_color; if (dl->al.offset != -1) { - const char *path = NULL; - double percent, max_percent = 0.0; - double *ppercents = &percent; - struct sym_hist_entry sample; - struct sym_hist_entry *psamples = &sample; + double max_percent = 0.0; int i, nr_percent = 1; const char *color; struct annotation *notes = symbol__annotation(sym); s64 offset = dl->al.offset; const u64 addr = start + offset; - struct annotation_line *next; struct block_range *br; - next = annotation_line__next(&dl->al, ¬es->src->source); + for (i = 0; i < dl->al.samples_nr; i++) { + struct annotation_data *sample = &dl->al.samples[i]; - if (perf_evsel__is_group_event(evsel)) { - nr_percent = evsel->nr_members; - ppercents = calloc(nr_percent, sizeof(double)); - psamples = calloc(nr_percent, sizeof(struct sym_hist_entry)); - if (ppercents == NULL || psamples == NULL) { - return -1; - } - } - - for (i = 0; i < nr_percent; i++) { - percent = disasm__calc_percent(notes, - notes->src->lines ? i : evsel->idx + i, - offset, - next ? next->offset : (s64) len, - &path, &sample); - - ppercents[i] = percent; - psamples[i] = sample; - if (percent > max_percent) - max_percent = percent; + if (sample->percent > max_percent) + max_percent = sample->percent; } if (max_percent < min_pcnt) @@ -1204,28 +1182,28 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st * the same color than the percentage. Don't print it * twice for close colored addr with the same filename:line */ - if (path) { - if (!prev_line || strcmp(prev_line, path) + if (dl->al.path) { + if (!prev_line || strcmp(prev_line, dl->al.path) || color != prev_color) { - color_fprintf(stdout, color, " %s", path); - prev_line = path; + color_fprintf(stdout, color, " %s", dl->al.path); + prev_line = dl->al.path; prev_color = color; } } for (i = 0; i < nr_percent; i++) { - percent = ppercents[i]; - sample = psamples[i]; - color = get_percent_color(percent); + struct annotation_data *sample = &dl->al.samples[i]; + + color = get_percent_color(sample->percent); if (symbol_conf.show_total_period) color_fprintf(stdout, color, " %11" PRIu64, - sample.period); + sample->he.period); else if (symbol_conf.show_nr_samples) color_fprintf(stdout, color, " %7" PRIu64, - sample.nr_samples); + sample->he.nr_samples); else - color_fprintf(stdout, color, " %7.2f", percent); + color_fprintf(stdout, color, " %7.2f", sample->percent); } printf(" : "); @@ -1235,13 +1213,6 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st color_fprintf(stdout, annotate__asm_color(br), "%s", dl->al.line); annotate__branch_printf(br, addr); printf("\n"); - - if (ppercents != &percent) - free(ppercents); - - if (psamples != &sample) - free(psamples); - } else if (max_lines && printed >= max_lines) return 1; else { -- cgit v1.2.3-70-g09d2 From e425da6caed1a2872e9543bba83488dbe4bbe3f3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:43 +0200 Subject: perf annotate: Remove disasm__calc_percent() from annotate_browser__calc_percent() Remove disasm__calc_percent() from annotate_browser__calc_percent(), because we already have the data calculated in struct annotation_line. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-21-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 4c54d5e76008..613682432940 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -444,17 +444,16 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, struct map_symbol *ms = browser->b.priv; struct symbol *sym = ms->sym; struct annotation *notes = symbol__annotation(sym); - struct annotation_line *next; struct disasm_line *pos; - s64 len = symbol__size(sym); browser->entries = RB_ROOT; pthread_mutex_lock(¬es->lock); + symbol__calc_percent(sym, evsel); + list_for_each_entry(pos, ¬es->src->source, al.node) { struct browser_disasm_line *bpos = disasm_line__browser(pos); - const char *path = NULL; double max_percent = 0.0; int i; @@ -463,17 +462,11 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, continue; } - next = annotation_line__next(&pos->al, ¬es->src->source); - for (i = 0; i < browser->nr_events; i++) { - struct sym_hist_entry sample; - - bpos->samples[i].percent = disasm__calc_percent(notes, - evsel->idx + i, - pos->al.offset, - next ? next->offset : len, - &path, &sample); - bpos->samples[i].he = sample; + struct annotation_data *sample = &pos->al.samples[i]; + + bpos->samples[i].percent = sample->percent; + bpos->samples[i].he = sample->he; if (max_percent < bpos->samples[i].percent) max_percent = bpos->samples[i].percent; -- cgit v1.2.3-70-g09d2 From 81e436a0b3a7a2f3ac0311674ce407b7cdd23f0b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:44 +0200 Subject: perf annotate: Remove disasm__calc_percent function Remove disasm__calc_percent() function, because it's no longer needed. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-22-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 44 -------------------------------------------- tools/perf/util/annotate.h | 2 -- 2 files changed, 46 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 209a25545542..29cf2a5ef620 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1010,50 +1010,6 @@ annotation_line__next(struct annotation_line *pos, struct list_head *head) return NULL; } -double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset, - s64 end, const char **path, struct sym_hist_entry *sample) -{ - struct source_line *src_line = notes->src->lines; - double percent = 0.0; - - sample->nr_samples = sample->period = 0; - - if (src_line) { - size_t sizeof_src_line = sizeof(*src_line) + - sizeof(src_line->samples) * (src_line->nr_pcnt - 1); - - while (offset < end) { - src_line = (void *)notes->src->lines + - (sizeof_src_line * offset); - - if (*path == NULL) - *path = src_line->path; - - percent += src_line->samples[evidx].percent; - sample->nr_samples += src_line->samples[evidx].nr; - offset++; - } - } else { - struct sym_hist *h = annotation__histogram(notes, evidx); - unsigned int hits = 0; - u64 period = 0; - - while (offset < end) { - hits += h->addr[offset].nr_samples; - period += h->addr[offset].period; - ++offset; - } - - if (h->nr_samples) { - sample->period = period; - sample->nr_samples = hits; - percent = 100.0 * hits / h->nr_samples; - } - } - - return percent; -} - static const char *annotate__address_color(struct block_range *br) { double cov = block_range__coverage(br); diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 927810b19f0d..f98acb2ad721 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -107,8 +107,6 @@ struct annotation_line * annotation_line__next(struct annotation_line *pos, struct list_head *head); int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw); size_t disasm__fprintf(struct list_head *head, FILE *fp); -double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset, - s64 end, const char **path, struct sym_hist_entry *sample); int symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel); struct sym_hist { -- cgit v1.2.3-70-g09d2 From fa1924eb4abcd756febc031d819ba75c3849ca45 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:45 +0200 Subject: perf annotate: Remove struct source_line Remove struct source_line*, no longer needed. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-23-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index f98acb2ad721..4fc805a271d2 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -126,19 +126,6 @@ struct cyc_hist { u16 reset; }; -struct source_line_samples { - double percent; - double percent_sum; - u64 nr; -}; - -struct source_line { - struct rb_node node; - char *path; - int nr_pcnt; - struct source_line_samples samples[1]; -}; - /** struct annotated_source - symbols with hits have this attached as in sannotation * * @histogram: Array of addr hit histograms per event being monitored @@ -154,7 +141,6 @@ struct source_line { */ struct annotated_source { struct list_head source; - struct source_line *lines; int nr_histograms; size_t sizeof_sym_hist; struct cyc_hist *cycles_hist; -- cgit v1.2.3-70-g09d2 From 8f25b8197d43885a4cc19bea581e37bf46ed9958 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:46 +0200 Subject: perf annotate: Add annotation_line__print function Separating struct annotation_line display function, it will hold the generic line display code. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-24-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 29cf2a5ef620..5c6f739ac3ac 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1189,6 +1189,18 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st return 0; } +static int +annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start, + struct perf_evsel *evsel, u64 len, int min_pcnt, int printed, + int max_lines, struct annotation_line *aq) +{ + struct disasm_line *dl = container_of(al, struct disasm_line, al); + struct disasm_line *queue = container_of(aq, struct disasm_line, al); + + return disasm_line__print(dl, sym, start, evsel, len, min_pcnt, printed, + max_lines, queue); +} + /* * symbol__parse_objdump_line() parses objdump output (with -d --no-show-raw) * which looks like following @@ -1797,7 +1809,7 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, const char *evsel_name = perf_evsel__name(evsel); struct annotation *notes = symbol__annotation(sym); struct sym_hist *h = annotation__histogram(notes, evsel->idx); - struct disasm_line *pos, *queue = NULL; + struct annotation_line *pos, *queue = NULL; u64 start = map__rip_2objdump(map, sym->start); int printed = 2, queue_len = 0; int more = 0; @@ -1830,15 +1842,19 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, if (verbose > 0) symbol__annotate_hits(sym, evsel); - list_for_each_entry(pos, ¬es->src->source, al.node) { + list_for_each_entry(pos, ¬es->src->source, node) { + int err; + if (context && queue == NULL) { queue = pos; queue_len = 0; } - switch (disasm_line__print(pos, sym, start, evsel, len, - min_pcnt, printed, max_lines, - queue)) { + err = annotation_line__print(pos, sym, start, evsel, len, + min_pcnt, printed, max_lines, + queue); + + switch (err) { case 0: ++printed; if (context) { @@ -1860,7 +1876,7 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, if (!context) break; if (queue_len == context) - queue = list_entry(queue->al.node.next, typeof(*queue), al.node); + queue = list_entry(queue->node.next, typeof(*queue), node); else ++queue_len; break; -- cgit v1.2.3-70-g09d2 From 29971f9a82a5d005b37d65fbb73edaf9073279b0 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:47 +0200 Subject: perf annotate: Factor annotation_line__print from disasm_line__print Move generic annotation line display code into annotation_line__print function. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-25-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 69 ++++++++++++++++++++++------------------------ 1 file changed, 33 insertions(+), 36 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 5c6f739ac3ac..cb065ca431ee 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1093,24 +1093,36 @@ static void annotate__branch_printf(struct block_range *br, u64 addr) } -static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 start, - struct perf_evsel *evsel, u64 len, int min_pcnt, int printed, - int max_lines, struct disasm_line *queue) +static int disasm_line__print(struct disasm_line *dl, u64 start) { + s64 offset = dl->al.offset; + const u64 addr = start + offset; + struct block_range *br; + + br = block_range__find(addr); + color_fprintf(stdout, annotate__address_color(br), " %" PRIx64 ":", addr); + color_fprintf(stdout, annotate__asm_color(br), "%s", dl->al.line); + annotate__branch_printf(br, addr); + return 0; +} + +static int +annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start, + struct perf_evsel *evsel, u64 len, int min_pcnt, int printed, + int max_lines, struct annotation_line *queue) +{ + struct disasm_line *dl = container_of(al, struct disasm_line, al); static const char *prev_line; static const char *prev_color; - if (dl->al.offset != -1) { + if (al->offset != -1) { double max_percent = 0.0; int i, nr_percent = 1; const char *color; struct annotation *notes = symbol__annotation(sym); - s64 offset = dl->al.offset; - const u64 addr = start + offset; - struct block_range *br; - for (i = 0; i < dl->al.samples_nr; i++) { - struct annotation_data *sample = &dl->al.samples[i]; + for (i = 0; i < al->samples_nr; i++) { + struct annotation_data *sample = &al->samples[i]; if (sample->percent > max_percent) max_percent = sample->percent; @@ -1123,11 +1135,11 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st return 1; if (queue != NULL) { - list_for_each_entry_from(queue, ¬es->src->source, al.node) { - if (queue == dl) + list_for_each_entry_from(queue, ¬es->src->source, node) { + if (queue == al) break; - disasm_line__print(queue, sym, start, evsel, len, - 0, 0, 1, NULL); + annotation_line__print(queue, sym, start, evsel, len, + 0, 0, 1, NULL); } } @@ -1138,17 +1150,17 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st * the same color than the percentage. Don't print it * twice for close colored addr with the same filename:line */ - if (dl->al.path) { - if (!prev_line || strcmp(prev_line, dl->al.path) + if (al->path) { + if (!prev_line || strcmp(prev_line, al->path) || color != prev_color) { - color_fprintf(stdout, color, " %s", dl->al.path); - prev_line = dl->al.path; + color_fprintf(stdout, color, " %s", al->path); + prev_line = al->path; prev_color = color; } } for (i = 0; i < nr_percent; i++) { - struct annotation_data *sample = &dl->al.samples[i]; + struct annotation_data *sample = &al->samples[i]; color = get_percent_color(sample->percent); @@ -1164,10 +1176,7 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st printf(" : "); - br = block_range__find(addr); - color_fprintf(stdout, annotate__address_color(br), " %" PRIx64 ":", addr); - color_fprintf(stdout, annotate__asm_color(br), "%s", dl->al.line); - annotate__branch_printf(br, addr); + disasm_line__print(dl, start); printf("\n"); } else if (max_lines && printed >= max_lines) return 1; @@ -1180,27 +1189,15 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st if (perf_evsel__is_group_event(evsel)) width *= evsel->nr_members; - if (!*dl->al.line) + if (!*al->line) printf(" %*s:\n", width, " "); else - printf(" %*s: %s\n", width, " ", dl->al.line); + printf(" %*s: %s\n", width, " ", al->line); } return 0; } -static int -annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start, - struct perf_evsel *evsel, u64 len, int min_pcnt, int printed, - int max_lines, struct annotation_line *aq) -{ - struct disasm_line *dl = container_of(al, struct disasm_line, al); - struct disasm_line *queue = container_of(aq, struct disasm_line, al); - - return disasm_line__print(dl, sym, start, evsel, len, min_pcnt, printed, - max_lines, queue); -} - /* * symbol__parse_objdump_line() parses objdump output (with -d --no-show-raw) * which looks like following -- cgit v1.2.3-70-g09d2 From 3ab6db8d0f3b19b93a8de25e3b7ab5fdaac47679 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:48 +0200 Subject: perf annotate browser: Use samples data from struct annotation_line We now carry the data in 'struct annotation_line', so using it instead of samples from 'struct browser_disasm_line' and removing it and its setup. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-26-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 57 ++++++++++++++------------------------- 1 file changed, 20 insertions(+), 37 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 613682432940..5d99429a03bc 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -29,11 +29,6 @@ struct browser_disasm_line { u32 idx; int idx_asm; int jump_sources; - /* - * actual length of this array is saved on the nr_events field - * of the struct annotate_browser - */ - struct disasm_line_samples samples[1]; }; static struct annotate_browser_opt { @@ -76,9 +71,7 @@ struct annotate_browser { static inline struct browser_disasm_line *disasm_line__browser(struct disasm_line *dl) { - struct annotation_line *al = &dl->al; - - return (void *) al - al->privsize; + return (void *) dl - sizeof(struct browser_disasm_line); } static bool disasm_line__filter(struct ui_browser *browser __maybe_unused, @@ -139,8 +132,8 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int bool show_title = false; for (i = 0; i < ab->nr_events; i++) { - if (bdl->samples[i].percent > percent_max) - percent_max = bdl->samples[i].percent; + if (dl->al.samples[i].percent > percent_max) + percent_max = dl->al.samples[i].percent; } if ((row == 0) && (dl->al.offset == -1 || percent_max == 0.0)) { @@ -154,17 +147,17 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int if (dl->al.offset != -1 && percent_max != 0.0) { for (i = 0; i < ab->nr_events; i++) { ui_browser__set_percent_color(browser, - bdl->samples[i].percent, + dl->al.samples[i].percent, current_entry); if (annotate_browser__opts.show_total_period) { ui_browser__printf(browser, "%11" PRIu64 " ", - bdl->samples[i].he.period); + dl->al.samples[i].he.period); } else if (annotate_browser__opts.show_nr_samples) { ui_browser__printf(browser, "%6" PRIu64 " ", - bdl->samples[i].he.nr_samples); + dl->al.samples[i].he.nr_samples); } else { ui_browser__printf(browser, "%6.2f ", - bdl->samples[i].percent); + dl->al.samples[i].percent); } } } else { @@ -363,11 +356,9 @@ static unsigned int annotate_browser__refresh(struct ui_browser *browser) return ret; } -static int disasm__cmp(struct disasm_line *da, - struct disasm_line *db, int nr_pcnt) +static int disasm__cmp(struct annotation_line *a, + struct annotation_line *b, int nr_pcnt) { - struct browser_disasm_line *a = disasm_line__browser(da); - struct browser_disasm_line *b = disasm_line__browser(db); int i; for (i = 0; i < nr_pcnt; i++) { @@ -378,24 +369,24 @@ static int disasm__cmp(struct disasm_line *da, return 0; } -static void disasm_rb_tree__insert(struct rb_root *root, struct disasm_line *dl, +static void disasm_rb_tree__insert(struct rb_root *root, struct annotation_line *al, int nr_events) { struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; - struct disasm_line *l; + struct annotation_line *l; while (*p != NULL) { parent = *p; - l = rb_entry(parent, struct disasm_line, al.rb_node); + l = rb_entry(parent, struct annotation_line, rb_node); - if (disasm__cmp(dl, l, nr_events)) + if (disasm__cmp(al, l, nr_events)) p = &(*p)->rb_left; else p = &(*p)->rb_right; } - rb_link_node(&dl->al.rb_node, parent, p); - rb_insert_color(&dl->al.rb_node, root); + rb_link_node(&al->rb_node, parent, p); + rb_insert_color(&al->rb_node, root); } static void annotate_browser__set_top(struct annotate_browser *browser, @@ -453,7 +444,6 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, symbol__calc_percent(sym, evsel); list_for_each_entry(pos, ¬es->src->source, al.node) { - struct browser_disasm_line *bpos = disasm_line__browser(pos); double max_percent = 0.0; int i; @@ -465,18 +455,15 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, for (i = 0; i < browser->nr_events; i++) { struct annotation_data *sample = &pos->al.samples[i]; - bpos->samples[i].percent = sample->percent; - bpos->samples[i].he = sample->he; - - if (max_percent < bpos->samples[i].percent) - max_percent = bpos->samples[i].percent; + if (max_percent < sample->percent) + max_percent = sample->percent; } if (max_percent < 0.01 && pos->al.ipc == 0) { RB_CLEAR_NODE(&pos->al.rb_node); continue; } - disasm_rb_tree__insert(&browser->entries, pos, + disasm_rb_tree__insert(&browser->entries, &pos->al, browser->nr_events); } pthread_mutex_unlock(¬es->lock); @@ -1096,7 +1083,6 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, }; int ret = -1, err; int nr_pcnt = 1; - size_t sizeof_bdl = sizeof(struct browser_disasm_line); if (sym == NULL) return -1; @@ -1112,14 +1098,11 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, return -1; } - if (perf_evsel__is_group_event(evsel)) { + if (perf_evsel__is_group_event(evsel)) nr_pcnt = evsel->nr_members; - sizeof_bdl += sizeof(struct disasm_line_samples) * - (nr_pcnt - 1); - } err = symbol__annotate(sym, map, evsel, - sizeof_bdl, &browser.arch, + sizeof(struct browser_disasm_line), &browser.arch, perf_evsel__env_cpuid(evsel)); if (err) { char msg[BUFSIZ]; -- cgit v1.2.3-70-g09d2 From b15636c62f3a32a8560ea6a32245ec49edd60c6b Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:49 +0200 Subject: perf annotate browser: Do not pass nr_events in disasm_rb_tree__insert We now keep samples_nr in struct annotation_line, so there's no need to pass nr_events to disasm_rb_tree__insert function. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-27-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 5d99429a03bc..67e5955b3c6f 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -356,12 +356,11 @@ static unsigned int annotate_browser__refresh(struct ui_browser *browser) return ret; } -static int disasm__cmp(struct annotation_line *a, - struct annotation_line *b, int nr_pcnt) +static int disasm__cmp(struct annotation_line *a, struct annotation_line *b) { int i; - for (i = 0; i < nr_pcnt; i++) { + for (i = 0; i < a->samples_nr; i++) { if (a->samples[i].percent == b->samples[i].percent) continue; return a->samples[i].percent < b->samples[i].percent; @@ -369,8 +368,7 @@ static int disasm__cmp(struct annotation_line *a, return 0; } -static void disasm_rb_tree__insert(struct rb_root *root, struct annotation_line *al, - int nr_events) +static void disasm_rb_tree__insert(struct rb_root *root, struct annotation_line *al) { struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; @@ -380,7 +378,7 @@ static void disasm_rb_tree__insert(struct rb_root *root, struct annotation_line parent = *p; l = rb_entry(parent, struct annotation_line, rb_node); - if (disasm__cmp(al, l, nr_events)) + if (disasm__cmp(al, l)) p = &(*p)->rb_left; else p = &(*p)->rb_right; @@ -452,7 +450,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, continue; } - for (i = 0; i < browser->nr_events; i++) { + for (i = 0; i < pos->al.samples_nr; i++) { struct annotation_data *sample = &pos->al.samples[i]; if (max_percent < sample->percent) @@ -463,8 +461,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, RB_CLEAR_NODE(&pos->al.rb_node); continue; } - disasm_rb_tree__insert(&browser->entries, &pos->al, - browser->nr_events); + disasm_rb_tree__insert(&browser->entries, &pos->al); } pthread_mutex_unlock(¬es->lock); -- cgit v1.2.3-70-g09d2 From 0d9579701fee0a482185ab4e8ee7f5ae86f8ae19 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 6 Nov 2017 11:55:36 +0100 Subject: perf annotate browser: Rename struct browser_disasm_line to browser_line Rename struct browser_disasm_line to browser_line, because the browser operates now on generic lines and no longer on disasm lines. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171106105536.GA20858@krava Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 67e5955b3c6f..5ed6c158af40 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -25,10 +25,10 @@ struct disasm_line_samples { #define IPC_WIDTH 6 #define CYCLES_WIDTH 6 -struct browser_disasm_line { - u32 idx; - int idx_asm; - int jump_sources; +struct browser_line { + u32 idx; + int idx_asm; + int jump_sources; }; static struct annotate_browser_opt { @@ -69,9 +69,9 @@ struct annotate_browser { char search_bf[128]; }; -static inline struct browser_disasm_line *disasm_line__browser(struct disasm_line *dl) +static inline struct browser_line *disasm_line__browser(struct disasm_line *dl) { - return (void *) dl - sizeof(struct browser_disasm_line); + return (void *) dl - sizeof(struct browser_line); } static bool disasm_line__filter(struct ui_browser *browser __maybe_unused, @@ -119,7 +119,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int { struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); struct disasm_line *dl = list_entry(entry, struct disasm_line, al.node); - struct browser_disasm_line *bdl = disasm_line__browser(dl); + struct browser_line *bdl = disasm_line__browser(dl); bool current_entry = ui_browser__is_current_entry(browser, row); bool change_color = (!annotate_browser__opts.hide_src_code && (!current_entry || (browser->use_navkeypressed && @@ -302,7 +302,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) { struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); struct disasm_line *cursor = ab->selection, *target; - struct browser_disasm_line *btarget, *bcursor; + struct browser_line *btarget, *bcursor; unsigned int from, to; struct map_symbol *ms = ab->b.priv; struct symbol *sym = ms->sym; @@ -413,7 +413,7 @@ static void annotate_browser__set_top(struct annotate_browser *browser, static void annotate_browser__set_rb_top(struct annotate_browser *browser, struct rb_node *nd) { - struct browser_disasm_line *bpos; + struct browser_line *bpos; struct disasm_line *pos; u32 idx; @@ -471,7 +471,7 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, static bool annotate_browser__toggle_source(struct annotate_browser *browser) { struct disasm_line *dl; - struct browser_disasm_line *bdl; + struct browser_line *bdl; off_t offset = browser->b.index - browser->b.top_idx; browser->b.seek(&browser->b, offset, SEEK_CUR); @@ -1027,7 +1027,7 @@ static void annotate_browser__mark_jump_targets(struct annotate_browser *browser for (offset = 0; offset < size; ++offset) { struct disasm_line *dl = browser->offsets[offset], *dlt; - struct browser_disasm_line *bdlt; + struct browser_line *bdlt; if (!disasm_line__is_valid_jump(dl, sym)) continue; @@ -1099,7 +1099,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, nr_pcnt = evsel->nr_members; err = symbol__annotate(sym, map, evsel, - sizeof(struct browser_disasm_line), &browser.arch, + sizeof(struct browser_line), &browser.arch, perf_evsel__env_cpuid(evsel)); if (err) { char msg[BUFSIZ]; @@ -1114,7 +1114,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, browser.start = map__rip_2objdump(map, sym->start); list_for_each_entry(pos, ¬es->src->source, al.node) { - struct browser_disasm_line *bpos; + struct browser_line *bpos; size_t line_len = strlen(pos->al.line); if (browser.b.width < line_len) -- cgit v1.2.3-70-g09d2 From daf25d4303cbf1795535b6c0b7172ba6f12aa2bd Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 6 Nov 2017 11:55:52 +0100 Subject: perf annotate browser: Rename disasm_line__browser to browser_line Rename disasm_line__browser function to browser_line, because the browser got generic and is no longer disasm specific. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171106105552.GB20858@krava Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 5ed6c158af40..3691dc8cef4c 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -69,7 +69,7 @@ struct annotate_browser { char search_bf[128]; }; -static inline struct browser_line *disasm_line__browser(struct disasm_line *dl) +static inline struct browser_line *browser_line(struct disasm_line *dl) { return (void *) dl - sizeof(struct browser_line); } @@ -119,7 +119,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int { struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); struct disasm_line *dl = list_entry(entry, struct disasm_line, al.node); - struct browser_line *bdl = disasm_line__browser(dl); + struct browser_line *bdl = browser_line(dl); bool current_entry = ui_browser__is_current_entry(browser, row); bool change_color = (!annotate_browser__opts.hide_src_code && (!current_entry || (browser->use_navkeypressed && @@ -319,8 +319,8 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) if (!target) return; - bcursor = disasm_line__browser(cursor); - btarget = disasm_line__browser(target); + bcursor = browser_line(cursor); + btarget = browser_line(target); if (annotate_browser__opts.hide_src_code) { from = bcursor->idx_asm; @@ -418,7 +418,7 @@ static void annotate_browser__set_rb_top(struct annotate_browser *browser, u32 idx; pos = rb_entry(nd, struct disasm_line, al.rb_node); - bpos = disasm_line__browser(pos); + bpos = browser_line(pos); idx = bpos->idx; if (annotate_browser__opts.hide_src_code) @@ -476,7 +476,7 @@ static bool annotate_browser__toggle_source(struct annotate_browser *browser) browser->b.seek(&browser->b, offset, SEEK_CUR); dl = list_entry(browser->b.top, struct disasm_line, al.node); - bdl = disasm_line__browser(dl); + bdl = browser_line(dl); if (annotate_browser__opts.hide_src_code) { if (bdl->idx_asm < offset) @@ -1040,7 +1040,7 @@ static void annotate_browser__mark_jump_targets(struct annotate_browser *browser if (dlt == NULL) continue; - bdlt = disasm_line__browser(dlt); + bdlt = browser_line(dlt); if (++bdlt->jump_sources > browser->max_jump_sources) browser->max_jump_sources = bdlt->jump_sources; @@ -1119,7 +1119,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, if (browser.b.width < line_len) browser.b.width = line_len; - bpos = disasm_line__browser(pos); + bpos = browser_line(pos); bpos->idx = browser.nr_entries++; if (pos->al.offset != -1) { bpos->idx_asm = browser.nr_asm_entries++; -- cgit v1.2.3-70-g09d2 From 7bcbcd589b15eae849d45540832ba4f9530c778e Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 6 Nov 2017 11:56:17 +0100 Subject: perf annotate browser: Change selection to struct annotation_line Use struct annotation_line as a browser::selection. We want to be able to use the annotate_browser for all sorts of source data, so it needs to be able to work over the generic struct annotation_line. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171106105617.GC20858@krava Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 63 +++++++++++++++++++++------------------ 1 file changed, 34 insertions(+), 29 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 3691dc8cef4c..657811669a6c 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -47,26 +47,26 @@ static struct annotate_browser_opt { struct arch; struct annotate_browser { - struct ui_browser b; - struct rb_root entries; - struct rb_node *curr_hot; - struct disasm_line *selection; - struct disasm_line **offsets; - struct arch *arch; - int nr_events; - u64 start; - int nr_asm_entries; - int nr_entries; - int max_jump_sources; - int nr_jumps; - bool searching_backwards; - bool have_cycles; - u8 addr_width; - u8 jumps_width; - u8 target_width; - u8 min_addr_width; - u8 max_addr_width; - char search_bf[128]; + struct ui_browser b; + struct rb_root entries; + struct rb_node *curr_hot; + struct annotation_line *selection; + struct disasm_line **offsets; + struct arch *arch; + int nr_events; + u64 start; + int nr_asm_entries; + int nr_entries; + int max_jump_sources; + int nr_jumps; + bool searching_backwards; + bool have_cycles; + u8 addr_width; + u8 jumps_width; + u8 target_width; + u8 min_addr_width; + u8 max_addr_width; + char search_bf[128]; }; static inline struct browser_line *browser_line(struct disasm_line *dl) @@ -265,7 +265,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int } if (current_entry) - ab->selection = dl; + ab->selection = &dl->al; } static bool disasm_line__is_valid_jump(struct disasm_line *dl, struct symbol *sym) @@ -301,7 +301,8 @@ static bool is_fused(struct annotate_browser *ab, struct disasm_line *cursor) static void annotate_browser__draw_current_jump(struct ui_browser *browser) { struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); - struct disasm_line *cursor = ab->selection, *target; + struct disasm_line *cursor = disasm_line(ab->selection); + struct disasm_line *target; struct browser_line *btarget, *bcursor; unsigned int from, to; struct map_symbol *ms = ab->b.priv; @@ -526,7 +527,7 @@ static bool annotate_browser__callq(struct annotate_browser *browser, struct hist_browser_timer *hbt) { struct map_symbol *ms = browser->b.priv; - struct disasm_line *dl = browser->selection; + struct disasm_line *dl = disasm_line(browser->selection); struct annotation *notes; struct addr_map_symbol target = { .map = ms->map, @@ -584,7 +585,7 @@ struct disasm_line *annotate_browser__find_offset(struct annotate_browser *brows static bool annotate_browser__jump(struct annotate_browser *browser) { - struct disasm_line *dl = browser->selection; + struct disasm_line *dl = disasm_line(browser->selection); u64 offset; s64 idx; @@ -610,7 +611,7 @@ struct disasm_line *annotate_browser__find_string(struct annotate_browser *brows struct map_symbol *ms = browser->b.priv; struct symbol *sym = ms->sym; struct annotation *notes = symbol__annotation(sym); - struct disasm_line *pos = browser->selection; + struct disasm_line *pos = disasm_line(browser->selection); *idx = browser->b.index; list_for_each_entry_continue(pos, ¬es->src->source, al.node) { @@ -649,7 +650,7 @@ struct disasm_line *annotate_browser__find_string_reverse(struct annotate_browse struct map_symbol *ms = browser->b.priv; struct symbol *sym = ms->sym; struct annotation *notes = symbol__annotation(sym); - struct disasm_line *pos = browser->selection; + struct disasm_line *pos = disasm_line(browser->selection); *idx = browser->b.index; list_for_each_entry_continue_reverse(pos, ¬es->src->source, al.node) { @@ -882,13 +883,16 @@ show_help: continue; case K_ENTER: case K_RIGHT: + { + struct disasm_line *dl = disasm_line(browser->selection); + if (browser->selection == NULL) ui_helpline__puts("Huh? No selection. Report to linux-kernel@vger.kernel.org"); - else if (browser->selection->al.offset == -1) + else if (browser->selection->offset == -1) ui_helpline__puts("Actions are only available for assembly lines."); - else if (!browser->selection->ins.ops) + else if (!dl->ins.ops) goto show_sup_ins; - else if (ins__is_ret(&browser->selection->ins)) + else if (ins__is_ret(&dl->ins)) goto out; else if (!(annotate_browser__jump(browser) || annotate_browser__callq(browser, evsel, hbt))) { @@ -896,6 +900,7 @@ show_sup_ins: ui_helpline__puts("Actions are only available for function call/return & jump/branch instructions."); } continue; + } case 't': if (annotate_browser__opts.show_total_period) { annotate_browser__opts.show_total_period = false; -- cgit v1.2.3-70-g09d2 From e1b60b5bd3c7a3f215e79fa911122aba59b3d984 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:53 +0200 Subject: perf annotate browser: Change offsets to struct annotation_line Use struct annotation_line as a browser::offsets array entry. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-31-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 49 +++++++++++++++++++++++---------------- 1 file changed, 29 insertions(+), 20 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 657811669a6c..911f06ce0f1b 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -51,7 +51,7 @@ struct annotate_browser { struct rb_root entries; struct rb_node *curr_hot; struct annotation_line *selection; - struct disasm_line **offsets; + struct annotation_line **offsets; struct arch *arch; int nr_events; u64 start; @@ -303,6 +303,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); struct disasm_line *cursor = disasm_line(ab->selection); struct disasm_line *target; + struct annotation_line *al; struct browser_line *btarget, *bcursor; unsigned int from, to; struct map_symbol *ms = ab->b.priv; @@ -316,10 +317,12 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) if (!disasm_line__is_valid_jump(cursor, sym)) return; - target = ab->offsets[cursor->ops.target.offset]; - if (!target) + al = ab->offsets[cursor->ops.target.offset]; + if (!al) return; + target = disasm_line(al); + bcursor = browser_line(cursor); btarget = browser_line(target); @@ -978,10 +981,10 @@ static void count_and_fill(struct annotate_browser *browser, u64 start, u64 end, return; for (offset = start; offset <= end; offset++) { - struct disasm_line *dl = browser->offsets[offset]; + struct annotation_line *al = browser->offsets[offset]; - if (dl) - dl->al.ipc = ipc; + if (al) + al->ipc = ipc; } } } @@ -1006,13 +1009,13 @@ static void annotate__compute_ipc(struct annotate_browser *browser, size_t size, ch = ¬es->src->cycles_hist[offset]; if (ch && ch->cycles) { - struct disasm_line *dl; + struct annotation_line *al; if (ch->have_start) count_and_fill(browser, ch->start, offset, ch); - dl = browser->offsets[offset]; - if (dl && ch->num_aggr) - dl->al.cycles = ch->cycles_aggr / ch->num_aggr; + al = browser->offsets[offset]; + if (al && ch->num_aggr) + al->cycles = ch->cycles_aggr / ch->num_aggr; browser->have_cycles = true; } } @@ -1031,13 +1034,18 @@ static void annotate_browser__mark_jump_targets(struct annotate_browser *browser return; for (offset = 0; offset < size; ++offset) { - struct disasm_line *dl = browser->offsets[offset], *dlt; + struct annotation_line *al = browser->offsets[offset]; + struct disasm_line *dl, *dlt; struct browser_line *bdlt; + dl = disasm_line(al); + if (!disasm_line__is_valid_jump(dl, sym)) continue; - dlt = browser->offsets[dl->ops.target.offset]; + al = browser->offsets[dl->ops.target.offset]; + dlt = disasm_line(al); + /* * FIXME: Oops, no jump target? Buggy disassembler? Or do we * have to adjust to the previous offset? @@ -1066,7 +1074,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, struct perf_evsel *evsel, struct hist_browser_timer *hbt) { - struct disasm_line *pos; + struct annotation_line *al; struct annotation *notes; size_t size; struct map_symbol ms = { @@ -1094,7 +1102,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, if (map->dso->annotate_warned) return -1; - browser.offsets = zalloc(size * sizeof(struct disasm_line *)); + browser.offsets = zalloc(size * sizeof(struct annotation_line *)); if (browser.offsets == NULL) { ui__error("Not enough memory!"); return -1; @@ -1118,15 +1126,16 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, notes = symbol__annotation(sym); browser.start = map__rip_2objdump(map, sym->start); - list_for_each_entry(pos, ¬es->src->source, al.node) { + list_for_each_entry(al, ¬es->src->source, node) { + struct disasm_line *dl = disasm_line(al); struct browser_line *bpos; - size_t line_len = strlen(pos->al.line); + size_t line_len = strlen(al->line); if (browser.b.width < line_len) browser.b.width = line_len; - bpos = browser_line(pos); + bpos = browser_line(dl); bpos->idx = browser.nr_entries++; - if (pos->al.offset != -1) { + if (al->offset != -1) { bpos->idx_asm = browser.nr_asm_entries++; /* * FIXME: short term bandaid to cope with assembly @@ -1135,8 +1144,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, * * E.g. copy_user_generic_unrolled */ - if (pos->al.offset < (s64)size) - browser.offsets[pos->al.offset] = pos; + if (al->offset < (s64)size) + browser.offsets[al->offset] = al; } else bpos->idx_asm = -1; } -- cgit v1.2.3-70-g09d2 From a5ef27020b4bc0785fabb2591eb670d3bc641257 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:54 +0200 Subject: perf annotate browser: Use struct annotation_line in browser_line Using struct annotation_line arg in browser_line function to make it generic. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-32-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 60 ++++++++++++++++++--------------------- 1 file changed, 28 insertions(+), 32 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 911f06ce0f1b..fb83deb8c909 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -69,9 +69,12 @@ struct annotate_browser { char search_bf[128]; }; -static inline struct browser_line *browser_line(struct disasm_line *dl) +static inline struct browser_line *browser_line(struct annotation_line *al) { - return (void *) dl - sizeof(struct browser_line); + void *ptr = al; + + ptr = container_of(al, struct disasm_line, al); + return ptr - sizeof(struct browser_line); } static bool disasm_line__filter(struct ui_browser *browser __maybe_unused, @@ -119,7 +122,7 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int { struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); struct disasm_line *dl = list_entry(entry, struct disasm_line, al.node); - struct browser_line *bdl = browser_line(dl); + struct browser_line *bdl = browser_line(&dl->al); bool current_entry = ui_browser__is_current_entry(browser, row); bool change_color = (!annotate_browser__opts.hide_src_code && (!current_entry || (browser->use_navkeypressed && @@ -302,8 +305,7 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) { struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); struct disasm_line *cursor = disasm_line(ab->selection); - struct disasm_line *target; - struct annotation_line *al; + struct annotation_line *target; struct browser_line *btarget, *bcursor; unsigned int from, to; struct map_symbol *ms = ab->b.priv; @@ -317,13 +319,9 @@ static void annotate_browser__draw_current_jump(struct ui_browser *browser) if (!disasm_line__is_valid_jump(cursor, sym)) return; - al = ab->offsets[cursor->ops.target.offset]; - if (!al) - return; - - target = disasm_line(al); + target = ab->offsets[cursor->ops.target.offset]; - bcursor = browser_line(cursor); + bcursor = browser_line(&cursor->al); btarget = browser_line(target); if (annotate_browser__opts.hide_src_code) { @@ -422,7 +420,7 @@ static void annotate_browser__set_rb_top(struct annotate_browser *browser, u32 idx; pos = rb_entry(nd, struct disasm_line, al.rb_node); - bpos = browser_line(pos); + bpos = browser_line(&pos->al); idx = bpos->idx; if (annotate_browser__opts.hide_src_code) @@ -475,37 +473,37 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, static bool annotate_browser__toggle_source(struct annotate_browser *browser) { struct disasm_line *dl; - struct browser_line *bdl; + struct browser_line *bl; off_t offset = browser->b.index - browser->b.top_idx; browser->b.seek(&browser->b, offset, SEEK_CUR); dl = list_entry(browser->b.top, struct disasm_line, al.node); - bdl = browser_line(dl); + bl = browser_line(&dl->al); if (annotate_browser__opts.hide_src_code) { - if (bdl->idx_asm < offset) - offset = bdl->idx; + if (bl->idx_asm < offset) + offset = bl->idx; browser->b.nr_entries = browser->nr_entries; annotate_browser__opts.hide_src_code = false; browser->b.seek(&browser->b, -offset, SEEK_CUR); - browser->b.top_idx = bdl->idx - offset; - browser->b.index = bdl->idx; + browser->b.top_idx = bl->idx - offset; + browser->b.index = bl->idx; } else { - if (bdl->idx_asm < 0) { + if (bl->idx_asm < 0) { ui_helpline__puts("Only available for assembly lines."); browser->b.seek(&browser->b, -offset, SEEK_CUR); return false; } - if (bdl->idx_asm < offset) - offset = bdl->idx_asm; + if (bl->idx_asm < offset) + offset = bl->idx_asm; browser->b.nr_entries = browser->nr_asm_entries; annotate_browser__opts.hide_src_code = true; browser->b.seek(&browser->b, -offset, SEEK_CUR); - browser->b.top_idx = bdl->idx_asm - offset; - browser->b.index = bdl->idx_asm; + browser->b.top_idx = bl->idx_asm - offset; + browser->b.index = bl->idx_asm; } return true; @@ -1035,8 +1033,8 @@ static void annotate_browser__mark_jump_targets(struct annotate_browser *browser for (offset = 0; offset < size; ++offset) { struct annotation_line *al = browser->offsets[offset]; - struct disasm_line *dl, *dlt; - struct browser_line *bdlt; + struct disasm_line *dl; + struct browser_line *blt; dl = disasm_line(al); @@ -1044,18 +1042,17 @@ static void annotate_browser__mark_jump_targets(struct annotate_browser *browser continue; al = browser->offsets[dl->ops.target.offset]; - dlt = disasm_line(al); /* * FIXME: Oops, no jump target? Buggy disassembler? Or do we * have to adjust to the previous offset? */ - if (dlt == NULL) + if (al == NULL) continue; - bdlt = browser_line(dlt); - if (++bdlt->jump_sources > browser->max_jump_sources) - browser->max_jump_sources = bdlt->jump_sources; + blt = browser_line(al); + if (++blt->jump_sources > browser->max_jump_sources) + browser->max_jump_sources = blt->jump_sources; ++browser->nr_jumps; } @@ -1127,13 +1124,12 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, browser.start = map__rip_2objdump(map, sym->start); list_for_each_entry(al, ¬es->src->source, node) { - struct disasm_line *dl = disasm_line(al); struct browser_line *bpos; size_t line_len = strlen(al->line); if (browser.b.width < line_len) browser.b.width = line_len; - bpos = browser_line(dl); + bpos = browser_line(al); bpos->idx = browser.nr_entries++; if (al->offset != -1) { bpos->idx_asm = browser.nr_asm_entries++; -- cgit v1.2.3-70-g09d2 From 9213afbdf9562cd108e7ed03bd960d8acdfb49c1 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:55 +0200 Subject: perf annotate browser: Use struct annotation_line in find functions Use struct annotation_line in find functions: annotate_browser__find_string annotate_browser__find_string_reverse Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-33-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 40 +++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index fb83deb8c909..8f75e56aedc2 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -606,23 +606,23 @@ static bool annotate_browser__jump(struct annotate_browser *browser) } static -struct disasm_line *annotate_browser__find_string(struct annotate_browser *browser, +struct annotation_line *annotate_browser__find_string(struct annotate_browser *browser, char *s, s64 *idx) { struct map_symbol *ms = browser->b.priv; struct symbol *sym = ms->sym; struct annotation *notes = symbol__annotation(sym); - struct disasm_line *pos = disasm_line(browser->selection); + struct annotation_line *al = browser->selection; *idx = browser->b.index; - list_for_each_entry_continue(pos, ¬es->src->source, al.node) { - if (disasm_line__filter(&browser->b, &pos->al.node)) + list_for_each_entry_continue(al, ¬es->src->source, node) { + if (disasm_line__filter(&browser->b, &al->node)) continue; ++*idx; - if (pos->al.line && strstr(pos->al.line, s) != NULL) - return pos; + if (al->line && strstr(al->line, s) != NULL) + return al; } return NULL; @@ -630,38 +630,38 @@ struct disasm_line *annotate_browser__find_string(struct annotate_browser *brows static bool __annotate_browser__search(struct annotate_browser *browser) { - struct disasm_line *dl; + struct annotation_line *al; s64 idx; - dl = annotate_browser__find_string(browser, browser->search_bf, &idx); - if (dl == NULL) { + al = annotate_browser__find_string(browser, browser->search_bf, &idx); + if (al == NULL) { ui_helpline__puts("String not found!"); return false; } - annotate_browser__set_top(browser, dl, idx); + annotate_browser__set_top(browser, disasm_line(al), idx); browser->searching_backwards = false; return true; } static -struct disasm_line *annotate_browser__find_string_reverse(struct annotate_browser *browser, +struct annotation_line *annotate_browser__find_string_reverse(struct annotate_browser *browser, char *s, s64 *idx) { struct map_symbol *ms = browser->b.priv; struct symbol *sym = ms->sym; struct annotation *notes = symbol__annotation(sym); - struct disasm_line *pos = disasm_line(browser->selection); + struct annotation_line *al = browser->selection; *idx = browser->b.index; - list_for_each_entry_continue_reverse(pos, ¬es->src->source, al.node) { - if (disasm_line__filter(&browser->b, &pos->al.node)) + list_for_each_entry_continue_reverse(al, ¬es->src->source, node) { + if (disasm_line__filter(&browser->b, &al->node)) continue; --*idx; - if (pos->al.line && strstr(pos->al.line, s) != NULL) - return pos; + if (al->line && strstr(al->line, s) != NULL) + return al; } return NULL; @@ -669,16 +669,16 @@ struct disasm_line *annotate_browser__find_string_reverse(struct annotate_browse static bool __annotate_browser__search_reverse(struct annotate_browser *browser) { - struct disasm_line *dl; + struct annotation_line *al; s64 idx; - dl = annotate_browser__find_string_reverse(browser, browser->search_bf, &idx); - if (dl == NULL) { + al = annotate_browser__find_string_reverse(browser, browser->search_bf, &idx); + if (al == NULL) { ui_helpline__puts("String not found!"); return false; } - annotate_browser__set_top(browser, dl, idx); + annotate_browser__set_top(browser, disasm_line(al), idx); browser->searching_backwards = true; return true; } -- cgit v1.2.3-70-g09d2 From ec03a77d7d28a2c2de246f67322c5d916852dd9d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:56 +0200 Subject: perf annotate browser: Use struct annotation_line in browser top Use struct annotation_line in browser::b::top. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-34-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 8f75e56aedc2..f0f27cf9db85 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -390,7 +390,7 @@ static void disasm_rb_tree__insert(struct rb_root *root, struct annotation_line } static void annotate_browser__set_top(struct annotate_browser *browser, - struct disasm_line *pos, u32 idx) + struct annotation_line *pos, u32 idx) { unsigned back; @@ -399,16 +399,16 @@ static void annotate_browser__set_top(struct annotate_browser *browser, browser->b.top_idx = browser->b.index = idx; while (browser->b.top_idx != 0 && back != 0) { - pos = list_entry(pos->al.node.prev, struct disasm_line, al.node); + pos = list_entry(pos->node.prev, struct annotation_line, node); - if (disasm_line__filter(&browser->b, &pos->al.node)) + if (disasm_line__filter(&browser->b, &pos->node)) continue; --browser->b.top_idx; --back; } - browser->b.top = &pos->al; + browser->b.top = pos; browser->b.navkeypressed = true; } @@ -416,11 +416,11 @@ static void annotate_browser__set_rb_top(struct annotate_browser *browser, struct rb_node *nd) { struct browser_line *bpos; - struct disasm_line *pos; + struct annotation_line *pos; u32 idx; - pos = rb_entry(nd, struct disasm_line, al.rb_node); - bpos = browser_line(&pos->al); + pos = rb_entry(nd, struct annotation_line, rb_node); + bpos = browser_line(pos); idx = bpos->idx; if (annotate_browser__opts.hide_src_code) @@ -472,13 +472,13 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, static bool annotate_browser__toggle_source(struct annotate_browser *browser) { - struct disasm_line *dl; + struct annotation_line *al; struct browser_line *bl; off_t offset = browser->b.index - browser->b.top_idx; browser->b.seek(&browser->b, offset, SEEK_CUR); - dl = list_entry(browser->b.top, struct disasm_line, al.node); - bl = browser_line(&dl->al); + al = list_entry(browser->b.top, struct annotation_line, node); + bl = browser_line(al); if (annotate_browser__opts.hide_src_code) { if (bl->idx_asm < offset) @@ -600,7 +600,7 @@ static bool annotate_browser__jump(struct annotate_browser *browser) return true; } - annotate_browser__set_top(browser, dl, idx); + annotate_browser__set_top(browser, &dl->al, idx); return true; } @@ -639,7 +639,7 @@ static bool __annotate_browser__search(struct annotate_browser *browser) return false; } - annotate_browser__set_top(browser, disasm_line(al), idx); + annotate_browser__set_top(browser, al, idx); browser->searching_backwards = false; return true; } @@ -678,7 +678,7 @@ static bool __annotate_browser__search_reverse(struct annotate_browser *browser) return false; } - annotate_browser__set_top(browser, disasm_line(al), idx); + annotate_browser__set_top(browser, al, idx); browser->searching_backwards = true; return true; } -- cgit v1.2.3-70-g09d2 From a5433b3ec937765a1d7521bc6bb87f6e76497640 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:57 +0200 Subject: perf annotate browser: Add disasm_line__write function Factor disasm_line__write function from annotate_browser__write, which now keeps only generic display code. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-35-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 98 +++++++++++++++++++++------------------ 1 file changed, 53 insertions(+), 45 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index f0f27cf9db85..5a2f37a91feb 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -118,11 +118,37 @@ static int annotate_browser__cycles_width(struct annotate_browser *ab) return ab->have_cycles ? IPC_WIDTH + CYCLES_WIDTH : 0; } +static void disasm_line__write(struct disasm_line *dl, struct ui_browser *browser, + char *bf, size_t size) +{ + if (dl->ins.ops && dl->ins.ops->scnprintf) { + if (ins__is_jump(&dl->ins)) { + bool fwd = dl->ops.target.offset > dl->al.offset; + + ui_browser__write_graph(browser, fwd ? SLSMG_DARROW_CHAR : + SLSMG_UARROW_CHAR); + SLsmg_write_char(' '); + } else if (ins__is_call(&dl->ins)) { + ui_browser__write_graph(browser, SLSMG_RARROW_CHAR); + SLsmg_write_char(' '); + } else if (ins__is_ret(&dl->ins)) { + ui_browser__write_graph(browser, SLSMG_LARROW_CHAR); + SLsmg_write_char(' '); + } else { + ui_browser__write_nstring(browser, " ", 2); + } + } else { + ui_browser__write_nstring(browser, " ", 2); + } + + disasm_line__scnprintf(dl, bf, size, !annotate_browser__opts.use_offset); +} + static void annotate_browser__write(struct ui_browser *browser, void *entry, int row) { struct annotate_browser *ab = container_of(browser, struct annotate_browser, b); - struct disasm_line *dl = list_entry(entry, struct disasm_line, al.node); - struct browser_line *bdl = browser_line(&dl->al); + struct annotation_line *al = list_entry(entry, struct annotation_line, node); + struct browser_line *bl = browser_line(al); bool current_entry = ui_browser__is_current_entry(browser, row); bool change_color = (!annotate_browser__opts.hide_src_code && (!current_entry || (browser->use_navkeypressed && @@ -135,32 +161,32 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int bool show_title = false; for (i = 0; i < ab->nr_events; i++) { - if (dl->al.samples[i].percent > percent_max) - percent_max = dl->al.samples[i].percent; + if (al->samples[i].percent > percent_max) + percent_max = al->samples[i].percent; } - if ((row == 0) && (dl->al.offset == -1 || percent_max == 0.0)) { + if ((row == 0) && (al->offset == -1 || percent_max == 0.0)) { if (ab->have_cycles) { - if (dl->al.ipc == 0.0 && dl->al.cycles == 0) + if (al->ipc == 0.0 && al->cycles == 0) show_title = true; } else show_title = true; } - if (dl->al.offset != -1 && percent_max != 0.0) { + if (al->offset != -1 && percent_max != 0.0) { for (i = 0; i < ab->nr_events; i++) { ui_browser__set_percent_color(browser, - dl->al.samples[i].percent, + al->samples[i].percent, current_entry); if (annotate_browser__opts.show_total_period) { ui_browser__printf(browser, "%11" PRIu64 " ", - dl->al.samples[i].he.period); + al->samples[i].he.period); } else if (annotate_browser__opts.show_nr_samples) { ui_browser__printf(browser, "%6" PRIu64 " ", - dl->al.samples[i].he.nr_samples); + al->samples[i].he.nr_samples); } else { ui_browser__printf(browser, "%6.2f ", - dl->al.samples[i].percent); + al->samples[i].percent); } } } else { @@ -175,16 +201,16 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int } } if (ab->have_cycles) { - if (dl->al.ipc) - ui_browser__printf(browser, "%*.2f ", IPC_WIDTH - 1, dl->al.ipc); + if (al->ipc) + ui_browser__printf(browser, "%*.2f ", IPC_WIDTH - 1, al->ipc); else if (!show_title) ui_browser__write_nstring(browser, " ", IPC_WIDTH); else ui_browser__printf(browser, "%*s ", IPC_WIDTH - 1, "IPC"); - if (dl->al.cycles) + if (al->cycles) ui_browser__printf(browser, "%*" PRIu64 " ", - CYCLES_WIDTH - 1, dl->al.cycles); + CYCLES_WIDTH - 1, al->cycles); else if (!show_title) ui_browser__write_nstring(browser, " ", CYCLES_WIDTH); else @@ -197,19 +223,19 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int if (!browser->navkeypressed) width += 1; - if (!*dl->al.line) + if (!*al->line) ui_browser__write_nstring(browser, " ", width - pcnt_width - cycles_width); - else if (dl->al.offset == -1) { - if (dl->al.line_nr && annotate_browser__opts.show_linenr) + else if (al->offset == -1) { + if (al->line_nr && annotate_browser__opts.show_linenr) printed = scnprintf(bf, sizeof(bf), "%-*d ", - ab->addr_width + 1, dl->al.line_nr); + ab->addr_width + 1, al->line_nr); else printed = scnprintf(bf, sizeof(bf), "%*s ", ab->addr_width, " "); ui_browser__write_nstring(browser, bf, printed); - ui_browser__write_nstring(browser, dl->al.line, width - printed - pcnt_width - cycles_width + 1); + ui_browser__write_nstring(browser, al->line, width - printed - pcnt_width - cycles_width + 1); } else { - u64 addr = dl->al.offset; + u64 addr = al->offset; int color = -1; if (!annotate_browser__opts.use_offset) @@ -218,13 +244,13 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int if (!annotate_browser__opts.use_offset) { printed = scnprintf(bf, sizeof(bf), "%" PRIx64 ": ", addr); } else { - if (bdl->jump_sources) { + if (bl->jump_sources) { if (annotate_browser__opts.show_nr_jumps) { int prev; printed = scnprintf(bf, sizeof(bf), "%*d ", ab->jumps_width, - bdl->jump_sources); - prev = annotate_browser__set_jumps_percent_color(ab, bdl->jump_sources, + bl->jump_sources); + prev = annotate_browser__set_jumps_percent_color(ab, bl->jump_sources, current_entry); ui_browser__write_nstring(browser, bf, printed); ui_browser__set_color(browser, prev); @@ -243,32 +269,14 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int ui_browser__write_nstring(browser, bf, printed); if (change_color) ui_browser__set_color(browser, color); - if (dl->ins.ops && dl->ins.ops->scnprintf) { - if (ins__is_jump(&dl->ins)) { - bool fwd = dl->ops.target.offset > dl->al.offset; - - ui_browser__write_graph(browser, fwd ? SLSMG_DARROW_CHAR : - SLSMG_UARROW_CHAR); - SLsmg_write_char(' '); - } else if (ins__is_call(&dl->ins)) { - ui_browser__write_graph(browser, SLSMG_RARROW_CHAR); - SLsmg_write_char(' '); - } else if (ins__is_ret(&dl->ins)) { - ui_browser__write_graph(browser, SLSMG_LARROW_CHAR); - SLsmg_write_char(' '); - } else { - ui_browser__write_nstring(browser, " ", 2); - } - } else { - ui_browser__write_nstring(browser, " ", 2); - } - disasm_line__scnprintf(dl, bf, sizeof(bf), !annotate_browser__opts.use_offset); + disasm_line__write(disasm_line(al), browser, bf, sizeof(bf)); + ui_browser__write_nstring(browser, bf, width - pcnt_width - cycles_width - 3 - printed); } if (current_entry) - ab->selection = &dl->al; + ab->selection = al; } static bool disasm_line__is_valid_jump(struct disasm_line *dl, struct symbol *sym) -- cgit v1.2.3-70-g09d2 From f48e7c407050e5f5f53a0fa9a266d83b001dd356 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 11 Oct 2017 17:01:58 +0200 Subject: perf annotate: Align source and offset lines Align source with offset lines, which are more advanced, because of the address column. Before: : static void *worker_thread(void *__tdata) : { 0.00 : 48a971: push %rbp 0.00 : 48a972: mov %rsp,%rbp 0.00 : 48a975: sub $0x30,%rsp 0.00 : 48a979: mov %rdi,-0x28(%rbp) 0.00 : 48a97d: mov %fs:0x28,%rax 0.00 : 48a986: mov %rax,-0x8(%rbp) 0.00 : 48a98a: xor %eax,%eax : struct thread_data *td = __tdata; 0.00 : 48a98c: mov -0x28(%rbp),%rax 0.00 : 48a990: mov %rax,-0x10(%rbp) : int m = 0, i; 0.00 : 48a994: movl $0x0,-0x1c(%rbp) : int ret; : : for (i = 0; i < loops; i++) { 0.00 : 48a99b: movl $0x0,-0x18(%rbp) After: : static void *worker_thread(void *__tdata) : { 0.00 : 48a971: push %rbp 0.00 : 48a972: mov %rsp,%rbp 0.00 : 48a975: sub $0x30,%rsp 0.00 : 48a979: mov %rdi,-0x28(%rbp) 0.00 : 48a97d: mov %fs:0x28,%rax 0.00 : 48a986: mov %rax,-0x8(%rbp) 0.00 : 48a98a: xor %eax,%eax : struct thread_data *td = __tdata; 0.00 : 48a98c: mov -0x28(%rbp),%rax 0.00 : 48a990: mov %rax,-0x10(%rbp) : int m = 0, i; 0.00 : 48a994: movl $0x0,-0x1c(%rbp) : int ret; : : for (i = 0; i < loops; i++) { 0.00 : 48a99b: movl $0x0,-0x18(%rbp) It makes bigger different when displaying script sources, where the comment lines looks oddly shifted from the lines which actually hold code. I'll send script support separately. Committer note: Do not use a fixed column width for the addresses, as kernel ones se more than 10 columns, look at the last offset and get the right width. Signed-off-by: Jiri Olsa Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171011150158.11895-36-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 34 ++++++++++++++++++++++++---------- 1 file changed, 24 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index cb065ca431ee..eab4a8e3c679 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1092,15 +1092,14 @@ static void annotate__branch_printf(struct block_range *br, u64 addr) } } - -static int disasm_line__print(struct disasm_line *dl, u64 start) +static int disasm_line__print(struct disasm_line *dl, u64 start, int addr_fmt_width) { s64 offset = dl->al.offset; const u64 addr = start + offset; struct block_range *br; br = block_range__find(addr); - color_fprintf(stdout, annotate__address_color(br), " %" PRIx64 ":", addr); + color_fprintf(stdout, annotate__address_color(br), " %*" PRIx64 ":", addr_fmt_width, addr); color_fprintf(stdout, annotate__asm_color(br), "%s", dl->al.line); annotate__branch_printf(br, addr); return 0; @@ -1109,7 +1108,7 @@ static int disasm_line__print(struct disasm_line *dl, u64 start) static int annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start, struct perf_evsel *evsel, u64 len, int min_pcnt, int printed, - int max_lines, struct annotation_line *queue) + int max_lines, struct annotation_line *queue, int addr_fmt_width) { struct disasm_line *dl = container_of(al, struct disasm_line, al); static const char *prev_line; @@ -1139,7 +1138,7 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start if (queue == al) break; annotation_line__print(queue, sym, start, evsel, len, - 0, 0, 1, NULL); + 0, 0, 1, NULL, addr_fmt_width); } } @@ -1174,9 +1173,9 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start color_fprintf(stdout, color, " %7.2f", sample->percent); } - printf(" : "); + printf(" : "); - disasm_line__print(dl, start); + disasm_line__print(dl, start, addr_fmt_width); printf("\n"); } else if (max_lines && printed >= max_lines) return 1; @@ -1192,7 +1191,7 @@ annotation_line__print(struct annotation_line *al, struct symbol *sym, u64 start if (!*al->line) printf(" %*s:\n", width, " "); else - printf(" %*s: %s\n", width, " ", al->line); + printf(" %*s: %*s %s\n", width, " ", addr_fmt_width, " ", al->line); } return 0; @@ -1796,6 +1795,19 @@ static void symbol__annotate_hits(struct symbol *sym, struct perf_evsel *evsel) printf("%*s: %" PRIu64 "\n", BITS_PER_LONG / 2, "h->nr_samples", h->nr_samples); } +static int annotated_source__addr_fmt_width(struct list_head *lines, u64 start) +{ + char bf[32]; + struct annotation_line *line; + + list_for_each_entry_reverse(line, lines, node) { + if (line->offset != -1) + return scnprintf(bf, sizeof(bf), "%" PRIx64, start + line->offset); + } + + return 0; +} + int symbol__annotate_printf(struct symbol *sym, struct map *map, struct perf_evsel *evsel, bool full_paths, int min_pcnt, int max_lines, int context) @@ -1808,7 +1820,7 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, struct sym_hist *h = annotation__histogram(notes, evsel->idx); struct annotation_line *pos, *queue = NULL; u64 start = map__rip_2objdump(map, sym->start); - int printed = 2, queue_len = 0; + int printed = 2, queue_len = 0, addr_fmt_width; int more = 0; u64 len; int width = symbol_conf.show_total_period ? 12 : 8; @@ -1839,6 +1851,8 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, if (verbose > 0) symbol__annotate_hits(sym, evsel); + addr_fmt_width = annotated_source__addr_fmt_width(¬es->src->source, start); + list_for_each_entry(pos, ¬es->src->source, node) { int err; @@ -1849,7 +1863,7 @@ int symbol__annotate_printf(struct symbol *sym, struct map *map, err = annotation_line__print(pos, sym, start, evsel, len, min_pcnt, printed, max_lines, - queue); + queue, addr_fmt_width); switch (err) { case 0: -- cgit v1.2.3-70-g09d2 From 1873b710ace80b9437227a0f8f1a5dab18f49ec1 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 7 Nov 2017 13:41:35 -0300 Subject: tools headers: Synchronize kernel ABI headers wrt SPDX tags Two more, that were just in perf/core and thus weren't covered by Ingo's latest headers synch, kcmp.h and prctl.h, silencing this: Warning: Kernel ABI header at 'tools/include/uapi/linux/kcmp.h' differs from latest version at 'include/uapi/linux/kcmp.h' Warning: Kernel ABI header at 'tools/include/uapi/linux/prctl.h' differs from latest version at 'include/uapi/linux/prctl.h' Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-2a0r7iybyqpkftllyy5t9hfk@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/linux/kcmp.h | 1 + tools/include/uapi/linux/prctl.h | 1 + 2 files changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/include/uapi/linux/kcmp.h b/tools/include/uapi/linux/kcmp.h index 481e103da78e..ef1305010925 100644 --- a/tools/include/uapi/linux/kcmp.h +++ b/tools/include/uapi/linux/kcmp.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ #ifndef _UAPI_LINUX_KCMP_H #define _UAPI_LINUX_KCMP_H diff --git a/tools/include/uapi/linux/prctl.h b/tools/include/uapi/linux/prctl.h index a8d0759a9e40..b640071421f7 100644 --- a/tools/include/uapi/linux/prctl.h +++ b/tools/include/uapi/linux/prctl.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ #ifndef _LINUX_PRCTL_H #define _LINUX_PRCTL_H -- cgit v1.2.3-70-g09d2 From c2f1cead19b628d7a23d2cfc43e444af669f9eab Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 20 Oct 2017 13:27:55 -0700 Subject: perf record: Fix -c/-F options for cpu event aliases The Intel PMU event aliases have a implicit period= specifier to set the default period. Unfortunately this breaks overriding these periods with -c or -F, because the alias terms look like they are user specified to the internal parser, and user specified event qualifiers override the command line options. Track that they are coming from aliases by adding a "weak" state to the term. Any weak terms don't override command line options. I only did it for -c/-F for now, I think that's the only case that's broken currently. Before: $ perf record -c 1000 -vv -e uops_issued.any ... { sample_period, sample_freq } 2000003 After: $ perf record -c 1000 -vv -e uops_issued.any ... { sample_period, sample_freq } 1000 Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20171020202755.21410-2-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 12 ++++++++---- tools/perf/util/evsel.h | 1 + tools/perf/util/parse-events.c | 2 ++ tools/perf/util/parse-events.h | 3 +++ tools/perf/util/pmu.c | 5 +++++ 5 files changed, 19 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index f894893c203d..bfde6a7a80f2 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -733,12 +733,16 @@ static void apply_config_terms(struct perf_evsel *evsel, list_for_each_entry(term, config_terms, list) { switch (term->type) { case PERF_EVSEL__CONFIG_TERM_PERIOD: - attr->sample_period = term->val.period; - attr->freq = 0; + if (!(term->weak && opts->user_interval != ULLONG_MAX)) { + attr->sample_period = term->val.period; + attr->freq = 0; + } break; case PERF_EVSEL__CONFIG_TERM_FREQ: - attr->sample_freq = term->val.freq; - attr->freq = 1; + if (!(term->weak && opts->user_freq != UINT_MAX)) { + attr->sample_freq = term->val.freq; + attr->freq = 1; + } break; case PERF_EVSEL__CONFIG_TERM_TIME: if (term->val.time) diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 9277df96ffda..157f49e8a772 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -67,6 +67,7 @@ struct perf_evsel_config_term { bool overwrite; char *branch; } val; + bool weak; }; struct perf_stat_evsel; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index a7fcd95961ef..170316795a18 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -1116,6 +1116,7 @@ do { \ INIT_LIST_HEAD(&__t->list); \ __t->type = PERF_EVSEL__CONFIG_TERM_ ## __type; \ __t->val.__name = __val; \ + __t->weak = term->weak; \ list_add_tail(&__t->list, head_terms); \ } while (0) @@ -2410,6 +2411,7 @@ static int new_term(struct parse_events_term **_term, *term = *temp; INIT_LIST_HEAD(&term->list); + term->weak = false; switch (term->type_val) { case PARSE_EVENTS__TERM_TYPE_NUM: diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index be337c266697..88108cd11b4c 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -101,6 +101,9 @@ struct parse_events_term { /* error string indexes for within parsed string */ int err_term; int err_val; + + /* Coming from implicit alias */ + bool weak; }; struct parse_events_error { diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 07cb2ac041d7..80fb1593913a 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -405,6 +405,11 @@ static int pmu_alias_terms(struct perf_pmu_alias *alias, parse_events_terms__purge(&list); return ret; } + /* + * Weak terms don't override command line options, + * which we don't want for implicit terms in aliases. + */ + cloned->weak = true; list_add_tail(&cloned->list, &list); } list_splice(&list, terms); -- cgit v1.2.3-70-g09d2 From d0565132605f454179699a1b8a3276fc0f8cc87b Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 20 Oct 2017 13:27:54 -0700 Subject: perf evsel: Enable type checking for perf_evsel_config_term types Use a typed enum for the perf_evsel_config_term type enum. This allows gcc to do much stronger type checks, and also check for missing case statements. I removed the unused _MAX member from the number. It found one missing case. I'm not sure it's a real problem, so I just turned it into a BUG_ON for now. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20171020202755.21410-1-andi@firstfloor.org [ Renamed the enum name to term_type as per jolsa's request ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 2 ++ tools/perf/util/evsel.h | 5 ++--- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index bfde6a7a80f2..4376cdfaea49 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -779,6 +779,8 @@ static void apply_config_terms(struct perf_evsel *evsel, case PERF_EVSEL__CONFIG_TERM_OVERWRITE: attr->write_backward = term->val.overwrite ? 1 : 0; break; + case PERF_EVSEL__CONFIG_TERM_DRV_CFG: + BUG_ON(1); default: break; } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 157f49e8a772..0688880227e1 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -38,7 +38,7 @@ struct cgroup_sel; * It is allocated within event parsing and attached to * perf_evsel::config_terms list head. */ -enum { +enum term_type { PERF_EVSEL__CONFIG_TERM_PERIOD, PERF_EVSEL__CONFIG_TERM_FREQ, PERF_EVSEL__CONFIG_TERM_TIME, @@ -49,12 +49,11 @@ enum { PERF_EVSEL__CONFIG_TERM_OVERWRITE, PERF_EVSEL__CONFIG_TERM_DRV_CFG, PERF_EVSEL__CONFIG_TERM_BRANCH, - PERF_EVSEL__CONFIG_TERM_MAX, }; struct perf_evsel_config_term { struct list_head list; - int type; + enum term_type type; union { u64 period; u64 freq; -- cgit v1.2.3-70-g09d2 From cbd5c1787bab4643e5959522275b46de94eba5ac Mon Sep 17 00:00:00 2001 From: Andrei Vagin Date: Tue, 7 Nov 2017 16:22:46 -0800 Subject: perf trace: Fix an exit code of trace__symbols_init Currently if trace_event__register_resolver() fails, we return -errno, but we can't be sure that errno isn't zero in this case. Signed-off-by: Andrei Vagin Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Vasily Averin Link: http://lkml.kernel.org/r/20171108002246.8924-2-avagin@openvz.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index f2757d38c7d7..84debdbad327 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1152,12 +1152,14 @@ static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist) if (trace->host == NULL) return -ENOMEM; - if (trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr) < 0) - return -errno; + err = trace_event__register_resolver(trace->host, trace__machine__resolve_kernel_addr); + if (err < 0) + goto out; err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target, evlist->threads, trace__tool_process, false, trace->opts.proc_map_timeout, 1); +out: if (err) symbol__exit(); -- cgit v1.2.3-70-g09d2 From 86f5fe01cf3ad42e99e7655dec93e0e36ef65614 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 8 Nov 2017 11:27:37 +0100 Subject: perf tools: Use shell function for perl cflags retrieval Using the shell function for perl CFLAGS retrieval instead of back quotes (``). Both execute shell with the command, but the latter is more explicit and seems to be the preferred way. Also we don't have any other use of the back quotes in perf Makefiles. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171108102739.30338-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 63f534a0902f..f6786fa2419f 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -579,7 +579,7 @@ else PERL_EMBED_LDOPTS = $(shell perl -MExtUtils::Embed -e ldopts 2>/dev/null) PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS)) PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS)) - PERL_EMBED_CCOPTS = `perl -MExtUtils::Embed -e ccopts 2>/dev/null` + PERL_EMBED_CCOPTS = $(shell perl -MExtUtils::Embed -e ccopts 2>/dev/null) FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS) ifneq ($(feature-libperl), 1) -- cgit v1.2.3-70-g09d2 From 82806c3aae7496d6974429f3bd43ebeeef18b9b2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 9 Nov 2017 12:03:40 -0300 Subject: perf evsel: Fix up leftover perf_evsel_stat usage via evsel->priv I forgot one conversion, which got noticed by Thomas when running: $ perf stat -e '{cpu-clock,instructions}' kill kill: not enough arguments Segmentation fault (core dumped) $ Fix it, those stats are in evsel->stats, not anymore in evsel->priv. Reported-by: Thomas-Mich Richter Tested-by: Thomas-Mich Richter Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Hendrik Brueckner Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Fixes: e669e833da8d ("perf evsel: Restore evsel->priv as a tool private area") Link: http://lkml.kernel.org/r/20171109150046.GN4333@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 4376cdfaea49..cb9bcdb065ea 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1377,7 +1377,7 @@ perf_evsel__process_group_data(struct perf_evsel *leader, static int perf_evsel__read_group(struct perf_evsel *leader, int cpu, int thread) { - struct perf_stat_evsel *ps = leader->priv; + struct perf_stat_evsel *ps = leader->stats; u64 read_format = leader->attr.read_format; int size = perf_evsel__read_size(leader); u64 *data = ps->group_data; -- cgit v1.2.3-70-g09d2 From 60dbcd2532dd7eec2f1e23a37b80ff85d8fb2953 Mon Sep 17 00:00:00 2001 From: Seonghyun Park Date: Thu, 9 Nov 2017 23:07:04 +0900 Subject: perf tests: Add missing WRITE_ASS for new fields of perf_event_attr Include newly added fields 'mmap2', 'comm_exec', 'use_clockid', 'namespaces', 'write_backward' and 'context_switch' from perf_event_attr to store_event(). Signed-off-by: Seonghyun Park Cc: Jiri Olsa Cc: Namhyung Kim Cc: Seonghyun Park Link: http://lkml.kernel.org/n/tip-vltn7pqhcv8h5fmo9cthk87q@git.kernel.org [ Fix log message to add 'write_backward', fix the patch to add 'use_clock_id' ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/attr.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tools') diff --git a/tools/perf/tests/attr.c b/tools/perf/tests/attr.c index 0e1367f90af5..97f64ad7fa08 100644 --- a/tools/perf/tests/attr.c +++ b/tools/perf/tests/attr.c @@ -124,6 +124,12 @@ static int store_event(struct perf_event_attr *attr, pid_t pid, int cpu, WRITE_ASS(exclude_guest, "d"); WRITE_ASS(exclude_callchain_kernel, "d"); WRITE_ASS(exclude_callchain_user, "d"); + WRITE_ASS(mmap2, "d"); + WRITE_ASS(comm_exec, "d"); + WRITE_ASS(context_switch, "d"); + WRITE_ASS(write_backward, "d"); + WRITE_ASS(namespaces, "d"); + WRITE_ASS(use_clockid, "d"); WRITE_ASS(wakeup_events, PRIu32); WRITE_ASS(bp_type, PRIu32); WRITE_ASS(config1, "llu"); -- cgit v1.2.3-70-g09d2 From fa48c892645dfd3159e5aa6eb9cefd00d5cb347a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 9 Nov 2017 16:04:26 -0300 Subject: perf script: Fix --per-event-dump for auxtrace synth evsels When processing PERF_RECORD_AUXTRACE_INFO several perf_evsel entries will be synthesized and inserted into session->evlist, eventually ending in perf_script.tool.sample(), which ends up calling builtin-script.c's process_event(), that expects evsel->priv to be a perf_evsel_script object with a valid FILE pointer in fp. So we need to intercept the processing of PERF_RECORD_AUXTRACE_INFO and then setup evsel->priv for these newly created perf_evsel instances, do it to fix the segfault in process_event() trying to use a NULL for that FILE pointer. Reported-by: Alexander Shishkin Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Ravi Bangoria Cc: Wang Nan Cc: yuzhoujian Fixes: a14390fde64e ("perf script: Allow creating per-event dump files") Link: http://lkml.kernel.org/n/tip-bthnur8r8de01gxvn2qayx6e@git.kernel.org [ Merge fix by Ravi Bangoria before pushing upstream to preserv bisectability ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 68f36dc0344f..9b43bda45a41 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1955,6 +1955,16 @@ static int perf_script__fopen_per_event_dump(struct perf_script *script) struct perf_evsel *evsel; evlist__for_each_entry(script->session->evlist, evsel) { + /* + * Already setup? I.e. we may be called twice in cases like + * Intel PT, one for the intel_pt// and dummy events, then + * for the evsels syntheized from the auxtrace info. + * + * Ses perf_script__process_auxtrace_info. + */ + if (evsel->priv != NULL) + continue; + evsel->priv = perf_evsel_script__new(evsel, script->session->data); if (evsel->priv == NULL) goto out_err_fclose; @@ -2838,6 +2848,25 @@ int process_cpu_map_event(struct perf_tool *tool __maybe_unused, return set_maps(script); } +#ifdef HAVE_AUXTRACE_SUPPORT +static int perf_script__process_auxtrace_info(struct perf_tool *tool, + union perf_event *event, + struct perf_session *session) +{ + int ret = perf_event__process_auxtrace_info(tool, event, session); + + if (ret == 0) { + struct perf_script *script = container_of(tool, struct perf_script, tool); + + ret = perf_script__setup_per_event_dump(script); + } + + return ret; +} +#else +#define perf_script__process_auxtrace_info 0 +#endif + int cmd_script(int argc, const char **argv) { bool show_full_info = false; @@ -2866,7 +2895,7 @@ int cmd_script(int argc, const char **argv) .feature = perf_event__process_feature, .build_id = perf_event__process_build_id, .id_index = perf_event__process_id_index, - .auxtrace_info = perf_event__process_auxtrace_info, + .auxtrace_info = perf_script__process_auxtrace_info, .auxtrace = perf_event__process_auxtrace, .auxtrace_error = perf_event__process_auxtrace_error, .stat = perf_event__process_stat_event, -- cgit v1.2.3-70-g09d2 From e795dd42b716ff36ebaa5384fd1be8458d6c9c34 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Wed, 8 Nov 2017 18:42:03 -0500 Subject: perf vendor events powerpc: Update POWER9 events The POWER9 hardware has dropped support for several events, added a few new events and changed the category for a couple of events. Update the POWER9 events in Linux to reflect these changes. Signed-off-by: Sukadev Bhattiprolu Cc: Jiri Olsa Cc: Michael Ellerman Cc: Madhavan Srinivasan Link: http://lkml.kernel.org/r/20171108201938.GA10985@us.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/pmu-events/arch/powerpc/power9/cache.json | 5 - .../pmu-events/arch/powerpc/power9/frontend.json | 7 +- .../pmu-events/arch/powerpc/power9/marked.json | 27 +- .../perf/pmu-events/arch/powerpc/power9/other.json | 276 ++++++--------------- .../pmu-events/arch/powerpc/power9/pipeline.json | 14 +- tools/perf/pmu-events/arch/powerpc/power9/pmc.json | 2 +- .../arch/powerpc/power9/translation.json | 5 - 7 files changed, 88 insertions(+), 248 deletions(-) (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/powerpc/power9/cache.json b/tools/perf/pmu-events/arch/powerpc/power9/cache.json index 18f6645f2897..7945c5196c43 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/cache.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/cache.json @@ -124,11 +124,6 @@ "EventName": "PM_CMPLU_STALL_LARX", "BriefDescription": "Finish stall because the NTF instruction was a larx waiting to be satisfied" }, - {, - "EventCode": "0x3006C", - "EventName": "PM_RUN_CYC_SMT2_MODE", - "BriefDescription": "Cycles in which this thread's run latch is set and the core is in SMT2 mode" - }, {, "EventCode": "0x1C058", "EventName": "PM_DTLB_MISS_16G", diff --git a/tools/perf/pmu-events/arch/powerpc/power9/frontend.json b/tools/perf/pmu-events/arch/powerpc/power9/frontend.json index c63a919eda98..bd8361b5fd6a 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/frontend.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/frontend.json @@ -1,9 +1,4 @@ [ - {, - "EventCode": "0x3E15C", - "EventName": "PM_MRK_L2_TM_ST_ABORT_SISTER", - "BriefDescription": "TM marked store abort for this thread" - }, {, "EventCode": "0x25044", "EventName": "PM_IPTEG_FROM_L31_MOD", @@ -369,4 +364,4 @@ "EventName": "PM_IPTEG_FROM_L31_ECO_MOD", "BriefDescription": "A Page Table Entry was loaded into the TLB with Modified (M) data from another core's ECO L3 on the same chip due to a instruction side request" } -] +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/powerpc/power9/marked.json b/tools/perf/pmu-events/arch/powerpc/power9/marked.json index b9df54fb37e3..22f9f32060a8 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/marked.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/marked.json @@ -1,9 +1,4 @@ [ - {, - "EventCode": "0x3C052", - "EventName": "PM_DATA_SYS_PUMP_MPRED", - "BriefDescription": "Final Pump Scope (system) mispredicted. Either the original scope was too small (Chip/Group) or the original scope was System and it should have been smaller. Counts for a demand load" - }, {, "EventCode": "0x3013E", "EventName": "PM_MRK_STALL_CMPLU_CYC", @@ -254,6 +249,11 @@ "EventName": "PM_RADIX_PWC_L1_PDE_FROM_L3", "BriefDescription": "A Page Directory Entry was reloaded to a level 1 page walk cache from the core's L3 data cache" }, + {, + "EventCode": "0x3C052", + "EventName": "PM_DATA_SYS_PUMP_MPRED", + "BriefDescription": "Final Pump Scope (system) mispredicted. Either the original scope was too small (Chip/Group) or the original scope was System and it should have been smaller. Counts for a demand load" + }, {, "EventCode": "0x4D142", "EventName": "PM_MRK_DATA_FROM_L3", @@ -434,21 +434,6 @@ "EventName": "PM_ITLB_MISS", "BriefDescription": "ITLB Reloaded. Counts 1 per ITLB miss for HPT but multiple for radix depending on number of levels traveresed" }, - {, - "EventCode": "0x2D024", - "EventName": "PM_RADIX_PWC_L2_HIT", - "BriefDescription": "A radix translation attempt missed in the TLB but hit on both the first and second levels of page walk cache." - }, - {, - "EventCode": "0x3F056", - "EventName": "PM_RADIX_PWC_L3_HIT", - "BriefDescription": "A radix translation attempt missed in the TLB but hit on the first, second, and third levels of page walk cache." - }, - {, - "EventCode": "0x4E014", - "EventName": "PM_TM_TX_PASS_RUN_INST", - "BriefDescription": "Run instructions spent in successful transactions" - }, {, "EventCode": "0x1E044", "EventName": "PM_DPTEG_FROM_L3_NO_CONFLICT", @@ -644,4 +629,4 @@ "EventName": "PM_MRK_BR_MPRED_CMPL", "BriefDescription": "Marked Branch Mispredicted" } -] +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/powerpc/power9/other.json b/tools/perf/pmu-events/arch/powerpc/power9/other.json index 54cc3be00fc2..5ce312973f1e 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/other.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/other.json @@ -79,6 +79,11 @@ "EventName": "PM_RADIX_PWC_MISS", "BriefDescription": "A radix translation attempt missed in the TLB and all levels of page walk cache." }, + {, + "EventCode": "0x26882", + "EventName": "PM_L2_DC_INV", + "BriefDescription": "D-cache invalidates sent over the reload bus to the core" + }, {, "EventCode": "0x24048", "EventName": "PM_INST_FROM_LMEM", @@ -94,11 +99,6 @@ "EventName": "PM_TM_PASSED", "BriefDescription": "Number of TM transactions that passed" }, - {, - "EventCode": "0xD1A0", - "EventName": "PM_MRK_LSU_FLUSH_LHS", - "BriefDescription": "Effective Address alias flush : no EA match but Real Address match. If the data has not yet been returned for this load, the instruction will just be rejected, but if it has returned data, it will be flushed" - }, {, "EventCode": "0xF088", "EventName": "PM_LSU0_STORE_REJECT", @@ -127,7 +127,7 @@ {, "EventCode": "0xD08C", "EventName": "PM_LSU2_LDMX_FIN", - "BriefDescription": "New P9 instruction LDMX. The definition of this new PMU event is (from the ldmx RFC02491): The thread has executed an ldmx instruction that accessed a doubleword that contains an effective address within an enabled section of the Load Monitored region. This event, therefore, should not occur if the FSCR has disabled the load monitored facility (FSCR[52]) or disabled the EBB facility (FSCR[56])" + "BriefDescription": "New P9 instruction LDMX. The definition of this new PMU event is (from the ldmx RFC02491): The thread has executed an ldmx instruction that accessed a doubleword that contains an effective address within an enabled section of the Load Monitored region. This event, therefore, should not occur if the FSCR has disabled the load monitored facility (FSCR[52]) or disabled the EBB facility (FSCR[56])." }, {, "EventCode": "0x300F8", @@ -204,11 +204,6 @@ "EventName": "PM_MRK_DATA_FROM_L31_ECO_MOD_CYC", "BriefDescription": "Duration in cycles to reload with Modified (M) data from another core's ECO L3 on the same chip due to a marked load" }, - {, - "EventCode": "0xF0B4", - "EventName": "PM_DC_PREF_CONS_ALLOC", - "BriefDescription": "Prefetch stream allocated in the conservative phase by either the hardware prefetch mechanism or software prefetch" - }, {, "EventCode": "0xF894", "EventName": "PM_LSU3_L1_CAM_CANCEL", @@ -219,21 +214,11 @@ "EventName": "PM_FLUSH_DISP_TLBIE", "BriefDescription": "Dispatch Flush: TLBIE" }, - {, - "EventCode": "0xD1A4", - "EventName": "PM_MRK_LSU_FLUSH_SAO", - "BriefDescription": "A load-hit-load condition with Strong Address Ordering will have address compare disabled and flush" - }, {, "EventCode": "0x4E11E", "EventName": "PM_MRK_DATA_FROM_DMEM_CYC", "BriefDescription": "Duration in cycles to reload from another chip's memory on the same Node or Group (Distant) due to a marked load" }, - {, - "EventCode": "0x5894", - "EventName": "PM_LWSYNC", - "BriefDescription": "Lwsync instruction decoded and transferred" - }, {, "EventCode": "0x14156", "EventName": "PM_MRK_DATA_FROM_L2_CYC", @@ -244,11 +229,6 @@ "EventName": "PM_RD_CLEARING_SC", "BriefDescription": "Read clearing SC" }, - {, - "EventCode": "0x50A0", - "EventName": "PM_HWSYNC", - "BriefDescription": "Hwsync instruction decoded and transferred" - }, {, "EventCode": "0x168B0", "EventName": "PM_L3_P1_NODE_PUMP", @@ -264,6 +244,11 @@ "EventName": "PM_MRK_DATA_FROM_L2_DISP_CONFLICT_LDHITST", "BriefDescription": "The processor's data cache was reloaded from local core's L2 with load hit store conflict due to a marked load" }, + {, + "EventCode": "0x468AE", + "EventName": "PM_L3_P3_CO_RTY", + "BriefDescription": "L3 CO received retry port 3 (memory only), every retry counted" + }, {, "EventCode": "0x460A8", "EventName": "PM_SN_HIT", @@ -279,11 +264,6 @@ "EventName": "PM_DC_PREF_HW_ALLOC", "BriefDescription": "Prefetch stream allocated by the hardware prefetch mechanism" }, - {, - "EventCode": "0xF0BC", - "EventName": "PM_LS2_UNALIGNED_ST", - "BriefDescription": "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size. If the Store wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" - }, {, "EventCode": "0xD0AC", "EventName": "PM_SRQ_SYNC_CYC", @@ -379,26 +359,11 @@ "EventName": "PM_RUN_CYC_SMT4_MODE", "BriefDescription": "Cycles in which this thread's run latch is set and the core is in SMT4 mode" }, - {, - "EventCode": "0x5088", - "EventName": "PM_DECODE_FUSION_OP_PRESERV", - "BriefDescription": "Destructive op operand preservation" - }, {, "EventCode": "0x1D14E", "EventName": "PM_MRK_DATA_FROM_OFF_CHIP_CACHE_CYC", "BriefDescription": "Duration in cycles to reload either shared or modified data from another core's L2/L3 on a different chip (remote or distant) due to a marked load" }, - {, - "EventCode": "0x509C", - "EventName": "PM_FORCED_NOP", - "BriefDescription": "Instruction was forced to execute as a nop because it was found to behave like a nop (have no effect) at decode time" - }, - {, - "EventCode": "0xC098", - "EventName": "PM_LS2_UNALIGNED_LD", - "BriefDescription": "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size. If the load wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" - }, {, "EventCode": "0x20058", "EventName": "PM_DARQ1_10_12_ENTRIES", @@ -434,11 +399,6 @@ "EventName": "PM_LSU1_STORE_REJECT", "BriefDescription": "All internal store rejects cause the instruction to go back to the SRQ and go to sleep until woken up to try again after the condition has been met" }, - {, - "EventCode": "0x4505E", - "EventName": "PM_FLOP_CMPL", - "BriefDescription": "Floating Point Operation Finished" - }, {, "EventCode": "0x1D144", "EventName": "PM_MRK_DATA_FROM_L3_DISP_CONFLICT", @@ -480,14 +440,9 @@ "BriefDescription": "XL-form branch was mispredicted due to the predicted target address missing from EAT. The EAT forces a mispredict in this case since there is no predicated target to validate. This is a rare case that may occur when the EAT is full and a branch is issued" }, {, - "EventCode": "0xC094", - "EventName": "PM_LS0_UNALIGNED_LD", - "BriefDescription": "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size. If the load wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" - }, - {, - "EventCode": "0xF8BC", - "EventName": "PM_LS3_UNALIGNED_ST", - "BriefDescription": "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size. If the Store wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" + "EventCode": "0x460AE", + "EventName": "PM_L3_P2_CO_RTY", + "BriefDescription": "L3 CO received retry port 2 (memory only), every retry counted" }, {, "EventCode": "0x58B0", @@ -504,11 +459,6 @@ "EventName": "PM_TM_ST_CONF", "BriefDescription": "TM Store (fav or non-fav) ran into conflict (failed)" }, - {, - "EventCode": "0xD998", - "EventName": "PM_MRK_LSU_FLUSH_EMSH", - "BriefDescription": "An ERAT miss was detected after a set-p hit. Erat tracker indicates fail due to tlbmiss and the instruction gets flushed because the instruction was working on the wrong address" - }, {, "EventCode": "0xF8A0", "EventName": "PM_NON_DATA_STORE", @@ -524,11 +474,6 @@ "EventName": "PM_BR_UNCOND", "BriefDescription": "Unconditional Branch Completed. HW branch prediction was not used for this branch. This can be an I-form branch, a B-form branch with BO-field set to branch always, or a B-form branch which was covenrted to a Resolve." }, - {, - "EventCode": "0x1F056", - "EventName": "PM_RADIX_PWC_L1_HIT", - "BriefDescription": "A radix translation attempt missed in the TLB and only the first level page walk cache was a hit." - }, {, "EventCode": "0xF8A8", "EventName": "PM_DC_PREF_FUZZY_CONF", @@ -544,6 +489,11 @@ "EventName": "PM_LSU2_TM_L1_MISS", "BriefDescription": "Load tm L1 miss" }, + {, + "EventCode": "0xC880", + "EventName": "PM_LS1_LD_VECTOR_FIN", + "BriefDescription": "" + }, {, "EventCode": "0x2894", "EventName": "PM_TM_OUTER_TEND", @@ -564,21 +514,11 @@ "EventName": "PM_MRK_LSU_DERAT_MISS", "BriefDescription": "Marked derat reload (miss) for any page size" }, - {, - "EventCode": "0x160A0", - "EventName": "PM_L3_PF_MISS_L3", - "BriefDescription": "L3 PF missed in L3" - }, {, "EventCode": "0x1C04A", "EventName": "PM_DATA_FROM_RL2L3_SHR", "BriefDescription": "The processor's data cache was reloaded with Shared (S) data from another chip's L2 or L3 on the same Node or Group (Remote), as this chip due to a demand load" }, - {, - "EventCode": "0xD99C", - "EventName": "PM_MRK_LSU_FLUSH_UE", - "BriefDescription": "Correctable ECC error on reload data, reported at critical data forward time" - }, {, "EventCode": "0x268B0", "EventName": "PM_L3_P1_GRP_PUMP", @@ -629,11 +569,6 @@ "EventName": "PM_TMA_REQ_L2", "BriefDescription": "addrs only req to L2 only on the first one,Indication that Load footprint is not expanding" }, - {, - "EventCode": "0x5884", - "EventName": "PM_DECODE_LANES_NOT_AVAIL", - "BriefDescription": "Decode has something to transmit but dispatch lanes are not available" - }, {, "EventCode": "0x3C042", "EventName": "PM_DATA_FROM_L3_DISP_CONFLICT", @@ -690,9 +625,9 @@ "BriefDescription": "False LHS match detected" }, {, - "EventCode": "0xD9A4", - "EventName": "PM_MRK_LSU_FLUSH_LARX_STCX", - "BriefDescription": "A larx is flushed because an older larx has an LMQ reservation for the same thread. A stcx is flushed because an older stcx is in the LMQ. The flush happens when the older larx/stcx relaunches" + "EventCode": "0xF0B0", + "EventName": "PM_L3_LD_PREF", + "BriefDescription": "L3 load prefetch, sourced from a hardware or software stream, was sent to the nest" }, {, "EventCode": "0x4D012", @@ -715,9 +650,9 @@ "BriefDescription": "All successful Ld/St dispatches for this thread that were an L2 miss (excludes i_l2mru_tch_reqs)" }, {, - "EventCode": "0xF8B8", - "EventName": "PM_LS1_UNALIGNED_ST", - "BriefDescription": "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size. If the Store wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" + "EventCode": "0x160A0", + "EventName": "PM_L3_PF_MISS_L3", + "BriefDescription": "L3 PF missed in L3" }, {, "EventCode": "0x408C", @@ -764,11 +699,6 @@ "EventName": "PM_TM_NESTED_TEND", "BriefDescription": "Completion time nested tend" }, - {, - "EventCode": "0x36084", - "EventName": "PM_L2_RCST_DISP", - "BriefDescription": "All D-side store dispatch attempts for this thread" - }, {, "EventCode": "0x368A0", "EventName": "PM_L3_PF_OFF_CHIP_CACHE", @@ -829,11 +759,6 @@ "EventName": "PM_L3_SN_USAGE", "BriefDescription": "Rotating sample of 16 snoop valids" }, - {, - "EventCode": "0x16084", - "EventName": "PM_L2_RCLD_DISP", - "BriefDescription": "All I-or-D side load dispatch attempts for this thread (excludes i_l2mru_tch_reqs)" - }, {, "EventCode": "0x1608C", "EventName": "PM_RC0_BUSY", @@ -842,7 +767,7 @@ {, "EventCode": "0x36082", "EventName": "PM_L2_LD_DISP", - "BriefDescription": "All successful I-or-D side load dispatches for this thread (excludes i_l2mru_tch_reqs)." + "BriefDescription": "All successful I-or-D side load dispatches for this thread (excludes i_l2mru_tch_reqs)" }, {, "EventCode": "0xF8B0", @@ -904,11 +829,6 @@ "EventName": "PM_IC_PREF_REQ", "BriefDescription": "Instruction prefetch requests" }, - {, - "EventCode": "0xC898", - "EventName": "PM_LS3_UNALIGNED_LD", - "BriefDescription": "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size. If the load wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" - }, {, "EventCode": "0x488C", "EventName": "PM_IC_PREF_WRITE", @@ -1017,7 +937,7 @@ {, "EventCode": "0x3E05E", "EventName": "PM_L3_CO_MEPF", - "BriefDescription": "L3 castouts in Mepf state for this thread" + "BriefDescription": "L3 CO of line in Mep state (includes casthrough to memory). The Mepf state indicates that a line was brought in to satisfy an L3 prefetch request" }, {, "EventCode": "0x460A2", @@ -1204,11 +1124,6 @@ "EventName": "PM_TM_FAIL_NON_TX_CONFLICT", "BriefDescription": "Non transactional conflict from LSU, gets reported to TEXASR" }, - {, - "EventCode": "0xD198", - "EventName": "PM_MRK_LSU_FLUSH_ATOMIC", - "BriefDescription": "Quad-word loads (lq) are considered atomic because they always span at least 2 slices. If a snoop or store from another thread changes the data the load is accessing between the 2 or 3 pieces of the lq instruction, the lq will be flushed" - }, {, "EventCode": "0x201E0", "EventName": "PM_MRK_DATA_FROM_MEMORY", @@ -1294,11 +1209,6 @@ "EventName": "PM_ICT_NOSLOT_DISP_HELD_HB_FULL", "BriefDescription": "Ict empty for this thread due to dispatch holds because the History Buffer was full. Could be GPR/VSR/VMR/FPR/CR/XVF; CR; XVF (XER/VSCR/FPSCR)" }, - {, - "EventCode": "0xC894", - "EventName": "PM_LS1_UNALIGNED_LD", - "BriefDescription": "Load instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the load of that size. If the load wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" - }, {, "EventCode": "0x360A2", "EventName": "PM_L3_L2_CO_HIT", @@ -1324,11 +1234,6 @@ "EventName": "PM_L2_CASTOUT_SHR", "BriefDescription": "L2 Castouts - Shared (Tx,Sx)" }, - {, - "EventCode": "0xD884", - "EventName": "PM_LSU3_SET_MPRED", - "BriefDescription": "Set prediction(set-p) miss. The entry was not found in the Set prediction table" - }, {, "EventCode": "0x26092", "EventName": "PM_L2_LD_MISS_64B", @@ -1362,12 +1267,12 @@ {, "EventCode": "0xD8A8", "EventName": "PM_ISLB_MISS", - "BriefDescription": "Instruction SLB miss - Total of all segment sizes" + "BriefDescription": "Instruction SLB Miss - Total of all segment sizes" }, {, - "EventCode": "0xD19C", - "EventName": "PM_MRK_LSU_FLUSH_RELAUNCH_MISS", - "BriefDescription": "If a load that has already returned data and has to relaunch for any reason then gets a miss (erat, setp, data cache), it will often be flushed at relaunch time because the data might be inconsistent" + "EventCode": "0x368AE", + "EventName": "PM_L3_P1_CO_RTY", + "BriefDescription": "L3 CO received retry port 1 (memory only), every retry counted" }, {, "EventCode": "0x260A2", @@ -1384,6 +1289,11 @@ "EventName": "PM_CMPLU_STALL_NESTED_TBEGIN", "BriefDescription": "Completion stall because the ISU is updating the TEXASR to keep track of the nested tbegin. This is a short delay, and it includes ROT" }, + {, + "EventCode": "0xC084", + "EventName": "PM_LS2_LD_VECTOR_FIN", + "BriefDescription": "" + }, {, "EventCode": "0x1608E", "EventName": "PM_ST_CAUSED_FAIL", @@ -1409,11 +1319,6 @@ "EventName": "PM_CO_USAGE", "BriefDescription": "Continuous 16 cycle (2to1) window where this signals rotates thru sampling each CO machine busy. PMU uses this wave to then do 16 cyc count to sample total number of machs running" }, - {, - "EventCode": "0xD084", - "EventName": "PM_LSU2_SET_MPRED", - "BriefDescription": "Set prediction(set-p) miss. The entry was not found in the Set prediction table" - }, {, "EventCode": "0x48B8", "EventName": "PM_BR_MPRED_TAKEN_TA", @@ -1449,30 +1354,25 @@ "EventName": "PM_DC_PREF_STRIDED_CONF", "BriefDescription": "A demand load referenced a line in an active strided prefetch stream. The stream could have been allocated through the hardware prefetch mechanism or through software." }, + {, + "EventCode": "0x36084", + "EventName": "PM_L2_RCST_DISP", + "BriefDescription": "All D-side store dispatch attempts for this thread" + }, {, "EventCode": "0x45054", "EventName": "PM_FMA_CMPL", "BriefDescription": "two flops operation completed (fmadd, fnmadd, fmsub, fnmsub) Scalar instructions only. " }, - {, - "EventCode": "0x5090", - "EventName": "PM_SHL_ST_DISABLE", - "BriefDescription": "Store-Hit-Load Table Read Hit with entry Disabled (entry was disabled due to the entry shown to not prevent the flush)" - }, {, "EventCode": "0x201E8", "EventName": "PM_THRESH_EXC_512", "BriefDescription": "Threshold counter exceeded a value of 512" }, - {, - "EventCode": "0x5084", - "EventName": "PM_DECODE_FUSION_EXT_ADD", - "BriefDescription": "32-bit extended addition" - }, {, "EventCode": "0x36080", "EventName": "PM_L2_INST", - "BriefDescription": "All successful I-side dispatches for this thread (excludes i_l2mru_tch reqs)." + "BriefDescription": "All successful I-side dispatches for this thread (excludes i_l2mru_tch reqs)" }, {, "EventCode": "0x3504C", @@ -1554,21 +1454,11 @@ "EventName": "PM_MEM_RWITM", "BriefDescription": "Memory Read With Intent to Modify for this thread" }, - {, - "EventCode": "0x26882", - "EventName": "PM_L2_DC_INV", - "BriefDescription": "D-cache invalidates sent over the reload bus to the core" - }, {, "EventCode": "0xC090", "EventName": "PM_LSU_STCX", "BriefDescription": "STCX sent to nest, i.e. total" }, - {, - "EventCode": "0xD080", - "EventName": "PM_LSU0_SET_MPRED", - "BriefDescription": "Set prediction(set-p) miss. The entry was not found in the Set prediction table" - }, {, "EventCode": "0x2C120", "EventName": "PM_MRK_DATA_FROM_L2_NO_CONFLICT", @@ -1609,11 +1499,6 @@ "EventName": "PM_IPTEG_FROM_L2_NO_CONFLICT", "BriefDescription": "A Page Table Entry was loaded into the TLB from local core's L2 without conflict due to a instruction side request" }, - {, - "EventCode": "0xD9A0", - "EventName": "PM_MRK_LSU_FLUSH_LHL_SHL", - "BriefDescription": "The instruction was flushed because of a sequential load/store consistency. If a load or store hits on an older load that has either been snooped (for loads) or has stale data (for stores)." - }, {, "EventCode": "0x35042", "EventName": "PM_IPTEG_FROM_L3_DISP_CONFLICT", @@ -1692,7 +1577,7 @@ {, "EventCode": "0x2001A", "EventName": "PM_NTC_ALL_FIN", - "BriefDescription": "Cycles after all instructions have finished to group completed" + "BriefDescription": "Cycles after instruction finished to instruction completed." }, {, "EventCode": "0x3005A", @@ -1709,6 +1594,11 @@ "EventName": "PM_LSU1_L1_CAM_CANCEL", "BriefDescription": "ls1 l1 tm cam cancel" }, + {, + "EventCode": "0x268AE", + "EventName": "PM_L3_P3_PF_RTY", + "BriefDescription": "L3 PF received retry port 3, every retry counted" + }, {, "EventCode": "0xE884", "EventName": "PM_LS1_ERAT_MISS_PREF", @@ -1742,7 +1632,7 @@ {, "EventCode": "0x160B6", "EventName": "PM_L3_WI0_BUSY", - "BriefDescription": "Rotating sample of 8 WI valid" + "BriefDescription": "Rotating sample of 8 WI valid (duplicate)" }, {, "EventCode": "0x368AC", @@ -1790,9 +1680,9 @@ "BriefDescription": "L2 guess system (VGS or RNS) and guess was correct (ie data beyond-group)" }, {, - "EventCode": "0x589C", - "EventName": "PM_PTESYNC", - "BriefDescription": "ptesync instruction counted when the instruction is decoded and transmitted" + "EventCode": "0x260AE", + "EventName": "PM_L3_P2_PF_RTY", + "BriefDescription": "L3 PF received retry port 2, every retry counted" }, {, "EventCode": "0x26086", @@ -1824,6 +1714,11 @@ "EventName": "PM_SHL_ST_DEP_CREATED", "BriefDescription": "Store-Hit-Load Table Read Hit with entry Enabled" }, + {, + "EventCode": "0x46882", + "EventName": "PM_L2_ST_HIT", + "BriefDescription": "All successful D-side store dispatches for this thread that were L2 hits" + }, {, "EventCode": "0x360AC", "EventName": "PM_L3_SN0_BUSY", @@ -1844,11 +1739,6 @@ "EventName": "PM_L2_ST_MISS", "BriefDescription": "All successful D-Side Store dispatches that were an L2 miss for this thread" }, - {, - "EventCode": "0xF8B4", - "EventName": "PM_DC_PREF_XCONS_ALLOC", - "BriefDescription": "Prefetch stream allocated in the Ultra conservative phase by either the hardware prefetch mechanism or software prefetch" - }, {, "EventCode": "0x35048", "EventName": "PM_IPTEG_FROM_DL2L3_SHR", @@ -1969,11 +1859,6 @@ "EventName": "PM_THRD_PRIO_2_3_CYC", "BriefDescription": "Cycles thread running at priority level 2 or 3" }, - {, - "EventCode": "0x10134", - "EventName": "PM_MRK_ST_DONE_L2", - "BriefDescription": "marked store completed in L2 ( RC machine done)" - }, {, "EventCode": "0x368B2", "EventName": "PM_L3_GRP_GUESS_WRONG_HIGH", @@ -2004,11 +1889,6 @@ "EventName": "PM_L2_GRP_GUESS_WRONG", "BriefDescription": "L2 guess grp (GS or NNS) and guess was not correct (ie data on-chip OR beyond-group)" }, - {, - "EventCode": "0x368AE", - "EventName": "PM_L3_P1_CO_RTY", - "BriefDescription": "L3 CO received retry port 1 (memory only), every retry counted" - }, {, "EventCode": "0xC0AC", "EventName": "PM_LSU_FLUSH_EMSH", @@ -2034,11 +1914,6 @@ "EventName": "PM_L2_GROUP_PUMP", "BriefDescription": "RC requests that were on group (aka nodel) pump attempts" }, - {, - "EventCode": "0xF0B0", - "EventName": "PM_L3_LD_PREF", - "BriefDescription": "L3 load prefetch, sourced from a hardware or software stream, was sent to the nest" - }, {, "EventCode": "0x16080", "EventName": "PM_L2_LD", @@ -2049,6 +1924,11 @@ "EventName": "PM_MATH_FLOP_CMPL", "BriefDescription": "Math flop instruction completed" }, + {, + "EventCode": "0xC080", + "EventName": "PM_LS0_LD_VECTOR_FIN", + "BriefDescription": "" + }, {, "EventCode": "0x368B0", "EventName": "PM_L3_P1_SYS_PUMP", @@ -2119,11 +1999,6 @@ "EventName": "PM_BR_CORECT_PRED_TAKEN_CMPL", "BriefDescription": "Conditional Branch Completed in which the HW correctly predicted the direction as taken. Counted at completion time" }, - {, - "EventCode": "0xF0B8", - "EventName": "PM_LS0_UNALIGNED_ST", - "BriefDescription": "Store instructions whose data crosses a double-word boundary, which causes it to require an additional slice than than what normally would be required of the Store of that size. If the Store wraps from slice 3 to slice 0, thee is an additional 3-cycle penalty" - }, {, "EventCode": "0x20132", "EventName": "PM_MRK_DFU_FIN", @@ -2139,6 +2014,11 @@ "EventName": "PM_LSU_FLUSH_LHS", "BriefDescription": "Effective Address alias flush : no EA match but Real Address match. If the data has not yet been returned for this load, the instruction will just be rejected, but if it has returned data, it will be flushed" }, + {, + "EventCode": "0x16084", + "EventName": "PM_L2_RCLD_DISP", + "BriefDescription": "All I-or-D side load dispatch attempts for this thread (excludes i_l2mru_tch_reqs)" + }, {, "EventCode": "0x3F150", "EventName": "PM_MRK_ST_DRAIN_TO_L2DISP_CYC", @@ -2224,11 +2104,6 @@ "EventName": "PM_IC_PREF_CANCEL_PAGE", "BriefDescription": "Prefetch Canceled due to page boundary" }, - {, - "EventCode": "0xF09C", - "EventName": "PM_SLB_TABLEWALK_CYC", - "BriefDescription": "Cycles when a tablewalk is pending on this thread on the SLB table" - }, {, "EventCode": "0x460AA", "EventName": "PM_L3_P0_CO_L31", @@ -2247,10 +2122,10 @@ {, "EventCode": "0x46082", "EventName": "PM_L2_ST_DISP", - "BriefDescription": "All successful D-side store dispatches for this thread " + "BriefDescription": "All successful D-side store dispatches for this thread (L2 miss + L2 hits)" }, {, - "EventCode": "0x4609E", + "EventCode": "0x36880", "EventName": "PM_L2_INST_MISS", "BriefDescription": "All successful I-side dispatches that were an L2 miss for this thread (excludes i_l2mru_tch reqs)" }, @@ -2340,9 +2215,9 @@ "BriefDescription": "All ISU rejects" }, {, - "EventCode": "0x46882", - "EventName": "PM_L2_ST_HIT", - "BriefDescription": "All successful D-side store dispatches for this thread that were L2 hits" + "EventCode": "0xC884", + "EventName": "PM_LS3_LD_VECTOR_FIN", + "BriefDescription": "" }, {, "EventCode": "0x360A8", @@ -2359,11 +2234,6 @@ "EventName": "PM_LSU_NCST", "BriefDescription": "Asserts when a i=1 store op is sent to the nest. No record of issue pipe (LS0/LS1) is maintained so this is for both pipes. Probably don't need separate LS0 and LS1" }, - {, - "EventCode": "0xD880", - "EventName": "PM_LSU1_SET_MPRED", - "BriefDescription": "Set prediction(set-p) miss. The entry was not found in the Set prediction table" - }, {, "EventCode": "0xD0B8", "EventName": "PM_LSU_LMQ_FULL_CYC", @@ -2389,4 +2259,4 @@ "EventName": "PM_L3_PF_USAGE", "BriefDescription": "Rotating sample of 32 PF actives" } -] +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json b/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json index bc2db636dabf..5af1abbe82c4 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/pipeline.json @@ -124,6 +124,11 @@ "EventName": "PM_PMC5_OVERFLOW", "BriefDescription": "Overflow from counter 5" }, + {, + "EventCode": "0x4505E", + "EventName": "PM_FLOP_CMPL", + "BriefDescription": "Floating Point Operation Finished" + }, {, "EventCode": "0x2C018", "EventName": "PM_CMPLU_STALL_DMISS_L21_L31", @@ -389,11 +394,6 @@ "EventName": "PM_ICT_NOSLOT_BR_MPRED", "BriefDescription": "Ict empty for this thread due to branch mispred" }, - {, - "EventCode": "0x3405E", - "EventName": "PM_IFETCH_THROTTLE", - "BriefDescription": "Cycles in which Instruction fetch throttle was active." - }, {, "EventCode": "0x1F148", "EventName": "PM_MRK_DPTEG_FROM_ON_CHIP_CACHE", @@ -422,7 +422,7 @@ {, "EventCode": "0xD0A8", "EventName": "PM_DSLB_MISS", - "BriefDescription": "Data SLB Miss - Total of all segment sizes" + "BriefDescription": "gate_and(sd_pc_c0_comp_valid AND sd_pc_c0_comp_thread(0:1)=tid,sd_pc_c0_comp_ppc_count(0:3)) + gate_and(sd_pc_c1_comp_valid AND sd_pc_c1_comp_thread(0:1)=tid,sd_pc_c1_comp_ppc_count(0:3))" }, {, "EventCode": "0x4C058", @@ -549,4 +549,4 @@ "EventName": "PM_MRK_DATA_FROM_L21_SHR_CYC", "BriefDescription": "Duration in cycles to reload with Shared (S) data from another core's L2 on the same chip due to a marked load" } -] +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/powerpc/power9/pmc.json b/tools/perf/pmu-events/arch/powerpc/power9/pmc.json index 3ef8a10aac86..d0b89f930567 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/pmc.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/pmc.json @@ -119,4 +119,4 @@ "EventName": "PM_1FLOP_CMPL", "BriefDescription": "one flop (fadd, fmul, fsub, fcmp, fsel, fabs, fnabs, fres, fsqrte, fneg) operation completed" } -] +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/powerpc/power9/translation.json b/tools/perf/pmu-events/arch/powerpc/power9/translation.json index 8c0f12024afa..bc8e03d7a6b0 100644 --- a/tools/perf/pmu-events/arch/powerpc/power9/translation.json +++ b/tools/perf/pmu-events/arch/powerpc/power9/translation.json @@ -89,11 +89,6 @@ "EventName": "PM_STCX_FAIL", "BriefDescription": "stcx failed" }, - {, - "EventCode": "0x20112", - "EventName": "PM_MRK_NTF_FIN", - "BriefDescription": "Marked next to finish instruction finished" - }, {, "EventCode": "0x300F0", "EventName": "PM_ST_MISS_L1", -- cgit v1.2.3-70-g09d2 From 4359dd88afb7cac8a98e32f6bdfe0b46c79bc3cd Mon Sep 17 00:00:00 2001 From: Thomas-Mich Richter Date: Tue, 7 Nov 2017 15:48:53 +0100 Subject: perf buildid-cache: Update help text for purge command Clarify the perf buildid-cache help text for the purge operation. The purge subcommand takes a list of files (binaries) as option parameter. Make the wording the same as for the add and remove operation. Signed-off-by: Thomas-Mich Richter Reviewed-by: Hendrik Brueckner Acked-by: Masami Hiramatsu Cc: Martin Schwidefsky LPU-Reference: 20171107144853.12925-1-tmricht@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-buildid-cache.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index 3d354ba6e9c5..41db2cba77eb 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -325,8 +325,8 @@ int cmd_buildid_cache(int argc, const char **argv) "file", "kcore file to add"), OPT_STRING('r', "remove", &remove_name_list_str, "file list", "file(s) to remove"), - OPT_STRING('p', "purge", &purge_name_list_str, "path list", - "path(s) to remove (remove old caches too)"), + OPT_STRING('p', "purge", &purge_name_list_str, "file list", + "file(s) to remove (remove old caches too)"), OPT_STRING('M', "missing", &missing_filename, "file", "to find missing build ids in the cache"), OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), -- cgit v1.2.3-70-g09d2 From 35c0a81a97692cc0afe3d005c9a737bbde06e784 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 9 Nov 2017 06:55:24 -0800 Subject: perf tools: Document some missing perf.data headers Document STAT and CACHE header entries. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20171109145528.23371-2-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf.data-file-format.txt | 23 ++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'tools') diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt index e90c59c6d815..15e8b48077ba 100644 --- a/tools/perf/Documentation/perf.data-file-format.txt +++ b/tools/perf/Documentation/perf.data-file-format.txt @@ -238,6 +238,29 @@ struct auxtrace_index { struct auxtrace_index_entry entries[PERF_AUXTRACE_INDEX_ENTRY_COUNT]; }; + HEADER_STAT = 19, + +This is merely a flag signifying that the data section contains data +recorded from perf stat record. + + HEADER_CACHE = 20, + +Description of the cache hierarchy. Based on the Linux sysfs format +in /sys/devices/system/cpu/cpu*/cache/ + + u32 version Currently always 1 + u32 number_of_cache_levels + +struct { + u32 level; + u32 line_size; + u32 sets; + u32 ways; + struct perf_header_string type; + struct perf_header_string size; + struct perf_header_string map; +}[number_of_cache_levels]; + other bits are reserved and should ignored for now HEADER_FEAT_BITS = 256, -- cgit v1.2.3-70-g09d2 From 5039c8a28fa97b8dce7b363a5ecd4bee2b87bf03 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 9 Nov 2017 06:55:26 -0800 Subject: perf script: Allow printing period for non freq mode groups When using leader sampling the values of the not sampled but counted events are shown by perf script in "period". Currently printing period is only allowed when the main event has a period, that is it is in frequency mode. This implies that we cannot dump the values of counted events when the leader event is not in frequency mode. Just remove the check that the period must be set on all events. It will just be printed as 0 instead if it's not available. This fixes the following: $ perf record -c 100000 -e '{cycles,branches}:S' $ perf script -F event,period Further commentary by Jiri Olsa: The period will be the value of configured period, not 0: int perf_evsel__parse_sample(struct ... ... data->period = evsel->attr.sample_period; $ perf record -c 100000 $ perf script -F event,period | head -3 Failed to open /tmp/perf-2048.map, continuing without symbols 100000 cycles:ppp: 100000 cycles:ppp: other than that I think we can remove that check, because we will have always sane number in period Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20171109145528.23371-4-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 9b43bda45a41..ee7c7aaaae72 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -423,11 +423,6 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, PERF_OUTPUT_CPU, allow_user_set)) return -EINVAL; - if (PRINT_FIELD(PERIOD) && - perf_evsel__check_stype(evsel, PERF_SAMPLE_PERIOD, "PERIOD", - PERF_OUTPUT_PERIOD)) - return -EINVAL; - if (PRINT_FIELD(IREGS) && perf_evsel__check_stype(evsel, PERF_SAMPLE_REGS_INTR, "IREGS", PERF_OUTPUT_IREGS)) -- cgit v1.2.3-70-g09d2 From 958964f803b27baffd238708842b527a1d30e110 Mon Sep 17 00:00:00 2001 From: Sihyeon Jang Date: Sun, 12 Nov 2017 10:10:46 +0900 Subject: perf top: Document missing options Signed-off-by: Sihyeon Jang Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1510449047-12941-2-git-send-email-uneedsihyeon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-top.txt | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 4353262bc462..8a32cc77bead 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -268,6 +268,12 @@ INTERACTIVE PROMPTING KEYS [S]:: Stop annotation, return to full profile display. +[K]:: + Hide kernel symbols. + +[U]:: + Hide user symbols. + [z]:: Toggle event count zeroing across display updates. -- cgit v1.2.3-70-g09d2 From 8fce3743cea47db86dd13ab4c479158a872271e8 Mon Sep 17 00:00:00 2001 From: Sihyeon Jang Date: Sun, 12 Nov 2017 10:10:47 +0900 Subject: perf top: Remove a duplicate word Signed-off-by: Sihyeon Jang Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1510449047-12941-3-git-send-email-uneedsihyeon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 0789f95ca2f3..68320ac5e9b0 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -412,7 +412,7 @@ static void perf_top__print_mapped_keys(struct perf_top *top) fprintf(stdout, "\t[S] stop annotation.\n"); fprintf(stdout, - "\t[K] hide kernel_symbols symbols. \t(%s)\n", + "\t[K] hide kernel symbols. \t(%s)\n", top->hide_kernel_symbols ? "yes" : "no"); fprintf(stdout, "\t[U] hide user symbols. \t(%s)\n", -- cgit v1.2.3-70-g09d2 From d492326f160e44e08fcf132a63163b36dd8e8839 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 13 Nov 2017 01:38:04 +0000 Subject: perf tests: Set evlist of test__backward_ring_buffer() to !overwrite Setting overwrite in perf_evlist__mmap() is meaningless because the event in this evlist is already have 'overwrite' postfix and goes to backward ring buffer automatically. Pass 'false' to perf_evlist__mmap() to make it similar to others. Signed-off-by: Wang Nan Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20171113013809.212417-3-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/backward-ring-buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c index 71b9a0b613d2..43a8c6ac4070 100644 --- a/tools/perf/tests/backward-ring-buffer.c +++ b/tools/perf/tests/backward-ring-buffer.c @@ -59,7 +59,7 @@ static int do_test(struct perf_evlist *evlist, int mmap_pages, int err; char sbuf[STRERR_BUFSIZE]; - err = perf_evlist__mmap(evlist, mmap_pages, true); + err = perf_evlist__mmap(evlist, mmap_pages, false); if (err < 0) { pr_debug("perf_evlist__mmap: %s\n", str_error_r(errno, sbuf, sizeof(sbuf))); -- cgit v1.2.3-70-g09d2 From 677b0601768881934f658bebb1713c3c843893fa Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 13 Nov 2017 01:38:05 +0000 Subject: perf tests: Set evlist of test__sw_clock_freq() to !overwrite Unsetting overwrite when calling perf_evlist__mmap is harmless. This commit passes false to it, makes following commits eliminate the overwrite argument easier. Signed-off-by: Wang Nan Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20171113013809.212417-4-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/sw-clock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c index 725a196991a8..c6937ed12e6b 100644 --- a/tools/perf/tests/sw-clock.c +++ b/tools/perf/tests/sw-clock.c @@ -78,7 +78,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) goto out_delete_evlist; } - err = perf_evlist__mmap(evlist, 128, true); + err = perf_evlist__mmap(evlist, 128, false); if (err < 0) { pr_debug("failed to mmap event: %d (%s)\n", errno, str_error_r(errno, sbuf, sizeof(sbuf))); -- cgit v1.2.3-70-g09d2 From 301d724aa19add1c0cf3ec8cad0d10151d30393f Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 13 Nov 2017 01:38:06 +0000 Subject: perf tests: Set evlist of test__basic_mmap() to !overwrite In this test, a large ring buffer is required so all events can feed into, so overwrite or not is meaningless. Change to !overwrite so following commits can remove this argument. Signed-off-by: Wang Nan Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20171113013809.212417-5-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/mmap-basic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 5a8bf318f8a7..91f10d6d9ae2 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -94,7 +94,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse expected_nr_events[i] = 1 + rand() % 127; } - if (perf_evlist__mmap(evlist, 128, true) < 0) { + if (perf_evlist__mmap(evlist, 128, false) < 0) { pr_debug("failed to mmap events: %d (%s)\n", errno, str_error_r(errno, sbuf, sizeof(sbuf))); goto out_delete_evlist; -- cgit v1.2.3-70-g09d2 From a0e3dd79cdd8ad838cbcefeff530a15193f8336e Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 13 Nov 2017 01:38:07 +0000 Subject: perf tests: Set evlist of test__task_exit() to !overwrite Changing ringbuffer to !overwrite in this task is harmless because this test uses a very low frequency (1) and using a very simple program (true). There should have only 3 events in the whole test. Overwriting is impossible to happen. Signed-off-by: Wang Nan Cc: Jiri Olsa Cc: Kan Liang Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20171113013809.212417-6-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/task-exit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index bc4a7344e274..98c098475e71 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -97,7 +97,7 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused goto out_delete_evlist; } - if (perf_evlist__mmap(evlist, 128, true) < 0) { + if (perf_evlist__mmap(evlist, 128, false) < 0) { pr_debug("failed to mmap events: %d (%s)\n", errno, str_error_r(errno, sbuf, sizeof(sbuf))); goto out_delete_evlist; -- cgit v1.2.3-70-g09d2 From 19993b82a571893e661afd90f1d77fa698785cee Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 13 Nov 2017 16:06:29 -0300 Subject: perf machine: Guard against NULL in machine__exit() A recent fix for 'perf trace' introduced a bug where machine__exit(trace->host) could be called while trace->host was still NULL, so make this more robust by guarding against NULL, just like free() does. The problem happens, for instance, when !root users try to run 'perf trace': [acme@jouet linux]$ trace Error: No permissions to read /sys/kernel/debug/tracing/events/raw_syscalls/sys_(enter|exit) Hint: Try 'sudo mount -o remount,mode=755 /sys/kernel/debug/tracing' perf: Segmentation fault Obtained 7 stack frames. [0x4f1b2e] /lib64/libc.so.6(+0x3671f) [0x7f43a1dd971f] [0x4f3fec] [0x47468b] [0x42a2db] /lib64/libc.so.6(__libc_start_main+0xe9) [0x7f43a1dc3509] [0x42a6c9] Segmentation fault (core dumped) [acme@jouet linux]$ Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andrei Vagin Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Vasily Averin Cc: Wang Nan Fixes: 33974a414ce2 ("perf trace: Call machine__exit() at exit") Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 6a8d03c3d9b7..270f3223c6df 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -172,6 +172,9 @@ void machine__exit(struct machine *machine) { int i; + if (machine == NULL) + return; + machine__destroy_kernel_maps(machine); map_groups__exit(&machine->kmaps); dsos__exit(&machine->dsos); -- cgit v1.2.3-70-g09d2 From 2f0af8600e82e9f950fc32908386b9c639f88d48 Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Tue, 14 Nov 2017 09:15:42 +0900 Subject: perf help: Fix a bug during strstart() conversion The commit 8e99b6d4533c changed prefixcmp() to strstart() but missed to change the return value in some place. It makes perf help print annoying output even for sane config items like below: $ perf help '.root': unsupported man viewer sub key. ... Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Namhyung Kim Tested-by: Arnaldo Carvalho de Melo Tested-by: Taeung Song Cc: Jiri Olsa Cc: Sihyeon Jang Cc: kernel-team@lge.com Link: http://lkml.kernel.org/r/20171114001542.GA16464@sejong Fixes: 8e99b6d4533c ("tools include: Adopt strstarts() from the kernel") Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-help.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index bd1fedef3d1c..a0f7ed2b869b 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -284,7 +284,7 @@ static int perf_help_config(const char *var, const char *value, void *cb) add_man_viewer(value); return 0; } - if (!strstarts(var, "man.")) + if (strstarts(var, "man.")) return add_man_viewer_info(var, value); return 0; @@ -314,7 +314,7 @@ static const char *cmd_to_page(const char *perf_cmd) if (!perf_cmd) return "perf"; - else if (!strstarts(perf_cmd, "perf")) + else if (strstarts(perf_cmd, "perf")) return perf_cmd; return asprintf(&s, "perf-%s", perf_cmd) < 0 ? NULL : s; -- cgit v1.2.3-70-g09d2 From 648388ae68e953b312e28eaf869fe6c01e2f70cc Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 14 Nov 2017 08:55:40 +0530 Subject: perf annotate: Do not truncate instruction names at 6 chars There are many instructions, esp on PowerPC, whose mnemonics are longer than 6 characters. Using precision limit causes truncation of such mnemonics. Fix this by removing precision limit. Note that, 'width' is still 6, so alignment won't get affected for length <= 6. Before: li r11,-1 xscvdp vs1,vs1 add. r10,r10,r11 After: li r11,-1 xscvdpsxds vs1,vs1 add. r10,r10,r11 Reported-by: Donald Stence Signed-off-by: Ravi Bangoria Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Taeung Song Link: http://lkml.kernel.org/r/20171114032540.4564-1-ravi.bangoria@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index eab4a8e3c679..30d74dabdc42 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -165,7 +165,7 @@ static void ins__delete(struct ins_operands *ops) static int ins__raw_scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops) { - return scnprintf(bf, size, "%-6.6s %s", ins->name, ops->raw); + return scnprintf(bf, size, "%-6s %s", ins->name, ops->raw); } int ins__scnprintf(struct ins *ins, char *bf, size_t size, @@ -230,12 +230,12 @@ static int call__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops) { if (ops->target.name) - return scnprintf(bf, size, "%-6.6s %s", ins->name, ops->target.name); + return scnprintf(bf, size, "%-6s %s", ins->name, ops->target.name); if (ops->target.addr == 0) return ins__raw_scnprintf(ins, bf, size, ops); - return scnprintf(bf, size, "%-6.6s *%" PRIx64, ins->name, ops->target.addr); + return scnprintf(bf, size, "%-6s *%" PRIx64, ins->name, ops->target.addr); } static struct ins_ops call_ops = { @@ -299,7 +299,7 @@ static int jump__scnprintf(struct ins *ins, char *bf, size_t size, c++; } - return scnprintf(bf, size, "%-6.6s %.*s%" PRIx64, + return scnprintf(bf, size, "%-6s %.*s%" PRIx64, ins->name, c ? c - ops->raw : 0, ops->raw, ops->target.offset); } @@ -372,7 +372,7 @@ static int lock__scnprintf(struct ins *ins, char *bf, size_t size, if (ops->locked.ins.ops == NULL) return ins__raw_scnprintf(ins, bf, size, ops); - printed = scnprintf(bf, size, "%-6.6s ", ins->name); + printed = scnprintf(bf, size, "%-6s ", ins->name); return printed + ins__scnprintf(&ops->locked.ins, bf + printed, size - printed, ops->locked.ops); } @@ -448,7 +448,7 @@ out_free_source: static int mov__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops) { - return scnprintf(bf, size, "%-6.6s %s,%s", ins->name, + return scnprintf(bf, size, "%-6s %s,%s", ins->name, ops->source.name ?: ops->source.raw, ops->target.name ?: ops->target.raw); } @@ -488,7 +488,7 @@ static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops static int dec__scnprintf(struct ins *ins, char *bf, size_t size, struct ins_operands *ops) { - return scnprintf(bf, size, "%-6.6s %s", ins->name, + return scnprintf(bf, size, "%-6s %s", ins->name, ops->target.name ?: ops->target.raw); } @@ -500,7 +500,7 @@ static struct ins_ops dec_ops = { static int nop__scnprintf(struct ins *ins __maybe_unused, char *bf, size_t size, struct ins_operands *ops __maybe_unused) { - return scnprintf(bf, size, "%-6.6s", "nop"); + return scnprintf(bf, size, "%-6s", "nop"); } static struct ins_ops nop_ops = { @@ -990,7 +990,7 @@ void disasm_line__free(struct disasm_line *dl) int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw) { if (raw || !dl->ins.ops) - return scnprintf(bf, size, "%-6.6s %s", dl->ins.name, dl->ops.raw); + return scnprintf(bf, size, "%-6s %s", dl->ins.name, dl->ops.raw); return ins__scnprintf(&dl->ins, bf, size, &dl->ops); } -- cgit v1.2.3-70-g09d2 From f231af789b11a2f1a3795acc3228a3e178a80c21 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Tue, 14 Nov 2017 08:18:46 +0100 Subject: perf test shell: Fix check open filename arg using 'perf trace' on s390x This 'perf test' case fails on s390x. The 'touch' command on s390x uses the 'openat' system call to open the file named on the command line: [root@s35lp76 perf]# perf probe -l probe:vfs_getname (on getname_flags:72@fs/namei.c with pathname) [root@s35lp76 perf]# perf trace -e open touch /tmp/abc 0.400 ( 0.015 ms): touch/27542 open(filename: /usr/lib/locale/locale-archive, flags: CLOEXEC) = 3 [root@s35lp76 perf]# There is no 'open' system call for file '/tmp/abc'. Instead the 'openat' system call is used: [root@s35lp76 perf]# strace touch /tmp/abc execve("/usr/bin/touch", ["touch", "/tmp/abc"], 0x3ffd547ec98 /* 30 vars */) = 0 [...] openat(AT_FDCWD, "/tmp/abc", O_WRONLY|O_CREAT|O_NOCTTY|O_NONBLOCK, 0666) = 3 [...] On s390x the 'egrep' command does not find a matching pattern and returns an error. Fix this for s390x create a platform dependent command line to enable the 'perf probe' call to listen to the 'openat' system call and get the expected output. Signed-off-by: Thomas-Mich Richter Tested-by: Arnaldo Carvalho de Melo Cc: Hendrik Brueckner Cc: Thomas-Mich Richter LPU-Reference: 20171114071847.2381-1-tmricht@linux.vnet.ibm.com Link: http://lkml.kernel.org/n/tip-3qf38jk0prz54rhmhyu871my@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/trace+probe_vfs_getname.sh | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/shell/trace+probe_vfs_getname.sh b/tools/perf/tests/shell/trace+probe_vfs_getname.sh index 2e68c5f120da..2a9ef080efd0 100755 --- a/tools/perf/tests/shell/trace+probe_vfs_getname.sh +++ b/tools/perf/tests/shell/trace+probe_vfs_getname.sh @@ -17,8 +17,10 @@ skip_if_no_perf_probe || exit 2 file=$(mktemp /tmp/temporary_file.XXXXX) trace_open_vfs_getname() { - perf trace -e open touch $file 2>&1 | \ - egrep " +[0-9]+\.[0-9]+ +\( +[0-9]+\.[0-9]+ ms\): +touch\/[0-9]+ open\(filename: +${file}, +flags: CREAT\|NOCTTY\|NONBLOCK\|WRONLY, +mode: +IRUGO\|IWUGO\) += +[0-9]+$" + test "$(uname -m)" = s390x && { svc="openat"; txt="dfd: +CWD, +"; } + + perf trace -e ${svc:-open} touch $file 2>&1 | \ + egrep " +[0-9]+\.[0-9]+ +\( +[0-9]+\.[0-9]+ ms\): +touch\/[0-9]+ ${svc:-open}\(${txt}filename: +${file}, +flags: CREAT\|NOCTTY\|NONBLOCK\|WRONLY, +mode: +IRUGO\|IWUGO\) += +[0-9]+$" } -- cgit v1.2.3-70-g09d2 From 0879e5e5f33c8a1eb01281ad920173664e68b266 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Tue, 14 Nov 2017 08:18:47 +0100 Subject: perf test shell: Fix test case probe libc's inet_pton on s390x MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'perf test' case "probe libc's inet_pton & backtrace it with ping" fails on s390x. The reason is the 'realpath /lib64/ld*.so.* | uniq' line which returns 2 libraries: root@s35lp76 shell]# realpath /lib64/ld*.so.* | uniq /usr/lib64/ld-2.26.so /usr/lib64/ld_pre_smc.so.1.0.1 [root@s35lp76 shell] This output makes the "perf probe" command lines invalid. Use ldd tool to find out the libraries required by "bash" and check if symbol "inet_pton" is part of the "libc" library. Some distros do not have a /lib64 directory. I have also added a check for the existence of an IPv6 network interface before it is being used. Committer changes: We can't really use ldd for libc, as in some systems, such as x86_64, it has hardlinks and then ldd sees one and the kernel the other, so grep for libc in /proc/self/maps to get the one we'll receive from PERF_RECORD_MMAP. Thomas checked this change and acked it. Signed-off-by: Thomas-Mich Richter Tested-by: Arnaldo Carvalho de Melo Suggested-by: Hendrik Brückner Reviewed-by: Hendrik Brückner Link: http://lkml.kernel.org/r/20171114133409.GN8836@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/trace+probe_libc_inet_pton.sh | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh b/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh index 7a84d73324e3..8b3da21a08f1 100755 --- a/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh +++ b/tools/perf/tests/shell/trace+probe_libc_inet_pton.sh @@ -10,8 +10,8 @@ . $(dirname $0)/lib/probe.sh -ld=$(realpath /lib64/ld*.so.* | uniq) -libc=$(echo $ld | sed 's/ld/libc/g') +libc=$(grep -w libc /proc/self/maps | head -1 | sed -r 's/.*[[:space:]](\/.*)/\1/g') +nm -g $libc 2>/dev/null | fgrep -q inet_pton || exit 254 trace_libc_inet_pton_backtrace() { idx=0 @@ -37,6 +37,9 @@ trace_libc_inet_pton_backtrace() { done } +# Check for IPv6 interface existence +ip a sh lo | fgrep -q inet6 || exit 2 + skip_if_no_perf_probe && \ perf probe -q $libc inet_pton && \ trace_libc_inet_pton_backtrace -- cgit v1.2.3-70-g09d2 From 07d6f446a9e45b7e6e7438f8560e40d4dcfa0321 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 14 Nov 2017 11:01:06 -0300 Subject: perf evlist: Add helper to check if attr.exclude_kernel is set in all evsels The warning about kptr_restrict needs to be emitted only when it is set and we ask for kernel space samples, so add a helper to help with that. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-fh7drty6yljei9gxxzer6eup@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 12 ++++++++++++ tools/perf/util/evlist.h | 2 ++ 2 files changed, 14 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index ccb749f9a83f..b62e523a7035 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1786,3 +1786,15 @@ void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist, state_err: return; } + +bool perf_evlist__exclude_kernel(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + if (!evsel->attr.exclude_kernel) + return false; + } + + return true; +} diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index e72ae64c11ac..491f69542920 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -312,4 +312,6 @@ perf_evlist__find_evsel_by_str(struct perf_evlist *evlist, const char *str); struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist, union perf_event *event); + +bool perf_evlist__exclude_kernel(struct perf_evlist *evlist); #endif /* __PERF_EVLIST_H */ -- cgit v1.2.3-70-g09d2 From 9c39ed90153d95d362004ed0d5e259ec46af3803 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 14 Nov 2017 11:12:11 -0300 Subject: perf report: Ignore kptr_restrict when not sampling the kernel If none of the evsels has attr.exclude_kernel set to zero, no kernel samples, so no point in warning the user about problems in processing kernel samples, as there will be none. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-7dn926v3at8txxkky92aesz2@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 1394cd8d96f7..af5dd038195e 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -441,6 +441,9 @@ static void report__warn_kptr_restrict(const struct report *rep) struct map *kernel_map = machine__kernel_map(&rep->session->machines.host); struct kmap *kernel_kmap = kernel_map ? map__kmap(kernel_map) : NULL; + if (perf_evlist__exclude_kernel(rep->session->evlist)) + return; + if (kernel_map == NULL || (kernel_map->dso->hit && (kernel_kmap->ref_reloc_sym == NULL || -- cgit v1.2.3-70-g09d2 From 6c4439545517c9d6155e85f1a508be38408fb0b4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 14 Nov 2017 11:03:19 -0300 Subject: perf record: Ignore kptr_restrict when not sampling the kernel If we're not sampling the kernel, we shouldn't care about kptr_restrict neither synthesize anything for assisting in resolving kernel samples, like the reference relocation symbol or kernel modules information. Before: $ cat /proc/sys/kernel/kptr_restrict /proc/sys/kernel/perf_event_paranoid 2 2 $ perf record sleep 1 WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted, check /proc/sys/kernel/kptr_restrict. Samples in kernel functions may not be resolved if a suitable vmlinux file is not found in the buildid cache or in the vmlinux path. Samples in kernel modules won't be resolved at all. If some relocation was applied (e.g. kexec) symbols may be misresolved even with a suitable vmlinux or kallsyms file. Couldn't record kernel reference relocation symbol Symbol resolution may be skewed if relocation was used (e.g. kexec). Check /proc/kallsyms permission or run as root. [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.001 MB perf.data (8 samples) ] $ perf evlist -v cycles:uppp: size: 112, { sample_period, sample_freq }: 4000, sample_type: IP|TID|TIME|PERIOD, disabled: 1, inherit: 1, exclude_kernel: 1, mmap: 1, comm: 1, freq: 1, enable_on_exec: 1, task: 1, precise_ip: 3, sample_id_all: 1, exclude_guest: 1, mmap2: 1, comm_exec: 1 $ After: $ perf record sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.001 MB perf.data (10 samples) ] $ Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-t025e9zftbx2b8cq2w01g5e5@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 5f78ce943407..003255910c05 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -765,17 +765,19 @@ static int record__synthesize(struct record *rec, bool tail) goto out; } - err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, - machine); - WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" - "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" - "Check /proc/kallsyms permission or run as root.\n"); - - err = perf_event__synthesize_modules(tool, process_synthesized_event, - machine); - WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" - "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" - "Check /proc/modules permission or run as root.\n"); + if (!perf_evlist__exclude_kernel(rec->evlist)) { + err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, + machine); + WARN_ONCE(err < 0, "Couldn't record kernel reference relocation symbol\n" + "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" + "Check /proc/kallsyms permission or run as root.\n"); + + err = perf_event__synthesize_modules(tool, process_synthesized_event, + machine); + WARN_ONCE(err < 0, "Couldn't record kernel module information.\n" + "Symbol resolution may be skewed if relocation was used (e.g. kexec).\n" + "Check /proc/modules permission or run as root.\n"); + } if (perf_guest) { machines__process_guests(&session->machines, @@ -1709,7 +1711,7 @@ int cmd_record(int argc, const char **argv) err = -ENOMEM; - if (symbol_conf.kptr_restrict) + if (symbol_conf.kptr_restrict && !perf_evlist__exclude_kernel(rec->evlist)) pr_warning( "WARNING: Kernel address maps (/proc/{kallsyms,modules}) are restricted,\n" "check /proc/sys/kernel/kptr_restrict.\n\n" -- cgit v1.2.3-70-g09d2 From b89a5124d2089eec8f090dcd05dd88abaec0cbd2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 14 Nov 2017 13:30:19 -0300 Subject: perf top: Ignore kptr_restrict when not sampling the kernel If all events have attr.exclude_kernel set, no need to look at kptr_restrict. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Linus Torvalds Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-yegpzg5bf2im69g0tfizqaqz@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 68320ac5e9b0..865191281591 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -735,14 +735,16 @@ static void perf_event__process_sample(struct perf_tool *tool, if (!machine->kptr_restrict_warned && symbol_conf.kptr_restrict && al.cpumode == PERF_RECORD_MISC_KERNEL) { - ui__warning( + if (!perf_evlist__exclude_kernel(top->session->evlist)) { + ui__warning( "Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n" "Check /proc/sys/kernel/kptr_restrict.\n\n" "Kernel%s samples will not be resolved.\n", al.map && !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ? " modules" : ""); - if (use_browser <= 0) - sleep(5); + if (use_browser <= 0) + sleep(5); + } machine->kptr_restrict_warned = true; } -- cgit v1.2.3-70-g09d2 From 239fb4fed6c49110ebebe7378a84d96e3f0cf55d Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Tue, 14 Nov 2017 15:04:47 -0600 Subject: perf c2c: Fix spelling mistakes in browser help text Togle -> Toggle, lenght -> length. Signed-off-by: Kim Phillips Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171114150447.f4b63bc5d97c83cdaa8bf7dc@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index 17855c4626a0..f1da9b0833c0 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2224,9 +2224,9 @@ static int perf_c2c__browse_cacheline(struct hist_entry *he) struct hist_browser *browser; int key = -1; const char help[] = - " ENTER Togle callchains (if present) \n" - " n Togle Node details info \n" - " s Togle full lenght of symbol and source line columns \n" + " ENTER Toggle callchains (if present) \n" + " n Toggle Node details info \n" + " s Toggle full length of symbol and source line columns \n" " q Return back to cacheline list \n"; /* Display compact version first. */ @@ -2303,7 +2303,7 @@ static int perf_c2c__hists_browse(struct hists *hists) int key = -1; const char help[] = " d Display cacheline details \n" - " ENTER Togle callchains (if present) \n" + " ENTER Toggle callchains (if present) \n" " q Quit \n"; browser = perf_c2c_browser__new(hists); -- cgit v1.2.3-70-g09d2 From 114bc191c37028d87a540251d93e7b328f4de3fe Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Tue, 14 Nov 2017 15:04:52 -0600 Subject: perf evsel: Say which PMU Hardware event doesn't support sampling/overflow-interrupts Help identify to the user the event with the unsupported sampling error. Also suggest a corrective action. BEFORE: $ sudo ./oldperf record -e armv8_pmuv3/mem_access/,ccn/cycles/,armv8_pmuv3/l2d_cache/ true Error: PMU Hardware doesn't support sampling/overflow-interrupts. AFTER: $ sudo ./newperf record -e armv8_pmuv3/mem_access/,ccn/cycles/,armv8_pmuv3/l2d_cache/ true Error: ccn/cycles/: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat' Signed-off-by: Kim Phillips Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171114150452.e846f2e23684c7d7d8ee706f@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index cb9bcdb065ea..b8e9def77f44 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2745,8 +2745,9 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, break; case EOPNOTSUPP: if (evsel->attr.sample_period != 0) - return scnprintf(msg, size, "%s", - "PMU Hardware doesn't support sampling/overflow-interrupts."); + return scnprintf(msg, size, + "%s: PMU Hardware doesn't support sampling/overflow-interrupts. Try 'perf stat'", + perf_evsel__name(evsel)); if (evsel->attr.precise_ip) return scnprintf(msg, size, "%s", "\'precise\' request may not be supported. Try removing 'p' modifier."); -- cgit v1.2.3-70-g09d2 From 38ba1daf8164d43d48b45c8e8deee4b20c21484d Mon Sep 17 00:00:00 2001 From: Sihyeon Jang Date: Thu, 16 Nov 2017 22:06:49 +0900 Subject: perf lock: Document missing options Add man page entry for --force. Signed-off-by: Sihyeon Jang Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1510837609-6277-1-git-send-email-uneedsihyeon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-lock.txt | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt index ab25be28c9dc..74d774592196 100644 --- a/tools/perf/Documentation/perf-lock.txt +++ b/tools/perf/Documentation/perf-lock.txt @@ -42,6 +42,10 @@ COMMON OPTIONS --dump-raw-trace:: Dump raw trace in ASCII. +-f:: +--force:: + Don't complan, do it. + REPORT OPTIONS -------------- -- cgit v1.2.3-70-g09d2 From 52186b8aa40f06350b33f8e4031879d389e2b9f2 Mon Sep 17 00:00:00 2001 From: Sihyeon Jang Date: Thu, 16 Nov 2017 23:26:02 +0900 Subject: perf inject: Document missing options Add the missing --force option to the man page. Signed-off-by: Sihyeon Jang Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1510842367-11011-1-git-send-email-uneedsihyeon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-inject.txt | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt index 87b2588d1cbd..a64d6588470e 100644 --- a/tools/perf/Documentation/perf-inject.txt +++ b/tools/perf/Documentation/perf-inject.txt @@ -60,6 +60,10 @@ include::itrace.txt[] found in the jitdumps files captured in the input perf.data file. Use this option if you are monitoring environment using JIT runtimes, such as Java, DART or V8. +-f:: +--force:: + Don't complain, do it. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1] -- cgit v1.2.3-70-g09d2 From 9b9d28a0087608052b39e7d9ee2f07b4e0fd6dca Mon Sep 17 00:00:00 2001 From: Sihyeon Jang Date: Thu, 16 Nov 2017 23:26:03 +0900 Subject: perf trace: Document missing option, colons Add missing --force option to the man page. Signed-off-by: Sihyeon Jang Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1510842367-11011-2-git-send-email-uneedsihyeon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-trace.txt | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index d53bea6bd571..6909cf1e0eea 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -86,18 +86,18 @@ comma-separated list with no space: 0,1. Ranges of CPUs are specified with -: 0- In per-thread mode with inheritance mode on (default), Events are captured only when the thread executes on the designated CPUs. Default is to monitor all CPUs. ---duration: +--duration:: Show only events that had a duration greater than N.M ms. ---sched: +--sched:: Accrue thread runtime and provide a summary at the end of the session. --i ---input +-i:: +--input:: Process events from a given perf data file. --T ---time +-T:: +--time:: Print full timestamp rather time relative to first sample. --comm:: @@ -117,6 +117,10 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs. Show tool stats such as number of times fd->pathname was discovered thru hooking the open syscall return + vfs_getname or via reading /proc/pid/fd, etc. +-f:: +--force:: + Don't complain, do it. + -F=[all|min|maj]:: --pf=[all|min|maj]:: Trace pagefaults. Optionally, you can specify whether you want minor, -- cgit v1.2.3-70-g09d2 From f4a30d2bee25b92f25086c81e33c80d767500097 Mon Sep 17 00:00:00 2001 From: Sihyeon Jang Date: Thu, 16 Nov 2017 23:26:04 +0900 Subject: perf timechart: Document missing --force option Add --force to the man page. Signed-off-by: Sihyeon Jang Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1510842367-11011-3-git-send-email-uneedsihyeon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-timechart.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-timechart.txt b/tools/perf/Documentation/perf-timechart.txt index df98d1c82688..ef0c7565bd5c 100644 --- a/tools/perf/Documentation/perf-timechart.txt +++ b/tools/perf/Documentation/perf-timechart.txt @@ -50,7 +50,9 @@ TIMECHART OPTIONS -p:: --process:: Select the processes to display, by name or PID - +-f:: +--force:: + Don't complain, do it. --symfs=:: Look for files with symbols relative to this directory. -n:: -- cgit v1.2.3-70-g09d2 From e9b61e52c384f4af13404ad95161af58af08c908 Mon Sep 17 00:00:00 2001 From: Sihyeon Jang Date: Thu, 16 Nov 2017 23:26:05 +0900 Subject: perf sched: Document missing --force option Add --force to the man page. Signed-off-by: Sihyeon Jang Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1510842367-11011-4-git-send-email-uneedsihyeon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-sched.txt | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-sched.txt b/tools/perf/Documentation/perf-sched.txt index 55b67338548e..c7e50f263887 100644 --- a/tools/perf/Documentation/perf-sched.txt +++ b/tools/perf/Documentation/perf-sched.txt @@ -74,6 +74,10 @@ OPTIONS --dump-raw-trace=:: Display verbose dump of the sched data. +-f:: +--force:: + Don't complain, do it. + OPTIONS for 'perf sched map' ---------------------------- -- cgit v1.2.3-70-g09d2 From deb368acf1731bf89c34b171094c4f8eff66ebd9 Mon Sep 17 00:00:00 2001 From: Sihyeon Jang Date: Thu, 16 Nov 2017 23:26:06 +0900 Subject: perf evlist: Document missing --force option Add --force to the man page. Signed-off-by: Sihyeon Jang Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1510842367-11011-5-git-send-email-uneedsihyeon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-evlist.txt | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-evlist.txt b/tools/perf/Documentation/perf-evlist.txt index 6f7200fb85cf..c0a66400a960 100644 --- a/tools/perf/Documentation/perf-evlist.txt +++ b/tools/perf/Documentation/perf-evlist.txt @@ -20,6 +20,10 @@ OPTIONS --input=:: Input file name. (default: perf.data unless stdin is a fifo) +-f:: +--force:: + Don't complain, do it. + -F:: --freq=:: Show just the sample frequency used for each event. -- cgit v1.2.3-70-g09d2 From 5a79eef4eccf0571e856eb13c0ffe19083d27474 Mon Sep 17 00:00:00 2001 From: Sihyeon Jang Date: Thu, 16 Nov 2017 23:26:07 +0900 Subject: perf buildid-cache: Document missing --force option Add --force to the man page. Signed-off-by: Sihyeon Jang Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1510842367-11011-6-git-send-email-uneedsihyeon@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-buildid-cache.txt | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt index 84681007f80f..73c2650bd0db 100644 --- a/tools/perf/Documentation/perf-buildid-cache.txt +++ b/tools/perf/Documentation/perf-buildid-cache.txt @@ -24,6 +24,9 @@ OPTIONS -a:: --add=:: Add specified file to the cache. +-f:: +--force:: + Don't complain, do it. -k:: --kcore:: Add specified kcore file to the cache. For the current host that is -- cgit v1.2.3-70-g09d2 From 914eb9ca51117776d83e6761a1c555fb76f0ded2 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 6 Aug 2017 16:39:39 +0200 Subject: perf callchain: Reset cursor arg instead of callchain_cursor We already pass cursor into thread__resolve_callchain function, so there's no point in resetting the global instance. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: David Ahern Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-puk015qvuppao9m1xtdy9v7j@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/machine.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 270f3223c6df..64d255f6a537 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -2204,7 +2204,7 @@ int thread__resolve_callchain(struct thread *thread, { int ret = 0; - callchain_cursor_reset(&callchain_cursor); + callchain_cursor_reset(cursor); if (callchain_param.order == ORDER_CALLEE) { ret = thread__resolve_callchain_sample(thread, cursor, -- cgit v1.2.3-70-g09d2 From 3ad31d8a0df257c3f18c989119359c1f25cd009d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 3 Aug 2017 16:07:05 +0200 Subject: perf evsel: Centralize perf_sample initialization Move the initialization bits into common place at the beginning of the function. Also removing some superfluous zero initialization for addr and transaction, because we zero all the data at the top. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: David Ahern Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-1gv5t6fvv735t1rt3mxpy1h9@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index b8e9def77f44..03d7abcdc6b7 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1983,6 +1983,8 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, data->stream_id = data->id = data->time = -1ULL; data->period = evsel->attr.sample_period; data->cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + data->id = -1ULL; + data->data_src = PERF_MEM_DATA_SRC_NONE; if (event->header.type != PERF_RECORD_SAMPLE) { if (!evsel->attr.sample_id_all) @@ -2000,7 +2002,6 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, if (evsel->sample_size + sizeof(event->header) > event->header.size) return -EFAULT; - data->id = -1ULL; if (type & PERF_SAMPLE_IDENTIFIER) { data->id = *array; array++; @@ -2030,7 +2031,6 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, array++; } - data->addr = 0; if (type & PERF_SAMPLE_ADDR) { data->addr = *array; array++; @@ -2194,14 +2194,12 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, array++; } - data->data_src = PERF_MEM_DATA_SRC_NONE; if (type & PERF_SAMPLE_DATA_SRC) { OVERFLOW_CHECK_u64(array); data->data_src = *array; array++; } - data->transaction = 0; if (type & PERF_SAMPLE_TRANSACTION) { OVERFLOW_CHECK_u64(array); data->transaction = *array; -- cgit v1.2.3-70-g09d2 From 014681208ea0d1a7e5ea2f014242e7d196d04c34 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 3 Aug 2017 13:10:28 +0200 Subject: perf evlist: Add perf_evlist__parse_sample_timestamp function Add perf_evlist__parse_sample_timestamp to retrieve the timestamp of the sample. The idea is to use this function instead of the full sample parsing before we queue the sample. At that time only the timestamp is needed and we parse the sample once again later on delivery. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: David Ahern Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-o7syqo8lipj4or7renpu8e8y@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 11 ++++++++ tools/perf/util/evlist.h | 4 +++ tools/perf/util/evsel.c | 65 +++++++++++++++++++++++++++++++++++++++++++----- tools/perf/util/evsel.h | 4 +++ 4 files changed, 78 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index b62e523a7035..199bb82efbcd 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1582,6 +1582,17 @@ int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *even return perf_evsel__parse_sample(evsel, event, sample); } +int perf_evlist__parse_sample_timestamp(struct perf_evlist *evlist, + union perf_event *event, + u64 *timestamp) +{ + struct perf_evsel *evsel = perf_evlist__event2evsel(evlist, event); + + if (!evsel) + return -EFAULT; + return perf_evsel__parse_sample_timestamp(evsel, event, timestamp); +} + size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp) { struct perf_evsel *evsel; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 491f69542920..4e8131dacbd7 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -205,6 +205,10 @@ u16 perf_evlist__id_hdr_size(struct perf_evlist *evlist); int perf_evlist__parse_sample(struct perf_evlist *evlist, union perf_event *event, struct perf_sample *sample); +int perf_evlist__parse_sample_timestamp(struct perf_evlist *evlist, + union perf_event *event, + u64 *timestamp); + bool perf_evlist__valid_sample_type(struct perf_evlist *evlist); bool perf_evlist__valid_sample_id_all(struct perf_evlist *evlist); bool perf_evlist__valid_read_format(struct perf_evlist *evlist); diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 03d7abcdc6b7..95853c51c0ca 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1962,6 +1962,20 @@ static inline bool overflow(const void *endp, u16 max_size, const void *offset, #define OVERFLOW_CHECK_u64(offset) \ OVERFLOW_CHECK(offset, sizeof(u64), sizeof(u64)) +static int +perf_event__check_size(union perf_event *event, unsigned int sample_size) +{ + /* + * The evsel's sample_size is based on PERF_SAMPLE_MASK which includes + * up to PERF_SAMPLE_PERIOD. After that overflow() must be used to + * check the format does not go past the end of the event. + */ + if (sample_size + sizeof(event->header) > event->header.size) + return -EFAULT; + + return 0; +} + int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, struct perf_sample *data) { @@ -1994,12 +2008,7 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, array = event->sample.array; - /* - * The evsel's sample_size is based on PERF_SAMPLE_MASK which includes - * up to PERF_SAMPLE_PERIOD. After that overflow() must be used to - * check the format does not go past the end of the event. - */ - if (evsel->sample_size + sizeof(event->header) > event->header.size) + if (perf_event__check_size(event, evsel->sample_size)) return -EFAULT; if (type & PERF_SAMPLE_IDENTIFIER) { @@ -2232,6 +2241,50 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, return 0; } +int perf_evsel__parse_sample_timestamp(struct perf_evsel *evsel, + union perf_event *event, + u64 *timestamp) +{ + u64 type = evsel->attr.sample_type; + const u64 *array; + + if (!(type & PERF_SAMPLE_TIME)) + return -1; + + if (event->header.type != PERF_RECORD_SAMPLE) { + struct perf_sample data = { + .time = -1ULL, + }; + + if (!evsel->attr.sample_id_all) + return -1; + if (perf_evsel__parse_id_sample(evsel, event, &data)) + return -1; + + *timestamp = data.time; + return 0; + } + + array = event->sample.array; + + if (perf_event__check_size(event, evsel->sample_size)) + return -EFAULT; + + if (type & PERF_SAMPLE_IDENTIFIER) + array++; + + if (type & PERF_SAMPLE_IP) + array++; + + if (type & PERF_SAMPLE_TID) + array++; + + if (type & PERF_SAMPLE_TIME) + *timestamp = *array; + + return 0; +} + size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, u64 read_format) { diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 0688880227e1..c3663a70c9b9 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -338,6 +338,10 @@ static inline int perf_evsel__read_on_cpu_scaled(struct perf_evsel *evsel, int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, struct perf_sample *sample); +int perf_evsel__parse_sample_timestamp(struct perf_evsel *evsel, + union perf_event *event, + u64 *timestamp); + static inline struct perf_evsel *perf_evsel__next(struct perf_evsel *evsel) { return list_entry(evsel->node.next, struct perf_evsel, node); -- cgit v1.2.3-70-g09d2 From dc83e1394083d6e12625a3158bf88396dfaec633 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 3 Aug 2017 13:24:33 +0200 Subject: perf ordered_events: Pass timestamp arg in perf_session__queue_event There's no need to pass whole sample data, because it's only timestamp that is used. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-xd1hpoze3kgb1rb639o3vehb@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-kvm.c | 2 +- tools/perf/util/ordered-events.c | 3 +-- tools/perf/util/ordered-events.h | 2 +- tools/perf/util/session.c | 6 +++--- tools/perf/util/session.h | 2 +- 5 files changed, 7 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 0c36f2ac6a0e..cd253db6917f 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -754,7 +754,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx, return -1; } - err = perf_session__queue_event(kvm->session, event, &sample, 0); + err = perf_session__queue_event(kvm->session, event, sample.time, 0); /* * FIXME: Here we can't consume the event, as perf_session__queue_event will * point to it, and it'll get possibly overwritten by the kernel. diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c index 8e09fd2d842f..bad9e0296e9a 100644 --- a/tools/perf/util/ordered-events.c +++ b/tools/perf/util/ordered-events.c @@ -157,9 +157,8 @@ void ordered_events__delete(struct ordered_events *oe, struct ordered_event *eve } int ordered_events__queue(struct ordered_events *oe, union perf_event *event, - struct perf_sample *sample, u64 file_offset) + u64 timestamp, u64 file_offset) { - u64 timestamp = sample->time; struct ordered_event *oevent; if (!timestamp || timestamp == ~0ULL) diff --git a/tools/perf/util/ordered-events.h b/tools/perf/util/ordered-events.h index 96e5292d88e2..8c7a2948593e 100644 --- a/tools/perf/util/ordered-events.h +++ b/tools/perf/util/ordered-events.h @@ -45,7 +45,7 @@ struct ordered_events { }; int ordered_events__queue(struct ordered_events *oe, union perf_event *event, - struct perf_sample *sample, u64 file_offset); + u64 timestamp, u64 file_offset); void ordered_events__delete(struct ordered_events *oe, struct ordered_event *event); int ordered_events__flush(struct ordered_events *oe, enum oe_flush how); void ordered_events__init(struct ordered_events *oe, ordered_events__deliver_t deliver); diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 5c412310f266..8976e417eab2 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -873,9 +873,9 @@ static int process_finished_round(struct perf_tool *tool __maybe_unused, } int perf_session__queue_event(struct perf_session *s, union perf_event *event, - struct perf_sample *sample, u64 file_offset) + u64 timestamp, u64 file_offset) { - return ordered_events__queue(&s->ordered_events, event, sample, file_offset); + return ordered_events__queue(&s->ordered_events, event, timestamp, file_offset); } static void callchain__lbr_callstack_printf(struct perf_sample *sample) @@ -1517,7 +1517,7 @@ static s64 perf_session__process_event(struct perf_session *session, return ret; if (tool->ordered_events) { - ret = perf_session__queue_event(session, event, &sample, file_offset); + ret = perf_session__queue_event(session, event, sample.time, file_offset); if (ret != -ETIME) return ret; } diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 80bc80de8362..5b1c32b3694a 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -53,7 +53,7 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset, int perf_session__process_events(struct perf_session *session); int perf_session__queue_event(struct perf_session *s, union perf_event *event, - struct perf_sample *sample, u64 file_offset); + u64 timestamp, u64 file_offset); void perf_tool__fill_defaults(struct perf_tool *tool); -- cgit v1.2.3-70-g09d2 From 93d10af26bb7159349158b721ba2e258291d53c3 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Thu, 3 Aug 2017 13:21:14 +0200 Subject: perf tools: Optimize sample parsing for ordered events Currently when using ordered events we parse the sample twice (the perf_evlist__parse_sample function). Once before we queue the sample for sorting: perf_session__process_event perf_evlist__parse_sample(sample) perf_session__queue_event(sample.time) And then when we deliver the sorted sample: ordered_events__deliver_event perf_evlist__parse_sample perf_session__deliver_event We can skip the initial full sample parsing by using perf_evlist__parse_sample_timestamp function, which got introduced earlier. The new path looks like: perf_session__process_event perf_evlist__parse_sample_timestamp perf_session__queue_event ordered_events__deliver_event perf_session__deliver_event perf_evlist__parse_sample It saves some instructions and is slightly faster: Before: Performance counter stats for './perf.old report --stdio' (5 runs): 64,396,007,225 cycles:u ( +- 0.97% ) 105,882,112,735 instructions:u # 1.64 insn per cycle ( +- 0.00% ) 21.618103465 seconds time elapsed ( +- 1.12% ) After: Performance counter stats for './perf report --stdio' (5 runs): 60,567,807,182 cycles:u ( +- 0.40% ) 104,853,333,514 instructions:u # 1.73 insn per cycle ( +- 0.00% ) 20.168895243 seconds time elapsed ( +- 0.32% ) Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-cjp2tuk0qkjs9dxzlpmm34ua@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-kvm.c | 8 ++++---- tools/perf/util/session.c | 41 ++++++++++++++++++----------------------- 2 files changed, 22 insertions(+), 27 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index cd253db6917f..597c7de9bec9 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -741,20 +741,20 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx, u64 *mmap_time) { union perf_event *event; - struct perf_sample sample; + u64 timestamp; s64 n = 0; int err; *mmap_time = ULLONG_MAX; while ((event = perf_evlist__mmap_read(kvm->evlist, idx)) != NULL) { - err = perf_evlist__parse_sample(kvm->evlist, event, &sample); + err = perf_evlist__parse_sample_timestamp(kvm->evlist, event, ×tamp); if (err) { perf_evlist__mmap_consume(kvm->evlist, idx); pr_err("Failed to parse sample\n"); return -1; } - err = perf_session__queue_event(kvm->session, event, sample.time, 0); + err = perf_session__queue_event(kvm->session, event, timestamp, 0); /* * FIXME: Here we can't consume the event, as perf_session__queue_event will * point to it, and it'll get possibly overwritten by the kernel. @@ -768,7 +768,7 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx, /* save time stamp of our first sample for this mmap */ if (n == 0) - *mmap_time = sample.time; + *mmap_time = timestamp; /* limit events per mmap handled all at once */ n++; diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 8976e417eab2..df2857137908 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -27,7 +27,6 @@ static int perf_session__deliver_event(struct perf_session *session, union perf_event *event, - struct perf_sample *sample, struct perf_tool *tool, u64 file_offset); @@ -107,17 +106,10 @@ static void perf_session__set_comm_exec(struct perf_session *session) static int ordered_events__deliver_event(struct ordered_events *oe, struct ordered_event *event) { - struct perf_sample sample; struct perf_session *session = container_of(oe, struct perf_session, ordered_events); - int ret = perf_evlist__parse_sample(session->evlist, event->event, &sample); - - if (ret) { - pr_err("Can't parse sample, err = %d\n", ret); - return ret; - } - return perf_session__deliver_event(session, event->event, &sample, + return perf_session__deliver_event(session, event->event, session->tool, event->file_offset); } @@ -1328,20 +1320,26 @@ static int machines__deliver_event(struct machines *machines, static int perf_session__deliver_event(struct perf_session *session, union perf_event *event, - struct perf_sample *sample, struct perf_tool *tool, u64 file_offset) { + struct perf_sample sample; int ret; - ret = auxtrace__process_event(session, event, sample, tool); + ret = perf_evlist__parse_sample(session->evlist, event, &sample); + if (ret) { + pr_err("Can't parse sample, err = %d\n", ret); + return ret; + } + + ret = auxtrace__process_event(session, event, &sample, tool); if (ret < 0) return ret; if (ret > 0) return 0; return machines__deliver_event(&session->machines, session->evlist, - event, sample, tool, file_offset); + event, &sample, tool, file_offset); } static s64 perf_session__process_user_event(struct perf_session *session, @@ -1495,7 +1493,6 @@ static s64 perf_session__process_event(struct perf_session *session, { struct perf_evlist *evlist = session->evlist; struct perf_tool *tool = session->tool; - struct perf_sample sample; int ret; if (session->header.needs_swap) @@ -1509,21 +1506,19 @@ static s64 perf_session__process_event(struct perf_session *session, if (event->header.type >= PERF_RECORD_USER_TYPE_START) return perf_session__process_user_event(session, event, file_offset); - /* - * For all kernel events we get the sample data - */ - ret = perf_evlist__parse_sample(evlist, event, &sample); - if (ret) - return ret; - if (tool->ordered_events) { - ret = perf_session__queue_event(session, event, sample.time, file_offset); + u64 timestamp; + + ret = perf_evlist__parse_sample_timestamp(evlist, event, ×tamp); + if (ret) + return ret; + + ret = perf_session__queue_event(session, event, timestamp, file_offset); if (ret != -ETIME) return ret; } - return perf_session__deliver_event(session, event, &sample, tool, - file_offset); + return perf_session__deliver_event(session, event, tool, file_offset); } void perf_event_header__bswap(struct perf_event_header *hdr) -- cgit v1.2.3-70-g09d2 From b135e5ee1a0e325166c30b16cf5493fea44ede45 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 14 Nov 2017 10:23:39 +0100 Subject: perf top: Fix window dimensions change handling The stdio perf top crashes when we change the terminal window size. The reason is that we assumed we get the perf_top pointer as a signal handler argument which is not the case. Changing the SIGWINCH handler logic to change global resize variable, which is checked in the main thread loop. Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Tested-by: Ravi Bangoria Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-ysuzwz77oev1ftgvdscn9bpu@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 865191281591..4cbd3dd14a33 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -77,6 +77,7 @@ #include "sane_ctype.h" static volatile int done; +static volatile int resize; #define HEADER_LINE_NR 5 @@ -86,10 +87,13 @@ static void perf_top__update_print_entries(struct perf_top *top) } static void perf_top__sig_winch(int sig __maybe_unused, - siginfo_t *info __maybe_unused, void *arg) + siginfo_t *info __maybe_unused, void *arg __maybe_unused) { - struct perf_top *top = arg; + resize = 1; +} +static void perf_top__resize(struct perf_top *top) +{ get_term_dimensions(&top->winsize); perf_top__update_print_entries(top); } @@ -480,7 +484,7 @@ static bool perf_top__handle_keypress(struct perf_top *top, int c) .sa_sigaction = perf_top__sig_winch, .sa_flags = SA_SIGINFO, }; - perf_top__sig_winch(SIGWINCH, NULL, top); + perf_top__resize(top); sigaction(SIGWINCH, &act, NULL); } else { signal(SIGWINCH, SIG_DFL); @@ -1035,6 +1039,11 @@ static int __cmd_top(struct perf_top *top) if (hits == top->samples) ret = perf_evlist__poll(top->evlist, 100); + + if (resize) { + perf_top__resize(top); + resize = 0; + } } ret = 0; -- cgit v1.2.3-70-g09d2 From 244a1086aba97a6b673162fd6684c5c024b724db Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 15 Nov 2017 14:30:57 +0100 Subject: perf top: Use signal interface for SIGWINCH handler There's no need for SA_SIGINFO data in SIGWINCH handler, switching it to register the handler via signal interface as we do for the rest of the signals in perf top. Signed-off-by: Jiri Olsa Tested-by: Ravi Bangoria Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-elxp1vdnaog1scaj13cx7cu0@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 4cbd3dd14a33..a29a98334f33 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -86,8 +86,7 @@ static void perf_top__update_print_entries(struct perf_top *top) top->print_entries = top->winsize.ws_row - HEADER_LINE_NR; } -static void perf_top__sig_winch(int sig __maybe_unused, - siginfo_t *info __maybe_unused, void *arg __maybe_unused) +static void winch_sig(int sig __maybe_unused) { resize = 1; } @@ -480,12 +479,8 @@ static bool perf_top__handle_keypress(struct perf_top *top, int c) case 'e': prompt_integer(&top->print_entries, "Enter display entries (lines)"); if (top->print_entries == 0) { - struct sigaction act = { - .sa_sigaction = perf_top__sig_winch, - .sa_flags = SA_SIGINFO, - }; perf_top__resize(top); - sigaction(SIGWINCH, &act, NULL); + signal(SIGWINCH, winch_sig); } else { signal(SIGWINCH, SIG_DFL); } @@ -1366,12 +1361,8 @@ int cmd_top(int argc, const char **argv) get_term_dimensions(&top.winsize); if (top.print_entries == 0) { - struct sigaction act = { - .sa_sigaction = perf_top__sig_winch, - .sa_flags = SA_SIGINFO, - }; perf_top__update_print_entries(&top); - sigaction(SIGWINCH, &act, NULL); + signal(SIGWINCH, winch_sig); } status = __cmd_top(&top); -- cgit v1.2.3-70-g09d2 From a7eec4c677fe60c8760fa9054b578c743ff6a3ec Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 15 Nov 2017 11:53:21 +0100 Subject: perf top: Fix crash when annotating symbol Ravi reported crash in perf top --stdio when annotating a function [1]. The issue was, that we don't pass evsel pointer into symbol__annotate() function, which got over looked in the last annotation changes. [1] https://marc.info/?l=linux-kernel&m=151060884412702&w=2 Committer note: This fixes the crash, but makes it stumble into another bug, double locking the annotation data structures, that is in turn fixed by the next patch in this series. Signed-off-by: Jiri Olsa Tested-by: Ravi Bangoria Tested-by: Arnaldo Carvalho de Melo Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-6eol035redpoqvxqnuiqudtc@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index a29a98334f33..0077724fb24f 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -99,6 +99,7 @@ static void perf_top__resize(struct perf_top *top) static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he) { + struct perf_evsel *evsel = hists_to_evsel(he->hists); struct symbol *sym; struct annotation *notes; struct map *map; @@ -137,7 +138,7 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he) return err; } - err = symbol__annotate(sym, map, NULL, 0, NULL, NULL); + err = symbol__annotate(sym, map, evsel, 0, NULL, NULL); if (err == 0) { out_assign: top->sym_filter_entry = he; -- cgit v1.2.3-70-g09d2 From 9e4e0a9d2ef37c7bc60c32e2a3189bd1f04067a5 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 15 Nov 2017 12:05:59 +0100 Subject: perf tools: Change (symbol|annotation)__calc_percent return type to void There's no need for symbol__calc_percent and annotation__calc_percent functions to return any value, since it's always zero. Changing both function to return void. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: David Ahern Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-z0gs28hh24m4gia1t1ctraye@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 17 ++++++++--------- tools/perf/util/annotate.h | 2 +- 2 files changed, 9 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 30d74dabdc42..846abb4955ac 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1584,8 +1584,8 @@ static void calc_percent(struct sym_hist *hist, } } -static int annotation__calc_percent(struct annotation *notes, - struct perf_evsel *evsel, s64 len) +static void annotation__calc_percent(struct annotation *notes, + struct perf_evsel *evsel, s64 len) { struct annotation_line *al, *next; @@ -1609,15 +1609,13 @@ static int annotation__calc_percent(struct annotation *notes, calc_percent(hist, sample, al->offset, end); } } - - return 0; } -int symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel) +void symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel) { struct annotation *notes = symbol__annotation(sym); - return annotation__calc_percent(notes, evsel, symbol__size(sym)); + annotation__calc_percent(notes, evsel, symbol__size(sym)); } int symbol__annotate(struct symbol *sym, struct map *map, @@ -1656,10 +1654,11 @@ int symbol__annotate(struct symbol *sym, struct map *map, } err = symbol__disassemble(sym, &args); - if (err) - return err; + if (!err) + symbol__calc_percent(sym, evsel); + + return err; - return symbol__calc_percent(sym, evsel); } static void insert_source_line(struct rb_root *root, struct annotation_line *al) diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 4fc805a271d2..6d7289e88fa3 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -107,7 +107,7 @@ struct annotation_line * annotation_line__next(struct annotation_line *pos, struct list_head *head); int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw); size_t disasm__fprintf(struct list_head *head, FILE *fp); -int symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel); +void symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel); struct sym_hist { u64 nr_samples; -- cgit v1.2.3-70-g09d2 From 05d3f1a1d5a3d37ca4b591d5524f5a5b159d0564 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 15 Nov 2017 12:20:08 +0100 Subject: perf tools: Move symbol__calc_percent() call to outside symbol__disassemble() We need to call symbol__calc_percent() periodicaly for top, so it's no longer convenient to keep it in symbol__disassemble(). Let's separate the symbol__disassemble() to allocate and init the symbol annotation structs and symbol__calc_percent() to compute the lines percentages based on symbol hists data. Signed-off-by: Jiri Olsa Cc: Adrian Hunter Cc: David Ahern Cc: Namhyung Kim Cc: Wang Nan Link: http://lkml.kernel.org/n/tip-gtnp8t4tb00q6lag07psn5nq@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/ui/browsers/annotate.c | 2 ++ tools/perf/ui/gtk/annotate.c | 2 ++ tools/perf/util/annotate.c | 9 +++------ 3 files changed, 7 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 5a2f37a91feb..03b7363a49c9 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -1126,6 +1126,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, goto out_free_offsets; } + symbol__calc_percent(sym, evsel); + ui_helpline__push("Press ESC to exit"); notes = symbol__annotation(sym); diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index 5e0a56df0b4c..cdb5ecf91666 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -177,6 +177,8 @@ static int symbol__gtk_annotate(struct symbol *sym, struct map *map, return -1; } + symbol__calc_percent(sym, evsel); + if (perf_gtk__is_active_context(pgctx)) { window = pgctx->main_window; notebook = pgctx->notebook; diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 846abb4955ac..22ea7936d92f 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1653,12 +1653,7 @@ int symbol__annotate(struct symbol *sym, struct map *map, } } - err = symbol__disassemble(sym, &args); - if (!err) - symbol__calc_percent(sym, evsel); - - return err; - + return symbol__disassemble(sym, &args); } static void insert_source_line(struct rb_root *root, struct annotation_line *al) @@ -2005,6 +2000,8 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, if (symbol__annotate(sym, map, evsel, 0, NULL, NULL) < 0) return -1; + symbol__calc_percent(sym, evsel); + if (print_lines) { srcline_full_filename = full_paths; symbol__calc_lines(sym, map, &source_line); -- cgit v1.2.3-70-g09d2 From 3f27bb5f00dc10609c2704cd39a130c8155a8510 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 21 Nov 2017 09:41:11 +0100 Subject: tools headers: Follow the upstream UAPI header version 100% differ from the kernel Remove this from check-headers.sh: opts="--ignore-blank-lines --ignore-space-change" as the easiest policy is to just follow the upstream UAPI header version 100%. Pure space-only changes are comparatively rare. Signed-off-by: Ingo Molnar Cc: Adrian Hunter Link: http://lkml.kernel.org/r/20171121084111.y6p5zwqso2cbms5s@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/check-headers.sh | 1 - 1 file changed, 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 77406d25e521..e66a8a7bcced 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -45,7 +45,6 @@ include/uapi/asm-generic/mman-common.h check () { file=$1 - opts="--ignore-blank-lines --ignore-space-change" shift while [ -n "$*" ]; do -- cgit v1.2.3-70-g09d2 From 4ca69ca9db3ae51ac7cc0bd1af7961b7a3ba5b87 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Thu, 23 Nov 2017 08:46:23 +0100 Subject: perf test: Disable test cases 19 and 20 on s390x The s390x CPU sampling and measurement facilities do not support perf events of type PERF_TYPE_BREAKPOINT. The test cases are executed and fail with -ENOENT due to missing hardware support. Disable the execution of both test cases based on a platform check. This is the same approach as done for PowerPC. Signed-off-by: Thomas Richter Reviewed-by: Hendrik Brueckner Cc: Martin Schwidefsky LPU-Reference: 20171123074623.20817-1-tmricht@linux.vnet.ibm.com Link: https://lkml.kernel.org/n/tip-uqvoy6a1tsu8jddo5jjg4h85@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/bp_signal.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/tests/bp_signal.c b/tools/perf/tests/bp_signal.c index 335b695f4970..a467615c5a0e 100644 --- a/tools/perf/tests/bp_signal.c +++ b/tools/perf/tests/bp_signal.c @@ -296,7 +296,7 @@ bool test__bp_signal_is_supported(void) * instruction breakpoint using the perf event interface. * Once it's there we can release this. */ -#ifdef __powerpc__ +#if defined(__powerpc__) || defined(__s390x__) return false; #else return true; -- cgit v1.2.3-70-g09d2 From bfd8f72c2778f5bd63dc9eb6d23bd7a0d99cff6d Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 17 Nov 2017 13:42:58 -0800 Subject: perf record: Synthesize unit/scale/... in event update Move the code to synthesize event updates for scale/unit/cpus to a common utility file, and use it both from stat and record. This allows to access scale and other extra qualifiers from perf script. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20171117214300.32746-2-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 9 ++++++ tools/perf/builtin-stat.c | 62 +++-------------------------------------- tools/perf/util/header.c | 68 +++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/header.h | 5 ++++ 4 files changed, 86 insertions(+), 58 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 003255910c05..b92d6d67bca8 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -372,6 +372,8 @@ try_again: ui__error("%s\n", msg); goto out; } + + pos->supported = true; } if (perf_evlist__apply_filters(evlist, &pos)) { @@ -784,6 +786,13 @@ static int record__synthesize(struct record *rec, bool tail) perf_event__synthesize_guest_os, tool); } + err = perf_event__synthesize_extra_attr(&rec->tool, + rec->evlist, + process_synthesized_event, + data->is_pipe); + if (err) + goto out; + err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads, process_synthesized_event, opts->sample_address, opts->proc_map_timeout, 1); diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 59af5a8419e2..a027b4712e48 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -458,19 +458,8 @@ static void workload_exec_failed_signal(int signo __maybe_unused, siginfo_t *inf workload_exec_errno = info->si_value.sival_int; } -static bool has_unit(struct perf_evsel *counter) -{ - return counter->unit && *counter->unit; -} - -static bool has_scale(struct perf_evsel *counter) -{ - return counter->scale != 1; -} - static int perf_stat_synthesize_config(bool is_pipe) { - struct perf_evsel *counter; int err; if (is_pipe) { @@ -482,53 +471,10 @@ static int perf_stat_synthesize_config(bool is_pipe) } } - /* - * Synthesize other events stuff not carried within - * attr event - unit, scale, name - */ - evlist__for_each_entry(evsel_list, counter) { - if (!counter->supported) - continue; - - /* - * Synthesize unit and scale only if it's defined. - */ - if (has_unit(counter)) { - err = perf_event__synthesize_event_update_unit(NULL, counter, process_synthesized_event); - if (err < 0) { - pr_err("Couldn't synthesize evsel unit.\n"); - return err; - } - } - - if (has_scale(counter)) { - err = perf_event__synthesize_event_update_scale(NULL, counter, process_synthesized_event); - if (err < 0) { - pr_err("Couldn't synthesize evsel scale.\n"); - return err; - } - } - - if (counter->own_cpus) { - err = perf_event__synthesize_event_update_cpus(NULL, counter, process_synthesized_event); - if (err < 0) { - pr_err("Couldn't synthesize evsel scale.\n"); - return err; - } - } - - /* - * Name is needed only for pipe output, - * perf.data carries event names. - */ - if (is_pipe) { - err = perf_event__synthesize_event_update_name(NULL, counter, process_synthesized_event); - if (err < 0) { - pr_err("Couldn't synthesize evsel name.\n"); - return err; - } - } - } + err = perf_event__synthesize_extra_attr(NULL, + evsel_list, + process_synthesized_event, + is_pipe); err = perf_event__synthesize_thread_map2(NULL, evsel_list->threads, process_synthesized_event, diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 7c0e9d587bfa..5890e08e0754 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -3258,6 +3258,74 @@ int perf_event__synthesize_attrs(struct perf_tool *tool, return err; } +static bool has_unit(struct perf_evsel *counter) +{ + return counter->unit && *counter->unit; +} + +static bool has_scale(struct perf_evsel *counter) +{ + return counter->scale != 1; +} + +int perf_event__synthesize_extra_attr(struct perf_tool *tool, + struct perf_evlist *evsel_list, + perf_event__handler_t process, + bool is_pipe) +{ + struct perf_evsel *counter; + int err; + + /* + * Synthesize other events stuff not carried within + * attr event - unit, scale, name + */ + evlist__for_each_entry(evsel_list, counter) { + if (!counter->supported) + continue; + + /* + * Synthesize unit and scale only if it's defined. + */ + if (has_unit(counter)) { + err = perf_event__synthesize_event_update_unit(tool, counter, process); + if (err < 0) { + pr_err("Couldn't synthesize evsel unit.\n"); + return err; + } + } + + if (has_scale(counter)) { + err = perf_event__synthesize_event_update_scale(tool, counter, process); + if (err < 0) { + pr_err("Couldn't synthesize evsel counter.\n"); + return err; + } + } + + if (counter->own_cpus) { + err = perf_event__synthesize_event_update_cpus(tool, counter, process); + if (err < 0) { + pr_err("Couldn't synthesize evsel cpus.\n"); + return err; + } + } + + /* + * Name is needed only for pipe output, + * perf.data carries event names. + */ + if (is_pipe) { + err = perf_event__synthesize_event_update_name(tool, counter, process); + if (err < 0) { + pr_err("Couldn't synthesize evsel name.\n"); + return err; + } + } + } + return 0; +} + int perf_event__process_attr(struct perf_tool *tool __maybe_unused, union perf_event *event, struct perf_evlist **pevlist) diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 29ccbfdf8724..91befc3b550d 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -107,6 +107,11 @@ int perf_event__synthesize_features(struct perf_tool *tool, struct perf_evlist *evlist, perf_event__handler_t process); +int perf_event__synthesize_extra_attr(struct perf_tool *tool, + struct perf_evlist *evsel_list, + perf_event__handler_t process, + bool is_pipe); + int perf_event__process_feature(struct perf_tool *tool, union perf_event *event, struct perf_session *session); -- cgit v1.2.3-70-g09d2 From 373565d285e8d2113f1b6c0a2e461b9c8d0da1c9 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 17 Nov 2017 13:42:59 -0800 Subject: perf record: Synthesize thread map and cpu map Synthesize the per attr thread maps and cpu maps in 'perf record'. This allows code from 'perf stat' called from 'perf script' to access this information. Committer testing: Please see the PERF_RECORD_THREAD_MAP and PERF_RECORD_CPU_MAP records, added by this patch: $ perf record sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.001 MB perf.data (8 samples) ] $ perf report -D | grep PERF_RECORD_ | head 0xe8 [0x20]: PERF_RECORD_TIME_CONV: unhandled! 0x108 [0x28]: PERF_RECORD_THREAD_MAP nr: 1 thread: 23568 0x130 [0x18]: PERF_RECORD_CPU_MAP: 0-3 0 0x148 [0x28]: PERF_RECORD_COMM: perf:23568/23568 0x570 [0x8]: PERF_RECORD_FINISHED_ROUND 445342677837144 0x170 [0x28]: PERF_RECORD_COMM exec: sleep:23568/23568 445342677847339 0x198 [0x68]: PERF_RECORD_MMAP2 23568/23568: [0x564c943a4000(0x208000) @ 0 fd:00 3147174 2566255743]: r-xp /usr/bin/sleep 445342677862450 0x200 [0x70]: PERF_RECORD_MMAP2 23568/23568: [0x7f25968a8000(0x229000) @ 0 fd:00 3151761 2566238119]: r-xp /usr/lib64/ld-2.25.so 445342677873174 0x270 [0x60]: PERF_RECORD_MMAP2 23568/23568: [0x7ffc98176000(0x2000) @ 0 00:00 0 0]: r-xp [vdso] 445342677891928 0x2d0 [0x28]: PERF_RECORD_SAMPLE(IP, 0x4002): 23568/23568: 0xffffffff8f84c7e7 period: 1 addr: 0 $ Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Link: http://lkml.kernel.org/r/20171117214300.32746-3-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index b92d6d67bca8..e304bc47fe9b 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -793,6 +793,21 @@ static int record__synthesize(struct record *rec, bool tail) if (err) goto out; + err = perf_event__synthesize_thread_map2(&rec->tool, rec->evlist->threads, + process_synthesized_event, + NULL); + if (err < 0) { + pr_err("Couldn't synthesize thread map.\n"); + return err; + } + + err = perf_event__synthesize_cpu_map(&rec->tool, rec->evlist->cpus, + process_synthesized_event, NULL); + if (err < 0) { + pr_err("Couldn't synthesize cpu map.\n"); + return err; + } + err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads, process_synthesized_event, opts->sample_address, opts->proc_map_timeout, 1); -- cgit v1.2.3-70-g09d2 From 4bd1bef8bba2f99ff472ae3617864dda301f81bd Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 17 Nov 2017 13:43:00 -0800 Subject: perf script: Allow computing 'perf stat' style metrics Add support for computing 'perf stat' style metrics in 'perf script'. When using leader sampling we can get metrics for each sampling period by computing formulas over the values of the different group members. This allows things like fine grained IPC tracking through sampling, much more fine grained than with 'perf stat'. The metric is still averaged over the sampling period, it is not just for the sampling point. This patch adds a new metric output field for 'perf script' that uses the existing 'perf stat' metrics infrastructure to compute any metrics supported by 'perf stat'. For example to sample IPC: $ perf record -e '{ref-cycles,cycles,instructions}:S' -a sleep 1 $ perf script -F metric,ip,sym,time,cpu,comm ... alsa-sink-ALC32 [000] 42815.856074: 7fd65937d6cc [unknown] alsa-sink-ALC32 [000] 42815.856074: 7fd65937d6cc [unknown] alsa-sink-ALC32 [000] 42815.856074: 7fd65937d6cc [unknown] alsa-sink-ALC32 [000] 42815.856074: metric: 0.13 insn per cycle swapper [000] 42815.857961: ffffffff81655df0 __schedule swapper [000] 42815.857961: ffffffff81655df0 __schedule swapper [000] 42815.857961: ffffffff81655df0 __schedule swapper [000] 42815.857961: metric: 0.23 insn per cycle qemu-system-x86 [000] 42815.858130: ffffffff8165ad0e _raw_spin_unlock_irqrestore qemu-system-x86 [000] 42815.858130: ffffffff8165ad0e _raw_spin_unlock_irqrestore qemu-system-x86 [000] 42815.858130: ffffffff8165ad0e _raw_spin_unlock_irqrestore qemu-system-x86 [000] 42815.858130: metric: 0.46 insn per cycle :4972 [000] 42815.858312: ffffffffa080e5f2 vmx_vcpu_run :4972 [000] 42815.858312: ffffffffa080e5f2 vmx_vcpu_run :4972 [000] 42815.858312: ffffffffa080e5f2 vmx_vcpu_run :4972 [000] 42815.858312: metric: 0.45 insn per cycle TopDown: This requires disabling SMT if you have it enabled, because SMT would require sampling per core, which is not supported. $ perf record -e '{ref-cycles,topdown-fetch-bubbles,\ topdown-recovery-bubbles,\ topdown-slots-retired,topdown-total-slots,\ topdown-slots-issued}:S' -a sleep 1 $ perf script --header -I -F cpu,ip,sym,event,metric,period ... [000] 121108 ref-cycles: ffffffff8165222e copy_user_enhanced_fast_string [000] 190350 topdown-fetch-bubbles: ffffffff8165222e copy_user_enhanced_fast_string [000] 2055 topdown-recovery-bubbles: ffffffff8165222e copy_user_enhanced_fast_string [000] 148729 topdown-slots-retired: ffffffff8165222e copy_user_enhanced_fast_string [000] 144324 topdown-total-slots: ffffffff8165222e copy_user_enhanced_fast_string [000] 160852 topdown-slots-issued: ffffffff8165222e copy_user_enhanced_fast_string [000] metric: 33.0% frontend bound [000] metric: 3.5% bad speculation [000] metric: 25.8% retiring [000] metric: 37.7% backend bound [000] 112112 ref-cycles: ffffffff8165aec8 _raw_spin_lock_irqsave [000] 357222 topdown-fetch-bubbles: ffffffff8165aec8 _raw_spin_lock_irqsave [000] 3325 topdown-recovery-bubbles: ffffffff8165aec8 _raw_spin_lock_irqsave [000] 323553 topdown-slots-retired: ffffffff8165aec8 _raw_spin_lock_irqsave [000] 270507 topdown-total-slots: ffffffff8165aec8 _raw_spin_lock_irqsave [000] 341226 topdown-slots-issued: ffffffff8165aec8 _raw_spin_lock_irqsave [000] metric: 33.0% frontend bound [000] metric: 2.9% bad speculation [000] metric: 29.9% retiring [000] metric: 34.2% backend bound ... v2: Use evsel->priv for new fields Port to new base line, support fp output. Handle stats in ->stats, not ->priv Minor cleanups Extra explanation about the use of the term 'averaging', from Andi in the thread in the Link: tag below: The current samples contains the sum of event counts for a sampling period. EventA-1 EventA-2 EventA-3 EventA-4 EventB-1 EventB-2 EventC-3 gap with no events overflow |-----------------------------------------------------------------| period-start period-end ^ ^ | | previous sample current sample So EventA = 4 and EventB = 3 at the sample point I generate a metric, let's say EventA / EventB. It applies to the whole period. But the metric is over a longer time which does not have the same behavior. For example the gap above doesn't have any events, while they are clustered at the beginning and end of the sample period. But we're summing everything together. The metric doesn't know that the gap is different than the busy period. That's what I'm trying to express with averaging. Signed-off-by: Andi Kleen Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/20171117214300.32746-4-andi@firstfloor.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-script.txt | 10 +++- tools/perf/builtin-script.c | 97 +++++++++++++++++++++++++++++++- tools/perf/util/metricgroup.c | 4 ++ 3 files changed, 108 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 2811fcf684cb..974ceb12c7f3 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -117,7 +117,7 @@ OPTIONS Comma separated list of fields to print. Options are: comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, brstackinsn, - brstackoff, callindent, insn, insnlen, synth, phys_addr. + brstackoff, callindent, insn, insnlen, synth, phys_addr, metric. Field list can be prepended with the type, trace, sw or hw, to indicate to which event type the field list applies. e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace @@ -217,6 +217,14 @@ OPTIONS The brstackoff field will print an offset into a specific dso/binary. + With the metric option perf script can compute metrics for + sampling periods, similar to perf stat. This requires + specifying a group with multiple metrics with the :S option + for perf record. perf will sample on the first event, and + compute metrics for all the events in the group. Please note + that the metric computed is averaged over the whole sampling + period, not just for the sample point. + -k:: --vmlinux=:: vmlinux pathname diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index ee7c7aaaae72..39d8b55f0db3 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -22,6 +22,7 @@ #include "util/cpumap.h" #include "util/thread_map.h" #include "util/stat.h" +#include "util/color.h" #include "util/string2.h" #include "util/thread-stack.h" #include "util/time-utils.h" @@ -90,6 +91,7 @@ enum perf_output_field { PERF_OUTPUT_SYNTH = 1U << 25, PERF_OUTPUT_PHYS_ADDR = 1U << 26, PERF_OUTPUT_UREGS = 1U << 27, + PERF_OUTPUT_METRIC = 1U << 28, }; struct output_option { @@ -124,6 +126,7 @@ struct output_option { {.str = "brstackoff", .field = PERF_OUTPUT_BRSTACKOFF}, {.str = "synth", .field = PERF_OUTPUT_SYNTH}, {.str = "phys_addr", .field = PERF_OUTPUT_PHYS_ADDR}, + {.str = "metric", .field = PERF_OUTPUT_METRIC}, }; enum { @@ -215,12 +218,20 @@ struct perf_evsel_script { char *filename; FILE *fp; u64 samples; + /* For metric output */ + u64 val; + int gnum; }; +static inline struct perf_evsel_script *evsel_script(struct perf_evsel *evsel) +{ + return (struct perf_evsel_script *)evsel->priv; +} + static struct perf_evsel_script *perf_evsel_script__new(struct perf_evsel *evsel, struct perf_data *data) { - struct perf_evsel_script *es = malloc(sizeof(*es)); + struct perf_evsel_script *es = zalloc(sizeof(*es)); if (es != NULL) { if (asprintf(&es->filename, "%s.%s.dump", data->file.path, perf_evsel__name(evsel)) < 0) @@ -228,7 +239,6 @@ static struct perf_evsel_script *perf_evsel_script__new(struct perf_evsel *evsel es->fp = fopen(es->filename, "w"); if (es->fp == NULL) goto out_free_filename; - es->samples = 0; } return es; @@ -1472,6 +1482,86 @@ static int data_src__fprintf(u64 data_src, FILE *fp) return fprintf(fp, "%-*s", maxlen, out); } +struct metric_ctx { + struct perf_sample *sample; + struct thread *thread; + struct perf_evsel *evsel; + FILE *fp; +}; + +static void script_print_metric(void *ctx, const char *color, + const char *fmt, + const char *unit, double val) +{ + struct metric_ctx *mctx = ctx; + + if (!fmt) + return; + perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel, + mctx->fp); + fputs("\tmetric: ", mctx->fp); + if (color) + color_fprintf(mctx->fp, color, fmt, val); + else + printf(fmt, val); + fprintf(mctx->fp, " %s\n", unit); +} + +static void script_new_line(void *ctx) +{ + struct metric_ctx *mctx = ctx; + + perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel, + mctx->fp); + fputs("\tmetric: ", mctx->fp); +} + +static void perf_sample__fprint_metric(struct perf_script *script, + struct thread *thread, + struct perf_evsel *evsel, + struct perf_sample *sample, + FILE *fp) +{ + struct perf_stat_output_ctx ctx = { + .print_metric = script_print_metric, + .new_line = script_new_line, + .ctx = &(struct metric_ctx) { + .sample = sample, + .thread = thread, + .evsel = evsel, + .fp = fp, + }, + .force_header = false, + }; + struct perf_evsel *ev2; + static bool init; + u64 val; + + if (!init) { + perf_stat__init_shadow_stats(); + init = true; + } + if (!evsel->stats) + perf_evlist__alloc_stats(script->session->evlist, false); + if (evsel_script(evsel->leader)->gnum++ == 0) + perf_stat__reset_shadow_stats(); + val = sample->period * evsel->scale; + perf_stat__update_shadow_stats(evsel, + val, + sample->cpu); + evsel_script(evsel)->val = val; + if (evsel_script(evsel->leader)->gnum == evsel->leader->nr_members) { + for_each_group_member (ev2, evsel->leader) { + perf_stat__print_shadow_stats(ev2, + evsel_script(ev2)->val, + sample->cpu, + &ctx, + NULL); + } + evsel_script(evsel->leader)->gnum = 0; + } +} + static void process_event(struct perf_script *script, struct perf_sample *sample, struct perf_evsel *evsel, struct addr_location *al, @@ -1559,6 +1649,9 @@ static void process_event(struct perf_script *script, if (PRINT_FIELD(PHYS_ADDR)) fprintf(fp, "%16" PRIx64, sample->phys_addr); fprintf(fp, "\n"); + + if (PRINT_FIELD(METRIC)) + perf_sample__fprint_metric(script, thread, evsel, sample, fp); } static struct scripting_ops *scripting_ops; diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 0ddd9c199227..6fd709017bbc 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -38,6 +38,10 @@ struct metric_event *metricgroup__lookup(struct rblist *metric_events, struct metric_event me = { .evsel = evsel }; + + if (!metric_events) + return NULL; + nd = rblist__find(metric_events, &me); if (nd) return container_of(nd, struct metric_event, nd); -- cgit v1.2.3-70-g09d2 From 2e38e661f00603584fa5a64acdf580b400bad570 Mon Sep 17 00:00:00 2001 From: Hansuk Hong Date: Fri, 24 Nov 2017 01:05:46 +0900 Subject: perf buildid-cache: Document for Node.js USDT Add a tip for Node.js USDT(User-Level Statically Defined Tracing) probes in tips.txt Signed-off-by: Hansuk Hong Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20171123160546.9722-1-flavono123@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/tips.txt | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/perf/Documentation/tips.txt b/tools/perf/Documentation/tips.txt index db0ca3063eae..3dd1dbe28407 100644 --- a/tools/perf/Documentation/tips.txt +++ b/tools/perf/Documentation/tips.txt @@ -32,3 +32,4 @@ Order by the overhead of source file name and line number: perf report -s srclin System-wide collection from all CPUs: perf record -a Show current config key-value pairs: perf config --list Show user configuration overrides: perf config --user --list +To add Node.js USDT(User-Level Statically Defined Tracing): perf buildid-cache --add `which node` -- cgit v1.2.3-70-g09d2 From f250b09c779550e4a7a412dae6d3ad34d5201019 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 23 Nov 2017 15:35:04 -0300 Subject: perf report: Fix -D output for user metadata events The PERF_RECORD_USER_ events are synthesized by the tool to assist in processing the PERF_RECORD_ ones generated by the kernel, the printing of that information doesn't come with a perf_sample structure, so, when dumping the event fields using 'perf report -D' there were columns that end up not being printed. To tidy up a bit this, fake a perf_sample structure with zeroes to have the missing columns printed and avoid the occasional surprise with that. Before: 0 0x45b8 [0x68]: PERF_RECORD_MMAP -1/0: [0xffffffffc12ec000(0x4000) @ 0]: x /lib/modules/4.14.0+/kernel/fs/nls/nls_utf8.ko 0x4620 [0x28]: PERF_RECORD_THREAD_MAP nr: 1 thread: 27820 0x4648 [0x18]: PERF_RECORD_CPU_MAP: 0-3 0 0x4660 [0x28]: PERF_RECORD_COMM: perf:27820/27820 0x4a58 [0x8]: PERF_RECORD_FINISHED_ROUND 447723433020976 0x4688 [0x28]: PERF_RECORD_SAMPLE(IP, 0x4001): 27820/27820: 0xffffffff8f1b6d7a period: 1 addr: 0 After: $ perf report -D | grep PERF_RECORD_ | head 0 0xe8 [0x20]: PERF_RECORD_TIME_CONV: unhandled! 0 0x108 [0x28]: PERF_RECORD_THREAD_MAP nr: 1 thread: 32555 0 0x130 [0x18]: PERF_RECORD_CPU_MAP: 0-3 0 0x148 [0x28]: PERF_RECORD_COMM: perf:32555/32555 0 0x4e8 [0x8]: PERF_RECORD_FINISHED_ROUND 448743409421205 0x170 [0x28]: PERF_RECORD_COMM exec: sleep:32555/32555 448743409431883 0x198 [0x68]: PERF_RECORD_MMAP2 32555/32555: [0x55e11d75a000(0x208000) @ 0 fd:00 3147174 2566255743]: r-xp /usr/bin/sleep 448743409443873 0x200 [0x70]: PERF_RECORD_MMAP2 32555/32555: [0x7f0ced316000(0x229000) @ 0 fd:00 3151761 2566238119]: r-xp /usr/lib64/ld-2.25.so 448743409454790 0x270 [0x60]: PERF_RECORD_MMAP2 32555/32555: [0x7ffe84f6d000(0x2000) @ 0 00:00 0 0]: r-xp [vdso] 448743409479500 0x2d0 [0x28]: PERF_RECORD_SAMPLE(IP, 0x4002): 32555/32555: 0xffffffff8f84c7e7 period: 1 addr: 0 $ Cc: Adrian Hunter Cc: Andi Kleen Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Cc: Thomas Gleixner Fixes: 9aefcab0de47 ("perf session: Consolidate the dump code") Link: https://lkml.kernel.org/n/tip-todcu15x0cwgppkh1gi6uhru@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index df2857137908..54e30f1bcbd7 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1348,10 +1348,11 @@ static s64 perf_session__process_user_event(struct perf_session *session, { struct ordered_events *oe = &session->ordered_events; struct perf_tool *tool = session->tool; + struct perf_sample sample = { .time = 0, }; int fd = perf_data__fd(session->data); int err; - dump_event(session->evlist, event, file_offset, NULL); + dump_event(session->evlist, event, file_offset, &sample); /* These events are processed right away */ switch (event->header.type) { -- cgit v1.2.3-70-g09d2 From c2653297311612b0ead3b72b3629bfd963af2273 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 21 Nov 2017 10:35:25 +0200 Subject: perf intel-pt: Improve build messages for files that differ from the kernel Print file names of files that differ. For example, instead of: Warning: Intel PT: x86 instruction decoder differs from kernel print: Warning: Intel PT: x86 instruction decoder header at 'tools/perf/util/intel-pt-decoder/inat.h' differs from latest version at 'arch/x86/include/asm/inat.h' Reported-by: Ingo Molnar Tested-by: Arnaldo Carvalho de Melo Signed-off-by: Adrian Hunter Link: http://lkml.kernel.org/r/1511253326-22308-2-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-pt-decoder/Build | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/intel-pt-decoder/Build b/tools/perf/util/intel-pt-decoder/Build index 10e0814bb8d2..1b704fbea9de 100644 --- a/tools/perf/util/intel-pt-decoder/Build +++ b/tools/perf/util/intel-pt-decoder/Build @@ -11,15 +11,21 @@ $(OUTPUT)util/intel-pt-decoder/inat-tables.c: $(inat_tables_script) $(inat_table $(OUTPUT)util/intel-pt-decoder/intel-pt-insn-decoder.o: util/intel-pt-decoder/intel-pt-insn-decoder.c util/intel-pt-decoder/inat.c $(OUTPUT)util/intel-pt-decoder/inat-tables.c @(diff -I 2>&1 | grep -q 'option requires an argument' && \ - test -d ../../kernel -a -d ../../tools -a -d ../perf && (( \ - diff -B -I'^#include' util/intel-pt-decoder/insn.c ../../arch/x86/lib/insn.c >/dev/null && \ - diff -B -I'^#include' util/intel-pt-decoder/inat.c ../../arch/x86/lib/inat.c >/dev/null && \ - diff -B util/intel-pt-decoder/x86-opcode-map.txt ../../arch/x86/lib/x86-opcode-map.txt >/dev/null && \ - diff -B util/intel-pt-decoder/gen-insn-attr-x86.awk ../../arch/x86/tools/gen-insn-attr-x86.awk >/dev/null && \ - diff -B -I'^#include' util/intel-pt-decoder/insn.h ../../arch/x86/include/asm/insn.h >/dev/null && \ - diff -B -I'^#include' util/intel-pt-decoder/inat.h ../../arch/x86/include/asm/inat.h >/dev/null && \ - diff -B -I'^#include' util/intel-pt-decoder/inat_types.h ../../arch/x86/include/asm/inat_types.h >/dev/null) \ - || echo "Warning: Intel PT: x86 instruction decoder differs from kernel" >&2 )) || true + test -d ../../kernel -a -d ../../tools -a -d ../perf && ( \ + ((diff -B -I'^#include' util/intel-pt-decoder/insn.c ../../arch/x86/lib/insn.c >/dev/null) || \ + (echo "Warning: Intel PT: x86 instruction decoder C file at 'tools/perf/util/intel-pt-decoder/insn.c' differs from latest version at 'arch/x86/lib/insn.c'" >&2)) && \ + ((diff -B -I'^#include' util/intel-pt-decoder/inat.c ../../arch/x86/lib/inat.c >/dev/null) || \ + (echo "Warning: Intel PT: x86 instruction decoder C file at 'tools/perf/util/intel-pt-decoder/inat.c' differs from latest version at 'arch/x86/lib/inat.c'" >&2)) && \ + ((diff -B util/intel-pt-decoder/x86-opcode-map.txt ../../arch/x86/lib/x86-opcode-map.txt >/dev/null) || \ + (echo "Warning: Intel PT: x86 instruction decoder map file at 'tools/perf/util/intel-pt-decoder/x86-opcode-map.txt' differs from latest version at 'arch/x86/lib/x86-opcode-map.txt'" >&2)) && \ + ((diff -B util/intel-pt-decoder/gen-insn-attr-x86.awk ../../arch/x86/tools/gen-insn-attr-x86.awk >/dev/null) || \ + (echo "Warning: Intel PT: x86 instruction decoder script at 'tools/perf/util/intel-pt-decoder/gen-insn-attr-x86.awk' differs from latest version at 'arch/x86/tools/gen-insn-attr-x86.awk'" >&2)) && \ + ((diff -B -I'^#include' util/intel-pt-decoder/insn.h ../../arch/x86/include/asm/insn.h >/dev/null) || \ + (echo "Warning: Intel PT: x86 instruction decoder header at 'tools/perf/util/intel-pt-decoder/insn.h' differs from latest version at 'arch/x86/include/asm/insn.h'" >&2)) && \ + ((diff -B -I'^#include' util/intel-pt-decoder/inat.h ../../arch/x86/include/asm/inat.h >/dev/null) || \ + (echo "Warning: Intel PT: x86 instruction decoder header at 'tools/perf/util/intel-pt-decoder/inat.h' differs from latest version at 'arch/x86/include/asm/inat.h'" >&2)) && \ + ((diff -B -I'^#include' util/intel-pt-decoder/inat_types.h ../../arch/x86/include/asm/inat_types.h >/dev/null) || \ + (echo "Warning: Intel PT: x86 instruction decoder header at 'tools/perf/util/intel-pt-decoder/inat_types.h' differs from latest version at 'arch/x86/include/asm/inat_types.h'" >&2)))) || true $(call rule_mkdir) $(call if_changed_dep,cc_o_c) -- cgit v1.2.3-70-g09d2 From 3b2323c2c1c4acf8961cfcdddcee9889daaa21e3 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Sun, 26 Nov 2017 20:20:59 -0800 Subject: perf bench futex: Use cpumaps It was reported that the whole futex bench breaks when dealing with non-contiguously numbered cpus. $ echo 0 | sudo tee /sys/devices/system/cpu/cpu3/online $ ./perf bench futex all perf: pthread_create: Operation not permitted Run summary [PID 14934]: 7 threads, each .... James had implemented an approach with cpumaps that use an in house flavor. Instead of re-inventing the wheel, I've redone the patch such that we use the perf's util/cpumap.c interface instead. Applies to all futex benchmarks. Suggested-by: Arnaldo Carvalho de Melo Originally-from: James Yang Signed-off-by: Davidlohr Bueso Cc: Davidlohr Bueso Cc: Kim Phillips Link: http://lkml.kernel.org/r/20171127042101.3659-2-dave@stgolabs.net Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/futex-hash.c | 19 ++++++++++++------- tools/perf/bench/futex-lock-pi.c | 23 ++++++++++++++--------- tools/perf/bench/futex-requeue.c | 22 +++++++++++++--------- tools/perf/bench/futex-wake-parallel.c | 24 +++++++++++++++--------- tools/perf/bench/futex-wake.c | 18 +++++++++++------- 5 files changed, 65 insertions(+), 41 deletions(-) (limited to 'tools') diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index 58ae6ed8f38b..2defb6df7fd0 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -24,6 +24,7 @@ #include #include "bench.h" #include "futex.h" +#include "cpumap.h" #include #include @@ -118,11 +119,12 @@ static void print_summary(void) int bench_futex_hash(int argc, const char **argv) { int ret = 0; - cpu_set_t cpu; + cpu_set_t cpuset; struct sigaction act; - unsigned int i, ncpus; + unsigned int i; pthread_attr_t thread_attr; struct worker *worker = NULL; + struct cpu_map *cpu; argc = parse_options(argc, argv, options, bench_futex_hash_usage, 0); if (argc) { @@ -130,14 +132,16 @@ int bench_futex_hash(int argc, const char **argv) exit(EXIT_FAILURE); } - ncpus = sysconf(_SC_NPROCESSORS_ONLN); + cpu = cpu_map__new(NULL); + if (!cpu) + goto errmem; sigfillset(&act.sa_mask); act.sa_sigaction = toggle_done; sigaction(SIGINT, &act, NULL); if (!nthreads) /* default to the number of CPUs */ - nthreads = ncpus; + nthreads = cpu->nr; worker = calloc(nthreads, sizeof(*worker)); if (!worker) @@ -163,10 +167,10 @@ int bench_futex_hash(int argc, const char **argv) if (!worker[i].futex) goto errmem; - CPU_ZERO(&cpu); - CPU_SET(i % ncpus, &cpu); + CPU_ZERO(&cpuset); + CPU_SET(cpu->map[i % cpu->nr], &cpuset); - ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu); + ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset); if (ret) err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); @@ -217,6 +221,7 @@ int bench_futex_hash(int argc, const char **argv) print_summary(); free(worker); + free(cpu); return ret; errmem: err(EXIT_FAILURE, "calloc"); diff --git a/tools/perf/bench/futex-lock-pi.c b/tools/perf/bench/futex-lock-pi.c index 08653ae8a8c4..8e9c4753e304 100644 --- a/tools/perf/bench/futex-lock-pi.c +++ b/tools/perf/bench/futex-lock-pi.c @@ -15,6 +15,7 @@ #include #include "bench.h" #include "futex.h" +#include "cpumap.h" #include #include @@ -32,7 +33,7 @@ static struct worker *worker; static unsigned int nsecs = 10; static bool silent = false, multi = false; static bool done = false, fshared = false; -static unsigned int ncpus, nthreads = 0; +static unsigned int nthreads = 0; static int futex_flag = 0; struct timeval start, end, runtime; static pthread_mutex_t thread_lock; @@ -113,9 +114,10 @@ static void *workerfn(void *arg) return NULL; } -static void create_threads(struct worker *w, pthread_attr_t thread_attr) +static void create_threads(struct worker *w, pthread_attr_t thread_attr, + struct cpu_map *cpu) { - cpu_set_t cpu; + cpu_set_t cpuset; unsigned int i; threads_starting = nthreads; @@ -130,10 +132,10 @@ static void create_threads(struct worker *w, pthread_attr_t thread_attr) } else worker[i].futex = &global_futex; - CPU_ZERO(&cpu); - CPU_SET(i % ncpus, &cpu); + CPU_ZERO(&cpuset); + CPU_SET(cpu->map[i % cpu->nr], &cpuset); - if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu)) + if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset)) err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); if (pthread_create(&w[i].thread, &thread_attr, workerfn, &worker[i])) @@ -147,19 +149,22 @@ int bench_futex_lock_pi(int argc, const char **argv) unsigned int i; struct sigaction act; pthread_attr_t thread_attr; + struct cpu_map *cpu; argc = parse_options(argc, argv, options, bench_futex_lock_pi_usage, 0); if (argc) goto err; - ncpus = sysconf(_SC_NPROCESSORS_ONLN); + cpu = cpu_map__new(NULL); + if (!cpu) + err(EXIT_FAILURE, "calloc"); sigfillset(&act.sa_mask); act.sa_sigaction = toggle_done; sigaction(SIGINT, &act, NULL); if (!nthreads) - nthreads = ncpus; + nthreads = cpu->nr; worker = calloc(nthreads, sizeof(*worker)); if (!worker) @@ -180,7 +185,7 @@ int bench_futex_lock_pi(int argc, const char **argv) pthread_attr_init(&thread_attr); gettimeofday(&start, NULL); - create_threads(worker, thread_attr); + create_threads(worker, thread_attr, cpu); pthread_attr_destroy(&thread_attr); pthread_mutex_lock(&thread_lock); diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c index 1058c194608a..fc692efa0c05 100644 --- a/tools/perf/bench/futex-requeue.c +++ b/tools/perf/bench/futex-requeue.c @@ -22,6 +22,7 @@ #include #include "bench.h" #include "futex.h" +#include "cpumap.h" #include #include @@ -40,7 +41,7 @@ static bool done = false, silent = false, fshared = false; static pthread_mutex_t thread_lock; static pthread_cond_t thread_parent, thread_worker; static struct stats requeuetime_stats, requeued_stats; -static unsigned int ncpus, threads_starting, nthreads = 0; +static unsigned int threads_starting, nthreads = 0; static int futex_flag = 0; static const struct option options[] = { @@ -83,19 +84,19 @@ static void *workerfn(void *arg __maybe_unused) } static void block_threads(pthread_t *w, - pthread_attr_t thread_attr) + pthread_attr_t thread_attr, struct cpu_map *cpu) { - cpu_set_t cpu; + cpu_set_t cpuset; unsigned int i; threads_starting = nthreads; /* create and block all threads */ for (i = 0; i < nthreads; i++) { - CPU_ZERO(&cpu); - CPU_SET(i % ncpus, &cpu); + CPU_ZERO(&cpuset); + CPU_SET(cpu->map[i % cpu->nr], &cpuset); - if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu)) + if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset)) err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); if (pthread_create(&w[i], &thread_attr, workerfn, NULL)) @@ -116,19 +117,22 @@ int bench_futex_requeue(int argc, const char **argv) unsigned int i, j; struct sigaction act; pthread_attr_t thread_attr; + struct cpu_map *cpu; argc = parse_options(argc, argv, options, bench_futex_requeue_usage, 0); if (argc) goto err; - ncpus = sysconf(_SC_NPROCESSORS_ONLN); + cpu = cpu_map__new(NULL); + if (!cpu) + err(EXIT_FAILURE, "cpu_map__new"); sigfillset(&act.sa_mask); act.sa_sigaction = toggle_done; sigaction(SIGINT, &act, NULL); if (!nthreads) - nthreads = ncpus; + nthreads = cpu->nr; worker = calloc(nthreads, sizeof(*worker)); if (!worker) @@ -156,7 +160,7 @@ int bench_futex_requeue(int argc, const char **argv) struct timeval start, end, runtime; /* create, launch & block all threads */ - block_threads(worker, thread_attr); + block_threads(worker, thread_attr, cpu); /* make sure all threads are already blocked */ pthread_mutex_lock(&thread_lock); diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c index b4732dad9f89..4488c27e8a43 100644 --- a/tools/perf/bench/futex-wake-parallel.c +++ b/tools/perf/bench/futex-wake-parallel.c @@ -21,6 +21,7 @@ #include #include "bench.h" #include "futex.h" +#include "cpumap.h" #include #include @@ -43,7 +44,7 @@ static unsigned int nblocked_threads = 0, nwaking_threads = 0; static pthread_mutex_t thread_lock; static pthread_cond_t thread_parent, thread_worker; static struct stats waketime_stats, wakeup_stats; -static unsigned int ncpus, threads_starting; +static unsigned int threads_starting; static int futex_flag = 0; static const struct option options[] = { @@ -119,19 +120,20 @@ static void *blocked_workerfn(void *arg __maybe_unused) return NULL; } -static void block_threads(pthread_t *w, pthread_attr_t thread_attr) +static void block_threads(pthread_t *w, pthread_attr_t thread_attr, + struct cpu_map *cpu) { - cpu_set_t cpu; + cpu_set_t cpuset; unsigned int i; threads_starting = nblocked_threads; /* create and block all threads */ for (i = 0; i < nblocked_threads; i++) { - CPU_ZERO(&cpu); - CPU_SET(i % ncpus, &cpu); + CPU_ZERO(&cpuset); + CPU_SET(cpu->map[i % cpu->nr], &cpuset); - if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu)) + if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset)) err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); if (pthread_create(&w[i], &thread_attr, blocked_workerfn, NULL)) @@ -205,6 +207,7 @@ int bench_futex_wake_parallel(int argc, const char **argv) struct sigaction act; pthread_attr_t thread_attr; struct thread_data *waking_worker; + struct cpu_map *cpu; argc = parse_options(argc, argv, options, bench_futex_wake_parallel_usage, 0); @@ -217,9 +220,12 @@ int bench_futex_wake_parallel(int argc, const char **argv) act.sa_sigaction = toggle_done; sigaction(SIGINT, &act, NULL); - ncpus = sysconf(_SC_NPROCESSORS_ONLN); + cpu = cpu_map__new(NULL); + if (!cpu) + err(EXIT_FAILURE, "calloc"); + if (!nblocked_threads) - nblocked_threads = ncpus; + nblocked_threads = cpu->nr; /* some sanity checks */ if (nwaking_threads > nblocked_threads || !nwaking_threads) @@ -259,7 +265,7 @@ int bench_futex_wake_parallel(int argc, const char **argv) err(EXIT_FAILURE, "calloc"); /* create, launch & block all threads */ - block_threads(blocked_worker, thread_attr); + block_threads(blocked_worker, thread_attr, cpu); /* make sure all threads are already blocked */ pthread_mutex_lock(&thread_lock); diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c index 8c5c0b6b5c97..e8181ad7d088 100644 --- a/tools/perf/bench/futex-wake.c +++ b/tools/perf/bench/futex-wake.c @@ -22,6 +22,7 @@ #include #include "bench.h" #include "futex.h" +#include "cpumap.h" #include #include @@ -89,19 +90,19 @@ static void print_summary(void) } static void block_threads(pthread_t *w, - pthread_attr_t thread_attr) + pthread_attr_t thread_attr, struct cpu_map *cpu) { - cpu_set_t cpu; + cpu_set_t cpuset; unsigned int i; threads_starting = nthreads; /* create and block all threads */ for (i = 0; i < nthreads; i++) { - CPU_ZERO(&cpu); - CPU_SET(i % ncpus, &cpu); + CPU_ZERO(&cpuset); + CPU_SET(cpu->map[i % cpu->nr], &cpuset); - if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu)) + if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpuset)) err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); if (pthread_create(&w[i], &thread_attr, workerfn, NULL)) @@ -122,6 +123,7 @@ int bench_futex_wake(int argc, const char **argv) unsigned int i, j; struct sigaction act; pthread_attr_t thread_attr; + struct cpu_map *cpu; argc = parse_options(argc, argv, options, bench_futex_wake_usage, 0); if (argc) { @@ -129,7 +131,9 @@ int bench_futex_wake(int argc, const char **argv) exit(EXIT_FAILURE); } - ncpus = sysconf(_SC_NPROCESSORS_ONLN); + cpu = cpu_map__new(NULL); + if (!cpu) + err(EXIT_FAILURE, "calloc"); sigfillset(&act.sa_mask); act.sa_sigaction = toggle_done; @@ -161,7 +165,7 @@ int bench_futex_wake(int argc, const char **argv) struct timeval start, end, runtime; /* create, launch & block all threads */ - block_threads(worker, thread_attr); + block_threads(worker, thread_attr, cpu); /* make sure all threads are already blocked */ pthread_mutex_lock(&thread_lock); -- cgit v1.2.3-70-g09d2 From 25ab5abf5b141d7fd13eed506c7458aa04749c29 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 5 Dec 2017 10:14:42 -0300 Subject: tools build feature: Check if pthread_barrier_t is available As 'perf bench futex wake-parallel" will use this, which is not available in older systems such as versions of the android NDK used in my container build tests (r12b and r15c at the moment). Cc: Adrian Hunter Cc: David Ahern Cc: Davidlohr Bueso Cc: James Yang Cc: Kim Phillips Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-1i7iv54in4wj08lwo55b0pzv@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Makefile.feature | 1 + tools/build/feature/Makefile | 4 ++++ tools/build/feature/test-all.c | 5 +++++ tools/build/feature/test-pthread-barrier.c | 12 ++++++++++++ tools/perf/Makefile.config | 4 ++++ 5 files changed, 26 insertions(+) create mode 100644 tools/build/feature/test-pthread-barrier.c (limited to 'tools') diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index c71a05b9c984..e52fcefee379 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -56,6 +56,7 @@ FEATURE_TESTS_BASIC := \ libunwind-arm \ libunwind-aarch64 \ pthread-attr-setaffinity-np \ + pthread-barrier \ stackprotector-all \ timerfd \ libdw-dwarf-unwind \ diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index 96982640fbf8..cff38f342283 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -37,6 +37,7 @@ FILES= \ test-libunwind-debug-frame-arm.bin \ test-libunwind-debug-frame-aarch64.bin \ test-pthread-attr-setaffinity-np.bin \ + test-pthread-barrier.bin \ test-stackprotector-all.bin \ test-timerfd.bin \ test-libdw-dwarf-unwind.bin \ @@ -79,6 +80,9 @@ $(OUTPUT)test-hello.bin: $(OUTPUT)test-pthread-attr-setaffinity-np.bin: $(BUILD) -D_GNU_SOURCE -lpthread +$(OUTPUT)test-pthread-barrier.bin: + $(BUILD) -lpthread + $(OUTPUT)test-stackprotector-all.bin: $(BUILD) -fstack-protector-all diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index 4112702e4aed..6fdf83263ab7 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -118,6 +118,10 @@ # include "test-pthread-attr-setaffinity-np.c" #undef main +#define main main_test_pthread_barrier +# include "test-pthread-barrier.c" +#undef main + #define main main_test_sched_getcpu # include "test-sched_getcpu.c" #undef main @@ -187,6 +191,7 @@ int main(int argc, char *argv[]) main_test_sync_compare_and_swap(argc, argv); main_test_zlib(); main_test_pthread_attr_setaffinity_np(); + main_test_pthread_barrier(); main_test_lzma(); main_test_get_cpuid(); main_test_bpf(); diff --git a/tools/build/feature/test-pthread-barrier.c b/tools/build/feature/test-pthread-barrier.c new file mode 100644 index 000000000000..0558d9334d97 --- /dev/null +++ b/tools/build/feature/test-pthread-barrier.c @@ -0,0 +1,12 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include + +int main(void) +{ + pthread_barrier_t barrier; + + pthread_barrier_init(&barrier, NULL, 1); + pthread_barrier_wait(&barrier); + return pthread_barrier_destroy(&barrier); +} diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index f6786fa2419f..2c437baf8364 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -263,6 +263,10 @@ ifeq ($(feature-pthread-attr-setaffinity-np), 1) CFLAGS += -DHAVE_PTHREAD_ATTR_SETAFFINITY_NP endif +ifeq ($(feature-pthread-barrier), 1) + CFLAGS += -DHAVE_PTHREAD_BARRIER +endif + ifndef NO_BIONIC $(call feature_check,bionic) ifeq ($(feature-bionic), 1) -- cgit v1.2.3-70-g09d2 From 8085e5ab41dadce558808b4186a6ea9d0862d3c0 Mon Sep 17 00:00:00 2001 From: James Yang Date: Sun, 26 Nov 2017 20:21:01 -0800 Subject: perf bench futex: Sync waker threads Waker threads in the futex wake-parallel benchmark are started by a loop using pthread_create(). However, there is no synchronization for when the waker threads wake the waiting threads. Comparison of the waker threads' measurement timestamps show they are not all running concurrently because older waker threads finish their task before newer waker threads even start. This patch uses a barrier to better synchronize the waker threads. Signed-off-by: James Yang Link: http://lkml.kernel.org/r/20171127042101.3659-4-dave@stgolabs.net Signed-off-by: Davidlohr Bueso [ Disable the wake-parallel test for systems without pthread_barrier_t ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/futex-wake-parallel.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c index 4488c27e8a43..69d8fdc87315 100644 --- a/tools/perf/bench/futex-wake-parallel.c +++ b/tools/perf/bench/futex-wake-parallel.c @@ -7,7 +7,17 @@ * for each individual thread to service its share of work. Ultimately * it can be used to measure futex_wake() changes. */ +#include "bench.h" +#include +#include "../util/debug.h" +#ifndef HAVE_PTHREAD_BARRIER +int bench_futex_wake_parallel(int argc __maybe_unused, const char **argv __maybe_unused) +{ + pr_err("%s: pthread_barrier_t unavailable, disabling this test...\n", __func__); + return 0; +} +#else /* HAVE_PTHREAD_BARRIER */ /* For the CLR_() macros */ #include #include @@ -15,11 +25,9 @@ #include #include "../util/stat.h" #include -#include #include #include #include -#include "bench.h" #include "futex.h" #include "cpumap.h" @@ -43,6 +51,7 @@ static bool done = false, silent = false, fshared = false; static unsigned int nblocked_threads = 0, nwaking_threads = 0; static pthread_mutex_t thread_lock; static pthread_cond_t thread_parent, thread_worker; +static pthread_barrier_t barrier; static struct stats waketime_stats, wakeup_stats; static unsigned int threads_starting; static int futex_flag = 0; @@ -65,6 +74,8 @@ static void *waking_workerfn(void *arg) struct thread_data *waker = (struct thread_data *) arg; struct timeval start, end; + pthread_barrier_wait(&barrier); + gettimeofday(&start, NULL); waker->nwoken = futex_wake(&futex, nwakes, futex_flag); @@ -85,6 +96,8 @@ static void wakeup_threads(struct thread_data *td, pthread_attr_t thread_attr) pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE); + pthread_barrier_init(&barrier, NULL, nwaking_threads + 1); + /* create and block all threads */ for (i = 0; i < nwaking_threads; i++) { /* @@ -97,9 +110,13 @@ static void wakeup_threads(struct thread_data *td, pthread_attr_t thread_attr) err(EXIT_FAILURE, "pthread_create"); } + pthread_barrier_wait(&barrier); + for (i = 0; i < nwaking_threads; i++) if (pthread_join(td[i].worker, NULL)) err(EXIT_FAILURE, "pthread_join"); + + pthread_barrier_destroy(&barrier); } static void *blocked_workerfn(void *arg __maybe_unused) @@ -303,3 +320,4 @@ int bench_futex_wake_parallel(int argc, const char **argv) free(blocked_worker); return ret; } +#endif /* HAVE_PTHREAD_BARRIER */ -- cgit v1.2.3-70-g09d2 From 36c263607d36c6a3788c09301d9f5fe35404048a Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Fri, 24 Nov 2017 10:46:37 +0100 Subject: perf annotate: Fix unnecessary memory allocation for s390x This patch fixes a bug introduced with commit d9f8dfa9baf9 ("perf annotate s390: Implement jump types for perf annotate"). 'perf annotate' displays annotated assembler output by reading output of command objdump and parsing the disassembled lines. For each shown mnemonic this function sequence is executed: disasm_line__new() | +--> disasm_line__init_ins() | +--> ins__find() | +--> arch->associate_instruction_ops() The s390x specific function assigned to function pointer associate_instruction_ops refers to function s390__associate_ins_ops(). This function checks for supported mnemonics and assigns a NULL pointer to unsupported mnemonics. However even the NULL pointer is added to the architecture dependend instruction array. This leads to an extremely large architecture instruction array (due to array resize logic in function arch__grow_instructions()). Depending on the objdump output being parsed the array can end up with several ten-thousand elements. This patch checks if a mnemonic is supported and only adds supported ones into the architecture instruction array. The array does not contain elements with NULL pointers anymore. Before the patch (With some debug printf output): [root@s35lp76 perf]# time ./perf annotate --stdio > /tmp/xxxbb real 8m49.679s user 7m13.008s sys 0m1.649s [root@s35lp76 perf]# fgrep '__ins__find sorted:1 nr_instructions:' /tmp/xxxbb | tail -1 __ins__find sorted:1 nr_instructions:87433 ins:0x341583c0 [root@s35lp76 perf]# The number of different s390x branch/jump/call/return instructions entered into the array is 87433. After the patch (With some printf debug output:) [root@s35lp76 perf]# time ./perf annotate --stdio > /tmp/xxxaa real 1m24.553s user 0m0.587s sys 0m1.530s [root@s35lp76 perf]# fgrep '__ins__find sorted:1 nr_instructions:' /tmp/xxxaa | tail -1 __ins__find sorted:1 nr_instructions:56 ins:0x3f406570 [root@s35lp76 perf]# The number of different s390x branch/jump/call/return instructions entered into the array is 56 which is sensible. Signed-off-by: Thomas Richter Reviewed-by: Hendrik Brueckner Acked-by: Ravi Bangoria Cc: Heiko Carstens Cc: Martin Schwidefsky Link: http://lkml.kernel.org/r/20171124094637.55558-1-tmricht@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/s390/annotate/instructions.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/arch/s390/annotate/instructions.c b/tools/perf/arch/s390/annotate/instructions.c index e0e466c650df..8c72b44444cb 100644 --- a/tools/perf/arch/s390/annotate/instructions.c +++ b/tools/perf/arch/s390/annotate/instructions.c @@ -18,7 +18,8 @@ static struct ins_ops *s390__associate_ins_ops(struct arch *arch, const char *na if (!strcmp(name, "br")) ops = &ret_ops; - arch__associate_ins_ops(arch, name, ops); + if (ops) + arch__associate_ins_ops(arch, name, ops); return ops; } -- cgit v1.2.3-70-g09d2 From 35a8a148d8c1ee9e5ae18f9565a880490f816f89 Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Tue, 28 Nov 2017 08:56:32 +0100 Subject: perf annotate: Fix objdump comment parsing for Intel mov dissassembly The command 'perf annotate' parses the output of objdump and also investigates the comments produced by objdump. For example the output of objdump produces (on x86): 23eee: 4c 8b 3d 13 01 21 00 mov 0x210113(%rip),%r15 # 234008 and the function mov__parse() is called to investigate the complete line. Mov__parse() breaks this line into several parts and finally calls function comment__symbol() to parse the data after the comment character '#'. Comment__symbol() expects a hexadecimal address followed by a symbol in '<' and '>' brackets. However the 2nd parameter given to function comment__symbol() always points to the comment character '#'. The address parsing always returns 0 because the character '#' is not a digit and strtoull() fails without being noticed. Fix this by advancing the second parameter to function comment__symbol() by one byte before invocation and add an error check after strtoull() has been called. Signed-off-by: Thomas Richter Reviewed-by: Hendrik Brueckner Acked-by: Ravi Bangoria Cc: Heiko Carstens Cc: Martin Schwidefsky Fixes: 6de783b6f50f ("perf annotate: Resolve symbols using objdump comment") Link: http://lkml.kernel.org/r/20171128075632.72182-1-tmricht@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 22ea7936d92f..facad1e279a8 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -322,6 +322,8 @@ static int comment__symbol(char *raw, char *comment, u64 *addrp, char **namep) return 0; *addrp = strtoull(comment, &endptr, 16); + if (endptr == comment) + return 0; name = strchr(endptr, '<'); if (name == NULL) return -1; @@ -435,8 +437,8 @@ static int mov__parse(struct arch *arch, struct ins_operands *ops, struct map *m return 0; comment = ltrim(comment); - comment__symbol(ops->source.raw, comment, &ops->source.addr, &ops->source.name); - comment__symbol(ops->target.raw, comment, &ops->target.addr, &ops->target.name); + comment__symbol(ops->source.raw, comment + 1, &ops->source.addr, &ops->source.name); + comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name); return 0; @@ -480,7 +482,7 @@ static int dec__parse(struct arch *arch __maybe_unused, struct ins_operands *ops return 0; comment = ltrim(comment); - comment__symbol(ops->target.raw, comment, &ops->target.addr, &ops->target.name); + comment__symbol(ops->target.raw, comment + 1, &ops->target.addr, &ops->target.name); return 0; } -- cgit v1.2.3-70-g09d2 From 33fec3e393dc1c55737cfb9c876b5c0da0d6f380 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 1 Dec 2017 18:57:25 +0800 Subject: perf rblist: Create rblist__exit() function Currently we have a rblist__delete() which is used to delete a rblist. While rblist__delete() will free the pointer of rblist at the end. It's an inconvenience for the user to delete a rblist which is not allocated by something like malloc(). For example, the rblist is embedded in a larger data structure. This patch creates a new function rblist__exit() which is similar to rblist__delete() but it will not free the pointer of rblist. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512125856-22056-2-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/rblist.c | 19 ++++++++++++------- tools/perf/util/rblist.h | 1 + 2 files changed, 13 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/rblist.c b/tools/perf/util/rblist.c index 0dfe27d99458..0efc3258c648 100644 --- a/tools/perf/util/rblist.c +++ b/tools/perf/util/rblist.c @@ -101,16 +101,21 @@ void rblist__init(struct rblist *rblist) return; } +void rblist__exit(struct rblist *rblist) +{ + struct rb_node *pos, *next = rb_first(&rblist->entries); + + while (next) { + pos = next; + next = rb_next(pos); + rblist__remove_node(rblist, pos); + } +} + void rblist__delete(struct rblist *rblist) { if (rblist != NULL) { - struct rb_node *pos, *next = rb_first(&rblist->entries); - - while (next) { - pos = next; - next = rb_next(pos); - rblist__remove_node(rblist, pos); - } + rblist__exit(rblist); free(rblist); } } diff --git a/tools/perf/util/rblist.h b/tools/perf/util/rblist.h index 4c8638a22571..76df15c27f5f 100644 --- a/tools/perf/util/rblist.h +++ b/tools/perf/util/rblist.h @@ -29,6 +29,7 @@ struct rblist { }; void rblist__init(struct rblist *rblist); +void rblist__exit(struct rblist *rblist); void rblist__delete(struct rblist *rblist); int rblist__add_node(struct rblist *rblist, const void *new_entry); void rblist__remove_node(struct rblist *rblist, struct rb_node *rb_node); -- cgit v1.2.3-70-g09d2 From b984aff7811bbac75b3f05931643d815067cf45c Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 1 Dec 2017 18:57:28 +0800 Subject: perf stat: Add rbtree node_delete op In current stat-shadow.c, the rbtree deleting is ignored. The patch adds the implementation to node_delete method of rblist. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512125856-22056-5-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-shadow.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 855e35cbb1dc..57ec22513971 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -87,6 +87,16 @@ static struct rb_node *saved_value_new(struct rblist *rblist __maybe_unused, return &nd->rb_node; } +static void saved_value_delete(struct rblist *rblist __maybe_unused, + struct rb_node *rb_node) +{ + struct saved_value *v; + + BUG_ON(!rb_node); + v = container_of(rb_node, struct saved_value, rb_node); + free(v); +} + static struct saved_value *saved_value_lookup(struct perf_evsel *evsel, int cpu, bool create) @@ -114,7 +124,7 @@ void perf_stat__init_shadow_stats(void) rblist__init(&runtime_saved_values); runtime_saved_values.node_cmp = saved_value_cmp; runtime_saved_values.node_new = saved_value_new; - /* No delete for now */ + runtime_saved_values.node_delete = saved_value_delete; } static int evsel_context(struct perf_evsel *evsel) -- cgit v1.2.3-70-g09d2 From 8d3cd4c3d3ab5f4f9edd5c593b7743f7fbd3526d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 1 Dec 2017 11:44:30 -0300 Subject: perf thread_map: Add method to map all threads in the system Reusing the thread_map__new_by_uid() proc scanning already in place to return a map with all threads in the system. Based-on-a-patch-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: https://lkml.kernel.org/n/tip-khh28q0wwqbqtrk32bfe07hd@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/thread_map.c | 22 ++++++++++++++++------ tools/perf/util/thread_map.h | 1 + 2 files changed, 17 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index be0d5a736dea..2b653853eec2 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -92,7 +92,7 @@ struct thread_map *thread_map__new_by_tid(pid_t tid) return threads; } -struct thread_map *thread_map__new_by_uid(uid_t uid) +static struct thread_map *__thread_map__new_all_cpus(uid_t uid) { DIR *proc; int max_threads = 32, items, i; @@ -113,7 +113,6 @@ struct thread_map *thread_map__new_by_uid(uid_t uid) while ((dirent = readdir(proc)) != NULL) { char *end; bool grow = false; - struct stat st; pid_t pid = strtol(dirent->d_name, &end, 10); if (*end) /* only interested in proper numerical dirents */ @@ -121,11 +120,12 @@ struct thread_map *thread_map__new_by_uid(uid_t uid) snprintf(path, sizeof(path), "/proc/%s", dirent->d_name); - if (stat(path, &st) != 0) - continue; + if (uid != UINT_MAX) { + struct stat st; - if (st.st_uid != uid) - continue; + if (stat(path, &st) != 0 || st.st_uid != uid) + continue; + } snprintf(path, sizeof(path), "/proc/%d/task", pid); items = scandir(path, &namelist, filter, NULL); @@ -178,6 +178,16 @@ out_free_closedir: goto out_closedir; } +struct thread_map *thread_map__new_all_cpus(void) +{ + return __thread_map__new_all_cpus(UINT_MAX); +} + +struct thread_map *thread_map__new_by_uid(uid_t uid) +{ + return __thread_map__new_all_cpus(uid); +} + struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid) { if (pid != -1) diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h index f15803985435..07a765fb22bb 100644 --- a/tools/perf/util/thread_map.h +++ b/tools/perf/util/thread_map.h @@ -23,6 +23,7 @@ struct thread_map *thread_map__new_dummy(void); struct thread_map *thread_map__new_by_pid(pid_t pid); struct thread_map *thread_map__new_by_tid(pid_t tid); struct thread_map *thread_map__new_by_uid(uid_t uid); +struct thread_map *thread_map__new_all_cpus(void); struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid); struct thread_map *thread_map__new_event(struct thread_map_event *event); -- cgit v1.2.3-70-g09d2 From 1dc4ddf112a408e607a073d951b962b6c6e2bd6c Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 30 Nov 2017 09:49:25 +0100 Subject: perf s390: Always build with -fPIC On s390, object files must be compiled with position-indepedent code in order to be incrementally linked or linked to shared libraries. Therefore, add -fPIC to the CFLAGS for s390 to ensure each object file is built properly. Reported-by: Jonathan Hermann Signed-off-by: Hendrik Brueckner Cc: Heiko Carstens Cc: Martin Schwidefsky Cc: Thomas Richter Cc: linux s390 list LPU-Reference: 1512031765-9382-1-git-send-email-brueckner@linux.vnet.ibm.com Link: https://lkml.kernel.org/n/tip-a8wga8hrl0d0r84cal96fmgv@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 2c437baf8364..bf86c09ca889 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -41,6 +41,7 @@ ifeq ($(SRCARCH),x86) LIBUNWIND_LIBS = -lunwind-x86 -llzma -lunwind endif NO_PERF_REGS := 0 + CFLAGS += -fPIC endif ifeq ($(SRCARCH),arm) -- cgit v1.2.3-70-g09d2 From 54e32dc0f89ec7fcb87df6c45b096e57f050f22b Mon Sep 17 00:00:00 2001 From: Ganapatrao Kulkarni Date: Tue, 17 Oct 2017 00:02:18 +0530 Subject: perf pmu: Pass pmu as a parameter to get_cpuid_str() The cpuid string will not be same on all CPUs on heterogeneous platforms like ARM's big.LITTLE, adding provision(using pmu->cpus) to find cpuid string from associated CPUs of PMU CORE device. Also optimise arguments to function pmu_add_cpu_aliases. Signed-off-by: Ganapatrao Kulkarni Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Catalin Marinas Cc: Jayachandran C Cc: Jonathan Cameron Cc: linux-arm-kernel@lists.infradead.org Cc: Mark Rutland Cc: Peter Zijlstra Cc: Robert Richter Cc: Shaokun Zhang Link: http://lkml.kernel.org/r/20171016183222.25750-2-ganapatrao.kulkarni@cavium.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/powerpc/util/header.c | 2 +- tools/perf/arch/x86/util/header.c | 2 +- tools/perf/util/header.h | 3 ++- tools/perf/util/metricgroup.c | 4 ++-- tools/perf/util/pmu.c | 22 +++++++++++----------- tools/perf/util/pmu.h | 2 +- 6 files changed, 18 insertions(+), 17 deletions(-) (limited to 'tools') diff --git a/tools/perf/arch/powerpc/util/header.c b/tools/perf/arch/powerpc/util/header.c index 7a4cf80c207a..0b242664f5ea 100644 --- a/tools/perf/arch/powerpc/util/header.c +++ b/tools/perf/arch/powerpc/util/header.c @@ -35,7 +35,7 @@ get_cpuid(char *buffer, size_t sz) } char * -get_cpuid_str(void) +get_cpuid_str(struct perf_pmu *pmu __maybe_unused) { char *bufp; diff --git a/tools/perf/arch/x86/util/header.c b/tools/perf/arch/x86/util/header.c index 33027c5e6f92..b626d2bad9f1 100644 --- a/tools/perf/arch/x86/util/header.c +++ b/tools/perf/arch/x86/util/header.c @@ -66,7 +66,7 @@ get_cpuid(char *buffer, size_t sz) } char * -get_cpuid_str(void) +get_cpuid_str(struct perf_pmu *pmu __maybe_unused) { char *buf = malloc(128); diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 91befc3b550d..317fb901e47f 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -9,6 +9,7 @@ #include #include "event.h" #include "env.h" +#include "pmu.h" enum { HEADER_RESERVED = 0, /* always cleared */ @@ -171,5 +172,5 @@ int write_padded(struct feat_fd *fd, const void *bf, */ int get_cpuid(char *buffer, size_t sz); -char *get_cpuid_str(void); +char *get_cpuid_str(struct perf_pmu *pmu __maybe_unused); #endif /* __PERF_HEADER_H */ diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 6fd709017bbc..e48410c99b39 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -274,7 +274,7 @@ static void metricgroup__print_strlist(struct strlist *metrics, bool raw) void metricgroup__print(bool metrics, bool metricgroups, char *filter, bool raw) { - struct pmu_events_map *map = perf_pmu__find_map(); + struct pmu_events_map *map = perf_pmu__find_map(NULL); struct pmu_event *pe; int i; struct rblist groups; @@ -372,7 +372,7 @@ void metricgroup__print(bool metrics, bool metricgroups, char *filter, static int metricgroup__add_metric(const char *metric, struct strbuf *events, struct list_head *group_list) { - struct pmu_events_map *map = perf_pmu__find_map(); + struct pmu_events_map *map = perf_pmu__find_map(NULL); struct pmu_event *pe; int ret = -EINVAL; int i, j; diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 80fb1593913a..4e7dd3a0f123 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -542,12 +542,12 @@ static bool pmu_is_uncore(const char *name) * Each architecture should provide a more precise id string that * can be use to match the architecture's "mapfile". */ -char * __weak get_cpuid_str(void) +char * __weak get_cpuid_str(struct perf_pmu *pmu __maybe_unused) { return NULL; } -static char *perf_pmu__getcpuid(void) +static char *perf_pmu__getcpuid(struct perf_pmu *pmu) { char *cpuid; static bool printed; @@ -556,7 +556,7 @@ static char *perf_pmu__getcpuid(void) if (cpuid) cpuid = strdup(cpuid); if (!cpuid) - cpuid = get_cpuid_str(); + cpuid = get_cpuid_str(pmu); if (!cpuid) return NULL; @@ -567,10 +567,10 @@ static char *perf_pmu__getcpuid(void) return cpuid; } -struct pmu_events_map *perf_pmu__find_map(void) +struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu) { struct pmu_events_map *map; - char *cpuid = perf_pmu__getcpuid(); + char *cpuid = perf_pmu__getcpuid(pmu); int i; i = 0; @@ -593,13 +593,14 @@ struct pmu_events_map *perf_pmu__find_map(void) * to the current running CPU. Then, add all PMU events from that table * as aliases. */ -static void pmu_add_cpu_aliases(struct list_head *head, const char *name) +static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu) { int i; struct pmu_events_map *map; struct pmu_event *pe; + const char *name = pmu->name; - map = perf_pmu__find_map(); + map = perf_pmu__find_map(pmu); if (!map) return; @@ -661,21 +662,20 @@ static struct perf_pmu *pmu_lookup(const char *name) if (pmu_aliases(name, &aliases)) return NULL; - pmu_add_cpu_aliases(&aliases, name); pmu = zalloc(sizeof(*pmu)); if (!pmu) return NULL; pmu->cpus = pmu_cpumask(name); - + pmu->name = strdup(name); + pmu->type = type; pmu->is_uncore = pmu_is_uncore(name); + pmu_add_cpu_aliases(&aliases, pmu); INIT_LIST_HEAD(&pmu->format); INIT_LIST_HEAD(&pmu->aliases); list_splice(&format, &pmu->format); list_splice(&aliases, &pmu->aliases); - pmu->name = strdup(name); - pmu->type = type; list_add_tail(&pmu->list, &pmus); pmu->default_config = perf_pmu__get_default_config(pmu); diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 27c75e635866..76fecec7b3f9 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -92,6 +92,6 @@ int perf_pmu__test(void); struct perf_event_attr *perf_pmu__get_default_config(struct perf_pmu *pmu); -struct pmu_events_map *perf_pmu__find_map(void); +struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu); #endif /* __PMU_H */ -- cgit v1.2.3-70-g09d2 From b57df28893543db3466172088786fae39b7fc3ad Mon Sep 17 00:00:00 2001 From: Ganapatrao Kulkarni Date: Tue, 17 Oct 2017 00:02:19 +0530 Subject: perf tools arm64: Add support for get_cpuid_str function. The get_cpuid_str function returns the MIDR string of the first online cpu from the range of cpus associated with the PMU CORE device. Signed-off-by: Ganapatrao Kulkarni Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Catalin Marinas Cc: Ganapatrao Kulkarni Cc: Jayachandran C Cc: Jonathan Cameron Cc: Mark Rutland Cc: Peter Zijlstra Cc: Robert Richter Cc: Shaokun Zhang Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20171016183222.25750-3-ganapatrao.kulkarni@cavium.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm64/util/Build | 1 + tools/perf/arch/arm64/util/header.c | 65 +++++++++++++++++++++++++++++++++++++ 2 files changed, 66 insertions(+) create mode 100644 tools/perf/arch/arm64/util/header.c (limited to 'tools') diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build index cef6fb38d17e..b1ab72d2a42e 100644 --- a/tools/perf/arch/arm64/util/Build +++ b/tools/perf/arch/arm64/util/Build @@ -1,3 +1,4 @@ +libperf-y += header.o libperf-$(CONFIG_DWARF) += dwarf-regs.o libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o diff --git a/tools/perf/arch/arm64/util/header.c b/tools/perf/arch/arm64/util/header.c new file mode 100644 index 000000000000..534cd2507d83 --- /dev/null +++ b/tools/perf/arch/arm64/util/header.c @@ -0,0 +1,65 @@ +#include +#include +#include +#include "header.h" + +#define MIDR "/regs/identification/midr_el1" +#define MIDR_SIZE 19 +#define MIDR_REVISION_MASK 0xf +#define MIDR_VARIANT_SHIFT 20 +#define MIDR_VARIANT_MASK (0xf << MIDR_VARIANT_SHIFT) + +char *get_cpuid_str(struct perf_pmu *pmu) +{ + char *buf = NULL; + char path[PATH_MAX]; + const char *sysfs = sysfs__mountpoint(); + int cpu; + u64 midr = 0; + struct cpu_map *cpus; + FILE *file; + + if (!sysfs || !pmu || !pmu->cpus) + return NULL; + + buf = malloc(MIDR_SIZE); + if (!buf) + return NULL; + + /* read midr from list of cpus mapped to this pmu */ + cpus = cpu_map__get(pmu->cpus); + for (cpu = 0; cpu < cpus->nr; cpu++) { + scnprintf(path, PATH_MAX, "%s/devices/system/cpu/cpu%d"MIDR, + sysfs, cpus->map[cpu]); + + file = fopen(path, "r"); + if (!file) { + pr_debug("fopen failed for file %s\n", path); + continue; + } + + if (!fgets(buf, MIDR_SIZE, file)) { + fclose(file); + continue; + } + fclose(file); + + /* Ignore/clear Variant[23:20] and + * Revision[3:0] of MIDR + */ + midr = strtoul(buf, NULL, 16); + midr &= (~(MIDR_VARIANT_MASK | MIDR_REVISION_MASK)); + scnprintf(buf, MIDR_SIZE, "0x%016lx", midr); + /* got midr break loop */ + break; + } + + if (!midr) { + pr_err("failed to get cpuid string for PMU %s\n", pmu->name); + free(buf); + buf = NULL; + } + + cpu_map__put(cpus); + return buf; +} -- cgit v1.2.3-70-g09d2 From 14b22ae028de56cca980171db625d1e9925c8fba Mon Sep 17 00:00:00 2001 From: Ganapatrao Kulkarni Date: Thu, 24 Aug 2017 16:30:58 +0530 Subject: perf pmu: Add helper function is_pmu_core to detect PMU CORE devices On some platforms, PMU core devices sysfs name is not cpu. Adding function is_pmu_core to detect PMU core devices using core device specific hints in sysfs. For arm64 platforms, all core devices have file "cpus" in sysfs. Signed-off-by: Ganapatrao Kulkarni Tested-by: Shaokun Zhang Tested-by: Jin Yao Acked-by: Will Deacon Link: https://lkml.kernel.org/n/tip-y1woxt1k2pqqwpprhonnft2s@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 39 +++++++++++++++++++++++++++++++++++---- 1 file changed, 35 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 4e7dd3a0f123..732ff579ec65 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -536,6 +536,34 @@ static bool pmu_is_uncore(const char *name) return !!cpus; } +/* + * PMU CORE devices have different name other than cpu in sysfs on some + * platforms. looking for possible sysfs files to identify as core device. + */ +static int is_pmu_core(const char *name) +{ + struct stat st; + char path[PATH_MAX]; + const char *sysfs = sysfs__mountpoint(); + + if (!sysfs) + return 0; + + /* Look for cpu sysfs (x86 and others) */ + scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/cpu", sysfs); + if ((stat(path, &st) == 0) && + (strncmp(name, "cpu", strlen("cpu")) == 0)) + return 1; + + /* Look for cpu sysfs (specific to arm) */ + scnprintf(path, PATH_MAX, "%s/bus/event_source/devices/%s/cpus", + sysfs, name); + if (stat(path, &st) == 0) + return 1; + + return 0; +} + /* * Return the CPU id as a raw string. * @@ -609,7 +637,6 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu) */ i = 0; while (1) { - const char *pname; pe = &map->table[i++]; if (!pe->name) { @@ -618,9 +645,13 @@ static void pmu_add_cpu_aliases(struct list_head *head, struct perf_pmu *pmu) break; } - pname = pe->pmu ? pe->pmu : "cpu"; - if (strncmp(pname, name, strlen(pname))) - continue; + if (!is_pmu_core(name)) { + /* check for uncore devices */ + if (pe->pmu == NULL) + continue; + if (strncmp(pe->pmu, name, strlen(pe->pmu))) + continue; + } /* need type casts to override 'const' */ __perf_pmu__new_alias(head, NULL, (char *)pe->name, -- cgit v1.2.3-70-g09d2 From d3964221ea14690fe51cb57331b88b5c69e4d2cb Mon Sep 17 00:00:00 2001 From: Ganapatrao Kulkarni Date: Tue, 17 Oct 2017 00:02:21 +0530 Subject: perf vendor events arm64: Add ThunderX2 implementation defined pmu core events This is not a full event list, but a short list of useful events. Signed-off-by: Ganapatrao Kulkarni Acked-by: Will Deacon Cc: Alexander Shishkin Cc: Catalin Marinas Cc: Ganapatrao Kulkarni Cc: Jayachandran C Cc: Jonathan Cameron Cc: Mark Rutland Cc: Peter Zijlstra Cc: Robert Richter Cc: Shaokun Zhang Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20171016183222.25750-5-ganapatrao.kulkarni@cavium.com Signed-off-by: Arnaldo Carvalho de Melo --- .../arch/arm64/cavium/thunderx2-imp-def.json | 62 ++++++++++++++++++++++ tools/perf/pmu-events/arch/arm64/mapfile.csv | 15 ++++++ 2 files changed, 77 insertions(+) create mode 100644 tools/perf/pmu-events/arch/arm64/cavium/thunderx2-imp-def.json create mode 100644 tools/perf/pmu-events/arch/arm64/mapfile.csv (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/arm64/cavium/thunderx2-imp-def.json b/tools/perf/pmu-events/arch/arm64/cavium/thunderx2-imp-def.json new file mode 100644 index 000000000000..2db45c40ebc7 --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/cavium/thunderx2-imp-def.json @@ -0,0 +1,62 @@ +[ + { + "PublicDescription": "Attributable Level 1 data cache access, read", + "EventCode": "0x40", + "EventName": "l1d_cache_rd", + "BriefDescription": "L1D cache read", + }, + { + "PublicDescription": "Attributable Level 1 data cache access, write ", + "EventCode": "0x41", + "EventName": "l1d_cache_wr", + "BriefDescription": "L1D cache write", + }, + { + "PublicDescription": "Attributable Level 1 data cache refill, read", + "EventCode": "0x42", + "EventName": "l1d_cache_refill_rd", + "BriefDescription": "L1D cache refill read", + }, + { + "PublicDescription": "Attributable Level 1 data cache refill, write", + "EventCode": "0x43", + "EventName": "l1d_cache_refill_wr", + "BriefDescription": "L1D refill write", + }, + { + "PublicDescription": "Attributable Level 1 data TLB refill, read", + "EventCode": "0x4C", + "EventName": "l1d_tlb_refill_rd", + "BriefDescription": "L1D tlb refill read", + }, + { + "PublicDescription": "Attributable Level 1 data TLB refill, write", + "EventCode": "0x4D", + "EventName": "l1d_tlb_refill_wr", + "BriefDescription": "L1D tlb refill write", + }, + { + "PublicDescription": "Attributable Level 1 data or unified TLB access, read", + "EventCode": "0x4E", + "EventName": "l1d_tlb_rd", + "BriefDescription": "L1D tlb read", + }, + { + "PublicDescription": "Attributable Level 1 data or unified TLB access, write", + "EventCode": "0x4F", + "EventName": "l1d_tlb_wr", + "BriefDescription": "L1D tlb write", + }, + { + "PublicDescription": "Bus access read", + "EventCode": "0x60", + "EventName": "bus_access_rd", + "BriefDescription": "Bus access read", + }, + { + "PublicDescription": "Bus access write", + "EventCode": "0x61", + "EventName": "bus_access_wr", + "BriefDescription": "Bus access write", + } +] diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv new file mode 100644 index 000000000000..219d6756134e --- /dev/null +++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv @@ -0,0 +1,15 @@ +# Format: +# MIDR,Version,JSON/file/pathname,Type +# +# where +# MIDR Processor version +# Variant[23:20] and Revision [3:0] should be zero. +# Version could be used to track version of of JSON file +# but currently unused. +# JSON/file/pathname is the path to JSON file, relative +# to tools/perf/pmu-events/arch/arm64/. +# Type is core, uncore etc +# +# +#Family-model,Version,Filename,EventType +0x00000000420f5160,v1,cavium,core -- cgit v1.2.3-70-g09d2 From de3d0f12be476271d03f1ddb5a7c241c2f07f126 Mon Sep 17 00:00:00 2001 From: Ganapatrao Kulkarni Date: Tue, 17 Oct 2017 00:02:22 +0530 Subject: perf pmu: Add check for valid cpuid in perf_pmu__find_map() On some platforms(arm/arm64) which uses cpus map to get corresponding cpuid string, cpuid can be NULL for PMUs other than CORE PMUs. Adding check for NULL cpuid in function perf_pmu__find_map to avoid segmentation fault. Signed-off-by: Ganapatrao Kulkarni Cc: Alexander Shishkin Cc: Catalin Marinas Cc: Ganapatrao Kulkarni Cc: Jayachandran C Cc: Jonathan Cameron Cc: Mark Rutland Cc: Peter Zijlstra Cc: Robert Richter Cc: Shaokun Zhang Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20171016183222.25750-6-ganapatrao.kulkarni@cavium.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/pmu.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 732ff579ec65..8b7c151579c0 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -601,6 +601,12 @@ struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu) char *cpuid = perf_pmu__getcpuid(pmu); int i; + /* on some platforms which uses cpus map, cpuid can be NULL for + * PMUs other than CORE PMUs. + */ + if (!cpuid) + return NULL; + i = 0; for (;;) { map = &pmu_events_map[i++]; -- cgit v1.2.3-70-g09d2 From c6707fdef7e2c1eb5458988b49c33497affdebbf Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 4 Dec 2017 12:23:08 -0300 Subject: perf tools: Fix up build in hardnened environments On Fedora systems the perl and python CFLAGS/LDFLAGS include the hardened specs from redhat-rpm-config package. We apply them only for perl/python objects, which makes them not compatible with the rest of the objects and the build fails with: /usr/bin/ld: perf-in.o: relocation R_X86_64_32 against `.rodata.str1.1' can not be used when making a shared object; recompile with -f +PIC /usr/bin/ld: libperf.a(libperf-in.o): relocation R_X86_64_32S against `.text' can not be used when making a shared object; recompile w +ith -fPIC /usr/bin/ld: final link failed: Nonrepresentable section on output collect2: error: ld returned 1 exit status make[2]: *** [Makefile.perf:507: perf] Error 1 make[1]: *** [Makefile.perf:210: sub-make] Error 2 make: *** [Makefile:69: all] Error 2 Mainly it's caused by perl/python objects being compiled with: -specs=/usr/lib/rpm/redhat/redhat-hardened-cc1 which prevent the final link impossible, because it will check for 'proper' objects with following option: -specs=/usr/lib/rpm/redhat/redhat-hardened-ld Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Peter Zijlstra Link: https://lkml.kernel.org/r/20171204082437.GC30564@krava Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index bf86c09ca889..808066c823f7 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -185,9 +185,7 @@ ifdef PYTHON_CONFIG PYTHON_EMBED_LDFLAGS := $(call strip-libs,$(PYTHON_EMBED_LDOPTS)) PYTHON_EMBED_LIBADD := $(call grep-libs,$(PYTHON_EMBED_LDOPTS)) -lutil PYTHON_EMBED_CCOPTS := $(shell $(PYTHON_CONFIG_SQ) --cflags 2>/dev/null) - ifeq ($(CC_NO_CLANG), 1) - PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS)) - endif + PYTHON_EMBED_CCOPTS := $(filter-out -specs=%,$(PYTHON_EMBED_CCOPTS)) FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS) endif @@ -577,7 +575,6 @@ ifndef NO_GTK2 endif endif - ifdef NO_LIBPERL CFLAGS += -DNO_LIBPERL else @@ -585,6 +582,8 @@ else PERL_EMBED_LDFLAGS = $(call strip-libs,$(PERL_EMBED_LDOPTS)) PERL_EMBED_LIBADD = $(call grep-libs,$(PERL_EMBED_LDOPTS)) PERL_EMBED_CCOPTS = $(shell perl -MExtUtils::Embed -e ccopts 2>/dev/null) + PERL_EMBED_CCOPTS := $(filter-out -specs=%,$(PERL_EMBED_CCOPTS)) + PERL_EMBED_LDOPTS := $(filter-out -specs=%,$(PERL_EMBED_LDOPTS)) FLAGS_PERL_EMBED=$(PERL_EMBED_CCOPTS) $(PERL_EMBED_LDOPTS) ifneq ($(feature-libperl), 1) -- cgit v1.2.3-70-g09d2 From f74b9d3a1ac2b9c3ae1475f474ca0e6644746fbf Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Sun, 3 Dec 2017 02:00:37 +0000 Subject: perf evlist: Remove 'overwrite' parameter from perf_evlist__mmap Now all perf_evlist__mmap's users doesn't set 'overwrite'. Remove it from arguments list. Signed-off-by: Wang Nan Acked-by: Namhyung Kim Cc: Jiri Olsa Cc: Kan Liang Link: http://lkml.kernel.org/r/20171203020044.81680-2-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/tests/perf-time-to-tsc.c | 2 +- tools/perf/builtin-kvm.c | 2 +- tools/perf/builtin-top.c | 2 +- tools/perf/builtin-trace.c | 2 +- tools/perf/tests/backward-ring-buffer.c | 2 +- tools/perf/tests/bpf.c | 2 +- tools/perf/tests/code-reading.c | 2 +- tools/perf/tests/keep-tracking.c | 2 +- tools/perf/tests/mmap-basic.c | 2 +- tools/perf/tests/openat-syscall-tp-fields.c | 2 +- tools/perf/tests/perf-record.c | 2 +- tools/perf/tests/sw-clock.c | 2 +- tools/perf/tests/switch-tracking.c | 2 +- tools/perf/tests/task-exit.c | 2 +- tools/perf/util/evlist.c | 5 ++--- tools/perf/util/evlist.h | 3 +-- tools/perf/util/python.c | 2 +- 17 files changed, 18 insertions(+), 20 deletions(-) (limited to 'tools') diff --git a/tools/perf/arch/x86/tests/perf-time-to-tsc.c b/tools/perf/arch/x86/tests/perf-time-to-tsc.c index b59678e8c1e2..06abe8108b33 100644 --- a/tools/perf/arch/x86/tests/perf-time-to-tsc.c +++ b/tools/perf/arch/x86/tests/perf-time-to-tsc.c @@ -84,7 +84,7 @@ int test__perf_time_to_tsc(struct test *test __maybe_unused, int subtest __maybe CHECK__(perf_evlist__open(evlist)); - CHECK__(perf_evlist__mmap(evlist, UINT_MAX, false)); + CHECK__(perf_evlist__mmap(evlist, UINT_MAX)); pc = evlist->mmap[0].base; ret = perf_read_tsc_conversion(pc, &tc); diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 597c7de9bec9..98853162eae9 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -1044,7 +1044,7 @@ static int kvm_live_open_events(struct perf_kvm_stat *kvm) goto out; } - if (perf_evlist__mmap(evlist, kvm->opts.mmap_pages, false) < 0) { + if (perf_evlist__mmap(evlist, kvm->opts.mmap_pages) < 0) { ui__error("Failed to mmap the events: %s\n", str_error_r(errno, sbuf, sizeof(sbuf))); perf_evlist__close(evlist); diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 0077724fb24f..540461f5e345 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -907,7 +907,7 @@ try_again: } } - if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) { + if (perf_evlist__mmap(evlist, opts->mmap_pages) < 0) { ui__error("Failed to mmap with %d (%s)\n", errno, str_error_r(errno, msg, sizeof(msg))); goto out_err; diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 84debdbad327..7c57898095ea 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2437,7 +2437,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) if (err < 0) goto out_error_apply_filters; - err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false); + err = perf_evlist__mmap(evlist, trace->opts.mmap_pages); if (err < 0) goto out_error_mmap; diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c index 43a8c6ac4070..cf37e43c42f3 100644 --- a/tools/perf/tests/backward-ring-buffer.c +++ b/tools/perf/tests/backward-ring-buffer.c @@ -59,7 +59,7 @@ static int do_test(struct perf_evlist *evlist, int mmap_pages, int err; char sbuf[STRERR_BUFSIZE]; - err = perf_evlist__mmap(evlist, mmap_pages, false); + err = perf_evlist__mmap(evlist, mmap_pages); if (err < 0) { pr_debug("perf_evlist__mmap: %s\n", str_error_r(errno, sbuf, sizeof(sbuf))); diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 34c22cdf4d5d..c433dd30975a 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -167,7 +167,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void), goto out_delete_evlist; } - err = perf_evlist__mmap(evlist, opts.mmap_pages, false); + err = perf_evlist__mmap(evlist, opts.mmap_pages); if (err < 0) { pr_debug("perf_evlist__mmap: %s\n", str_error_r(errno, sbuf, sizeof(sbuf))); diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index fcc8984bc329..3bf7b145b826 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -639,7 +639,7 @@ static int do_test_code_reading(bool try_kcore) break; } - ret = perf_evlist__mmap(evlist, UINT_MAX, false); + ret = perf_evlist__mmap(evlist, UINT_MAX); if (ret < 0) { pr_debug("perf_evlist__mmap failed\n"); goto out_put; diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c index 842d33637a18..c46530918938 100644 --- a/tools/perf/tests/keep-tracking.c +++ b/tools/perf/tests/keep-tracking.c @@ -95,7 +95,7 @@ int test__keep_tracking(struct test *test __maybe_unused, int subtest __maybe_un goto out_err; } - CHECK__(perf_evlist__mmap(evlist, UINT_MAX, false)); + CHECK__(perf_evlist__mmap(evlist, UINT_MAX)); /* * First, test that a 'comm' event can be found when the event is diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 91f10d6d9ae2..c0e971da965c 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -94,7 +94,7 @@ int test__basic_mmap(struct test *test __maybe_unused, int subtest __maybe_unuse expected_nr_events[i] = 1 + rand() % 127; } - if (perf_evlist__mmap(evlist, 128, false) < 0) { + if (perf_evlist__mmap(evlist, 128) < 0) { pr_debug("failed to mmap events: %d (%s)\n", errno, str_error_r(errno, sbuf, sizeof(sbuf))); goto out_delete_evlist; diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c index d9619d265314..97c9407d02a0 100644 --- a/tools/perf/tests/openat-syscall-tp-fields.c +++ b/tools/perf/tests/openat-syscall-tp-fields.c @@ -64,7 +64,7 @@ int test__syscall_openat_tp_fields(struct test *test __maybe_unused, int subtest goto out_delete_evlist; } - err = perf_evlist__mmap(evlist, UINT_MAX, false); + err = perf_evlist__mmap(evlist, UINT_MAX); if (err < 0) { pr_debug("perf_evlist__mmap: %s\n", str_error_r(errno, sbuf, sizeof(sbuf))); diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index c34904d37705..0afafab85238 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -141,7 +141,7 @@ int test__PERF_RECORD(struct test *test __maybe_unused, int subtest __maybe_unus * fds in the same CPU to be injected in the same mmap ring buffer * (using ioctl(PERF_EVENT_IOC_SET_OUTPUT)). */ - err = perf_evlist__mmap(evlist, opts.mmap_pages, false); + err = perf_evlist__mmap(evlist, opts.mmap_pages); if (err < 0) { pr_debug("perf_evlist__mmap: %s\n", str_error_r(errno, sbuf, sizeof(sbuf))); diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c index c6937ed12e6b..f6c72f915d48 100644 --- a/tools/perf/tests/sw-clock.c +++ b/tools/perf/tests/sw-clock.c @@ -78,7 +78,7 @@ static int __test__sw_clock_freq(enum perf_sw_ids clock_id) goto out_delete_evlist; } - err = perf_evlist__mmap(evlist, 128, false); + err = perf_evlist__mmap(evlist, 128); if (err < 0) { pr_debug("failed to mmap event: %d (%s)\n", errno, str_error_r(errno, sbuf, sizeof(sbuf))); diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index 7d3f4bf9534f..33e00295a972 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -449,7 +449,7 @@ int test__switch_tracking(struct test *test __maybe_unused, int subtest __maybe_ goto out; } - err = perf_evlist__mmap(evlist, UINT_MAX, false); + err = perf_evlist__mmap(evlist, UINT_MAX); if (err) { pr_debug("perf_evlist__mmap failed!\n"); goto out_err; diff --git a/tools/perf/tests/task-exit.c b/tools/perf/tests/task-exit.c index 5d06ac81f7f1..01b62b81751b 100644 --- a/tools/perf/tests/task-exit.c +++ b/tools/perf/tests/task-exit.c @@ -101,7 +101,7 @@ int test__task_exit(struct test *test __maybe_unused, int subtest __maybe_unused goto out_delete_evlist; } - if (perf_evlist__mmap(evlist, 128, false) < 0) { + if (perf_evlist__mmap(evlist, 128) < 0) { pr_debug("failed to mmap events: %d (%s)\n", errno, str_error_r(errno, sbuf, sizeof(sbuf))); goto out_delete_evlist; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 199bb82efbcd..3c1778b500e0 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1091,10 +1091,9 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, return perf_evlist__mmap_per_cpu(evlist, &mp); } -int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, - bool overwrite) +int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages) { - return perf_evlist__mmap_ex(evlist, pages, overwrite, 0, false); + return perf_evlist__mmap_ex(evlist, pages, false, 0, false); } int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 4e8131dacbd7..f0f2c8b2504b 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -171,8 +171,7 @@ unsigned long perf_event_mlock_kb_in_pages(void); int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, bool overwrite, unsigned int auxtrace_pages, bool auxtrace_overwrite); -int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, - bool overwrite); +int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages); void perf_evlist__munmap(struct perf_evlist *evlist); size_t perf_evlist__mmap_size(unsigned long pages); diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 8e49d9cafcfc..b1e999bd21ef 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -864,7 +864,7 @@ static PyObject *pyrf_evlist__mmap(struct pyrf_evlist *pevlist, &pages, &overwrite)) return NULL; - if (perf_evlist__mmap(evlist, pages, overwrite) < 0) { + if (perf_evlist__mmap(evlist, pages) < 0) { PyErr_SetFromErrno(PyExc_OSError); return NULL; } -- cgit v1.2.3-70-g09d2 From 7a276ff6c3202697c3c15cad757dec3bb07d14bf Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Sun, 3 Dec 2017 02:00:38 +0000 Subject: perf evlist: Remove 'overwrite' parameter from perf_evlist__mmap_ex All users of perf_evlist__mmap_ex set !overwrite. Remove it from its arguments list. Signed-off-by: Wang Nan Acked-by: Namhyung Kim Cc: Jiri Olsa Cc: Kan Liang Link: http://lkml.kernel.org/r/20171203020044.81680-3-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 2 +- tools/perf/util/evlist.c | 8 ++++---- tools/perf/util/evlist.h | 2 +- 3 files changed, 6 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index e304bc47fe9b..08070f87d489 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -301,7 +301,7 @@ static int record__mmap_evlist(struct record *rec, struct record_opts *opts = &rec->opts; char msg[512]; - if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false, + if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, opts->auxtrace_mmap_pages, opts->auxtrace_snapshot_mode) < 0) { if (errno == EPERM) { diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 3c1778b500e0..93272d932407 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1052,14 +1052,14 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, * Return: %0 on success, negative error code otherwise. */ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, - bool overwrite, unsigned int auxtrace_pages, + unsigned int auxtrace_pages, bool auxtrace_overwrite) { struct perf_evsel *evsel; const struct cpu_map *cpus = evlist->cpus; const struct thread_map *threads = evlist->threads; struct mmap_params mp = { - .prot = PROT_READ | (overwrite ? 0 : PROT_WRITE), + .prot = PROT_READ | PROT_WRITE, }; if (!evlist->mmap) @@ -1070,7 +1070,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) return -ENOMEM; - evlist->overwrite = overwrite; + evlist->overwrite = false; evlist->mmap_len = perf_evlist__mmap_size(pages); pr_debug("mmap size %zuB\n", evlist->mmap_len); mp.mask = evlist->mmap_len - page_size - 1; @@ -1093,7 +1093,7 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages) { - return perf_evlist__mmap_ex(evlist, pages, false, 0, false); + return perf_evlist__mmap_ex(evlist, pages, 0, false); } int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index f0f2c8b2504b..424a3d6015af 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -169,7 +169,7 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, unsigned long perf_event_mlock_kb_in_pages(void); int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, - bool overwrite, unsigned int auxtrace_pages, + unsigned int auxtrace_pages, bool auxtrace_overwrite); int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages); void perf_evlist__munmap(struct perf_evlist *evlist); -- cgit v1.2.3-70-g09d2 From 144b9a4fc53039c09007b71a06640560a6e62140 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Sun, 3 Dec 2017 02:00:39 +0000 Subject: perf evlist: Remove evlist->overwrite evlist->overwrite is set to false in all users. It can be removed. Signed-off-by: Wang Nan Acked-by: Namhyung Kim Cc: Jiri Olsa Cc: Kan Liang Link: http://lkml.kernel.org/r/20171203020044.81680-4-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 2 +- tools/perf/util/evlist.c | 5 ++--- tools/perf/util/evlist.h | 1 - 3 files changed, 3 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 08070f87d489..3bc6ceeae1f9 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -500,7 +500,7 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap; if (maps[i].base) { - if (perf_mmap__push(&maps[i], evlist->overwrite, backward, rec, record__pushfn) != 0) { + if (perf_mmap__push(&maps[i], false, backward, rec, record__pushfn) != 0) { rc = -1; goto out; } diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 93272d932407..a59134fb141f 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -711,7 +711,7 @@ union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int * No need for read-write ring buffer: kernel stop outputting when * it hit md->prev (perf_mmap__consume()). */ - return perf_mmap__read_forward(md, evlist->overwrite); + return perf_mmap__read_forward(md, false); } union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx) @@ -738,7 +738,7 @@ void perf_evlist__mmap_read_catchup(struct perf_evlist *evlist, int idx) void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx) { - perf_mmap__consume(&evlist->mmap[idx], evlist->overwrite); + perf_mmap__consume(&evlist->mmap[idx], false); } static void perf_evlist__munmap_nofree(struct perf_evlist *evlist) @@ -1070,7 +1070,6 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, if (evlist->pollfd.entries == NULL && perf_evlist__alloc_pollfd(evlist) < 0) return -ENOMEM; - evlist->overwrite = false; evlist->mmap_len = perf_evlist__mmap_size(pages); pr_debug("mmap size %zuB\n", evlist->mmap_len); mp.mask = evlist->mmap_len - page_size - 1; diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 424a3d6015af..eec33770b8e6 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -31,7 +31,6 @@ struct perf_evlist { int nr_entries; int nr_groups; int nr_mmaps; - bool overwrite; bool enabled; bool has_user_cpus; size_t mmap_len; -- cgit v1.2.3-70-g09d2 From ca6a9a05391960be5e8161a59a9854b32325d901 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Sun, 3 Dec 2017 02:00:40 +0000 Subject: perf mmap: Remove overwrite from arguments list of perf_mmap__push 'overwrite' argument is always 'false'. Remove it from arguments list of perf_mmap__push(). Signed-off-by: Wang Nan Acked-by: Namhyung Kim Cc: Jiri Olsa Cc: Kan Liang Link: http://lkml.kernel.org/r/20171203020044.81680-5-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 2 +- tools/perf/util/mmap.c | 6 +++--- tools/perf/util/mmap.h | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 3bc6ceeae1f9..26b8571d0fdb 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -500,7 +500,7 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap; if (maps[i].base) { - if (perf_mmap__push(&maps[i], false, backward, rec, record__pushfn) != 0) { + if (perf_mmap__push(&maps[i], backward, rec, record__pushfn) != 0) { rc = -1; goto out; } diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 9fe5f9c7d577..703ed41a9269 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -299,7 +299,7 @@ static int rb_find_range(void *data, int mask, u64 head, u64 old, return backward_rb_find_range(data, mask, head, start, end); } -int perf_mmap__push(struct perf_mmap *md, bool overwrite, bool backward, +int perf_mmap__push(struct perf_mmap *md, bool backward, void *to, int push(void *to, void *buf, size_t size)) { u64 head = perf_mmap__read_head(md); @@ -321,7 +321,7 @@ int perf_mmap__push(struct perf_mmap *md, bool overwrite, bool backward, WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n"); md->prev = head; - perf_mmap__consume(md, overwrite || backward); + perf_mmap__consume(md, backward); return 0; } @@ -346,7 +346,7 @@ int perf_mmap__push(struct perf_mmap *md, bool overwrite, bool backward, } md->prev = head; - perf_mmap__consume(md, overwrite || backward); + perf_mmap__consume(md, backward); out: return rc; } diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index efd78b827b05..2c3d291785de 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -89,7 +89,7 @@ static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail) union perf_event *perf_mmap__read_forward(struct perf_mmap *map, bool check_messup); union perf_event *perf_mmap__read_backward(struct perf_mmap *map); -int perf_mmap__push(struct perf_mmap *md, bool overwrite, bool backward, +int perf_mmap__push(struct perf_mmap *md, bool backward, void *to, int push(void *to, void *buf, size_t size)); size_t perf_mmap__mmap_len(struct perf_mmap *map); -- cgit v1.2.3-70-g09d2 From 8eb7a1fe31612fd3e8ae8042dd2ebaf7575504cb Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Sun, 3 Dec 2017 02:00:41 +0000 Subject: perf mmap: Remove overwrite and check_messup from mmap read All perf_mmap__read_forward() read from read-write ring buffer, so no need check_messup. Reading from backward ring buffer doesn't require check_messup because it never mess up. Cleanup arguments lists. Signed-off-by: Wang Nan Acked-by: Namhyung Kim Cc: Jiri Olsa Cc: Kan Liang Link: http://lkml.kernel.org/r/20171203020044.81680-6-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 2 +- tools/perf/util/mmap.c | 28 ++++------------------------ tools/perf/util/mmap.h | 2 +- 3 files changed, 6 insertions(+), 26 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index a59134fb141f..68c1f9546650 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -711,7 +711,7 @@ union perf_event *perf_evlist__mmap_read_forward(struct perf_evlist *evlist, int * No need for read-write ring buffer: kernel stop outputting when * it hit md->prev (perf_mmap__consume()). */ - return perf_mmap__read_forward(md, false); + return perf_mmap__read_forward(md); } union perf_event *perf_evlist__mmap_read_backward(struct perf_evlist *evlist, int idx) diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 703ed41a9269..3f262e707a41 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -21,33 +21,13 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map) } /* When check_messup is true, 'end' must points to a good entry */ -static union perf_event *perf_mmap__read(struct perf_mmap *map, bool check_messup, +static union perf_event *perf_mmap__read(struct perf_mmap *map, u64 start, u64 end, u64 *prev) { unsigned char *data = map->base + page_size; union perf_event *event = NULL; int diff = end - start; - if (check_messup) { - /* - * If we're further behind than half the buffer, there's a chance - * the writer will bite our tail and mess up the samples under us. - * - * If we somehow ended up ahead of the 'end', we got messed up. - * - * In either case, truncate and restart at 'end'. - */ - if (diff > map->mask / 2 || diff < 0) { - fprintf(stderr, "WARNING: failed to keep up with mmap data.\n"); - - /* - * 'end' points to a known good entry, start there. - */ - start = end; - diff = 0; - } - } - if (diff >= (int)sizeof(event->header)) { size_t size; @@ -89,7 +69,7 @@ broken_event: return event; } -union perf_event *perf_mmap__read_forward(struct perf_mmap *map, bool check_messup) +union perf_event *perf_mmap__read_forward(struct perf_mmap *map) { u64 head; u64 old = map->prev; @@ -102,7 +82,7 @@ union perf_event *perf_mmap__read_forward(struct perf_mmap *map, bool check_mess head = perf_mmap__read_head(map); - return perf_mmap__read(map, check_messup, old, head, &map->prev); + return perf_mmap__read(map, old, head, &map->prev); } union perf_event *perf_mmap__read_backward(struct perf_mmap *map) @@ -138,7 +118,7 @@ union perf_event *perf_mmap__read_backward(struct perf_mmap *map) else end = head + map->mask + 1; - return perf_mmap__read(map, false, start, end, &map->prev); + return perf_mmap__read(map, start, end, &map->prev); } void perf_mmap__read_catchup(struct perf_mmap *map) diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index 2c3d291785de..d640273b7762 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -86,7 +86,7 @@ static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail) pc->data_tail = tail; } -union perf_event *perf_mmap__read_forward(struct perf_mmap *map, bool check_messup); +union perf_event *perf_mmap__read_forward(struct perf_mmap *map); union perf_event *perf_mmap__read_backward(struct perf_mmap *map); int perf_mmap__push(struct perf_mmap *md, bool backward, -- cgit v1.2.3-70-g09d2 From 0125195268a0f9886b582c7e73da98f89029796f Mon Sep 17 00:00:00 2001 From: Sangwon Hong Date: Sat, 2 Dec 2017 13:16:41 +0900 Subject: perf c2c: Add a tip about cacheline events Signed-off-by: Sangwon Hong Cc: Jiri Olsa Cc: Namhyung Kim Link: http://lkml.kernel.org/r/1512188201-14109-1-git-send-email-qpakzk@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/tips.txt | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/perf/Documentation/tips.txt b/tools/perf/Documentation/tips.txt index 3dd1dbe28407..849599f39c5e 100644 --- a/tools/perf/Documentation/tips.txt +++ b/tools/perf/Documentation/tips.txt @@ -33,3 +33,4 @@ System-wide collection from all CPUs: perf record -a Show current config key-value pairs: perf config --list Show user configuration overrides: perf config --user --list To add Node.js USDT(User-Level Statically Defined Tracing): perf buildid-cache --add `which node` +To report cacheline events from previous recording: perf c2c report -- cgit v1.2.3-70-g09d2 From fbc2844e84038ce3687d203ac80b66194e9f21e6 Mon Sep 17 00:00:00 2001 From: William Cohen Date: Mon, 4 Dec 2017 09:57:28 -0500 Subject: perf vendor events: Use more flexible pattern matching for CPU identification for mapfile.csv The powerpc cpuid information includes chip revision information. Changes between chip revisions are usually minor bug fixes and usually do not affect the operation of the performance monitoring hardware. The original mapfile.csv matching requires enumerating every possible cpuid string. When a new minor chip revision is produced a new entry has to be added to the mapfile.csv and the code recompiled to allow perf to have the implementation specific perf events for this new minor revision. For users of various distibutions of Linux having to wait for a new release of the kernel's perf tool to be built with these trivial patches is inconvenient. Using regular expressions rather than exactly string matching of the entire cpuid string allows developers to write mapfile.csv files that do not require patches and recompiles for each of these minor version changes. If special cases need to be made for some particular versions, they can be placed earlier in the mapfile.csv file before the more general matches. Signed-off-by: William Cohen Tested-by: Ravi Bangoria Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Shriya Link: http://lkml.kernel.org/r/20171204145728.16792-1-wcohen@redhat.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/powerpc/mapfile.csv | 12 ++------ tools/perf/pmu-events/arch/x86/mapfile.csv | 5 +--- tools/perf/pmu-events/jevents.c | 39 +++++++++++++++++++++++++- tools/perf/util/pmu.c | 20 ++++++++++++- 4 files changed, 60 insertions(+), 16 deletions(-) (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/powerpc/mapfile.csv b/tools/perf/pmu-events/arch/powerpc/mapfile.csv index a0f3a11ca19f..229150e7ab7d 100644 --- a/tools/perf/pmu-events/arch/powerpc/mapfile.csv +++ b/tools/perf/pmu-events/arch/powerpc/mapfile.csv @@ -13,13 +13,5 @@ # # Power8 entries -004b0000,1,power8,core -004b0201,1,power8,core -004c0000,1,power8,core -004d0000,1,power8,core -004d0100,1,power8,core -004d0200,1,power8,core -004c0100,1,power8,core -004e0100,1,power9,core -004e0200,1,power9,core -004e1200,1,power9,core +004[bcd][[:xdigit:]]{4},1,power8,core +004e[[:xdigit:]]{4},1,power9,core diff --git a/tools/perf/pmu-events/arch/x86/mapfile.csv b/tools/perf/pmu-events/arch/x86/mapfile.csv index fe1a2c47cabf..93656f2fd53a 100644 --- a/tools/perf/pmu-events/arch/x86/mapfile.csv +++ b/tools/perf/pmu-events/arch/x86/mapfile.csv @@ -23,10 +23,7 @@ GenuineIntel-6-1E,v2,nehalemep,core GenuineIntel-6-1F,v2,nehalemep,core GenuineIntel-6-1A,v2,nehalemep,core GenuineIntel-6-2E,v2,nehalemex,core -GenuineIntel-6-4E,v24,skylake,core -GenuineIntel-6-5E,v24,skylake,core -GenuineIntel-6-8E,v24,skylake,core -GenuineIntel-6-9E,v24,skylake,core +GenuineIntel-6-[4589]E,v24,skylake,core GenuineIntel-6-37,v13,silvermont,core GenuineIntel-6-4D,v13,silvermont,core GenuineIntel-6-4C,v13,silvermont,core diff --git a/tools/perf/pmu-events/jevents.c b/tools/perf/pmu-events/jevents.c index 9eb7047bafe4..b578aa26e375 100644 --- a/tools/perf/pmu-events/jevents.c +++ b/tools/perf/pmu-events/jevents.c @@ -116,6 +116,43 @@ static void fixdesc(char *s) *e = 0; } +/* Add escapes for '\' so they are proper C strings. */ +static char *fixregex(char *s) +{ + int len = 0; + int esc_count = 0; + char *fixed = NULL; + char *p, *q; + + /* Count the number of '\' in string */ + for (p = s; *p; p++) { + ++len; + if (*p == '\\') + ++esc_count; + } + + if (esc_count == 0) + return s; + + /* allocate space for a new string */ + fixed = (char *) malloc(len + 1); + if (!fixed) + return NULL; + + /* copy over the characters */ + q = fixed; + for (p = s; *p; p++) { + if (*p == '\\') { + *q = '\\'; + ++q; + } + *q = *p; + ++q; + } + *q = '\0'; + return fixed; +} + static struct msrmap { const char *num; const char *pname; @@ -648,7 +685,7 @@ static int process_mapfile(FILE *outfp, char *fpath) } line[strlen(line)-1] = '\0'; - cpuid = strtok_r(p, ",", &save); + cpuid = fixregex(strtok_r(p, ",", &save)); version = strtok_r(NULL, ",", &save); fname = strtok_r(NULL, ",", &save); type = strtok_r(NULL, ",", &save); diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 8b7c151579c0..57e38fdf0b34 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "util.h" #include "pmu.h" #include "parse-events.h" @@ -609,14 +610,31 @@ struct pmu_events_map *perf_pmu__find_map(struct perf_pmu *pmu) i = 0; for (;;) { + regex_t re; + regmatch_t pmatch[1]; + int match; + map = &pmu_events_map[i++]; if (!map->table) { map = NULL; break; } - if (!strcmp(map->cpuid, cpuid)) + if (regcomp(&re, map->cpuid, REG_EXTENDED) != 0) { + /* Warn unable to generate match particular string. */ + pr_info("Invalid regular expression %s\n", map->cpuid); break; + } + + match = !regexec(&re, cpuid, 1, pmatch, 0); + regfree(&re); + if (match) { + size_t match_len = (pmatch[0].rm_eo - pmatch[0].rm_so); + + /* Verify the entire string matched. */ + if (match_len == strlen(cpuid)) + break; + } } free(cpuid); return map; -- cgit v1.2.3-70-g09d2 From 712d36db5a5c57eb79e962a0f9b85964640e8415 Mon Sep 17 00:00:00 2001 From: Seokho Song <0xdevssh@gmail.com> Date: Tue, 5 Dec 2017 01:02:44 +0900 Subject: perf report: Set browser mode right before setup_browser() There are codes that print messages to the screen between assignment of the use_browser variable and setup_browser(). But since the GUI browser is not initialized during that period, all messages fail to show if the user passed the --gtk option to perf as GTK is not initialized yet. Reorder the code to assign use_browser variable right before setup_browser() is called. Signed-off-by: Seokho Song <0xdevssh@gmail.com> Acked-by: Namhyung Kim Cc: Jiri Olsa Link: http://lkml.kernel.org/r/20171204160244.6332-1-0xdevssh@gmail.com Signed-off-by: Park Ju Hyung Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index af5dd038195e..eb9ce6327e71 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -921,13 +921,6 @@ int cmd_report(int argc, const char **argv) return -EINVAL; } - if (report.use_stdio) - use_browser = 0; - else if (report.use_tui) - use_browser = 1; - else if (report.use_gtk) - use_browser = 2; - if (report.inverted_callchain) callchain_param.order = ORDER_CALLER; if (symbol_conf.cumulate_callchain && !callchain_param.order_set) @@ -1014,6 +1007,13 @@ repeat: perf_hpp_list.need_collapse = true; } + if (report.use_stdio) + use_browser = 0; + else if (report.use_tui) + use_browser = 1; + else if (report.use_gtk) + use_browser = 2; + /* Force tty output for header output and per-thread stat. */ if (report.header || report.header_only || report.show_threads) use_browser = 0; -- cgit v1.2.3-70-g09d2 From 71f566a34986f4a86a8c546c7a36f70f0132b8a9 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 4 Dec 2017 16:51:05 +0000 Subject: perf mmap: Fix perf backward recording 'perf record' backward recording doesn't work as we expected: it never overwrites when ring buffer gets full. Test: Run a busy python printing task background like this: while True: print 123 send SIGUSR2 to perf to capture snapshot, then: # ./perf record --overwrite -e raw_syscalls:sys_enter -e raw_syscalls:sys_exit --exclude-perf -a --switch-output [ perf record: dump data: Woken up 1 times ] [ perf record: Dump perf.data.2017110101520743 ] [ perf record: dump data: Woken up 1 times ] [ perf record: Dump perf.data.2017110101521251 ] [ perf record: dump data: Woken up 1 times ] [ perf record: Dump perf.data.2017110101521692 ] ^C[ perf record: Woken up 1 times to write data ] [ perf record: Dump perf.data.2017110101521936 ] [ perf record: Captured and wrote 0.826 MB perf.data. ] # ./perf script -i ./perf.data.2017110101520743 | head -n3 perf 2717 [000] 12449.310785: raw_syscalls:sys_enter: NR 16 (5, 2400, 0, 59, 100, 0) perf 2717 [000] 12449.310790: raw_syscalls:sys_enter: NR 7 (4112340, 2, ffffffff, 3df, 100, 0) python 2545 [000] 12449.310800: raw_syscalls:sys_exit: NR 1 = 4 # ./perf script -i ./perf.data.2017110101521251 | head -n3 perf 2717 [000] 12449.310785: raw_syscalls:sys_enter: NR 16 (5, 2400, 0, 59, 100, 0) perf 2717 [000] 12449.310790: raw_syscalls:sys_enter: NR 7 (4112340, 2, ffffffff, 3df, 100, 0) python 2545 [000] 12449.310800: raw_syscalls:sys_exit: NR 1 = 4 # ./perf script -i ./perf.data.2017110101521692 | head -n3 perf 2717 [000] 12449.310785: raw_syscalls:sys_enter: NR 16 (5, 2400, 0, 59, 100, 0) perf 2717 [000] 12449.310790: raw_syscalls:sys_enter: NR 7 (4112340, 2, ffffffff, 3df, 100, 0) python 2545 [000] 12449.310800: raw_syscalls:sys_exit: NR 1 = 4 Timestamps never change, but my background task is a dead loop, can easily overwhelm the ring buffer. This patch fixes it by forcing unsetting PROT_WRITE for a backward ring buffer, so all backward ring buffers become overwrite ring buffers. Test result: # ./perf record --overwrite -e raw_syscalls:sys_enter -e raw_syscalls:sys_exit --exclude-perf -a --switch-output [ perf record: dump data: Woken up 1 times ] [ perf record: Dump perf.data.2017110101285323 ] [ perf record: dump data: Woken up 1 times ] [ perf record: Dump perf.data.2017110101290053 ] [ perf record: dump data: Woken up 1 times ] [ perf record: Dump perf.data.2017110101290446 ] ^C[ perf record: Woken up 1 times to write data ] [ perf record: Dump perf.data.2017110101290837 ] [ perf record: Captured and wrote 0.826 MB perf.data. ] # ./perf script -i ./perf.data.2017110101285323 | head -n3 python 2545 [000] 11064.268083: raw_syscalls:sys_exit: NR 1 = 4 python 2545 [000] 11064.268084: raw_syscalls:sys_enter: NR 1 (1, 12cc330, 4, 7fc237280370, 7fc2373d0700, 2c7b0) python 2545 [000] 11064.268086: raw_syscalls:sys_exit: NR 1 = 4 # ./perf script -i ./perf.data.2017110101290 | head -n3 failed to open ./perf.data.2017110101290: No such file or directory # ./perf script -i ./perf.data.2017110101290053 | head -n3 python 2545 [000] 11071.564062: raw_syscalls:sys_enter: NR 1 (1, 12cc330, 4, 7fc237280370, 7fc2373d0700, 2c7b0) python 2545 [000] 11071.564064: raw_syscalls:sys_exit: NR 1 = 4 python 2545 [000] 11071.564066: raw_syscalls:sys_enter: NR 1 (1, 12cc330, 4, 7fc237280370, 7fc2373d0700, 2c7b0) # ./perf script -i ./perf.data.2017110101290 | head -n3 perf.data.2017110101290053 perf.data.2017110101290446 perf.data.2017110101290837 # ./perf script -i ./perf.data.2017110101290446 | head -n3 sshd 1321 [000] 11075.499473: raw_syscalls:sys_exit: NR 14 = 0 sshd 1321 [000] 11075.499474: raw_syscalls:sys_enter: NR 14 (2, 7ffe98899490, 0, 8, 0, 3000) sshd 1321 [000] 11075.499474: raw_syscalls:sys_exit: NR 14 = 0 # ./perf script -i ./perf.data.2017110101290837 | head -n3 python 2545 [000] 11079.280844: raw_syscalls:sys_exit: NR 1 = 4 python 2545 [000] 11079.280847: raw_syscalls:sys_enter: NR 1 (1, 12cc330, 4, 7fc237280370, 7fc2373d0700, 2c7b0) python 2545 [000] 11079.280850: raw_syscalls:sys_exit: NR 1 = 4 Signed-off-by: Wang Nan Acked-by: Namhyung Kim Cc: Jiri Olsa Cc: Kan Liang Cc: Mengting Zhang Link: http://lkml.kernel.org/r/20171204165107.95327-2-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 68c1f9546650..b1cea711232b 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -812,6 +812,7 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, int fd; int cpu; + mp->prot = PROT_READ | PROT_WRITE; if (evsel->attr.write_backward) { output = _output_backward; maps = evlist->backward_mmap; @@ -824,6 +825,7 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY) perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING); } + mp->prot &= ~PROT_WRITE; } if (evsel->system_wide && thread) @@ -1058,9 +1060,12 @@ int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, struct perf_evsel *evsel; const struct cpu_map *cpus = evlist->cpus; const struct thread_map *threads = evlist->threads; - struct mmap_params mp = { - .prot = PROT_READ | PROT_WRITE, - }; + /* + * Delay setting mp.prot: set it before calling perf_mmap__mmap. + * Its value is decided by evsel's write_backward. + * So &mp should not be passed through const pointer. + */ + struct mmap_params mp; if (!evlist->mmap) evlist->mmap = perf_evlist__alloc_mmap(evlist); -- cgit v1.2.3-70-g09d2 From 7fb4b407a1242dbc85ea3ed1be065dca8f9a6f5b Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 4 Dec 2017 16:51:06 +0000 Subject: perf mmap: Don't discard prev in backward mode 'perf record' can switch its output data file. The new output should only store the data after switching. However, in overwrite backward mode, the new output still can have data from before switching. That also brings extra overhead. At the end of mmap_read(), the position of the processed ring buffer is saved in md->prev. Next mmap_read should be end in md->prev if it is not overwriten. That avoids processing duplicate data. However, md->prev is discarded. So next the mmap_read() has to process whole valid ring buffer, which probably includes old processed data. Avoid calling backward_rb_find_range() when md->prev is still available. Signed-off-by: Wang Nan Tested-by: Kan Liang Acked-by: Namhyung Kim Cc: Jiri Olsa Cc: Mengting Zhang Link: http://lkml.kernel.org/r/20171204165107.95327-3-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/mmap.c | 33 +++++++++++++++------------------ 1 file changed, 15 insertions(+), 18 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 3f262e707a41..5f8cb1583e53 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -267,18 +267,6 @@ static int backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 return -1; } -static int rb_find_range(void *data, int mask, u64 head, u64 old, - u64 *start, u64 *end, bool backward) -{ - if (!backward) { - *start = old; - *end = head; - return 0; - } - - return backward_rb_find_range(data, mask, head, start, end); -} - int perf_mmap__push(struct perf_mmap *md, bool backward, void *to, int push(void *to, void *buf, size_t size)) { @@ -290,19 +278,28 @@ int perf_mmap__push(struct perf_mmap *md, bool backward, void *buf; int rc = 0; - if (rb_find_range(data, md->mask, head, old, &start, &end, backward)) - return -1; + start = backward ? head : old; + end = backward ? old : head; if (start == end) return 0; size = end - start; if (size > (unsigned long)(md->mask) + 1) { - WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n"); + if (!backward) { + WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n"); - md->prev = head; - perf_mmap__consume(md, backward); - return 0; + md->prev = head; + perf_mmap__consume(md, backward); + return 0; + } + + /* + * Backward ring buffer is full. We still have a chance to read + * most of data from it. + */ + if (backward_rb_find_range(data, md->mask, head, &start, &end)) + return -1; } if ((start & md->mask) + size != (end & md->mask)) { -- cgit v1.2.3-70-g09d2 From 0b72d69a542873ee098867deeb37d27ad4629c64 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Mon, 4 Dec 2017 16:51:07 +0000 Subject: perf tools: Rename 'backward' to 'overwrite' in evlist, mmap and record Remove the backward/forward concept to make it uniform with user interface (the '--overwrite' option). Signed-off-by: Wang Nan Acked-by: Namhyung Kim Cc: Jiri Olsa Cc: Kan Liang Cc: Mengting Zhang Link: http://lkml.kernel.org/r/20171204165107.95327-4-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 14 +++++++------- tools/perf/tests/backward-ring-buffer.c | 4 ++-- tools/perf/util/evlist.c | 30 +++++++++++++++--------------- tools/perf/util/evlist.h | 2 +- tools/perf/util/mmap.c | 22 +++++++++++----------- 5 files changed, 36 insertions(+), 36 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 26b8571d0fdb..0a5749ef8b94 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -479,7 +479,7 @@ static struct perf_event_header finished_round_event = { }; static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evlist, - bool backward) + bool overwrite) { u64 bytes_written = rec->bytes_written; int i; @@ -489,18 +489,18 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli if (!evlist) return 0; - maps = backward ? evlist->backward_mmap : evlist->mmap; + maps = overwrite ? evlist->overwrite_mmap : evlist->mmap; if (!maps) return 0; - if (backward && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING) + if (overwrite && evlist->bkw_mmap_state != BKW_MMAP_DATA_PENDING) return 0; for (i = 0; i < evlist->nr_mmaps; i++) { struct auxtrace_mmap *mm = &maps[i].auxtrace_mmap; if (maps[i].base) { - if (perf_mmap__push(&maps[i], backward, rec, record__pushfn) != 0) { + if (perf_mmap__push(&maps[i], overwrite, rec, record__pushfn) != 0) { rc = -1; goto out; } @@ -520,7 +520,7 @@ static int record__mmap_read_evlist(struct record *rec, struct perf_evlist *evli if (bytes_written != rec->bytes_written) rc = record__write(rec, &finished_round_event, sizeof(finished_round_event)); - if (backward) + if (overwrite) perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_EMPTY); out: return rc; @@ -692,8 +692,8 @@ perf_evlist__pick_pc(struct perf_evlist *evlist) if (evlist) { if (evlist->mmap && evlist->mmap[0].base) return evlist->mmap[0].base; - if (evlist->backward_mmap && evlist->backward_mmap[0].base) - return evlist->backward_mmap[0].base; + if (evlist->overwrite_mmap && evlist->overwrite_mmap[0].base) + return evlist->overwrite_mmap[0].base; } return NULL; } diff --git a/tools/perf/tests/backward-ring-buffer.c b/tools/perf/tests/backward-ring-buffer.c index cf37e43c42f3..4035d43523c3 100644 --- a/tools/perf/tests/backward-ring-buffer.c +++ b/tools/perf/tests/backward-ring-buffer.c @@ -33,8 +33,8 @@ static int count_samples(struct perf_evlist *evlist, int *sample_count, for (i = 0; i < evlist->nr_mmaps; i++) { union perf_event *event; - perf_mmap__read_catchup(&evlist->backward_mmap[i]); - while ((event = perf_mmap__read_backward(&evlist->backward_mmap[i])) != NULL) { + perf_mmap__read_catchup(&evlist->overwrite_mmap[i]); + while ((event = perf_mmap__read_backward(&evlist->overwrite_mmap[i])) != NULL) { const u32 type = event->header.type; switch (type) { diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index b1cea711232b..3570355bcf39 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -125,7 +125,7 @@ static void perf_evlist__purge(struct perf_evlist *evlist) void perf_evlist__exit(struct perf_evlist *evlist) { zfree(&evlist->mmap); - zfree(&evlist->backward_mmap); + zfree(&evlist->overwrite_mmap); fdarray__exit(&evlist->pollfd); } @@ -675,11 +675,11 @@ static int perf_evlist__set_paused(struct perf_evlist *evlist, bool value) { int i; - if (!evlist->backward_mmap) + if (!evlist->overwrite_mmap) return 0; for (i = 0; i < evlist->nr_mmaps; i++) { - int fd = evlist->backward_mmap[i].fd; + int fd = evlist->overwrite_mmap[i].fd; int err; if (fd < 0) @@ -749,16 +749,16 @@ static void perf_evlist__munmap_nofree(struct perf_evlist *evlist) for (i = 0; i < evlist->nr_mmaps; i++) perf_mmap__munmap(&evlist->mmap[i]); - if (evlist->backward_mmap) + if (evlist->overwrite_mmap) for (i = 0; i < evlist->nr_mmaps; i++) - perf_mmap__munmap(&evlist->backward_mmap[i]); + perf_mmap__munmap(&evlist->overwrite_mmap[i]); } void perf_evlist__munmap(struct perf_evlist *evlist) { perf_evlist__munmap_nofree(evlist); zfree(&evlist->mmap); - zfree(&evlist->backward_mmap); + zfree(&evlist->overwrite_mmap); } static struct perf_mmap *perf_evlist__alloc_mmap(struct perf_evlist *evlist) @@ -800,7 +800,7 @@ perf_evlist__should_poll(struct perf_evlist *evlist __maybe_unused, static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, struct mmap_params *mp, int cpu_idx, - int thread, int *_output, int *_output_backward) + int thread, int *_output, int *_output_overwrite) { struct perf_evsel *evsel; int revent; @@ -814,14 +814,14 @@ static int perf_evlist__mmap_per_evsel(struct perf_evlist *evlist, int idx, mp->prot = PROT_READ | PROT_WRITE; if (evsel->attr.write_backward) { - output = _output_backward; - maps = evlist->backward_mmap; + output = _output_overwrite; + maps = evlist->overwrite_mmap; if (!maps) { maps = perf_evlist__alloc_mmap(evlist); if (!maps) return -1; - evlist->backward_mmap = maps; + evlist->overwrite_mmap = maps; if (evlist->bkw_mmap_state == BKW_MMAP_NOTREADY) perf_evlist__toggle_bkw_mmap(evlist, BKW_MMAP_RUNNING); } @@ -886,14 +886,14 @@ static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, pr_debug2("perf event ring buffer mmapped per cpu\n"); for (cpu = 0; cpu < nr_cpus; cpu++) { int output = -1; - int output_backward = -1; + int output_overwrite = -1; auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu, true); for (thread = 0; thread < nr_threads; thread++) { if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu, - thread, &output, &output_backward)) + thread, &output, &output_overwrite)) goto out_unmap; } } @@ -914,13 +914,13 @@ static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist, pr_debug2("perf event ring buffer mmapped per thread\n"); for (thread = 0; thread < nr_threads; thread++) { int output = -1; - int output_backward = -1; + int output_overwrite = -1; auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread, false); if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread, - &output, &output_backward)) + &output, &output_overwrite)) goto out_unmap; } @@ -1753,7 +1753,7 @@ void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist, RESUME, } action = NONE; - if (!evlist->backward_mmap) + if (!evlist->overwrite_mmap) return; switch (old_state) { diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index eec33770b8e6..75160666d305 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -44,7 +44,7 @@ struct perf_evlist { } workload; struct fdarray pollfd; struct perf_mmap *mmap; - struct perf_mmap *backward_mmap; + struct perf_mmap *overwrite_mmap; struct thread_map *threads; struct cpu_map *cpus; struct perf_evsel *selected; diff --git a/tools/perf/util/mmap.c b/tools/perf/util/mmap.c index 5f8cb1583e53..05076e683938 100644 --- a/tools/perf/util/mmap.c +++ b/tools/perf/util/mmap.c @@ -234,18 +234,18 @@ int perf_mmap__mmap(struct perf_mmap *map, struct mmap_params *mp, int fd) return 0; } -static int backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end) +static int overwrite_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 *end) { struct perf_event_header *pheader; u64 evt_head = head; int size = mask + 1; - pr_debug2("backward_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head); + pr_debug2("overwrite_rb_find_range: buf=%p, head=%"PRIx64"\n", buf, head); pheader = (struct perf_event_header *)(buf + (head & mask)); *start = head; while (true) { if (evt_head - head >= (unsigned int)size) { - pr_debug("Finished reading backward ring buffer: rewind\n"); + pr_debug("Finished reading overwrite ring buffer: rewind\n"); if (evt_head - head > (unsigned int)size) evt_head -= pheader->size; *end = evt_head; @@ -255,7 +255,7 @@ static int backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 pheader = (struct perf_event_header *)(buf + (evt_head & mask)); if (pheader->size == 0) { - pr_debug("Finished reading backward ring buffer: get start\n"); + pr_debug("Finished reading overwrite ring buffer: get start\n"); *end = evt_head; return 0; } @@ -267,7 +267,7 @@ static int backward_rb_find_range(void *buf, int mask, u64 head, u64 *start, u64 return -1; } -int perf_mmap__push(struct perf_mmap *md, bool backward, +int perf_mmap__push(struct perf_mmap *md, bool overwrite, void *to, int push(void *to, void *buf, size_t size)) { u64 head = perf_mmap__read_head(md); @@ -278,19 +278,19 @@ int perf_mmap__push(struct perf_mmap *md, bool backward, void *buf; int rc = 0; - start = backward ? head : old; - end = backward ? old : head; + start = overwrite ? head : old; + end = overwrite ? old : head; if (start == end) return 0; size = end - start; if (size > (unsigned long)(md->mask) + 1) { - if (!backward) { + if (!overwrite) { WARN_ONCE(1, "failed to keep up with mmap data. (warn only once)\n"); md->prev = head; - perf_mmap__consume(md, backward); + perf_mmap__consume(md, overwrite); return 0; } @@ -298,7 +298,7 @@ int perf_mmap__push(struct perf_mmap *md, bool backward, * Backward ring buffer is full. We still have a chance to read * most of data from it. */ - if (backward_rb_find_range(data, md->mask, head, &start, &end)) + if (overwrite_rb_find_range(data, md->mask, head, &start, &end)) return -1; } @@ -323,7 +323,7 @@ int perf_mmap__push(struct perf_mmap *md, bool backward, } md->prev = head; - perf_mmap__consume(md, backward); + perf_mmap__consume(md, overwrite); out: return rc; } -- cgit v1.2.3-70-g09d2 From e5fcc2abc353be94548080d84de3269ef6cc2af6 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 5 Dec 2017 22:03:01 +0800 Subject: perf stat: Define a structure for per-thread shadow stats Perf has a set of static variables to record the runtime shadow metrics stats. While if we want to record the runtime shadow stats for per-thread, it will be the limitation. This patch creates a structure and the next patches will use this structure to update the runtime shadow stats for per-thread. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512482591-4646-2-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-shadow.c | 11 ----------- tools/perf/util/stat.h | 43 ++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 42 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 57ec22513971..93aac2788056 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -9,17 +9,6 @@ #include "expr.h" #include "metricgroup.h" -enum { - CTX_BIT_USER = 1 << 0, - CTX_BIT_KERNEL = 1 << 1, - CTX_BIT_HV = 1 << 2, - CTX_BIT_HOST = 1 << 3, - CTX_BIT_IDLE = 1 << 4, - CTX_BIT_MAX = 1 << 5, -}; - -#define NUM_CTX CTX_BIT_MAX - /* * AGGR_GLOBAL: Use CPU 0 * AGGR_SOCKET: Use first CPU of socket diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index eefca5c981fd..c685c41f1fb9 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -5,6 +5,7 @@ #include #include #include "xyarray.h" +#include "rblist.h" struct stats { @@ -43,6 +44,47 @@ enum aggr_mode { AGGR_UNSET, }; +enum { + CTX_BIT_USER = 1 << 0, + CTX_BIT_KERNEL = 1 << 1, + CTX_BIT_HV = 1 << 2, + CTX_BIT_HOST = 1 << 3, + CTX_BIT_IDLE = 1 << 4, + CTX_BIT_MAX = 1 << 5, +}; + +#define NUM_CTX CTX_BIT_MAX + +enum stat_type { + STAT_NONE = 0, + STAT_NSECS, + STAT_CYCLES, + STAT_STALLED_CYCLES_FRONT, + STAT_STALLED_CYCLES_BACK, + STAT_BRANCHES, + STAT_CACHEREFS, + STAT_L1_DCACHE, + STAT_L1_ICACHE, + STAT_LL_CACHE, + STAT_ITLB_CACHE, + STAT_DTLB_CACHE, + STAT_CYCLES_IN_TX, + STAT_TRANSACTION, + STAT_ELISION, + STAT_TOPDOWN_TOTAL_SLOTS, + STAT_TOPDOWN_SLOTS_ISSUED, + STAT_TOPDOWN_SLOTS_RETIRED, + STAT_TOPDOWN_FETCH_BUBBLES, + STAT_TOPDOWN_RECOVERY_BUBBLES, + STAT_SMI_NUM, + STAT_APERF, + STAT_MAX +}; + +struct runtime_stat { + struct rblist value_list; +}; + struct perf_stat_config { enum aggr_mode aggr_mode; bool scale; @@ -92,7 +134,6 @@ struct perf_stat_output_ctx { bool force_header; }; -struct rblist; void perf_stat__print_shadow_stats(struct perf_evsel *evsel, double avg, int cpu, struct perf_stat_output_ctx *out, -- cgit v1.2.3-70-g09d2 From 49cd456af1dcb13ff3e94cb997c82968ae86722a Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 5 Dec 2017 22:03:02 +0800 Subject: perf stat: Extend rbtree to support per-thread shadow stats Previously the rbtree was used to link generic metrics. This patches adds new ctx/type/stat into rbtree keys because we will use this rbtree to maintain shadow metrics to replace original a couple of static arrays for supporting per-thread shadow stats. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512482591-4646-3-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-shadow.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 93aac2788056..528be3e8d13b 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -45,7 +45,10 @@ struct stats walltime_nsecs_stats; struct saved_value { struct rb_node rb_node; struct perf_evsel *evsel; + enum stat_type type; + int ctx; int cpu; + struct runtime_stat *stat; struct stats stats; }; @@ -58,6 +61,30 @@ static int saved_value_cmp(struct rb_node *rb_node, const void *entry) if (a->cpu != b->cpu) return a->cpu - b->cpu; + + /* + * Previously the rbtree was used to link generic metrics. + * The keys were evsel/cpu. Now the rbtree is extended to support + * per-thread shadow stats. For shadow stats case, the keys + * are cpu/type/ctx/stat (evsel is NULL). For generic metrics + * case, the keys are still evsel/cpu (type/ctx/stat are 0 or NULL). + */ + if (a->type != b->type) + return a->type - b->type; + + if (a->ctx != b->ctx) + return a->ctx - b->ctx; + + if (a->evsel == NULL && b->evsel == NULL) { + if (a->stat == b->stat) + return 0; + + if ((char *)a->stat < (char *)b->stat) + return -1; + + return 1; + } + if (a->evsel == b->evsel) return 0; if ((char *)a->evsel < (char *)b->evsel) -- cgit v1.2.3-70-g09d2 From 8efb2df1288bc1bcc3711a97028620717319f138 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 5 Dec 2017 22:03:03 +0800 Subject: perf stat: Create the runtime_stat init/exit function It mainly initializes and releases the rblist which is defined in struct runtime_stat. For the original rblist 'runtime_saved_values', it's still kept there for keeping the patch bisectable. The rblist 'runtime_saved_values' will be removed in later patch at switching time. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512482591-4646-4-git-send-email-yao.jin@linux.intel.com [ Rename 'stat' variables to 'st' to build on centos:{5,6} and others where it shadows a global declaration ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-shadow.c | 17 +++++++++++++++++ tools/perf/util/stat.h | 3 +++ 2 files changed, 20 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 528be3e8d13b..07cfbf613bdc 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -40,6 +40,7 @@ static struct stats runtime_aperf_stats[NUM_CTX][MAX_NR_CPUS]; static struct rblist runtime_saved_values; static bool have_frontend_stalled; +struct runtime_stat rt_stat; struct stats walltime_nsecs_stats; struct saved_value { @@ -134,6 +135,21 @@ static struct saved_value *saved_value_lookup(struct perf_evsel *evsel, return NULL; } +void runtime_stat__init(struct runtime_stat *st) +{ + struct rblist *rblist = &st->value_list; + + rblist__init(rblist); + rblist->node_cmp = saved_value_cmp; + rblist->node_new = saved_value_new; + rblist->node_delete = saved_value_delete; +} + +void runtime_stat__exit(struct runtime_stat *st) +{ + rblist__exit(&st->value_list); +} + void perf_stat__init_shadow_stats(void) { have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend"); @@ -141,6 +157,7 @@ void perf_stat__init_shadow_stats(void) runtime_saved_values.node_cmp = saved_value_cmp; runtime_saved_values.node_new = saved_value_new; runtime_saved_values.node_delete = saved_value_delete; + runtime_stat__init(&rt_stat); } static int evsel_context(struct perf_evsel *evsel) diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index c685c41f1fb9..f20240037377 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -117,12 +117,15 @@ bool __perf_evsel_stat__is(struct perf_evsel *evsel, void perf_stat_evsel_id_init(struct perf_evsel *evsel); +extern struct runtime_stat rt_stat; extern struct stats walltime_nsecs_stats; typedef void (*print_metric_t)(void *ctx, const char *color, const char *unit, const char *fmt, double val); typedef void (*new_line_t )(void *ctx); +void runtime_stat__init(struct runtime_stat *st); +void runtime_stat__exit(struct runtime_stat *st); void perf_stat__init_shadow_stats(void); void perf_stat__reset_shadow_stats(void); void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count, -- cgit v1.2.3-70-g09d2 From 1fcd03946b52b8a57a6692fedd4406b45baedfe6 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 5 Dec 2017 22:03:04 +0800 Subject: perf stat: Update per-thread shadow stats The functions perf_stat__update_shadow_stats() is called to update the shadow stats on a set of static variables. But the static variables are the limitations to be extended to support per-thread shadow stats. This patch lets the perf_stat__update_shadow_stats() support to update the shadow stats on a input parameter 'st' and uses update_runtime_stat() to update the stats. It will not directly update the static variables as before. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512482591-4646-5-git-send-email-yao.jin@linux.intel.com [ Rename 'stat' variables to 'st' to build on centos:{5,6} and others where it shadows a global declaration ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 3 +- tools/perf/builtin-stat.c | 3 +- tools/perf/util/stat-shadow.c | 86 +++++++++++++++++++++++++++++-------------- tools/perf/util/stat.c | 8 ++-- tools/perf/util/stat.h | 2 +- 5 files changed, 68 insertions(+), 34 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 39d8b55f0db3..81b395040298 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1548,7 +1548,8 @@ static void perf_sample__fprint_metric(struct perf_script *script, val = sample->period * evsel->scale; perf_stat__update_shadow_stats(evsel, val, - sample->cpu); + sample->cpu, + &rt_stat); evsel_script(evsel)->val = val; if (evsel_script(evsel->leader)->gnum == evsel->leader->nr_members) { for_each_group_member (ev2, evsel->leader) { diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index a027b4712e48..3f4a2c21b824 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1214,7 +1214,8 @@ static void aggr_update_shadow(void) val += perf_counts(counter->counts, cpu, 0)->val; } perf_stat__update_shadow_stats(counter, val, - first_shadow_cpu(counter, id)); + first_shadow_cpu(counter, id), + &rt_stat); } } } diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 07cfbf613bdc..4b28c40de927 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -116,19 +116,29 @@ static void saved_value_delete(struct rblist *rblist __maybe_unused, static struct saved_value *saved_value_lookup(struct perf_evsel *evsel, int cpu, - bool create) + bool create, + enum stat_type type, + int ctx, + struct runtime_stat *st) { + struct rblist *rblist; struct rb_node *nd; struct saved_value dm = { .cpu = cpu, .evsel = evsel, + .type = type, + .ctx = ctx, + .stat = st, }; - nd = rblist__find(&runtime_saved_values, &dm); + + rblist = &st->value_list; + + nd = rblist__find(rblist, &dm); if (nd) return container_of(nd, struct saved_value, rb_node); if (create) { - rblist__add_node(&runtime_saved_values, &dm); - nd = rblist__find(&runtime_saved_values, &dm); + rblist__add_node(rblist, &dm); + nd = rblist__find(rblist, &dm); if (nd) return container_of(nd, struct saved_value, rb_node); } @@ -217,13 +227,24 @@ void perf_stat__reset_shadow_stats(void) } } +static void update_runtime_stat(struct runtime_stat *st, + enum stat_type type, + int ctx, int cpu, u64 count) +{ + struct saved_value *v = saved_value_lookup(NULL, cpu, true, + type, ctx, st); + + if (v) + update_stats(&v->stats, count); +} + /* * Update various tracking values we maintain to print * more semantic information such as miss/hit ratios, * instruction rates, etc: */ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count, - int cpu) + int cpu, struct runtime_stat *st) { int ctx = evsel_context(counter); @@ -231,50 +252,58 @@ void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count, if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK) || perf_evsel__match(counter, SOFTWARE, SW_CPU_CLOCK)) - update_stats(&runtime_nsecs_stats[cpu], count); + update_runtime_stat(st, STAT_NSECS, 0, cpu, count); else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) - update_stats(&runtime_cycles_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_CYCLES, ctx, cpu, count); else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) - update_stats(&runtime_cycles_in_tx_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_CYCLES_IN_TX, ctx, cpu, count); else if (perf_stat_evsel__is(counter, TRANSACTION_START)) - update_stats(&runtime_transaction_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_TRANSACTION, ctx, cpu, count); else if (perf_stat_evsel__is(counter, ELISION_START)) - update_stats(&runtime_elision_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_ELISION, ctx, cpu, count); else if (perf_stat_evsel__is(counter, TOPDOWN_TOTAL_SLOTS)) - update_stats(&runtime_topdown_total_slots[ctx][cpu], count); + update_runtime_stat(st, STAT_TOPDOWN_TOTAL_SLOTS, + ctx, cpu, count); else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_ISSUED)) - update_stats(&runtime_topdown_slots_issued[ctx][cpu], count); + update_runtime_stat(st, STAT_TOPDOWN_SLOTS_ISSUED, + ctx, cpu, count); else if (perf_stat_evsel__is(counter, TOPDOWN_SLOTS_RETIRED)) - update_stats(&runtime_topdown_slots_retired[ctx][cpu], count); + update_runtime_stat(st, STAT_TOPDOWN_SLOTS_RETIRED, + ctx, cpu, count); else if (perf_stat_evsel__is(counter, TOPDOWN_FETCH_BUBBLES)) - update_stats(&runtime_topdown_fetch_bubbles[ctx][cpu], count); + update_runtime_stat(st, STAT_TOPDOWN_FETCH_BUBBLES, + ctx, cpu, count); else if (perf_stat_evsel__is(counter, TOPDOWN_RECOVERY_BUBBLES)) - update_stats(&runtime_topdown_recovery_bubbles[ctx][cpu], count); + update_runtime_stat(st, STAT_TOPDOWN_RECOVERY_BUBBLES, + ctx, cpu, count); else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) - update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_STALLED_CYCLES_FRONT, + ctx, cpu, count); else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) - update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_STALLED_CYCLES_BACK, + ctx, cpu, count); else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) - update_stats(&runtime_branches_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_BRANCHES, ctx, cpu, count); else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) - update_stats(&runtime_cacherefs_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_CACHEREFS, ctx, cpu, count); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) - update_stats(&runtime_l1_dcache_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_L1_DCACHE, ctx, cpu, count); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) - update_stats(&runtime_ll_cache_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_L1_ICACHE, ctx, cpu, count); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) - update_stats(&runtime_ll_cache_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_LL_CACHE, ctx, cpu, count); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) - update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_DTLB_CACHE, ctx, cpu, count); else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) - update_stats(&runtime_itlb_cache_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_ITLB_CACHE, ctx, cpu, count); else if (perf_stat_evsel__is(counter, SMI_NUM)) - update_stats(&runtime_smi_num_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_SMI_NUM, ctx, cpu, count); else if (perf_stat_evsel__is(counter, APERF)) - update_stats(&runtime_aperf_stats[ctx][cpu], count); + update_runtime_stat(st, STAT_APERF, ctx, cpu, count); if (counter->collect_stat) { - struct saved_value *v = saved_value_lookup(counter, cpu, true); + struct saved_value *v = saved_value_lookup(counter, cpu, true, + STAT_NONE, 0, st); update_stats(&v->stats, count); } } @@ -694,7 +723,8 @@ static void generic_metric(const char *metric_expr, stats = &walltime_nsecs_stats; scale = 1e-9; } else { - v = saved_value_lookup(metric_events[i], cpu, false); + v = saved_value_lookup(metric_events[i], cpu, false, + STAT_NONE, 0, &rt_stat); if (!v) break; stats = &v->stats; diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 151e9efd7286..78abfd40b135 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -278,9 +278,11 @@ process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel perf_evsel__compute_deltas(evsel, cpu, thread, count); perf_counts_values__scale(count, config->scale, NULL); if (config->aggr_mode == AGGR_NONE) - perf_stat__update_shadow_stats(evsel, count->val, cpu); + perf_stat__update_shadow_stats(evsel, count->val, cpu, + &rt_stat); if (config->aggr_mode == AGGR_THREAD) - perf_stat__update_shadow_stats(evsel, count->val, 0); + perf_stat__update_shadow_stats(evsel, count->val, 0, + &rt_stat); break; case AGGR_GLOBAL: aggr->val += count->val; @@ -362,7 +364,7 @@ int perf_stat_process_counter(struct perf_stat_config *config, /* * Save the full runtime - to allow normalization during printout: */ - perf_stat__update_shadow_stats(counter, *count, 0); + perf_stat__update_shadow_stats(counter, *count, 0, &rt_stat); return 0; } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index f20240037377..bb9902ad3a79 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -129,7 +129,7 @@ void runtime_stat__exit(struct runtime_stat *st); void perf_stat__init_shadow_stats(void); void perf_stat__reset_shadow_stats(void); void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count, - int cpu); + int cpu, struct runtime_stat *st); struct perf_stat_output_ctx { void *ctx; print_metric_t print_metric; -- cgit v1.2.3-70-g09d2 From e0128b30dbfb2884530251b4accdffdbf55a6b72 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 5 Dec 2017 22:03:05 +0800 Subject: perf stat: Print per-thread shadow stats The function perf_stat__print_shadow_stats() is called to print the shadow stats on a set of static variables. But the static variables are the limitations to support per-thread shadow stats. This patch lets the perf_stat__print_shadow_stats() support to print the shadow stats from a input parameter 'st'. It will not directly get value from static variable. Instead, it now uses runtime_stat_avg() and runtime_stat_n() to get and compute the values. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512482591-4646-6-git-send-email-yao.jin@linux.intel.com [ Rename 'stat' variables to 'st' to build on centos:{5,6} and others where it shadows a global declaration ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 3 +- tools/perf/builtin-stat.c | 23 +++-- tools/perf/util/stat-shadow.c | 209 ++++++++++++++++++++++++++---------------- tools/perf/util/stat.h | 3 +- 4 files changed, 151 insertions(+), 87 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 81b395040298..fac6f053e4da 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1557,7 +1557,8 @@ static void perf_sample__fprint_metric(struct perf_script *script, evsel_script(ev2)->val, sample->cpu, &ctx, - NULL); + NULL, + &rt_stat); } evsel_script(evsel->leader)->gnum = 0; } diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 3f4a2c21b824..097a694d16f2 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1097,7 +1097,8 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) } static void printout(int id, int nr, struct perf_evsel *counter, double uval, - char *prefix, u64 run, u64 ena, double noise) + char *prefix, u64 run, u64 ena, double noise, + struct runtime_stat *st) { struct perf_stat_output_ctx out; struct outstate os = { @@ -1190,7 +1191,7 @@ static void printout(int id, int nr, struct perf_evsel *counter, double uval, perf_stat__print_shadow_stats(counter, uval, first_shadow_cpu(counter, id), - &out, &metric_events); + &out, &metric_events, st); if (!csv_output && !metric_only) { print_noise(counter, noise); print_running(run, ena); @@ -1335,7 +1336,8 @@ static void print_aggr(char *prefix) fprintf(output, "%s", prefix); uval = val * counter->scale; - printout(id, nr, counter, uval, prefix, run, ena, 1.0); + printout(id, nr, counter, uval, prefix, run, ena, 1.0, + &rt_stat); if (!metric_only) fputc('\n', output); } @@ -1365,7 +1367,8 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix) fprintf(output, "%s", prefix); uval = val * counter->scale; - printout(thread, 0, counter, uval, prefix, run, ena, 1.0); + printout(thread, 0, counter, uval, prefix, run, ena, 1.0, + &rt_stat); fputc('\n', output); } } @@ -1402,7 +1405,8 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) fprintf(output, "%s", prefix); uval = cd.avg * counter->scale; - printout(-1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled, cd.avg); + printout(-1, 0, counter, uval, prefix, cd.avg_running, cd.avg_enabled, + cd.avg, &rt_stat); if (!metric_only) fprintf(output, "\n"); } @@ -1441,7 +1445,8 @@ static void print_counter(struct perf_evsel *counter, char *prefix) fprintf(output, "%s", prefix); uval = val * counter->scale; - printout(cpu, 0, counter, uval, prefix, run, ena, 1.0); + printout(cpu, 0, counter, uval, prefix, run, ena, 1.0, + &rt_stat); fputc('\n', output); } @@ -1473,7 +1478,8 @@ static void print_no_aggr_metric(char *prefix) run = perf_counts(counter->counts, cpu, 0)->run; uval = val * counter->scale; - printout(cpu, 0, counter, uval, prefix, run, ena, 1.0); + printout(cpu, 0, counter, uval, prefix, run, ena, 1.0, + &rt_stat); } fputc('\n', stat_config.output); } @@ -1529,7 +1535,8 @@ static void print_metric_headers(const char *prefix, bool no_indent) perf_stat__print_shadow_stats(counter, 0, 0, &out, - &metric_events); + &metric_events, + &rt_stat); } fputc('\n', stat_config.output); } diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 4b28c40de927..a95c4fe991aa 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -424,15 +424,40 @@ void perf_stat__collect_metric_expr(struct perf_evlist *evsel_list) } } +static double runtime_stat_avg(struct runtime_stat *st, + enum stat_type type, int ctx, int cpu) +{ + struct saved_value *v; + + v = saved_value_lookup(NULL, cpu, false, type, ctx, st); + if (!v) + return 0.0; + + return avg_stats(&v->stats); +} + +static double runtime_stat_n(struct runtime_stat *st, + enum stat_type type, int ctx, int cpu) +{ + struct saved_value *v; + + v = saved_value_lookup(NULL, cpu, false, type, ctx, st); + if (!v) + return 0.0; + + return v->stats.n; +} + static void print_stalled_cycles_frontend(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_cycles_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -448,13 +473,14 @@ static void print_stalled_cycles_frontend(int cpu, static void print_stalled_cycles_backend(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_cycles_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -467,13 +493,14 @@ static void print_stalled_cycles_backend(int cpu, static void print_branch_misses(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_branches_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_BRANCHES, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -486,13 +513,15 @@ static void print_branch_misses(int cpu, static void print_l1_dcache_misses(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) + { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_l1_dcache_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_L1_DCACHE, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -505,13 +534,15 @@ static void print_l1_dcache_misses(int cpu, static void print_l1_icache_misses(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) + { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_l1_icache_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_L1_ICACHE, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -523,13 +554,14 @@ static void print_l1_icache_misses(int cpu, static void print_dtlb_cache_misses(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_dtlb_cache_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_DTLB_CACHE, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -541,13 +573,14 @@ static void print_dtlb_cache_misses(int cpu, static void print_itlb_cache_misses(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_itlb_cache_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_ITLB_CACHE, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -559,13 +592,14 @@ static void print_itlb_cache_misses(int cpu, static void print_ll_cache_misses(int cpu, struct perf_evsel *evsel, double avg, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { double total, ratio = 0.0; const char *color; int ctx = evsel_context(evsel); - total = avg_stats(&runtime_ll_cache_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_LL_CACHE, ctx, cpu); if (total) ratio = avg / total * 100.0; @@ -623,68 +657,72 @@ static double sanitize_val(double x) return x; } -static double td_total_slots(int ctx, int cpu) +static double td_total_slots(int ctx, int cpu, struct runtime_stat *st) { - return avg_stats(&runtime_topdown_total_slots[ctx][cpu]); + return runtime_stat_avg(st, STAT_TOPDOWN_TOTAL_SLOTS, ctx, cpu); } -static double td_bad_spec(int ctx, int cpu) +static double td_bad_spec(int ctx, int cpu, struct runtime_stat *st) { double bad_spec = 0; double total_slots; double total; - total = avg_stats(&runtime_topdown_slots_issued[ctx][cpu]) - - avg_stats(&runtime_topdown_slots_retired[ctx][cpu]) + - avg_stats(&runtime_topdown_recovery_bubbles[ctx][cpu]); - total_slots = td_total_slots(ctx, cpu); + total = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_ISSUED, ctx, cpu) - + runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, ctx, cpu) + + runtime_stat_avg(st, STAT_TOPDOWN_RECOVERY_BUBBLES, ctx, cpu); + + total_slots = td_total_slots(ctx, cpu, st); if (total_slots) bad_spec = total / total_slots; return sanitize_val(bad_spec); } -static double td_retiring(int ctx, int cpu) +static double td_retiring(int ctx, int cpu, struct runtime_stat *st) { double retiring = 0; - double total_slots = td_total_slots(ctx, cpu); - double ret_slots = avg_stats(&runtime_topdown_slots_retired[ctx][cpu]); + double total_slots = td_total_slots(ctx, cpu, st); + double ret_slots = runtime_stat_avg(st, STAT_TOPDOWN_SLOTS_RETIRED, + ctx, cpu); if (total_slots) retiring = ret_slots / total_slots; return retiring; } -static double td_fe_bound(int ctx, int cpu) +static double td_fe_bound(int ctx, int cpu, struct runtime_stat *st) { double fe_bound = 0; - double total_slots = td_total_slots(ctx, cpu); - double fetch_bub = avg_stats(&runtime_topdown_fetch_bubbles[ctx][cpu]); + double total_slots = td_total_slots(ctx, cpu, st); + double fetch_bub = runtime_stat_avg(st, STAT_TOPDOWN_FETCH_BUBBLES, + ctx, cpu); if (total_slots) fe_bound = fetch_bub / total_slots; return fe_bound; } -static double td_be_bound(int ctx, int cpu) +static double td_be_bound(int ctx, int cpu, struct runtime_stat *st) { - double sum = (td_fe_bound(ctx, cpu) + - td_bad_spec(ctx, cpu) + - td_retiring(ctx, cpu)); + double sum = (td_fe_bound(ctx, cpu, st) + + td_bad_spec(ctx, cpu, st) + + td_retiring(ctx, cpu, st)); if (sum == 0) return 0; return sanitize_val(1.0 - sum); } static void print_smi_cost(int cpu, struct perf_evsel *evsel, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { double smi_num, aperf, cycles, cost = 0.0; int ctx = evsel_context(evsel); const char *color = NULL; - smi_num = avg_stats(&runtime_smi_num_stats[ctx][cpu]); - aperf = avg_stats(&runtime_aperf_stats[ctx][cpu]); - cycles = avg_stats(&runtime_cycles_stats[ctx][cpu]); + smi_num = runtime_stat_avg(st, STAT_SMI_NUM, ctx, cpu); + aperf = runtime_stat_avg(st, STAT_APERF, ctx, cpu); + cycles = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); if ((cycles == 0) || (aperf == 0)) return; @@ -704,7 +742,8 @@ static void generic_metric(const char *metric_expr, const char *metric_name, double avg, int cpu, - struct perf_stat_output_ctx *out) + struct perf_stat_output_ctx *out, + struct runtime_stat *st) { print_metric_t print_metric = out->print_metric; struct parse_ctx pctx; @@ -724,7 +763,7 @@ static void generic_metric(const char *metric_expr, scale = 1e-9; } else { v = saved_value_lookup(metric_events[i], cpu, false, - STAT_NONE, 0, &rt_stat); + STAT_NONE, 0, st); if (!v) break; stats = &v->stats; @@ -752,7 +791,8 @@ static void generic_metric(const char *metric_expr, void perf_stat__print_shadow_stats(struct perf_evsel *evsel, double avg, int cpu, struct perf_stat_output_ctx *out, - struct rblist *metric_events) + struct rblist *metric_events, + struct runtime_stat *st) { void *ctxp = out->ctx; print_metric_t print_metric = out->print_metric; @@ -763,7 +803,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, int num = 1; if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { - total = avg_stats(&runtime_cycles_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); + if (total) { ratio = avg / total; print_metric(ctxp, NULL, "%7.2f ", @@ -771,8 +812,13 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, } else { print_metric(ctxp, NULL, NULL, "insn per cycle", 0); } - total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]); - total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu])); + + total = runtime_stat_avg(st, STAT_STALLED_CYCLES_FRONT, + ctx, cpu); + + total = max(total, runtime_stat_avg(st, + STAT_STALLED_CYCLES_BACK, + ctx, cpu)); if (total && avg) { out->new_line(ctxp); @@ -785,8 +831,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, "stalled cycles per insn", 0); } } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES)) { - if (runtime_branches_stats[ctx][cpu].n != 0) - print_branch_misses(cpu, evsel, avg, out); + if (runtime_stat_n(st, STAT_BRANCHES, ctx, cpu) != 0) + print_branch_misses(cpu, evsel, avg, out, st); else print_metric(ctxp, NULL, NULL, "of all branches", 0); } else if ( @@ -794,8 +840,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_l1_dcache_stats[ctx][cpu].n != 0) - print_l1_dcache_misses(cpu, evsel, avg, out); + + if (runtime_stat_n(st, STAT_L1_DCACHE, ctx, cpu) != 0) + print_l1_dcache_misses(cpu, evsel, avg, out, st); else print_metric(ctxp, NULL, NULL, "of all L1-dcache hits", 0); } else if ( @@ -803,8 +850,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_l1_icache_stats[ctx][cpu].n != 0) - print_l1_icache_misses(cpu, evsel, avg, out); + + if (runtime_stat_n(st, STAT_L1_ICACHE, ctx, cpu) != 0) + print_l1_icache_misses(cpu, evsel, avg, out, st); else print_metric(ctxp, NULL, NULL, "of all L1-icache hits", 0); } else if ( @@ -812,8 +860,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_dtlb_cache_stats[ctx][cpu].n != 0) - print_dtlb_cache_misses(cpu, evsel, avg, out); + + if (runtime_stat_n(st, STAT_DTLB_CACHE, ctx, cpu) != 0) + print_dtlb_cache_misses(cpu, evsel, avg, out, st); else print_metric(ctxp, NULL, NULL, "of all dTLB cache hits", 0); } else if ( @@ -821,8 +870,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_itlb_cache_stats[ctx][cpu].n != 0) - print_itlb_cache_misses(cpu, evsel, avg, out); + + if (runtime_stat_n(st, STAT_ITLB_CACHE, ctx, cpu) != 0) + print_itlb_cache_misses(cpu, evsel, avg, out, st); else print_metric(ctxp, NULL, NULL, "of all iTLB cache hits", 0); } else if ( @@ -830,27 +880,28 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16))) { - if (runtime_ll_cache_stats[ctx][cpu].n != 0) - print_ll_cache_misses(cpu, evsel, avg, out); + + if (runtime_stat_n(st, STAT_LL_CACHE, ctx, cpu) != 0) + print_ll_cache_misses(cpu, evsel, avg, out, st); else print_metric(ctxp, NULL, NULL, "of all LL-cache hits", 0); } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES)) { - total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CACHEREFS, ctx, cpu); if (total) ratio = avg * 100 / total; - if (runtime_cacherefs_stats[ctx][cpu].n != 0) + if (runtime_stat_n(st, STAT_CACHEREFS, ctx, cpu) != 0) print_metric(ctxp, NULL, "%8.3f %%", "of all cache refs", ratio); else print_metric(ctxp, NULL, NULL, "of all cache refs", 0); } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { - print_stalled_cycles_frontend(cpu, evsel, avg, out); + print_stalled_cycles_frontend(cpu, evsel, avg, out, st); } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { - print_stalled_cycles_backend(cpu, evsel, avg, out); + print_stalled_cycles_backend(cpu, evsel, avg, out, st); } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { - total = avg_stats(&runtime_nsecs_stats[cpu]); + total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); if (total) { ratio = avg / total; @@ -859,7 +910,8 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, print_metric(ctxp, NULL, NULL, "Ghz", 0); } } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { - total = avg_stats(&runtime_cycles_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); + if (total) print_metric(ctxp, NULL, "%7.2f%%", "transactional cycles", @@ -868,8 +920,9 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, print_metric(ctxp, NULL, NULL, "transactional cycles", 0); } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { - total = avg_stats(&runtime_cycles_stats[ctx][cpu]); - total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CYCLES, ctx, cpu); + total2 = runtime_stat_avg(st, STAT_CYCLES_IN_TX, ctx, cpu); + if (total2 < avg) total2 = avg; if (total) @@ -878,19 +931,21 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, else print_metric(ctxp, NULL, NULL, "aborted cycles", 0); } else if (perf_stat_evsel__is(evsel, TRANSACTION_START)) { - total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, + ctx, cpu); if (avg) ratio = total / avg; - if (runtime_cycles_in_tx_stats[ctx][cpu].n != 0) + if (runtime_stat_n(st, STAT_CYCLES_IN_TX, ctx, cpu) != 0) print_metric(ctxp, NULL, "%8.0f", "cycles / transaction", ratio); else print_metric(ctxp, NULL, NULL, "cycles / transaction", - 0); + 0); } else if (perf_stat_evsel__is(evsel, ELISION_START)) { - total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); + total = runtime_stat_avg(st, STAT_CYCLES_IN_TX, + ctx, cpu); if (avg) ratio = total / avg; @@ -904,28 +959,28 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, else print_metric(ctxp, NULL, NULL, "CPUs utilized", 0); } else if (perf_stat_evsel__is(evsel, TOPDOWN_FETCH_BUBBLES)) { - double fe_bound = td_fe_bound(ctx, cpu); + double fe_bound = td_fe_bound(ctx, cpu, st); if (fe_bound > 0.2) color = PERF_COLOR_RED; print_metric(ctxp, color, "%8.1f%%", "frontend bound", fe_bound * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_RETIRED)) { - double retiring = td_retiring(ctx, cpu); + double retiring = td_retiring(ctx, cpu, st); if (retiring > 0.7) color = PERF_COLOR_GREEN; print_metric(ctxp, color, "%8.1f%%", "retiring", retiring * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_RECOVERY_BUBBLES)) { - double bad_spec = td_bad_spec(ctx, cpu); + double bad_spec = td_bad_spec(ctx, cpu, st); if (bad_spec > 0.1) color = PERF_COLOR_RED; print_metric(ctxp, color, "%8.1f%%", "bad speculation", bad_spec * 100.); } else if (perf_stat_evsel__is(evsel, TOPDOWN_SLOTS_ISSUED)) { - double be_bound = td_be_bound(ctx, cpu); + double be_bound = td_be_bound(ctx, cpu, st); const char *name = "backend bound"; static int have_recovery_bubbles = -1; @@ -938,19 +993,19 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, if (be_bound > 0.2) color = PERF_COLOR_RED; - if (td_total_slots(ctx, cpu) > 0) + if (td_total_slots(ctx, cpu, st) > 0) print_metric(ctxp, color, "%8.1f%%", name, be_bound * 100.); else print_metric(ctxp, NULL, NULL, name, 0); } else if (evsel->metric_expr) { generic_metric(evsel->metric_expr, evsel->metric_events, evsel->name, - evsel->metric_name, avg, cpu, out); - } else if (runtime_nsecs_stats[cpu].n != 0) { + evsel->metric_name, avg, cpu, out, st); + } else if (runtime_stat_n(st, STAT_NSECS, 0, cpu) != 0) { char unit = 'M'; char unit_buf[10]; - total = avg_stats(&runtime_nsecs_stats[cpu]); + total = runtime_stat_avg(st, STAT_NSECS, 0, cpu); if (total) ratio = 1000.0 * avg / total; @@ -961,7 +1016,7 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, snprintf(unit_buf, sizeof(unit_buf), "%c/sec", unit); print_metric(ctxp, NULL, "%8.3f", unit_buf, ratio); } else if (perf_stat_evsel__is(evsel, SMI_NUM)) { - print_smi_cost(cpu, evsel, out); + print_smi_cost(cpu, evsel, out, st); } else { num = 0; } @@ -974,7 +1029,7 @@ void perf_stat__print_shadow_stats(struct perf_evsel *evsel, out->new_line(ctxp); generic_metric(mexp->metric_expr, mexp->metric_events, evsel->name, mexp->metric_name, - avg, cpu, out); + avg, cpu, out, st); } } if (num == 0) diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index bb9902ad3a79..76b322a2d293 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -140,7 +140,8 @@ struct perf_stat_output_ctx { void perf_stat__print_shadow_stats(struct perf_evsel *evsel, double avg, int cpu, struct perf_stat_output_ctx *out, - struct rblist *metric_events); + struct rblist *metric_events, + struct runtime_stat *st); void perf_stat__collect_metric_expr(struct perf_evlist *); int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw); -- cgit v1.2.3-70-g09d2 From 6a1e2c5c267358455a13bd8d59547430370c845a Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 5 Dec 2017 22:03:06 +0800 Subject: perf stat: Remove a set of shadow stats static variables In previous patches, we have reconstructed the code and let it not access the static variables directly. This patch removes these static variables. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512482591-4646-7-git-send-email-yao.jin@linux.intel.com [ Rename 'stat' variables to 'st' to build on centos:{5,6} and others where it shadows a global declaration ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/stat-shadow.c | 68 ++++++++++--------------------------------- tools/perf/util/stat.h | 1 + 2 files changed, 16 insertions(+), 53 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index a95c4fe991aa..594d14a02b67 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -16,28 +16,6 @@ * AGGR_NONE: Use matching CPU * AGGR_THREAD: Not supported? */ -static struct stats runtime_nsecs_stats[MAX_NR_CPUS]; -static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_stalled_cycles_back_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_branches_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_cacherefs_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_l1_dcache_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_l1_icache_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_ll_cache_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_itlb_cache_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_topdown_total_slots[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_topdown_slots_issued[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_topdown_slots_retired[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_topdown_fetch_bubbles[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_topdown_recovery_bubbles[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_smi_num_stats[NUM_CTX][MAX_NR_CPUS]; -static struct stats runtime_aperf_stats[NUM_CTX][MAX_NR_CPUS]; -static struct rblist runtime_saved_values; static bool have_frontend_stalled; struct runtime_stat rt_stat; @@ -163,10 +141,6 @@ void runtime_stat__exit(struct runtime_stat *st) void perf_stat__init_shadow_stats(void) { have_frontend_stalled = pmu_have_event("cpu", "stalled-cycles-frontend"); - rblist__init(&runtime_saved_values); - runtime_saved_values.node_cmp = saved_value_cmp; - runtime_saved_values.node_new = saved_value_new; - runtime_saved_values.node_delete = saved_value_delete; runtime_stat__init(&rt_stat); } @@ -188,36 +162,13 @@ static int evsel_context(struct perf_evsel *evsel) return ctx; } -void perf_stat__reset_shadow_stats(void) +static void reset_stat(struct runtime_stat *st) { + struct rblist *rblist; struct rb_node *pos, *next; - memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats)); - memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats)); - memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats)); - memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats)); - memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats)); - memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats)); - memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats)); - memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats)); - memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats)); - memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats)); - memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats)); - memset(runtime_cycles_in_tx_stats, 0, - sizeof(runtime_cycles_in_tx_stats)); - memset(runtime_transaction_stats, 0, - sizeof(runtime_transaction_stats)); - memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats)); - memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); - memset(runtime_topdown_total_slots, 0, sizeof(runtime_topdown_total_slots)); - memset(runtime_topdown_slots_retired, 0, sizeof(runtime_topdown_slots_retired)); - memset(runtime_topdown_slots_issued, 0, sizeof(runtime_topdown_slots_issued)); - memset(runtime_topdown_fetch_bubbles, 0, sizeof(runtime_topdown_fetch_bubbles)); - memset(runtime_topdown_recovery_bubbles, 0, sizeof(runtime_topdown_recovery_bubbles)); - memset(runtime_smi_num_stats, 0, sizeof(runtime_smi_num_stats)); - memset(runtime_aperf_stats, 0, sizeof(runtime_aperf_stats)); - - next = rb_first(&runtime_saved_values.entries); + rblist = &st->value_list; + next = rb_first(&rblist->entries); while (next) { pos = next; next = rb_next(pos); @@ -227,6 +178,17 @@ void perf_stat__reset_shadow_stats(void) } } +void perf_stat__reset_shadow_stats(void) +{ + reset_stat(&rt_stat); + memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); +} + +void perf_stat__reset_shadow_per_stat(struct runtime_stat *st) +{ + reset_stat(st); +} + static void update_runtime_stat(struct runtime_stat *st, enum stat_type type, int ctx, int cpu, u64 count) diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 76b322a2d293..cfe4fb899633 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -128,6 +128,7 @@ void runtime_stat__init(struct runtime_stat *st); void runtime_stat__exit(struct runtime_stat *st); void perf_stat__init_shadow_stats(void); void perf_stat__reset_shadow_stats(void); +void perf_stat__reset_shadow_per_stat(struct runtime_stat *st); void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 count, int cpu, struct runtime_stat *st); struct perf_stat_output_ctx { -- cgit v1.2.3-70-g09d2 From 56739444d861daa050624d40c7adff32c73e9980 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 5 Dec 2017 22:03:07 +0800 Subject: perf stat: Allocate shadow stats buffer for threads After perf_evlist__create_maps() being executed, we can get all threads from /proc. And via thread_map__nr(), we can also get the number of threads. With the number of threads, the patch allocates a buffer which will record the shadow stats for these threads. The buffer pointer is saved in stat_config. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512482591-4646-8-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 46 +++++++++++++++++++++++++++++++++++++++++++++- tools/perf/util/stat.h | 2 ++ 2 files changed, 47 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 097a694d16f2..4c492ac3ac07 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -214,8 +214,13 @@ static inline void diff_timespec(struct timespec *r, struct timespec *a, static void perf_stat__reset_stats(void) { + int i; + perf_evlist__reset_stats(evsel_list); perf_stat__reset_shadow_stats(); + + for (i = 0; i < stat_config.stats_num; i++) + perf_stat__reset_shadow_per_stat(&stat_config.stats[i]); } static int create_perf_stat_counter(struct perf_evsel *evsel) @@ -2495,6 +2500,35 @@ int process_cpu_map_event(struct perf_tool *tool, return set_maps(st); } +static int runtime_stat_new(struct perf_stat_config *config, int nthreads) +{ + int i; + + config->stats = calloc(nthreads, sizeof(struct runtime_stat)); + if (!config->stats) + return -1; + + config->stats_num = nthreads; + + for (i = 0; i < nthreads; i++) + runtime_stat__init(&config->stats[i]); + + return 0; +} + +static void runtime_stat_delete(struct perf_stat_config *config) +{ + int i; + + if (!config->stats) + return; + + for (i = 0; i < config->stats_num; i++) + runtime_stat__exit(&config->stats[i]); + + free(config->stats); +} + static const char * const stat_report_usage[] = { "perf stat report []", NULL, @@ -2750,8 +2784,15 @@ int cmd_stat(int argc, const char **argv) * Initialize thread_map with comm names, * so we could print it out on output. */ - if (stat_config.aggr_mode == AGGR_THREAD) + if (stat_config.aggr_mode == AGGR_THREAD) { thread_map__read_comms(evsel_list->threads); + if (target.system_wide) { + if (runtime_stat_new(&stat_config, + thread_map__nr(evsel_list->threads))) { + goto out; + } + } + } if (interval && interval < 100) { if (interval < 10) { @@ -2841,5 +2882,8 @@ out: sysfs__write_int(FREEZE_ON_SMI_PATH, 0); perf_evlist__delete(evsel_list); + + runtime_stat_delete(&stat_config); + return status; } diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index cfe4fb899633..2ed95dc72784 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -90,6 +90,8 @@ struct perf_stat_config { bool scale; FILE *output; unsigned int interval; + struct runtime_stat *stats; + int stats_num; }; void update_stats(struct stats *stats, u64 val); -- cgit v1.2.3-70-g09d2 From 14e72a21c783654ca7b6c897b6d6508c1abccd7d Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 5 Dec 2017 22:03:08 +0800 Subject: perf stat: Update or print per-thread stats If the stats pointer in stat_config structure is not null, it will update the per-thread stats or print the per-thread stats on this buffer. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512482591-4646-9-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 9 +++++++-- tools/perf/util/stat.c | 11 ++++++++--- 2 files changed, 15 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 4c492ac3ac07..f4129a5fbb01 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1372,8 +1372,13 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix) fprintf(output, "%s", prefix); uval = val * counter->scale; - printout(thread, 0, counter, uval, prefix, run, ena, 1.0, - &rt_stat); + + if (stat_config.stats) + printout(thread, 0, counter, uval, prefix, run, ena, + 1.0, &stat_config.stats[thread]); + else + printout(thread, 0, counter, uval, prefix, run, ena, + 1.0, &rt_stat); fputc('\n', output); } } diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 78abfd40b135..32235657c1ac 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -280,9 +280,14 @@ process_counter_values(struct perf_stat_config *config, struct perf_evsel *evsel if (config->aggr_mode == AGGR_NONE) perf_stat__update_shadow_stats(evsel, count->val, cpu, &rt_stat); - if (config->aggr_mode == AGGR_THREAD) - perf_stat__update_shadow_stats(evsel, count->val, 0, - &rt_stat); + if (config->aggr_mode == AGGR_THREAD) { + if (config->stats) + perf_stat__update_shadow_stats(evsel, + count->val, 0, &config->stats[thread]); + else + perf_stat__update_shadow_stats(evsel, + count->val, 0, &rt_stat); + } break; case AGGR_GLOBAL: aggr->val += count->val; -- cgit v1.2.3-70-g09d2 From 73c0ca1eee3d2c96898e05a16be49da2a6d590b2 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 5 Dec 2017 22:03:09 +0800 Subject: perf thread_map: Enumerate all threads from /proc This patch calls thread_map__new_all_cpus() to enumerate all threads from /proc if per-thread flag is enabled. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512482591-4646-10-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/thread-map.c | 2 +- tools/perf/util/evlist.c | 3 ++- tools/perf/util/thread_map.c | 5 ++++- tools/perf/util/thread_map.h | 2 +- 4 files changed, 8 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c index dbcb6a19b375..4de1939b58ba 100644 --- a/tools/perf/tests/thread-map.c +++ b/tools/perf/tests/thread-map.c @@ -105,7 +105,7 @@ int test__thread_map_remove(struct test *test __maybe_unused, int subtest __mayb TEST_ASSERT_VAL("failed to allocate map string", asprintf(&str, "%d,%d", getpid(), getppid()) >= 0); - threads = thread_map__new_str(str, NULL, 0); + threads = thread_map__new_str(str, NULL, 0, false); TEST_ASSERT_VAL("failed to allocate thread_map", threads); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 3570355bcf39..f0a5e09c4071 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1105,7 +1105,8 @@ int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) struct cpu_map *cpus; struct thread_map *threads; - threads = thread_map__new_str(target->pid, target->tid, target->uid); + threads = thread_map__new_str(target->pid, target->tid, target->uid, + target->per_thread); if (!threads) return -1; diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index 2b653853eec2..3e1038f6491c 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -323,7 +323,7 @@ out_free_threads: } struct thread_map *thread_map__new_str(const char *pid, const char *tid, - uid_t uid) + uid_t uid, bool per_thread) { if (pid) return thread_map__new_by_pid_str(pid); @@ -331,6 +331,9 @@ struct thread_map *thread_map__new_str(const char *pid, const char *tid, if (!tid && uid != UINT_MAX) return thread_map__new_by_uid(uid); + if (per_thread) + return thread_map__new_all_cpus(); + return thread_map__new_by_tid_str(tid); } diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h index 07a765fb22bb..0a806b99e73c 100644 --- a/tools/perf/util/thread_map.h +++ b/tools/perf/util/thread_map.h @@ -31,7 +31,7 @@ struct thread_map *thread_map__get(struct thread_map *map); void thread_map__put(struct thread_map *map); struct thread_map *thread_map__new_str(const char *pid, - const char *tid, uid_t uid); + const char *tid, uid_t uid, bool per_thread); struct thread_map *thread_map__new_by_tid_str(const char *tid_str); -- cgit v1.2.3-70-g09d2 From 1d9f8d1b824bf69cf984c1c36e5641b51eea42bb Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 5 Dec 2017 22:03:10 +0800 Subject: perf stat: Remove --per-thread pid/tid limitation Currently, if we execute 'perf stat --per-thread' without specifying pid/tid, perf will return error. root@skl:/tmp# perf stat --per-thread The --per-thread option is only available when monitoring via -p -t options. -p, --pid stat events on existing process id -t, --tid stat events on existing thread id This patch removes this limitation. If no pid/tid specified, it returns all threads (get threads from /proc). Note that it doesn't support cpu_list yet so if it's a cpu_list case, then skip. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512482591-4646-11-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 23 +++++++++++++++-------- tools/perf/util/target.h | 7 +++++++ 2 files changed, 22 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index f4129a5fbb01..ee708ba6f79a 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -277,7 +277,7 @@ static int create_perf_stat_counter(struct perf_evsel *evsel) attr->enable_on_exec = 1; } - if (target__has_cpu(&target)) + if (target__has_cpu(&target) && !target__has_per_thread(&target)) return perf_evsel__open_per_cpu(evsel, perf_evsel__cpus(evsel)); return perf_evsel__open_per_thread(evsel, evsel_list->threads); @@ -340,7 +340,7 @@ static int read_counter(struct perf_evsel *counter) int nthreads = thread_map__nr(evsel_list->threads); int ncpus, cpu, thread; - if (target__has_cpu(&target)) + if (target__has_cpu(&target) && !target__has_per_thread(&target)) ncpus = perf_evsel__nr_cpus(counter); else ncpus = 1; @@ -2743,12 +2743,16 @@ int cmd_stat(int argc, const char **argv) run_count = 1; } - if ((stat_config.aggr_mode == AGGR_THREAD) && !target__has_task(&target)) { - fprintf(stderr, "The --per-thread option is only available " - "when monitoring via -p -t options.\n"); - parse_options_usage(NULL, stat_options, "p", 1); - parse_options_usage(NULL, stat_options, "t", 1); - goto out; + if ((stat_config.aggr_mode == AGGR_THREAD) && + !target__has_task(&target)) { + if (!target.system_wide || target.cpu_list) { + fprintf(stderr, "The --per-thread option is only " + "available when monitoring via -p -t -a " + "options or only --per-thread.\n"); + parse_options_usage(NULL, stat_options, "p", 1); + parse_options_usage(NULL, stat_options, "t", 1); + goto out; + } } /* @@ -2772,6 +2776,9 @@ int cmd_stat(int argc, const char **argv) target__validate(&target); + if ((stat_config.aggr_mode == AGGR_THREAD) && (target.system_wide)) + target.per_thread = true; + if (perf_evlist__create_maps(evsel_list, &target) < 0) { if (target__has_task(&target)) { pr_err("Problems finding threads of monitor\n"); diff --git a/tools/perf/util/target.h b/tools/perf/util/target.h index 446aa7a56f25..6ef01a83b24e 100644 --- a/tools/perf/util/target.h +++ b/tools/perf/util/target.h @@ -64,6 +64,11 @@ static inline bool target__none(struct target *target) return !target__has_task(target) && !target__has_cpu(target); } +static inline bool target__has_per_thread(struct target *target) +{ + return target->system_wide && target->per_thread; +} + static inline bool target__uses_dummy_map(struct target *target) { bool use_dummy = false; @@ -73,6 +78,8 @@ static inline bool target__uses_dummy_map(struct target *target) else if (target__has_task(target) || (!target__has_cpu(target) && !target->uses_mmap)) use_dummy = true; + else if (target__has_per_thread(target)) + use_dummy = true; return use_dummy; } -- cgit v1.2.3-70-g09d2 From 29734550c996c259ffa8d32198439d6fe4b51320 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 5 Dec 2017 22:03:11 +0800 Subject: perf stat: Resort '--per-thread' result There are many threads reported if we enable '--per-thread' globally. 1. Most of the threads are not counted or counting value 0. This patch removes these threads. 2. We also resort the threads in display according to the counting value. It's useful for user to see the hottest threads easily. For example, the new results would be: root@skl:/tmp# perf stat --per-thread ^C Performance counter stats for 'system wide': perf-24165 4.302433 cpu-clock (msec) # 0.001 CPUs utilized vmstat-23127 1.562215 cpu-clock (msec) # 0.000 CPUs utilized irqbalance-2780 0.827851 cpu-clock (msec) # 0.000 CPUs utilized sshd-23111 0.278308 cpu-clock (msec) # 0.000 CPUs utilized thermald-2841 0.230880 cpu-clock (msec) # 0.000 CPUs utilized sshd-23058 0.207306 cpu-clock (msec) # 0.000 CPUs utilized kworker/0:2-19991 0.133983 cpu-clock (msec) # 0.000 CPUs utilized kworker/u16:1-18249 0.125636 cpu-clock (msec) # 0.000 CPUs utilized rcu_sched-8 0.085533 cpu-clock (msec) # 0.000 CPUs utilized kworker/u16:2-23146 0.077139 cpu-clock (msec) # 0.000 CPUs utilized gmain-2700 0.041789 cpu-clock (msec) # 0.000 CPUs utilized kworker/4:1-15354 0.028370 cpu-clock (msec) # 0.000 CPUs utilized kworker/6:0-17528 0.023895 cpu-clock (msec) # 0.000 CPUs utilized kworker/4:1H-1887 0.013209 cpu-clock (msec) # 0.000 CPUs utilized kworker/5:2-31362 0.011627 cpu-clock (msec) # 0.000 CPUs utilized watchdog/0-11 0.010892 cpu-clock (msec) # 0.000 CPUs utilized kworker/3:2-12870 0.010220 cpu-clock (msec) # 0.000 CPUs utilized ksoftirqd/0-7 0.008869 cpu-clock (msec) # 0.000 CPUs utilized watchdog/1-14 0.008476 cpu-clock (msec) # 0.000 CPUs utilized watchdog/7-50 0.002944 cpu-clock (msec) # 0.000 CPUs utilized watchdog/3-26 0.002893 cpu-clock (msec) # 0.000 CPUs utilized watchdog/4-32 0.002759 cpu-clock (msec) # 0.000 CPUs utilized watchdog/2-20 0.002429 cpu-clock (msec) # 0.000 CPUs utilized watchdog/6-44 0.001491 cpu-clock (msec) # 0.000 CPUs utilized watchdog/5-38 0.001477 cpu-clock (msec) # 0.000 CPUs utilized rcu_sched-8 10 context-switches # 0.117 M/sec kworker/u16:1-18249 7 context-switches # 0.056 M/sec sshd-23111 4 context-switches # 0.014 M/sec vmstat-23127 4 context-switches # 0.003 M/sec perf-24165 4 context-switches # 0.930 K/sec kworker/0:2-19991 3 context-switches # 0.022 M/sec kworker/u16:2-23146 3 context-switches # 0.039 M/sec kworker/4:1-15354 2 context-switches # 0.070 M/sec kworker/6:0-17528 2 context-switches # 0.084 M/sec sshd-23058 2 context-switches # 0.010 M/sec ksoftirqd/0-7 1 context-switches # 0.113 M/sec watchdog/0-11 1 context-switches # 0.092 M/sec watchdog/1-14 1 context-switches # 0.118 M/sec watchdog/2-20 1 context-switches # 0.412 M/sec watchdog/3-26 1 context-switches # 0.346 M/sec watchdog/4-32 1 context-switches # 0.362 M/sec watchdog/5-38 1 context-switches # 0.677 M/sec watchdog/6-44 1 context-switches # 0.671 M/sec watchdog/7-50 1 context-switches # 0.340 M/sec kworker/4:1H-1887 1 context-switches # 0.076 M/sec thermald-2841 1 context-switches # 0.004 M/sec gmain-2700 1 context-switches # 0.024 M/sec irqbalance-2780 1 context-switches # 0.001 M/sec kworker/3:2-12870 1 context-switches # 0.098 M/sec kworker/5:2-31362 1 context-switches # 0.086 M/sec kworker/u16:1-18249 2 cpu-migrations # 0.016 M/sec kworker/u16:2-23146 2 cpu-migrations # 0.026 M/sec rcu_sched-8 1 cpu-migrations # 0.012 M/sec sshd-23058 1 cpu-migrations # 0.005 M/sec perf-24165 8,833,385 cycles # 2.053 GHz vmstat-23127 1,702,699 cycles # 1.090 GHz irqbalance-2780 739,847 cycles # 0.894 GHz sshd-23111 269,506 cycles # 0.968 GHz thermald-2841 204,556 cycles # 0.886 GHz sshd-23058 158,780 cycles # 0.766 GHz kworker/0:2-19991 112,981 cycles # 0.843 GHz kworker/u16:1-18249 100,926 cycles # 0.803 GHz rcu_sched-8 74,024 cycles # 0.865 GHz kworker/u16:2-23146 55,984 cycles # 0.726 GHz gmain-2700 34,278 cycles # 0.820 GHz kworker/4:1-15354 20,665 cycles # 0.728 GHz kworker/6:0-17528 16,445 cycles # 0.688 GHz kworker/5:2-31362 9,492 cycles # 0.816 GHz watchdog/3-26 8,695 cycles # 3.006 GHz kworker/4:1H-1887 8,238 cycles # 0.624 GHz watchdog/4-32 7,580 cycles # 2.747 GHz kworker/3:2-12870 7,306 cycles # 0.715 GHz watchdog/2-20 7,274 cycles # 2.995 GHz watchdog/0-11 6,988 cycles # 0.642 GHz ksoftirqd/0-7 6,376 cycles # 0.719 GHz watchdog/1-14 5,340 cycles # 0.630 GHz watchdog/5-38 4,061 cycles # 2.749 GHz watchdog/6-44 3,976 cycles # 2.667 GHz watchdog/7-50 3,418 cycles # 1.161 GHz vmstat-23127 2,511,699 instructions # 1.48 insn per cycle perf-24165 1,829,908 instructions # 0.21 insn per cycle irqbalance-2780 1,190,204 instructions # 1.61 insn per cycle thermald-2841 143,544 instructions # 0.70 insn per cycle sshd-23111 128,138 instructions # 0.48 insn per cycle sshd-23058 57,654 instructions # 0.36 insn per cycle rcu_sched-8 44,063 instructions # 0.60 insn per cycle kworker/u16:1-18249 42,551 instructions # 0.42 insn per cycle kworker/0:2-19991 25,873 instructions # 0.23 insn per cycle kworker/u16:2-23146 21,407 instructions # 0.38 insn per cycle gmain-2700 13,691 instructions # 0.40 insn per cycle kworker/4:1-15354 12,964 instructions # 0.63 insn per cycle kworker/6:0-17528 10,034 instructions # 0.61 insn per cycle kworker/5:2-31362 5,203 instructions # 0.55 insn per cycle kworker/3:2-12870 4,866 instructions # 0.67 insn per cycle kworker/4:1H-1887 3,586 instructions # 0.44 insn per cycle ksoftirqd/0-7 3,463 instructions # 0.54 insn per cycle watchdog/0-11 3,135 instructions # 0.45 insn per cycle watchdog/1-14 3,135 instructions # 0.59 insn per cycle watchdog/2-20 3,135 instructions # 0.43 insn per cycle watchdog/3-26 3,135 instructions # 0.36 insn per cycle watchdog/4-32 3,135 instructions # 0.41 insn per cycle watchdog/5-38 3,135 instructions # 0.77 insn per cycle watchdog/6-44 3,135 instructions # 0.79 insn per cycle watchdog/7-50 3,135 instructions # 0.92 insn per cycle vmstat-23127 539,181 branches # 345.139 M/sec perf-24165 375,364 branches # 87.245 M/sec irqbalance-2780 262,092 branches # 316.593 M/sec thermald-2841 31,611 branches # 136.915 M/sec sshd-23111 21,874 branches # 78.596 M/sec sshd-23058 10,682 branches # 51.528 M/sec rcu_sched-8 8,693 branches # 101.633 M/sec kworker/u16:1-18249 7,891 branches # 62.808 M/sec kworker/0:2-19991 5,761 branches # 42.998 M/sec kworker/u16:2-23146 4,099 branches # 53.138 M/sec kworker/4:1-15354 2,755 branches # 97.110 M/sec gmain-2700 2,638 branches # 63.127 M/sec kworker/6:0-17528 2,216 branches # 92.739 M/sec kworker/5:2-31362 1,132 branches # 97.360 M/sec kworker/3:2-12870 1,081 branches # 105.773 M/sec kworker/4:1H-1887 725 branches # 54.887 M/sec ksoftirqd/0-7 707 branches # 79.716 M/sec watchdog/0-11 652 branches # 59.860 M/sec watchdog/1-14 652 branches # 76.923 M/sec watchdog/2-20 652 branches # 268.423 M/sec watchdog/3-26 652 branches # 225.372 M/sec watchdog/4-32 652 branches # 236.318 M/sec watchdog/5-38 652 branches # 441.435 M/sec watchdog/6-44 652 branches # 437.290 M/sec watchdog/7-50 652 branches # 221.467 M/sec vmstat-23127 8,960 branch-misses # 1.66% of all branches irqbalance-2780 3,047 branch-misses # 1.16% of all branches perf-24165 2,876 branch-misses # 0.77% of all branches sshd-23111 1,843 branch-misses # 8.43% of all branches thermald-2841 1,444 branch-misses # 4.57% of all branches sshd-23058 1,379 branch-misses # 12.91% of all branches kworker/u16:1-18249 982 branch-misses # 12.44% of all branches rcu_sched-8 893 branch-misses # 10.27% of all branches kworker/u16:2-23146 578 branch-misses # 14.10% of all branches kworker/0:2-19991 376 branch-misses # 6.53% of all branches gmain-2700 280 branch-misses # 10.61% of all branches kworker/6:0-17528 196 branch-misses # 8.84% of all branches kworker/4:1-15354 187 branch-misses # 6.79% of all branches kworker/5:2-31362 123 branch-misses # 10.87% of all branches watchdog/0-11 95 branch-misses # 14.57% of all branches watchdog/4-32 89 branch-misses # 13.65% of all branches kworker/3:2-12870 80 branch-misses # 7.40% of all branches watchdog/3-26 61 branch-misses # 9.36% of all branches kworker/4:1H-1887 60 branch-misses # 8.28% of all branches watchdog/2-20 52 branch-misses # 7.98% of all branches ksoftirqd/0-7 47 branch-misses # 6.65% of all branches watchdog/1-14 46 branch-misses # 7.06% of all branches watchdog/7-50 13 branch-misses # 1.99% of all branches watchdog/5-38 8 branch-misses # 1.23% of all branches watchdog/6-44 7 branch-misses # 1.07% of all branches 3.695150786 seconds time elapsed root@skl:/tmp# perf stat --per-thread -M IPC,CPI ^C Performance counter stats for 'system wide': vmstat-23127 2,000,783 inst_retired.any # 1.5 IPC thermald-2841 1,472,670 inst_retired.any # 1.3 IPC sshd-23111 977,374 inst_retired.any # 1.2 IPC perf-24163 483,779 inst_retired.any # 0.2 IPC gmain-2700 341,213 inst_retired.any # 0.9 IPC sshd-23058 148,891 inst_retired.any # 0.8 IPC rtkit-daemon-3288 71,210 inst_retired.any # 0.7 IPC kworker/u16:1-18249 39,562 inst_retired.any # 0.3 IPC rcu_sched-8 14,474 inst_retired.any # 0.8 IPC kworker/0:2-19991 7,659 inst_retired.any # 0.2 IPC kworker/4:1-15354 6,714 inst_retired.any # 0.8 IPC rtkit-daemon-3289 4,839 inst_retired.any # 0.3 IPC kworker/6:0-17528 3,321 inst_retired.any # 0.6 IPC kworker/5:2-31362 3,215 inst_retired.any # 0.5 IPC kworker/7:2-23145 3,173 inst_retired.any # 0.7 IPC kworker/4:1H-1887 1,719 inst_retired.any # 0.3 IPC watchdog/0-11 1,479 inst_retired.any # 0.3 IPC watchdog/1-14 1,479 inst_retired.any # 0.3 IPC watchdog/2-20 1,479 inst_retired.any # 0.4 IPC watchdog/3-26 1,479 inst_retired.any # 0.4 IPC watchdog/4-32 1,479 inst_retired.any # 0.3 IPC watchdog/5-38 1,479 inst_retired.any # 0.3 IPC watchdog/6-44 1,479 inst_retired.any # 0.7 IPC watchdog/7-50 1,479 inst_retired.any # 0.7 IPC kworker/u16:2-23146 1,408 inst_retired.any # 0.5 IPC perf-24163 2,249,872 cpu_clk_unhalted.thread vmstat-23127 1,352,455 cpu_clk_unhalted.thread thermald-2841 1,161,140 cpu_clk_unhalted.thread sshd-23111 807,827 cpu_clk_unhalted.thread gmain-2700 375,535 cpu_clk_unhalted.thread sshd-23058 194,071 cpu_clk_unhalted.thread kworker/u16:1-18249 114,306 cpu_clk_unhalted.thread rtkit-daemon-3288 103,547 cpu_clk_unhalted.thread kworker/0:2-19991 46,550 cpu_clk_unhalted.thread rcu_sched-8 18,855 cpu_clk_unhalted.thread rtkit-daemon-3289 17,549 cpu_clk_unhalted.thread kworker/4:1-15354 8,812 cpu_clk_unhalted.thread kworker/5:2-31362 6,812 cpu_clk_unhalted.thread kworker/4:1H-1887 5,270 cpu_clk_unhalted.thread kworker/6:0-17528 5,111 cpu_clk_unhalted.thread kworker/7:2-23145 4,667 cpu_clk_unhalted.thread watchdog/0-11 4,663 cpu_clk_unhalted.thread watchdog/1-14 4,663 cpu_clk_unhalted.thread watchdog/4-32 4,626 cpu_clk_unhalted.thread watchdog/5-38 4,403 cpu_clk_unhalted.thread watchdog/3-26 3,936 cpu_clk_unhalted.thread watchdog/2-20 3,850 cpu_clk_unhalted.thread kworker/u16:2-23146 2,654 cpu_clk_unhalted.thread watchdog/6-44 2,017 cpu_clk_unhalted.thread watchdog/7-50 2,017 cpu_clk_unhalted.thread vmstat-23127 2,000,783 inst_retired.any # 0.7 CPI thermald-2841 1,472,670 inst_retired.any # 0.8 CPI sshd-23111 977,374 inst_retired.any # 0.8 CPI perf-24163 495,037 inst_retired.any # 4.7 CPI gmain-2700 341,213 inst_retired.any # 1.1 CPI sshd-23058 148,891 inst_retired.any # 1.3 CPI rtkit-daemon-3288 71,210 inst_retired.any # 1.5 CPI kworker/u16:1-18249 39,562 inst_retired.any # 2.9 CPI rcu_sched-8 14,474 inst_retired.any # 1.3 CPI kworker/0:2-19991 7,659 inst_retired.any # 6.1 CPI kworker/4:1-15354 6,714 inst_retired.any # 1.3 CPI rtkit-daemon-3289 4,839 inst_retired.any # 3.6 CPI kworker/6:0-17528 3,321 inst_retired.any # 1.5 CPI kworker/5:2-31362 3,215 inst_retired.any # 2.1 CPI kworker/7:2-23145 3,173 inst_retired.any # 1.5 CPI kworker/4:1H-1887 1,719 inst_retired.any # 3.1 CPI watchdog/0-11 1,479 inst_retired.any # 3.2 CPI watchdog/1-14 1,479 inst_retired.any # 3.2 CPI watchdog/2-20 1,479 inst_retired.any # 2.6 CPI watchdog/3-26 1,479 inst_retired.any # 2.7 CPI watchdog/4-32 1,479 inst_retired.any # 3.1 CPI watchdog/5-38 1,479 inst_retired.any # 3.0 CPI watchdog/6-44 1,479 inst_retired.any # 1.4 CPI watchdog/7-50 1,479 inst_retired.any # 1.4 CPI kworker/u16:2-23146 1,408 inst_retired.any # 1.9 CPI perf-24163 2,302,323 cycles vmstat-23127 1,352,455 cycles thermald-2841 1,161,140 cycles sshd-23111 807,827 cycles gmain-2700 375,535 cycles sshd-23058 194,071 cycles kworker/u16:1-18249 114,306 cycles rtkit-daemon-3288 103,547 cycles kworker/0:2-19991 46,550 cycles rcu_sched-8 18,855 cycles rtkit-daemon-3289 17,549 cycles kworker/4:1-15354 8,812 cycles kworker/5:2-31362 6,812 cycles kworker/4:1H-1887 5,270 cycles kworker/6:0-17528 5,111 cycles kworker/7:2-23145 4,667 cycles watchdog/0-11 4,663 cycles watchdog/1-14 4,663 cycles watchdog/4-32 4,626 cycles watchdog/5-38 4,403 cycles watchdog/3-26 3,936 cycles watchdog/2-20 3,850 cycles kworker/u16:2-23146 2,654 cycles watchdog/6-44 2,017 cycles watchdog/7-50 2,017 cycles 2.175726600 seconds time elapsed Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512482591-4646-12-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-stat.c | 77 ++++++++++++++++++++++++++++++++++++++++------- tools/perf/util/stat.h | 9 ++++++ 2 files changed, 75 insertions(+), 11 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index ee708ba6f79a..58d501d1f5fd 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -1351,13 +1351,24 @@ static void print_aggr(char *prefix) } } -static void print_aggr_thread(struct perf_evsel *counter, char *prefix) +static int cmp_val(const void *a, const void *b) { - FILE *output = stat_config.output; - int nthreads = thread_map__nr(counter->threads); - int ncpus = cpu_map__nr(counter->cpus); - int cpu, thread; + return ((struct perf_aggr_thread_value *)b)->val - + ((struct perf_aggr_thread_value *)a)->val; +} + +static struct perf_aggr_thread_value *sort_aggr_thread( + struct perf_evsel *counter, + int nthreads, int ncpus, + int *ret) +{ + int cpu, thread, i = 0; double uval; + struct perf_aggr_thread_value *buf; + + buf = calloc(nthreads, sizeof(struct perf_aggr_thread_value)); + if (!buf) + return NULL; for (thread = 0; thread < nthreads; thread++) { u64 ena = 0, run = 0, val = 0; @@ -1368,19 +1379,63 @@ static void print_aggr_thread(struct perf_evsel *counter, char *prefix) run += perf_counts(counter->counts, cpu, thread)->run; } + uval = val * counter->scale; + + /* + * Skip value 0 when enabling --per-thread globally, + * otherwise too many 0 output. + */ + if (uval == 0.0 && target__has_per_thread(&target)) + continue; + + buf[i].counter = counter; + buf[i].id = thread; + buf[i].uval = uval; + buf[i].val = val; + buf[i].run = run; + buf[i].ena = ena; + i++; + } + + qsort(buf, i, sizeof(struct perf_aggr_thread_value), cmp_val); + + if (ret) + *ret = i; + + return buf; +} + +static void print_aggr_thread(struct perf_evsel *counter, char *prefix) +{ + FILE *output = stat_config.output; + int nthreads = thread_map__nr(counter->threads); + int ncpus = cpu_map__nr(counter->cpus); + int thread, sorted_threads, id; + struct perf_aggr_thread_value *buf; + + buf = sort_aggr_thread(counter, nthreads, ncpus, &sorted_threads); + if (!buf) { + perror("cannot sort aggr thread"); + return; + } + + for (thread = 0; thread < sorted_threads; thread++) { if (prefix) fprintf(output, "%s", prefix); - uval = val * counter->scale; - + id = buf[thread].id; if (stat_config.stats) - printout(thread, 0, counter, uval, prefix, run, ena, - 1.0, &stat_config.stats[thread]); + printout(id, 0, buf[thread].counter, buf[thread].uval, + prefix, buf[thread].run, buf[thread].ena, 1.0, + &stat_config.stats[id]); else - printout(thread, 0, counter, uval, prefix, run, ena, - 1.0, &rt_stat); + printout(id, 0, buf[thread].counter, buf[thread].uval, + prefix, buf[thread].run, buf[thread].ena, 1.0, + &rt_stat); fputc('\n', output); } + + free(buf); } struct caggr_data { diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 2ed95dc72784..dbc6f7134f61 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -111,6 +111,15 @@ static inline void init_stats(struct stats *stats) struct perf_evsel; struct perf_evlist; +struct perf_aggr_thread_value { + struct perf_evsel *counter; + int id; + double uval; + u64 val; + u64 run; + u64 ena; +}; + bool __perf_evsel_stat__is(struct perf_evsel *evsel, enum perf_stat_evsel_id id); -- cgit v1.2.3-70-g09d2 From 06c3f2aa9fc68e7f3fe3d83e7569d2a2801d9f99 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 6 Dec 2017 18:45:35 +0100 Subject: perf utils: Move is_directory() to path.h So that it can be used more widely, like in the next patch, when it will be used to fix a bug in 'perf test' handling of dirent.d_type == DT_UNKNOWN. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Michael Petlan Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171206174535.25380-1-jolsa@kernel.org [ Split from a larger patch, removed needless includes in path.h ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 14 +------------- tools/perf/util/path.c | 14 ++++++++++++++ tools/perf/util/path.h | 3 +++ 3 files changed, 18 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index fac6f053e4da..77e47cf39f2c 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -26,6 +26,7 @@ #include "util/string2.h" #include "util/thread-stack.h" #include "util/time-utils.h" +#include "util/path.h" #include "print_binary.h" #include #include @@ -2401,19 +2402,6 @@ out: return rc; } -/* Helper function for filesystems that return a dent->d_type DT_UNKNOWN */ -static int is_directory(const char *base_path, const struct dirent *dent) -{ - char path[PATH_MAX]; - struct stat st; - - sprintf(path, "%s/%s", base_path, dent->d_name); - if (stat(path, &st)) - return 0; - - return S_ISDIR(st.st_mode); -} - #define for_each_lang(scripts_path, scripts_dir, lang_dirent) \ while ((lang_dirent = readdir(scripts_dir)) != NULL) \ if ((lang_dirent->d_type == DT_DIR || \ diff --git a/tools/perf/util/path.c b/tools/perf/util/path.c index 933f5c6bffb4..ca56ba2dd3da 100644 --- a/tools/perf/util/path.c +++ b/tools/perf/util/path.c @@ -18,6 +18,7 @@ #include #include #include +#include #include static char bad_path[] = "/bad-path/"; @@ -77,3 +78,16 @@ bool is_regular_file(const char *file) return S_ISREG(st.st_mode); } + +/* Helper function for filesystems that return a dent->d_type DT_UNKNOWN */ +bool is_directory(const char *base_path, const struct dirent *dent) +{ + char path[PATH_MAX]; + struct stat st; + + sprintf(path, "%s/%s", base_path, dent->d_name); + if (stat(path, &st)) + return false; + + return S_ISDIR(st.st_mode); +} diff --git a/tools/perf/util/path.h b/tools/perf/util/path.h index 14a254ada7eb..f014f905df50 100644 --- a/tools/perf/util/path.h +++ b/tools/perf/util/path.h @@ -2,9 +2,12 @@ #ifndef _PERF_PATH_H #define _PERF_PATH_H +struct dirent; + int path__join(char *bf, size_t size, const char *path1, const char *path2); int path__join3(char *bf, size_t size, const char *path1, const char *path2, const char *path3); bool is_regular_file(const char *file); +bool is_directory(const char *base_path, const struct dirent *dent); #endif /* _PERF_PATH_H */ -- cgit v1.2.3-70-g09d2 From 378811ac303df13efbe49f3ad1795b63d334ac5d Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 6 Dec 2017 18:45:35 +0100 Subject: perf test: Handle properly readdir DT_UNKNOWN Some system can return DT_UNKNOWN in readdir's struct dirent::d_type and we must handle it properly. In this case we can directly check if the entity we found is directory and skip it. Reported-by: Michael Petlan Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20171206174535.25380-1-jolsa@kernel.org [ Split from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/builtin-test.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 766573e236e4..fafa014240cd 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -411,9 +411,9 @@ static const char *shell_test__description(char *description, size_t size, return description ? trim(description + 1) : NULL; } -#define for_each_shell_test(dir, ent) \ +#define for_each_shell_test(dir, base, ent) \ while ((ent = readdir(dir)) != NULL) \ - if (ent->d_type == DT_REG && ent->d_name[0] != '.') + if (!is_directory(base, ent)) static const char *shell_tests__dir(char *path, size_t size) { @@ -452,7 +452,7 @@ static int shell_tests__max_desc_width(void) if (!dir) return -1; - for_each_shell_test(dir, ent) { + for_each_shell_test(dir, path, ent) { char bf[256]; const char *desc = shell_test__description(bf, sizeof(bf), path, ent->d_name); @@ -504,7 +504,7 @@ static int run_shell_tests(int argc, const char *argv[], int i, int width) if (!dir) return -1; - for_each_shell_test(dir, ent) { + for_each_shell_test(dir, st.dir, ent) { int curr = i++; char desc[256]; struct test test = { @@ -614,7 +614,7 @@ static int perf_test__list_shell(int argc, const char **argv, int i) if (!dir) return -1; - for_each_shell_test(dir, ent) { + for_each_shell_test(dir, path, ent) { int curr = i++; char bf[256]; struct test t = { -- cgit v1.2.3-70-g09d2 From 3315d14f8eea27a845bd2e3a88341a35f4025866 Mon Sep 17 00:00:00 2001 From: Pravin Shedge Date: Wed, 6 Dec 2017 23:13:24 +0530 Subject: perf perf: Remove duplicate includes These duplicate includes have been found with scripts/checkincludes.pl but they have been removed manually to avoid removing false positives. Signed-off-by: Pravin Shedge Cc: David S. Miller Cc: Greg Kroah-Hartman Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1512582204-6493-1-git-send-email-pravin.shedge4linux@gmail.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/bench/futex-hash.c | 1 - tools/perf/builtin-c2c.c | 3 --- tools/perf/builtin-record.c | 1 - tools/perf/builtin-stat.c | 1 - tools/perf/tests/parse-events.c | 1 - tools/perf/util/auxtrace.c | 3 --- tools/perf/util/header.c | 2 -- tools/perf/util/metricgroup.c | 2 -- tools/perf/util/scripting-engines/trace-event-python.c | 1 - 9 files changed, 15 deletions(-) (limited to 'tools') diff --git a/tools/perf/bench/futex-hash.c b/tools/perf/bench/futex-hash.c index 2defb6df7fd0..9aa3a674829b 100644 --- a/tools/perf/bench/futex-hash.c +++ b/tools/perf/bench/futex-hash.c @@ -27,7 +27,6 @@ #include "cpumap.h" #include -#include static unsigned int nthreads = 0; static unsigned int nsecs = 10; diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index f1da9b0833c0..c0debc3f79b6 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -27,13 +27,10 @@ #include "sort.h" #include "tool.h" #include "data.h" -#include "sort.h" #include "event.h" #include "evlist.h" #include "evsel.h" -#include #include "ui/browsers/hists.h" -#include "evlist.h" #include "thread.h" struct c2c_hists { diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 0a5749ef8b94..98da8cb8de93 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -51,7 +51,6 @@ #include #include #include -#include #include struct switch_output { diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 58d501d1f5fd..98bf9d32f222 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -63,7 +63,6 @@ #include "util/group.h" #include "util/session.h" #include "util/tool.h" -#include "util/group.h" #include "util/string2.h" #include "util/metricgroup.h" #include "asm/bug.h" diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index f0679613bd18..18b06444f230 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #define PERF_TP_SAMPLE_TYPE (PERF_SAMPLE_RAW | PERF_SAMPLE_TIME | \ diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index a33491416400..c76687e42344 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -31,9 +31,6 @@ #include #include #include -#include -#include -#include #include #include "../perf.h" diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 5890e08e0754..ca73aa7be708 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -15,9 +15,7 @@ #include #include #include -#include #include -#include #include "evlist.h" #include "evsel.h" diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index e48410c99b39..1ddc3d1d0147 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -20,12 +20,10 @@ #include "pmu.h" #include "expr.h" #include "rblist.h" -#include "pmu.h" #include #include #include #include "pmu-events/pmu-events.h" -#include "strbuf.h" #include "strlist.h" #include #include diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index c7187f067d31..c1848b543f27 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -43,7 +43,6 @@ #include "../db-export.h" #include "../thread-stack.h" #include "../trace-event.h" -#include "../machine.h" #include "../call-path.h" #include "thread_map.h" #include "cpumap.h" -- cgit v1.2.3-70-g09d2 From 7af7919f0f4bde0cec1f546f924be81cfe50533d Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 7 Dec 2017 09:27:59 +0100 Subject: tools include s390: Grab a copy of arch/s390/include/uapi/asm/unistd.h Will be used for generating the syscall id/string translation table. Signed-off-by: Hendrik Brueckner Reviewed-by: Thomas Richter Cc: Jiri Olsa Cc: Michael Petlan Cc: linux-s390@vger.kernel.org LPU-Reference: 1512635281-20733-2-git-send-email-brueckner@linux.vnet.ibm.com Link: https://lkml.kernel.org/n/tip-vjfbfvgjrnqnbdluqd7leo98@git.kernel.org [ split from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/s390/include/uapi/asm/unistd.h | 412 ++++++++++++++++++++++++++++++ tools/perf/check-headers.sh | 1 + 2 files changed, 413 insertions(+) create mode 100644 tools/arch/s390/include/uapi/asm/unistd.h (limited to 'tools') diff --git a/tools/arch/s390/include/uapi/asm/unistd.h b/tools/arch/s390/include/uapi/asm/unistd.h new file mode 100644 index 000000000000..725120939051 --- /dev/null +++ b/tools/arch/s390/include/uapi/asm/unistd.h @@ -0,0 +1,412 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * S390 version + * + * Derived from "include/asm-i386/unistd.h" + */ + +#ifndef _UAPI_ASM_S390_UNISTD_H_ +#define _UAPI_ASM_S390_UNISTD_H_ + +/* + * This file contains the system call numbers. + */ + +#define __NR_exit 1 +#define __NR_fork 2 +#define __NR_read 3 +#define __NR_write 4 +#define __NR_open 5 +#define __NR_close 6 +#define __NR_restart_syscall 7 +#define __NR_creat 8 +#define __NR_link 9 +#define __NR_unlink 10 +#define __NR_execve 11 +#define __NR_chdir 12 +#define __NR_mknod 14 +#define __NR_chmod 15 +#define __NR_lseek 19 +#define __NR_getpid 20 +#define __NR_mount 21 +#define __NR_umount 22 +#define __NR_ptrace 26 +#define __NR_alarm 27 +#define __NR_pause 29 +#define __NR_utime 30 +#define __NR_access 33 +#define __NR_nice 34 +#define __NR_sync 36 +#define __NR_kill 37 +#define __NR_rename 38 +#define __NR_mkdir 39 +#define __NR_rmdir 40 +#define __NR_dup 41 +#define __NR_pipe 42 +#define __NR_times 43 +#define __NR_brk 45 +#define __NR_signal 48 +#define __NR_acct 51 +#define __NR_umount2 52 +#define __NR_ioctl 54 +#define __NR_fcntl 55 +#define __NR_setpgid 57 +#define __NR_umask 60 +#define __NR_chroot 61 +#define __NR_ustat 62 +#define __NR_dup2 63 +#define __NR_getppid 64 +#define __NR_getpgrp 65 +#define __NR_setsid 66 +#define __NR_sigaction 67 +#define __NR_sigsuspend 72 +#define __NR_sigpending 73 +#define __NR_sethostname 74 +#define __NR_setrlimit 75 +#define __NR_getrusage 77 +#define __NR_gettimeofday 78 +#define __NR_settimeofday 79 +#define __NR_symlink 83 +#define __NR_readlink 85 +#define __NR_uselib 86 +#define __NR_swapon 87 +#define __NR_reboot 88 +#define __NR_readdir 89 +#define __NR_mmap 90 +#define __NR_munmap 91 +#define __NR_truncate 92 +#define __NR_ftruncate 93 +#define __NR_fchmod 94 +#define __NR_getpriority 96 +#define __NR_setpriority 97 +#define __NR_statfs 99 +#define __NR_fstatfs 100 +#define __NR_socketcall 102 +#define __NR_syslog 103 +#define __NR_setitimer 104 +#define __NR_getitimer 105 +#define __NR_stat 106 +#define __NR_lstat 107 +#define __NR_fstat 108 +#define __NR_lookup_dcookie 110 +#define __NR_vhangup 111 +#define __NR_idle 112 +#define __NR_wait4 114 +#define __NR_swapoff 115 +#define __NR_sysinfo 116 +#define __NR_ipc 117 +#define __NR_fsync 118 +#define __NR_sigreturn 119 +#define __NR_clone 120 +#define __NR_setdomainname 121 +#define __NR_uname 122 +#define __NR_adjtimex 124 +#define __NR_mprotect 125 +#define __NR_sigprocmask 126 +#define __NR_create_module 127 +#define __NR_init_module 128 +#define __NR_delete_module 129 +#define __NR_get_kernel_syms 130 +#define __NR_quotactl 131 +#define __NR_getpgid 132 +#define __NR_fchdir 133 +#define __NR_bdflush 134 +#define __NR_sysfs 135 +#define __NR_personality 136 +#define __NR_afs_syscall 137 /* Syscall for Andrew File System */ +#define __NR_getdents 141 +#define __NR_flock 143 +#define __NR_msync 144 +#define __NR_readv 145 +#define __NR_writev 146 +#define __NR_getsid 147 +#define __NR_fdatasync 148 +#define __NR__sysctl 149 +#define __NR_mlock 150 +#define __NR_munlock 151 +#define __NR_mlockall 152 +#define __NR_munlockall 153 +#define __NR_sched_setparam 154 +#define __NR_sched_getparam 155 +#define __NR_sched_setscheduler 156 +#define __NR_sched_getscheduler 157 +#define __NR_sched_yield 158 +#define __NR_sched_get_priority_max 159 +#define __NR_sched_get_priority_min 160 +#define __NR_sched_rr_get_interval 161 +#define __NR_nanosleep 162 +#define __NR_mremap 163 +#define __NR_query_module 167 +#define __NR_poll 168 +#define __NR_nfsservctl 169 +#define __NR_prctl 172 +#define __NR_rt_sigreturn 173 +#define __NR_rt_sigaction 174 +#define __NR_rt_sigprocmask 175 +#define __NR_rt_sigpending 176 +#define __NR_rt_sigtimedwait 177 +#define __NR_rt_sigqueueinfo 178 +#define __NR_rt_sigsuspend 179 +#define __NR_pread64 180 +#define __NR_pwrite64 181 +#define __NR_getcwd 183 +#define __NR_capget 184 +#define __NR_capset 185 +#define __NR_sigaltstack 186 +#define __NR_sendfile 187 +#define __NR_getpmsg 188 +#define __NR_putpmsg 189 +#define __NR_vfork 190 +#define __NR_pivot_root 217 +#define __NR_mincore 218 +#define __NR_madvise 219 +#define __NR_getdents64 220 +#define __NR_readahead 222 +#define __NR_setxattr 224 +#define __NR_lsetxattr 225 +#define __NR_fsetxattr 226 +#define __NR_getxattr 227 +#define __NR_lgetxattr 228 +#define __NR_fgetxattr 229 +#define __NR_listxattr 230 +#define __NR_llistxattr 231 +#define __NR_flistxattr 232 +#define __NR_removexattr 233 +#define __NR_lremovexattr 234 +#define __NR_fremovexattr 235 +#define __NR_gettid 236 +#define __NR_tkill 237 +#define __NR_futex 238 +#define __NR_sched_setaffinity 239 +#define __NR_sched_getaffinity 240 +#define __NR_tgkill 241 +/* Number 242 is reserved for tux */ +#define __NR_io_setup 243 +#define __NR_io_destroy 244 +#define __NR_io_getevents 245 +#define __NR_io_submit 246 +#define __NR_io_cancel 247 +#define __NR_exit_group 248 +#define __NR_epoll_create 249 +#define __NR_epoll_ctl 250 +#define __NR_epoll_wait 251 +#define __NR_set_tid_address 252 +#define __NR_fadvise64 253 +#define __NR_timer_create 254 +#define __NR_timer_settime 255 +#define __NR_timer_gettime 256 +#define __NR_timer_getoverrun 257 +#define __NR_timer_delete 258 +#define __NR_clock_settime 259 +#define __NR_clock_gettime 260 +#define __NR_clock_getres 261 +#define __NR_clock_nanosleep 262 +/* Number 263 is reserved for vserver */ +#define __NR_statfs64 265 +#define __NR_fstatfs64 266 +#define __NR_remap_file_pages 267 +#define __NR_mbind 268 +#define __NR_get_mempolicy 269 +#define __NR_set_mempolicy 270 +#define __NR_mq_open 271 +#define __NR_mq_unlink 272 +#define __NR_mq_timedsend 273 +#define __NR_mq_timedreceive 274 +#define __NR_mq_notify 275 +#define __NR_mq_getsetattr 276 +#define __NR_kexec_load 277 +#define __NR_add_key 278 +#define __NR_request_key 279 +#define __NR_keyctl 280 +#define __NR_waitid 281 +#define __NR_ioprio_set 282 +#define __NR_ioprio_get 283 +#define __NR_inotify_init 284 +#define __NR_inotify_add_watch 285 +#define __NR_inotify_rm_watch 286 +#define __NR_migrate_pages 287 +#define __NR_openat 288 +#define __NR_mkdirat 289 +#define __NR_mknodat 290 +#define __NR_fchownat 291 +#define __NR_futimesat 292 +#define __NR_unlinkat 294 +#define __NR_renameat 295 +#define __NR_linkat 296 +#define __NR_symlinkat 297 +#define __NR_readlinkat 298 +#define __NR_fchmodat 299 +#define __NR_faccessat 300 +#define __NR_pselect6 301 +#define __NR_ppoll 302 +#define __NR_unshare 303 +#define __NR_set_robust_list 304 +#define __NR_get_robust_list 305 +#define __NR_splice 306 +#define __NR_sync_file_range 307 +#define __NR_tee 308 +#define __NR_vmsplice 309 +#define __NR_move_pages 310 +#define __NR_getcpu 311 +#define __NR_epoll_pwait 312 +#define __NR_utimes 313 +#define __NR_fallocate 314 +#define __NR_utimensat 315 +#define __NR_signalfd 316 +#define __NR_timerfd 317 +#define __NR_eventfd 318 +#define __NR_timerfd_create 319 +#define __NR_timerfd_settime 320 +#define __NR_timerfd_gettime 321 +#define __NR_signalfd4 322 +#define __NR_eventfd2 323 +#define __NR_inotify_init1 324 +#define __NR_pipe2 325 +#define __NR_dup3 326 +#define __NR_epoll_create1 327 +#define __NR_preadv 328 +#define __NR_pwritev 329 +#define __NR_rt_tgsigqueueinfo 330 +#define __NR_perf_event_open 331 +#define __NR_fanotify_init 332 +#define __NR_fanotify_mark 333 +#define __NR_prlimit64 334 +#define __NR_name_to_handle_at 335 +#define __NR_open_by_handle_at 336 +#define __NR_clock_adjtime 337 +#define __NR_syncfs 338 +#define __NR_setns 339 +#define __NR_process_vm_readv 340 +#define __NR_process_vm_writev 341 +#define __NR_s390_runtime_instr 342 +#define __NR_kcmp 343 +#define __NR_finit_module 344 +#define __NR_sched_setattr 345 +#define __NR_sched_getattr 346 +#define __NR_renameat2 347 +#define __NR_seccomp 348 +#define __NR_getrandom 349 +#define __NR_memfd_create 350 +#define __NR_bpf 351 +#define __NR_s390_pci_mmio_write 352 +#define __NR_s390_pci_mmio_read 353 +#define __NR_execveat 354 +#define __NR_userfaultfd 355 +#define __NR_membarrier 356 +#define __NR_recvmmsg 357 +#define __NR_sendmmsg 358 +#define __NR_socket 359 +#define __NR_socketpair 360 +#define __NR_bind 361 +#define __NR_connect 362 +#define __NR_listen 363 +#define __NR_accept4 364 +#define __NR_getsockopt 365 +#define __NR_setsockopt 366 +#define __NR_getsockname 367 +#define __NR_getpeername 368 +#define __NR_sendto 369 +#define __NR_sendmsg 370 +#define __NR_recvfrom 371 +#define __NR_recvmsg 372 +#define __NR_shutdown 373 +#define __NR_mlock2 374 +#define __NR_copy_file_range 375 +#define __NR_preadv2 376 +#define __NR_pwritev2 377 +#define __NR_s390_guarded_storage 378 +#define __NR_statx 379 +#define __NR_s390_sthyi 380 +#define NR_syscalls 381 + +/* + * There are some system calls that are not present on 64 bit, some + * have a different name although they do the same (e.g. __NR_chown32 + * is __NR_chown on 64 bit). + */ +#ifndef __s390x__ + +#define __NR_time 13 +#define __NR_lchown 16 +#define __NR_setuid 23 +#define __NR_getuid 24 +#define __NR_stime 25 +#define __NR_setgid 46 +#define __NR_getgid 47 +#define __NR_geteuid 49 +#define __NR_getegid 50 +#define __NR_setreuid 70 +#define __NR_setregid 71 +#define __NR_getrlimit 76 +#define __NR_getgroups 80 +#define __NR_setgroups 81 +#define __NR_fchown 95 +#define __NR_ioperm 101 +#define __NR_setfsuid 138 +#define __NR_setfsgid 139 +#define __NR__llseek 140 +#define __NR__newselect 142 +#define __NR_setresuid 164 +#define __NR_getresuid 165 +#define __NR_setresgid 170 +#define __NR_getresgid 171 +#define __NR_chown 182 +#define __NR_ugetrlimit 191 /* SuS compliant getrlimit */ +#define __NR_mmap2 192 +#define __NR_truncate64 193 +#define __NR_ftruncate64 194 +#define __NR_stat64 195 +#define __NR_lstat64 196 +#define __NR_fstat64 197 +#define __NR_lchown32 198 +#define __NR_getuid32 199 +#define __NR_getgid32 200 +#define __NR_geteuid32 201 +#define __NR_getegid32 202 +#define __NR_setreuid32 203 +#define __NR_setregid32 204 +#define __NR_getgroups32 205 +#define __NR_setgroups32 206 +#define __NR_fchown32 207 +#define __NR_setresuid32 208 +#define __NR_getresuid32 209 +#define __NR_setresgid32 210 +#define __NR_getresgid32 211 +#define __NR_chown32 212 +#define __NR_setuid32 213 +#define __NR_setgid32 214 +#define __NR_setfsuid32 215 +#define __NR_setfsgid32 216 +#define __NR_fcntl64 221 +#define __NR_sendfile64 223 +#define __NR_fadvise64_64 264 +#define __NR_fstatat64 293 + +#else + +#define __NR_select 142 +#define __NR_getrlimit 191 /* SuS compliant getrlimit */ +#define __NR_lchown 198 +#define __NR_getuid 199 +#define __NR_getgid 200 +#define __NR_geteuid 201 +#define __NR_getegid 202 +#define __NR_setreuid 203 +#define __NR_setregid 204 +#define __NR_getgroups 205 +#define __NR_setgroups 206 +#define __NR_fchown 207 +#define __NR_setresuid 208 +#define __NR_getresuid 209 +#define __NR_setresgid 210 +#define __NR_getresgid 211 +#define __NR_chown 212 +#define __NR_setuid 213 +#define __NR_setgid 214 +#define __NR_setfsuid 215 +#define __NR_setfsgid 216 +#define __NR_newfstatat 293 + +#endif + +#endif /* _UAPI_ASM_S390_UNISTD_H_ */ diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index ea602cd1b43a..f81ca508700c 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -33,6 +33,7 @@ arch/s390/include/uapi/asm/kvm.h arch/s390/include/uapi/asm/kvm_perf.h arch/s390/include/uapi/asm/ptrace.h arch/s390/include/uapi/asm/sie.h +arch/s390/include/uapi/asm/unistd.h arch/arm/include/uapi/asm/kvm.h arch/arm64/include/uapi/asm/kvm.h include/asm-generic/bitops/arch_hweight.h -- cgit v1.2.3-70-g09d2 From 164a747f1ac2380c582988d2a4d9a9af13f8e644 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 7 Dec 2017 09:27:59 +0100 Subject: perf s390: Generate system call table from asm/unistd.h This should speed up accessing new system calls introduced with the kernel rather than waiting for libaudit updates to include them. Committer testing: $ rm -rf /tmp/build/perf $ mkdir /tmp/build/perf $ make srctree=/home/acme/git/perf -C tools/perf/arch/s390 OUTPUT=/tmp/build/perf/ archheaders make: Entering directory '/home/acme/git/perf/tools/perf/arch/s390' /bin/sh '/home/acme/git/perf/tools/perf/arch/s390/entry/syscalls//mksyscalltbl' 'cc' /home/acme/git/perf/tools/arch/s390/include/uapi/asm/unistd.h > /tmp/build/perf/arch/s390/include/generated/asm/syscalls_64.c make: Leaving directory '/home/acme/git/perf/tools/perf/arch/s390' $ head -5 /tmp/build/perf/arch/s390/include/generated/asm/syscalls_64.c static const char *syscalltbl_s390_64[] = { [1] = "exit", [2] = "fork", [3] = "read", [4] = "write", $ tail -5 /tmp/build/perf/arch/s390/include/generated/asm/syscalls_64.c [378] = "s390_guarded_storage", [379] = "statx", [380] = "s390_sthyi", }; #define SYSCALLTBL_S390_64_MAX_ID 380 $ Now to plug this into 'perf trace' proper. Signed-off-by: Hendrik Brueckner Reviewed-by: Thomas Richter Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Michael Petlan Cc: linux-s390@vger.kernel.org LPU-Reference: 1512635281-20733-2-git-send-email-brueckner@linux.vnet.ibm.com Link: https://lkml.kernel.org/n/tip-h5km60rdg3rqxvsys85q50l3@git.kernel.org [ split from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/s390/Makefile | 21 ++++++++++++++ tools/perf/arch/s390/entry/syscalls/mksyscalltbl | 36 ++++++++++++++++++++++++ 2 files changed, 57 insertions(+) create mode 100755 tools/perf/arch/s390/entry/syscalls/mksyscalltbl (limited to 'tools') diff --git a/tools/perf/arch/s390/Makefile b/tools/perf/arch/s390/Makefile index 09ba923debe8..48228de415d0 100644 --- a/tools/perf/arch/s390/Makefile +++ b/tools/perf/arch/s390/Makefile @@ -3,3 +3,24 @@ PERF_HAVE_DWARF_REGS := 1 endif HAVE_KVM_STAT_SUPPORT := 1 PERF_HAVE_ARCH_REGS_QUERY_REGISTER_OFFSET := 1 + +# +# Syscall table generation for perf +# + +out := $(OUTPUT)arch/s390/include/generated/asm +header := $(out)/syscalls_64.c +sysdef := $(srctree)/tools/arch/s390/include/uapi/asm/unistd.h +sysprf := $(srctree)/tools/perf/arch/s390/entry/syscalls/ +systbl := $(sysprf)/mksyscalltbl + +# Create output directory if not already present +_dummy := $(shell [ -d '$(out)' ] || mkdir -p '$(out)') + +$(header): $(sysdef) $(systbl) + $(Q)$(SHELL) '$(systbl)' '$(CC)' $(sysdef) > $@ + +clean:: + $(call QUIET_CLEAN, s390) $(RM) $(header) + +archheaders: $(header) diff --git a/tools/perf/arch/s390/entry/syscalls/mksyscalltbl b/tools/perf/arch/s390/entry/syscalls/mksyscalltbl new file mode 100755 index 000000000000..7fa0d0abd419 --- /dev/null +++ b/tools/perf/arch/s390/entry/syscalls/mksyscalltbl @@ -0,0 +1,36 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Generate system call table for perf +# +# +# Copyright IBM Corp. 2017 +# Author(s): Hendrik Brueckner +# + +gcc=$1 +input=$2 + +if ! test -r $input; then + echo "Could not read input file" >&2 + exit 1 +fi + +create_table() +{ + local max_nr + + echo 'static const char *syscalltbl_s390_64[] = {' + while read sc nr; do + printf '\t[%d] = "%s",\n' $nr $sc + max_nr=$nr + done + echo '};' + echo "#define SYSCALLTBL_S390_64_MAX_ID $max_nr" +} + + +$gcc -m64 -E -dM -x c $input \ + |sed -ne 's/^#define __NR_//p' \ + |sort -t' ' -k2 -nu \ + |create_table -- cgit v1.2.3-70-g09d2 From 901bb0280b60782603e999a6c1e30ddfe1c7b0fb Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Thu, 7 Dec 2017 09:27:59 +0100 Subject: perf trace: Use generated syscall table on s390 too This should speed up accessing new system calls introduced with the kernel rather than waiting for libaudit updates to include them. It also enables users to specify wildcards, for example, perf trace -e 'open*', just like was already possible on x86. Signed-off-by: Hendrik Brueckner Reviewed-by: Thomas Richter Cc: Jiri Olsa Cc: Michael Petlan Cc: linux-s390@vger.kernel.org LPU-Reference: 1512635281-20733-2-git-send-email-brueckner@linux.vnet.ibm.com Link: https://lkml.kernel.org/n/tip-htplh3nbrivi7g3cffbh4fsu@git.kernel.org [ split from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 10 +++++++++- tools/perf/util/syscalltbl.c | 4 ++++ 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 79b117a03fd7..6f73c2316740 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -22,6 +22,7 @@ include $(srctree)/tools/scripts/Makefile.arch $(call detected_var,SRCARCH) NO_PERF_REGS := 1 +NO_SYSCALL_TABLE := 1 # Additional ARCH settings for ppc ifeq ($(SRCARCH),powerpc) @@ -33,7 +34,8 @@ endif ifeq ($(SRCARCH),x86) $(call detected,CONFIG_X86) ifeq (${IS_64_BIT}, 1) - CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT -DHAVE_SYSCALL_TABLE -I$(OUTPUT)arch/x86/include/generated + NO_SYSCALL_TABLE := 0 + CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT -I$(OUTPUT)arch/x86/include/generated ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S LIBUNWIND_LIBS = -lunwind-x86_64 -lunwind -llzma $(call detected,CONFIG_X86_64) @@ -56,12 +58,18 @@ endif ifeq ($(ARCH),s390) NO_PERF_REGS := 0 + NO_SYSCALL_TABLE := 0 + CFLAGS += -I$(OUTPUT)arch/s390/include/generated endif ifeq ($(NO_PERF_REGS),0) $(call detected,CONFIG_PERF_REGS) endif +ifneq ($(NO_SYSCALL_TABLE),1) + CFLAGS += -DHAVE_SYSCALL_TABLE +endif + # So far there's only x86 and arm libdw unwind support merged in perf. # Disable it on all other architectures in case libdw unwind # support is detected in system. Add supported architectures diff --git a/tools/perf/util/syscalltbl.c b/tools/perf/util/syscalltbl.c index 6eea7cff3d4e..303bdb84ab5a 100644 --- a/tools/perf/util/syscalltbl.c +++ b/tools/perf/util/syscalltbl.c @@ -26,6 +26,10 @@ #include const int syscalltbl_native_max_id = SYSCALLTBL_x86_64_MAX_ID; static const char **syscalltbl_native = syscalltbl_x86_64; +#elif defined(__s390x__) +#include +const int syscalltbl_native_max_id = SYSCALLTBL_S390_64_MAX_ID; +static const char **syscalltbl_native = syscalltbl_s390_64; #endif struct syscall { -- cgit v1.2.3-70-g09d2 From 5449f13c553e9c50690419f6114665a8beb71bea Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 11 Dec 2017 12:46:11 -0300 Subject: perf annotate: Get the cpuid from evsel->evlist->env in symbol__annotate() To reduce its function signature, since we get this from 'evsel' which is already one of its arguments. Cc: Adrian Hunter Cc: David Ahern Cc: Hendrik Brueckner Cc: Jiri Olsa Cc: Michael Petlan Cc: Namhyung Kim Cc: Thomas Richter Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-070eap7t6uicg9c3w086xy2z@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-top.c | 2 +- tools/perf/ui/browsers/annotate.c | 4 +--- tools/perf/ui/gtk/annotate.c | 2 +- tools/perf/util/annotate.c | 7 ++++--- tools/perf/util/annotate.h | 2 +- tools/perf/util/evsel.c | 6 +++--- tools/perf/util/evsel.h | 2 +- 7 files changed, 12 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 540461f5e345..c6ccda52117d 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -138,7 +138,7 @@ static int perf_top__parse_source(struct perf_top *top, struct hist_entry *he) return err; } - err = symbol__annotate(sym, map, evsel, 0, NULL, NULL); + err = symbol__annotate(sym, map, evsel, 0, NULL); if (err == 0) { out_assign: top->sym_filter_entry = he; diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 03b7363a49c9..286427975112 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -1116,9 +1116,7 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, if (perf_evsel__is_group_event(evsel)) nr_pcnt = evsel->nr_members; - err = symbol__annotate(sym, map, evsel, - sizeof(struct browser_line), &browser.arch, - perf_evsel__env_cpuid(evsel)); + err = symbol__annotate(sym, map, evsel, sizeof(struct browser_line), &browser.arch); if (err) { char msg[BUFSIZ]; symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg)); diff --git a/tools/perf/ui/gtk/annotate.c b/tools/perf/ui/gtk/annotate.c index cdb5ecf91666..aeeaf15029f0 100644 --- a/tools/perf/ui/gtk/annotate.c +++ b/tools/perf/ui/gtk/annotate.c @@ -169,7 +169,7 @@ static int symbol__gtk_annotate(struct symbol *sym, struct map *map, if (map->dso->annotate_warned) return -1; - err = symbol__annotate(sym, map, evsel, 0, NULL, NULL); + err = symbol__annotate(sym, map, evsel, 0, NULL); if (err) { char msg[BUFSIZ]; symbol__strerror_disassemble(sym, map, err, msg, sizeof(msg)); diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index facad1e279a8..bc34b28373f4 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1622,13 +1622,14 @@ void symbol__calc_percent(struct symbol *sym, struct perf_evsel *evsel) int symbol__annotate(struct symbol *sym, struct map *map, struct perf_evsel *evsel, size_t privsize, - struct arch **parch, char *cpuid) + struct arch **parch) { struct annotate_args args = { .privsize = privsize, .map = map, .evsel = evsel, }; + struct perf_env *env = perf_evsel__env(evsel); const char *arch_name = NULL; struct arch *arch; int err; @@ -1648,7 +1649,7 @@ int symbol__annotate(struct symbol *sym, struct map *map, *parch = arch; if (arch->init) { - err = arch->init(arch, cpuid); + err = arch->init(arch, env ? env->cpuid : NULL); if (err) { pr_err("%s: failed to initialize %s arch priv area\n", __func__, arch->name); return err; @@ -1999,7 +2000,7 @@ int symbol__tty_annotate(struct symbol *sym, struct map *map, struct dso *dso = map->dso; struct rb_root source_line = RB_ROOT; - if (symbol__annotate(sym, map, evsel, 0, NULL, NULL) < 0) + if (symbol__annotate(sym, map, evsel, 0, NULL) < 0) return -1; symbol__calc_percent(sym, evsel); diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index 6d7289e88fa3..ce427445671f 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -179,7 +179,7 @@ void symbol__annotate_zero_histograms(struct symbol *sym); int symbol__annotate(struct symbol *sym, struct map *map, struct perf_evsel *evsel, size_t privsize, - struct arch **parch, char *cpuid); + struct arch **parch); enum symbol_disassemble_errno { SYMBOL_ANNOTATE_ERRNO__SUCCESS = 0, diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 95853c51c0ca..541897049c6c 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2842,9 +2842,9 @@ char *perf_evsel__env_arch(struct perf_evsel *evsel) return NULL; } -char *perf_evsel__env_cpuid(struct perf_evsel *evsel) +struct perf_env *perf_evsel__env(struct perf_evsel *evsel) { - if (evsel && evsel->evlist && evsel->evlist->env) - return evsel->evlist->env->cpuid; + if (evsel && evsel->evlist) + return evsel->evlist->env; return NULL; } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index c3663a70c9b9..0e961ce60a9c 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -447,6 +447,6 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, attr__fprintf_f attr__fprintf, void *priv); char *perf_evsel__env_arch(struct perf_evsel *evsel); -char *perf_evsel__env_cpuid(struct perf_evsel *evsel); +struct perf_env *perf_evsel__env(struct perf_evsel *evsel); #endif /* __PERF_EVSEL_H */ -- cgit v1.2.3-70-g09d2 From 3285debaf5992f9729ba33e3f31eff5253d29dc4 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 11 Dec 2017 12:52:17 -0300 Subject: perf annotate: Use perf_env when obtaining the arch name Paving the way to reuse these routines in other areas, like when generating errno tables. Cc: Adrian Hunter Cc: David Ahern Cc: Hendrik Brueckner Cc: Jiri Olsa Cc: Michael Petlan Cc: Namhyung Kim Cc: Thomas Richter Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-rh1qv051vb8gfdcswskrn53h@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 17 ++++++++--------- tools/perf/util/evsel.c | 7 ------- tools/perf/util/evsel.h | 1 - 3 files changed, 8 insertions(+), 17 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index bc34b28373f4..eac45ccd5c32 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1420,16 +1420,19 @@ fallback: return 0; } -static const char *annotate__norm_arch(const char *arch_name) +static const char *perf_env__arch(struct perf_env *env) { struct utsname uts; + char *arch_name; - if (!arch_name) { /* Assume we are annotating locally. */ + if (!env) { /* Assume local operation */ if (uname(&uts) < 0) return NULL; arch_name = uts.machine; - } - return normalize_arch((char *)arch_name); + } else + arch_name = env->arch; + + return normalize_arch(arch_name); } static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) @@ -1630,14 +1633,10 @@ int symbol__annotate(struct symbol *sym, struct map *map, .evsel = evsel, }; struct perf_env *env = perf_evsel__env(evsel); - const char *arch_name = NULL; + const char *arch_name = perf_env__arch(env); struct arch *arch; int err; - if (evsel) - arch_name = perf_evsel__env_arch(evsel); - - arch_name = annotate__norm_arch(arch_name); if (!arch_name) return -1; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 541897049c6c..4718f0a460df 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2835,13 +2835,6 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, perf_evsel__name(evsel)); } -char *perf_evsel__env_arch(struct perf_evsel *evsel) -{ - if (evsel && evsel->evlist && evsel->evlist->env) - return evsel->evlist->env->arch; - return NULL; -} - struct perf_env *perf_evsel__env(struct perf_evsel *evsel) { if (evsel && evsel->evlist) diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 0e961ce60a9c..846e41644525 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -446,7 +446,6 @@ typedef int (*attr__fprintf_f)(FILE *, const char *, const char *, void *); int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, attr__fprintf_f attr__fprintf, void *priv); -char *perf_evsel__env_arch(struct perf_evsel *evsel); struct perf_env *perf_evsel__env(struct perf_evsel *evsel); #endif /* __PERF_EVSEL_H */ -- cgit v1.2.3-70-g09d2 From 4e8fbc1c975c667c61a3073da81b338b9bf61c37 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 11 Dec 2017 14:47:49 -0300 Subject: perf env: Adopt perf_env__arch() from the annotate code And use it in the libunwind case, with both passing a valid perf_env to extract the arch to be normalized from and passing NULL with the same semantic as in the annotate code: to get it from uname() uts.machine. Now the code to generate per arch errno translation tables (int/string) can use it to decode perf.data files recorded in a different arch than that where 'perf trace' (or any other analysis tool) runs. Cc: Adrian Hunter Cc: David Ahern Cc: Hendrik Brueckner Cc: Jiri Olsa Cc: Michael Petlan Cc: Namhyung Kim Cc: Thomas Richter Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-p2epffgash69w38kvj3ntpc9@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/common.c | 44 +++-------------------------------- tools/perf/arch/common.h | 1 - tools/perf/util/annotate.c | 16 ------------- tools/perf/util/env.c | 47 ++++++++++++++++++++++++++++++++++++++ tools/perf/util/env.h | 2 ++ tools/perf/util/unwind-libunwind.c | 4 ++-- 6 files changed, 54 insertions(+), 60 deletions(-) (limited to 'tools') diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c index 8c0cfeb55f8e..c6f373508a4f 100644 --- a/tools/perf/arch/common.c +++ b/tools/perf/arch/common.c @@ -1,12 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 #include -#include #include "common.h" +#include "../util/env.h" #include "../util/util.h" #include "../util/debug.h" -#include "sane_ctype.h" - const char *const arm_triplets[] = { "arm-eabi-", "arm-linux-androideabi-", @@ -120,55 +118,19 @@ static int lookup_triplets(const char *const *triplets, const char *name) return -1; } -/* - * Return architecture name in a normalized form. - * The conversion logic comes from the Makefile. - */ -const char *normalize_arch(char *arch) -{ - if (!strcmp(arch, "x86_64")) - return "x86"; - if (arch[0] == 'i' && arch[2] == '8' && arch[3] == '6') - return "x86"; - if (!strcmp(arch, "sun4u") || !strncmp(arch, "sparc", 5)) - return "sparc"; - if (!strcmp(arch, "aarch64") || !strcmp(arch, "arm64")) - return "arm64"; - if (!strncmp(arch, "arm", 3) || !strcmp(arch, "sa110")) - return "arm"; - if (!strncmp(arch, "s390", 4)) - return "s390"; - if (!strncmp(arch, "parisc", 6)) - return "parisc"; - if (!strncmp(arch, "powerpc", 7) || !strncmp(arch, "ppc", 3)) - return "powerpc"; - if (!strncmp(arch, "mips", 4)) - return "mips"; - if (!strncmp(arch, "sh", 2) && isdigit(arch[2])) - return "sh"; - - return arch; -} - static int perf_env__lookup_binutils_path(struct perf_env *env, const char *name, const char **path) { int idx; - const char *arch, *cross_env; - struct utsname uts; + const char *arch = perf_env__arch(env), *cross_env; const char *const *path_list; char *buf = NULL; - arch = normalize_arch(env->arch); - - if (uname(&uts) < 0) - goto out; - /* * We don't need to try to find objdump path for native system. * Just use default binutils path (e.g.: "objdump"). */ - if (!strcmp(normalize_arch(uts.machine), arch)) + if (!strcmp(perf_env__arch(NULL), arch)) goto out; cross_env = getenv("CROSS_COMPILE"); diff --git a/tools/perf/arch/common.h b/tools/perf/arch/common.h index a1546509ad24..2d875baa92e6 100644 --- a/tools/perf/arch/common.h +++ b/tools/perf/arch/common.h @@ -7,6 +7,5 @@ extern const char *objdump_path; int perf_env__lookup_objdump(struct perf_env *env); -const char *normalize_arch(char *arch); #endif /* ARCH_PERF_COMMON_H */ diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index eac45ccd5c32..68e687d1bf99 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -26,7 +26,6 @@ #include #include #include -#include #include "sane_ctype.h" @@ -1420,21 +1419,6 @@ fallback: return 0; } -static const char *perf_env__arch(struct perf_env *env) -{ - struct utsname uts; - char *arch_name; - - if (!env) { /* Assume local operation */ - if (uname(&uts) < 0) - return NULL; - arch_name = uts.machine; - } else - arch_name = env->arch; - - return normalize_arch(arch_name); -} - static int symbol__disassemble(struct symbol *sym, struct annotate_args *args) { struct map *map = args->map; diff --git a/tools/perf/util/env.c b/tools/perf/util/env.c index 6276b340f893..6d311868d850 100644 --- a/tools/perf/util/env.c +++ b/tools/perf/util/env.c @@ -1,8 +1,10 @@ // SPDX-License-Identifier: GPL-2.0 #include "cpumap.h" #include "env.h" +#include "sane_ctype.h" #include "util.h" #include +#include struct perf_env perf_env; @@ -93,3 +95,48 @@ void cpu_cache_level__free(struct cpu_cache_level *cache) free(cache->map); free(cache->size); } + +/* + * Return architecture name in a normalized form. + * The conversion logic comes from the Makefile. + */ +static const char *normalize_arch(char *arch) +{ + if (!strcmp(arch, "x86_64")) + return "x86"; + if (arch[0] == 'i' && arch[2] == '8' && arch[3] == '6') + return "x86"; + if (!strcmp(arch, "sun4u") || !strncmp(arch, "sparc", 5)) + return "sparc"; + if (!strcmp(arch, "aarch64") || !strcmp(arch, "arm64")) + return "arm64"; + if (!strncmp(arch, "arm", 3) || !strcmp(arch, "sa110")) + return "arm"; + if (!strncmp(arch, "s390", 4)) + return "s390"; + if (!strncmp(arch, "parisc", 6)) + return "parisc"; + if (!strncmp(arch, "powerpc", 7) || !strncmp(arch, "ppc", 3)) + return "powerpc"; + if (!strncmp(arch, "mips", 4)) + return "mips"; + if (!strncmp(arch, "sh", 2) && isdigit(arch[2])) + return "sh"; + + return arch; +} + +const char *perf_env__arch(struct perf_env *env) +{ + struct utsname uts; + char *arch_name; + + if (!env) { /* Assume local operation */ + if (uname(&uts) < 0) + return NULL; + arch_name = uts.machine; + } else + arch_name = env->arch; + + return normalize_arch(arch_name); +} diff --git a/tools/perf/util/env.h b/tools/perf/util/env.h index 1eb35b190b34..bf970f57dce0 100644 --- a/tools/perf/util/env.h +++ b/tools/perf/util/env.h @@ -65,4 +65,6 @@ int perf_env__set_cmdline(struct perf_env *env, int argc, const char *argv[]); int perf_env__read_cpu_topology_map(struct perf_env *env); void cpu_cache_level__free(struct cpu_cache_level *cache); + +const char *perf_env__arch(struct perf_env *env); #endif /* __PERF_ENV_H */ diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c index 647a1e6b4c7b..b029a5e9ae49 100644 --- a/tools/perf/util/unwind-libunwind.c +++ b/tools/perf/util/unwind-libunwind.c @@ -3,7 +3,7 @@ #include "thread.h" #include "session.h" #include "debug.h" -#include "arch/common.h" +#include "env.h" struct unwind_libunwind_ops __weak *local_unwind_libunwind_ops; struct unwind_libunwind_ops __weak *x86_32_unwind_libunwind_ops; @@ -39,7 +39,7 @@ int unwind__prepare_access(struct thread *thread, struct map *map, if (dso_type == DSO__TYPE_UNKNOWN) return 0; - arch = normalize_arch(thread->mg->machine->env->arch); + arch = perf_env__arch(thread->mg->machine->env); if (!strcmp(arch, "x86")) { if (dso_type != DSO__TYPE_64BIT) -- cgit v1.2.3-70-g09d2 From 9f5c6d8777a2d962b0eeacb2a16f37da6bea545b Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 9 Dec 2017 01:26:46 +0900 Subject: perf probe: Add warning message if there is unexpected event name This improve the error message so that user can know event-name error before writing new events to kprobe-events interface. E.g. ====== #./perf probe -x /lib64/libc-2.25.so malloc_get_state* Internal error: "malloc_get_state@GLIBC_2" is an invalid event name. Error: Failed to add events. ====== Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Masami Hiramatsu Acked-by: Ravi Bangoria Reviewed-by: Thomas Richter Tested-by: Arnaldo Carvalho de Melo Cc: Paul Clarke Cc: bhargavb Cc: linux-rt-users@vger.kernel.org Link: http://lkml.kernel.org/r/151275040665.24652.5188568529237584489.stgit@devbox Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index b7aaf9b2294d..262d5da86623 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -2625,6 +2625,14 @@ static int get_new_event_name(char *buf, size_t len, const char *base, out: free(nbase); + + /* Final validation */ + if (ret >= 0 && !is_c_func_name(buf)) { + pr_warning("Internal error: \"%s\" is an invalid event name.\n", + buf); + ret = -EINVAL; + } + return ret; } -- cgit v1.2.3-70-g09d2 From a3110cd9d0f77a796da545e112f9305094257798 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 11 Dec 2017 15:19:25 -0300 Subject: perf probe: Cut off the version suffix from event name Cut off the version suffix (e.g. @GLIBC_2.2.5 etc.) from automatic generated event name. This fixes wildcard event adding like below case; ===== # perf probe -x /lib64/libc-2.25.so malloc* Internal error: "malloc_get_state@GLIBC_2" is wrong event name. Error: Failed to add events. ===== This failure was caused by a versioned suffix symbol. With this fix, perf probe automatically cuts the suffix after @ as below. ===== # ./perf probe -x /lib64/libc-2.25.so malloc* Added new events: probe_libc:malloc_printerr (on malloc* in /usr/lib64/libc-2.25.so) probe_libc:malloc_consolidate (on malloc* in /usr/lib64/libc-2.25.so) probe_libc:malloc_check (on malloc* in /usr/lib64/libc-2.25.so) probe_libc:malloc_hook_ini (on malloc* in /usr/lib64/libc-2.25.so) probe_libc:malloc (on malloc* in /usr/lib64/libc-2.25.so) probe_libc:malloc_trim (on malloc* in /usr/lib64/libc-2.25.so) probe_libc:malloc_usable_size (on malloc* in /usr/lib64/libc-2.25.so) probe_libc:malloc_stats (on malloc* in /usr/lib64/libc-2.25.so) probe_libc:malloc_info (on malloc* in /usr/lib64/libc-2.25.so) probe_libc:mallochook (on malloc* in /usr/lib64/libc-2.25.so) probe_libc:malloc_get_state (on malloc* in /usr/lib64/libc-2.25.so) probe_libc:malloc_set_state (on malloc* in /usr/lib64/libc-2.25.so) You can now use it in all perf tools, such as: perf record -e probe_libc:malloc_set_state -aR sleep 1 ===== Reported-by: Arnaldo Carvalho de Melo Reported-by: bhargavb Signed-off-by: Masami Hiramatsu Acked-by: Ravi Bangoria Reviewed-by: Thomas Richter Tested-by: Arnaldo Carvalho de Melo Cc: Paul Clarke Cc: linux-rt-users@vger.kernel.org Link: http://lkml.kernel.org/r/None Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/probe-event.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 262d5da86623..7e582547ac07 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -2584,8 +2584,8 @@ static int get_new_event_name(char *buf, size_t len, const char *base, if (!nbase) return -ENOMEM; - /* Cut off the dot suffixes (e.g. .const, .isra)*/ - p = strchr(nbase, '.'); + /* Cut off the dot suffixes (e.g. .const, .isra) and version suffixes */ + p = strpbrk(nbase, ".@"); if (p && p != nbase) *p = '\0'; -- cgit v1.2.3-70-g09d2 From e63c625a1e417edbe513b75b347a7238e9e7fea0 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 9 Dec 2017 01:27:44 +0900 Subject: perf probe: Add __return suffix for return events Add __return suffix for function return events automatically. Without this, user have to give --force option and will see the number suffix for each event like "function_1", which is not easy to recognize. Instead, this adds __return suffix to it automatically. E.g. ===== # ./perf probe -x /lib64/libc-2.25.so 'malloc*%return' Added new events: probe_libc:malloc_printerr__return (on malloc*%return in /usr/lib64/libc-2.25.so) probe_libc:malloc_consolidate__return (on malloc*%return in /usr/lib64/libc-2.25.so) probe_libc:malloc_check__return (on malloc*%return in /usr/lib64/libc-2.25.so) probe_libc:malloc_hook_ini__return (on malloc*%return in /usr/lib64/libc-2.25.so) probe_libc:malloc__return (on malloc*%return in /usr/lib64/libc-2.25.so) probe_libc:malloc_trim__return (on malloc*%return in /usr/lib64/libc-2.25.so) probe_libc:malloc_usable_size__return (on malloc*%return in /usr/lib64/libc-2.25.so) probe_libc:malloc_stats__return (on malloc*%return in /usr/lib64/libc-2.25.so) probe_libc:malloc_info__return (on malloc*%return in /usr/lib64/libc-2.25.so) probe_libc:mallochook__return (on malloc*%return in /usr/lib64/libc-2.25.so) probe_libc:malloc_get_state__return (on malloc*%return in /usr/lib64/libc-2.25.so) probe_libc:malloc_set_state__return (on malloc*%return in /usr/lib64/libc-2.25.so) You can now use it in all perf tools, such as: perf record -e probe_libc:malloc_set_state__return -aR sleep 1 ===== Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Masami Hiramatsu Acked-by: Ravi Bangoria Reviewed-by: Thomas Richter Tested-by: Arnaldo Carvalho de Melo Cc: Paul Clarke Cc: bhargavb Cc: linux-rt-users@vger.kernel.org Link: http://lkml.kernel.org/r/151275046418.24652.6696011972866498489.stgit@devbox Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-probe.txt | 2 +- tools/perf/util/probe-event.c | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt index d7e4869905f1..f96382692f42 100644 --- a/tools/perf/Documentation/perf-probe.txt +++ b/tools/perf/Documentation/perf-probe.txt @@ -170,7 +170,7 @@ Probe points are defined by following syntax. or, sdt_PROVIDER:SDTEVENT -'EVENT' specifies the name of new event, if omitted, it will be set the name of the probed function. You can also specify a group name by 'GROUP', if omitted, set 'probe' is used for kprobe and 'probe_' is used for uprobe. +'EVENT' specifies the name of new event, if omitted, it will be set the name of the probed function, and for return probes, a "\_\_return" suffix is automatically added to the function name. You can also specify a group name by 'GROUP', if omitted, set 'probe' is used for kprobe and 'probe_' is used for uprobe. Note that using existing group name can conflict with other events. Especially, using the group name reserved for kernel modules can hide embedded events in the modules. 'FUNC' specifies a probed function name, and it may have one of the following options; '+OFFS' is the offset from function entry address in bytes, ':RLN' is the relative-line number from function entry line, and '%return' means that it probes function return. And ';PTN' means lazy matching pattern (see LAZY MATCHING). Note that ';PTN' must be the end of the probe point definition. In addition, '@SRC' specifies a source file which has that function. diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 7e582547ac07..a68141d360b0 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -2573,7 +2573,8 @@ int show_perf_probe_events(struct strfilter *filter) } static int get_new_event_name(char *buf, size_t len, const char *base, - struct strlist *namelist, bool allow_suffix) + struct strlist *namelist, bool ret_event, + bool allow_suffix) { int i, ret; char *p, *nbase; @@ -2590,7 +2591,7 @@ static int get_new_event_name(char *buf, size_t len, const char *base, *p = '\0'; /* Try no suffix number */ - ret = e_snprintf(buf, len, "%s", nbase); + ret = e_snprintf(buf, len, "%s%s", nbase, ret_event ? "__return" : ""); if (ret < 0) { pr_debug("snprintf() failed: %d\n", ret); goto out; @@ -2689,8 +2690,8 @@ static int probe_trace_event__set_name(struct probe_trace_event *tev, group = PERFPROBE_GROUP; /* Get an unused new event name */ - ret = get_new_event_name(buf, 64, event, - namelist, allow_suffix); + ret = get_new_event_name(buf, 64, event, namelist, + tev->point.retprobe, allow_suffix); if (ret < 0) return ret; -- cgit v1.2.3-70-g09d2 From 4b3a2716dd785fabb9f6ac80c1d53cb29a88169d Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 9 Dec 2017 01:28:12 +0900 Subject: perf probe: Find versioned symbols from map Commit d80406453ad4 ("perf symbols: Allow user probes on versioned symbols") allows user to find default versioned symbols (with "@@") in map. However, it did not enable normal versioned symbol (with "@") for perf-probe. E.g. ===== # ./perf probe -x /lib64/libc-2.25.so malloc_get_state Failed to find symbol malloc_get_state in /usr/lib64/libc-2.25.so Error: Failed to add events. ===== This solves above issue by improving perf-probe symbol search function, as below. ===== # ./perf probe -x /lib64/libc-2.25.so malloc_get_state Added new event: probe_libc:malloc_get_state (on malloc_get_state in /usr/lib64/libc-2.25.so) You can now use it in all perf tools, such as: perf record -e probe_libc:malloc_get_state -aR sleep 1 # ./perf probe -l probe_libc:malloc_get_state (on malloc_get_state@GLIBC_2.2.5 in /usr/lib64/libc-2.25.so) ===== Signed-off-by: Masami Hiramatsu Reviewed-by: Thomas Richter Acked-by: Ravi Bangoria Tested-by: Arnaldo Carvalho de Melo Cc: Paul Clarke Cc: bhargavb Cc: linux-rt-users@vger.kernel.org Link: http://lkml.kernel.org/r/151275049269.24652.1639103455496216255.stgit@devbox Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/powerpc/util/sym-handling.c | 8 ++++++++ tools/perf/util/probe-event.c | 20 ++++++++++++++++++-- tools/perf/util/symbol.c | 5 +++++ tools/perf/util/symbol.h | 1 + 4 files changed, 32 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c index 9c4e23d8c8ce..53d83d7e6a09 100644 --- a/tools/perf/arch/powerpc/util/sym-handling.c +++ b/tools/perf/arch/powerpc/util/sym-handling.c @@ -64,6 +64,14 @@ int arch__compare_symbol_names_n(const char *namea, const char *nameb, return strncmp(namea, nameb, n); } + +const char *arch__normalize_symbol_name(const char *name) +{ + /* Skip over initial dot */ + if (name && *name == '.') + name++; + return name; +} #endif #if defined(_CALL_ELF) && _CALL_ELF == 2 diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index a68141d360b0..0d6c66d51939 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -2801,16 +2801,32 @@ static int find_probe_functions(struct map *map, char *name, int found = 0; struct symbol *sym; struct rb_node *tmp; + const char *norm, *ver; + char *buf = NULL; if (map__load(map) < 0) return 0; map__for_each_symbol(map, sym, tmp) { - if (strglobmatch(sym->name, name)) { + norm = arch__normalize_symbol_name(sym->name); + if (!norm) + continue; + + /* We don't care about default symbol or not */ + ver = strchr(norm, '@'); + if (ver) { + buf = strndup(norm, ver - norm); + if (!buf) + return -ENOMEM; + norm = buf; + } + if (strglobmatch(norm, name)) { found++; if (syms && found < probe_conf.max_probes) syms[found - 1] = sym; } + if (buf) + zfree(&buf); } return found; @@ -2856,7 +2872,7 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, * same name but different addresses, this lists all the symbols. */ num_matched_functions = find_probe_functions(map, pp->function, syms); - if (num_matched_functions == 0) { + if (num_matched_functions <= 0) { pr_err("Failed to find symbol %s in %s\n", pp->function, pev->target ? : "kernel"); ret = -ENOENT; diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index 1b67a8639dfe..cc065d4bfafc 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -94,6 +94,11 @@ static int prefix_underscores_count(const char *str) return tail - str; } +const char * __weak arch__normalize_symbol_name(const char *name) +{ + return name; +} + int __weak arch__compare_symbol_names(const char *namea, const char *nameb) { return strcmp(namea, nameb); diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index a4f0075b4e5c..0563f33c1eb3 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -349,6 +349,7 @@ bool elf__needs_adjust_symbols(GElf_Ehdr ehdr); void arch__sym_update(struct symbol *s, GElf_Sym *sym); #endif +const char *arch__normalize_symbol_name(const char *name); #define SYMBOL_A 0 #define SYMBOL_B 1 -- cgit v1.2.3-70-g09d2 From 1e9f9e8af0de80e8f6a47d991df66090934be0c6 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Sat, 9 Dec 2017 01:28:41 +0900 Subject: perf string: Add {strdup,strpbrk}_esc() To support the special characters escaped by '\' in 'perf probe' event parser. Signed-off-by: Masami Hiramatsu Reviewed-by: Thomas Richter Acked-by: Ravi Bangoria Cc: Paul Clarke Cc: bhargavb Cc: linux-rt-users@vger.kernel.org Link: http://lkml.kernel.org/r/151275052163.24652.18205979384585484358.stgit@devbox [ Split from a larger patch ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/string.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/string2.h | 2 ++ 2 files changed, 48 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/string.c b/tools/perf/util/string.c index aaa08ee8c717..d8bfd0c4d2cb 100644 --- a/tools/perf/util/string.c +++ b/tools/perf/util/string.c @@ -396,3 +396,49 @@ out_err_overflow: free(expr); return NULL; } + +/* Like strpbrk(), but not break if it is right after a backslash (escaped) */ +char *strpbrk_esc(char *str, const char *stopset) +{ + char *ptr; + + do { + ptr = strpbrk(str, stopset); + if (ptr == str || + (ptr == str + 1 && *(ptr - 1) != '\\')) + break; + str = ptr + 1; + } while (ptr && *(ptr - 1) == '\\' && *(ptr - 2) != '\\'); + + return ptr; +} + +/* Like strdup, but do not copy a single backslash */ +char *strdup_esc(const char *str) +{ + char *s, *d, *p, *ret = strdup(str); + + if (!ret) + return NULL; + + d = strchr(ret, '\\'); + if (!d) + return ret; + + s = d + 1; + do { + if (*s == '\0') { + *d = '\0'; + break; + } + p = strchr(s + 1, '\\'); + if (p) { + memmove(d, s, p - s); + d += p - s; + s = p + 1; + } else + memmove(d, s, strlen(s) + 1); + } while (p); + + return ret; +} diff --git a/tools/perf/util/string2.h b/tools/perf/util/string2.h index ee14ca5451ab..4c68a09b97e8 100644 --- a/tools/perf/util/string2.h +++ b/tools/perf/util/string2.h @@ -39,5 +39,7 @@ static inline char *asprintf_expr_not_in_ints(const char *var, size_t nints, int return asprintf_expr_inout_ints(var, false, nints, ints); } +char *strpbrk_esc(char *str, const char *stopset); +char *strdup_esc(const char *str); #endif /* PERF_STRING_H */ -- cgit v1.2.3-70-g09d2 From c588d158124d5b60184fc612e551a19720720d68 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 13 Dec 2017 00:05:12 +0900 Subject: perf probe: Support escaped character in parser Support the special characters escaped by '\' in parser. This allows user to specify versions directly like below. ===== # ./perf probe -x /lib64/libc-2.25.so malloc_get_state\\@GLIBC_2.2.5 Added new event: probe_libc:malloc_get_state (on malloc_get_state@GLIBC_2.2.5 in /usr/lib64/libc-2.25.so) You can now use it in all perf tools, such as: perf record -e probe_libc:malloc_get_state -aR sleep 1 ===== Or, you can use separators in source filename, e.g. ===== # ./perf probe -x /opt/test/a.out foo+bar.c:3 Semantic error :There is non-digit character in offset. Error: Command Parse Error. ===== Usually "+" in source file cause parser error, but ===== # ./perf probe -x /opt/test/a.out foo\\+bar.c:4 Added new event: probe_a:main (on @foo+bar.c:4 in /opt/test/a.out) You can now use it in all perf tools, such as: perf record -e probe_a:main -aR sleep 1 ===== escaped "\+" allows you to specify that. Signed-off-by: Masami Hiramatsu Reviewed-by: Thomas Richter Acked-by: Ravi Bangoria Cc: Paul Clarke Cc: bhargavb Cc: linux-rt-users@vger.kernel.org Link: http://lkml.kernel.org/r/151309111236.18107.5634753157435343410.stgit@devbox Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-probe.txt | 16 +++++++++ tools/perf/util/probe-event.c | 58 ++++++++++++++++++++------------- 2 files changed, 51 insertions(+), 23 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt index f96382692f42..b6866a05edd2 100644 --- a/tools/perf/Documentation/perf-probe.txt +++ b/tools/perf/Documentation/perf-probe.txt @@ -182,6 +182,14 @@ Note that before using the SDT event, the target binary (on which SDT events are For details of the SDT, see below. https://sourceware.org/gdb/onlinedocs/gdb/Static-Probe-Points.html +ESCAPED CHARACTER +----------------- + +In the probe syntax, '=', '@', '+', ':' and ';' are treated as a special character. You can use a backslash ('\') to escape the special characters. +This is useful if you need to probe on a specific versioned symbols, like @GLIBC_... suffixes, or also you need to specify a source file which includes the special characters. +Note that usually single backslash is consumed by shell, so you might need to pass double backslash (\\) or wrapping with single quotes (\'AAA\@BBB'). +See EXAMPLES how it is used. + PROBE ARGUMENT -------------- Each probe argument follows below syntax. @@ -277,6 +285,14 @@ Add a USDT probe to a target process running in a different mount namespace ./perf probe --target-ns -x /usr/lib/jvm/java-1.8.0-openjdk-1.8.0.121-0.b13.el7_3.x86_64/jre/lib/amd64/server/libjvm.so %sdt_hotspot:thread__sleep__end +Add a probe on specific versioned symbol by backslash escape + + ./perf probe -x /lib64/libc-2.25.so 'malloc_get_state\@GLIBC_2.2.5' + +Add a probe in a source file using special characters by backslash escape + + ./perf probe -x /opt/test/a.out 'foo\+bar.c:4' + SEE ALSO -------- diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 0d6c66d51939..e1dbc9821617 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -1325,27 +1325,30 @@ static int parse_perf_probe_event_name(char **arg, struct perf_probe_event *pev) { char *ptr; - ptr = strchr(*arg, ':'); + ptr = strpbrk_esc(*arg, ":"); if (ptr) { *ptr = '\0'; if (!pev->sdt && !is_c_func_name(*arg)) goto ng_name; - pev->group = strdup(*arg); + pev->group = strdup_esc(*arg); if (!pev->group) return -ENOMEM; *arg = ptr + 1; } else pev->group = NULL; - if (!pev->sdt && !is_c_func_name(*arg)) { + + pev->event = strdup_esc(*arg); + if (pev->event == NULL) + return -ENOMEM; + + if (!pev->sdt && !is_c_func_name(pev->event)) { + zfree(&pev->event); ng_name: + zfree(&pev->group); semantic_error("%s is bad for event name -it must " "follow C symbol-naming rule.\n", *arg); return -EINVAL; } - pev->event = strdup(*arg); - if (pev->event == NULL) - return -ENOMEM; - return 0; } @@ -1373,7 +1376,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) arg++; } - ptr = strpbrk(arg, ";=@+%"); + ptr = strpbrk_esc(arg, ";=@+%"); if (pev->sdt) { if (ptr) { if (*ptr != '@') { @@ -1387,7 +1390,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) pev->target = build_id_cache__origname(tmp); free(tmp); } else - pev->target = strdup(ptr + 1); + pev->target = strdup_esc(ptr + 1); if (!pev->target) return -ENOMEM; *ptr = '\0'; @@ -1421,13 +1424,14 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) * * Otherwise, we consider arg to be a function specification. */ - if (!strpbrk(arg, "+@%") && (ptr = strpbrk(arg, ";:")) != NULL) { + if (!strpbrk_esc(arg, "+@%")) { + ptr = strpbrk_esc(arg, ";:"); /* This is a file spec if it includes a '.' before ; or : */ - if (memchr(arg, '.', ptr - arg)) + if (ptr && memchr(arg, '.', ptr - arg)) file_spec = true; } - ptr = strpbrk(arg, ";:+@%"); + ptr = strpbrk_esc(arg, ";:+@%"); if (ptr) { nc = *ptr; *ptr++ = '\0'; @@ -1436,7 +1440,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) if (arg[0] == '\0') tmp = NULL; else { - tmp = strdup(arg); + tmp = strdup_esc(arg); if (tmp == NULL) return -ENOMEM; } @@ -1469,12 +1473,12 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) arg = ptr; c = nc; if (c == ';') { /* Lazy pattern must be the last part */ - pp->lazy_line = strdup(arg); + pp->lazy_line = strdup(arg); /* let leave escapes */ if (pp->lazy_line == NULL) return -ENOMEM; break; } - ptr = strpbrk(arg, ";:+@%"); + ptr = strpbrk_esc(arg, ";:+@%"); if (ptr) { nc = *ptr; *ptr++ = '\0'; @@ -1501,7 +1505,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) semantic_error("SRC@SRC is not allowed.\n"); return -EINVAL; } - pp->file = strdup(arg); + pp->file = strdup_esc(arg); if (pp->file == NULL) return -ENOMEM; break; @@ -2803,23 +2807,31 @@ static int find_probe_functions(struct map *map, char *name, struct rb_node *tmp; const char *norm, *ver; char *buf = NULL; + bool cut_version = true; if (map__load(map) < 0) return 0; + /* If user gives a version, don't cut off the version from symbols */ + if (strchr(name, '@')) + cut_version = false; + map__for_each_symbol(map, sym, tmp) { norm = arch__normalize_symbol_name(sym->name); if (!norm) continue; - /* We don't care about default symbol or not */ - ver = strchr(norm, '@'); - if (ver) { - buf = strndup(norm, ver - norm); - if (!buf) - return -ENOMEM; - norm = buf; + if (cut_version) { + /* We don't care about default symbol or not */ + ver = strchr(norm, '@'); + if (ver) { + buf = strndup(norm, ver - norm); + if (!buf) + return -ENOMEM; + norm = buf; + } } + if (strglobmatch(norm, name)) { found++; if (syms && found < probe_conf.max_probes) -- cgit v1.2.3-70-g09d2 From f9d8adb345d7adbb2d3431eea73beb89c8d6d612 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Wed, 29 Nov 2017 19:43:46 +0100 Subject: perf evsel: Fix swap for samples with raw data When we detect a different endianity we swap event before processing. It's tricky for samples because we have no idea what's inside. We treat it as an array of u64s, swap them and later on we swap back parts which are different. We mangle this way also the tracepoint raw data, which ends up in report showing wrong data: 1.95% comm=Q^B pid=29285 prio=16777216 target_cpu=000 1.67% comm=l^B pid=0 prio=16777216 target_cpu=000 Luckily the traceevent library handles the endianity by itself (thank you Steven!), so we can pass the RAW data directly in the other endianity. 2.51% comm=beah-rhts-task pid=1175 prio=120 target_cpu=002 2.23% comm=kworker/0:0 pid=11566 prio=120 target_cpu=000 The fix is basically to swap back the raw data if different endianity is detected. Signed-off-by: Jiri Olsa Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Steven Rostedt Link: http://lkml.kernel.org/r/20171129184346.3656-1-jolsa@kernel.org [ Add util/memswap.c to python-ext-sources to link missing mem_bswap_64() ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 20 +++++++++++++++++--- tools/perf/util/python-ext-sources | 1 + 2 files changed, 18 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 4718f0a460df..1cf044cbae36 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -36,6 +36,7 @@ #include "debug.h" #include "trace-event.h" #include "stat.h" +#include "memswap.h" #include "util/parse-branch-options.h" #include "sane_ctype.h" @@ -2131,14 +2132,27 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, if (type & PERF_SAMPLE_RAW) { OVERFLOW_CHECK_u64(array); u.val64 = *array; - if (WARN_ONCE(swapped, - "Endianness of raw data not corrected!\n")) { - /* undo swap of u64, then swap on individual u32s */ + + /* + * Undo swap of u64, then swap on individual u32s, + * get the size of the raw area and undo all of the + * swap. The pevent interface handles endianity by + * itself. + */ + if (swapped) { u.val64 = bswap_64(u.val64); u.val32[0] = bswap_32(u.val32[0]); u.val32[1] = bswap_32(u.val32[1]); } data->raw_size = u.val32[0]; + + /* + * The raw data is aligned on 64bits including the + * u32 size, so it's safe to use mem_bswap_64. + */ + if (swapped) + mem_bswap_64((void *) array, data->raw_size); + array = (void *)array + sizeof(u32); OVERFLOW_CHECK(array, data->raw_size, max_size); diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index b4f2f06722a7..7aa0ea64544e 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -10,6 +10,7 @@ util/ctype.c util/evlist.c util/evsel.c util/cpumap.c +util/memswap.c util/mmap.c util/namespaces.c ../lib/bitmap.c -- cgit v1.2.3-70-g09d2 From 69b5c953400897a978f8a7d212c53aa90ff5027d Mon Sep 17 00:00:00 2001 From: Michael Petlan Date: Tue, 12 Dec 2017 11:22:03 -0500 Subject: perf test shell: Fix check open filename arg using 'perf trace' Commit f231af789b11 ("perf test shell: Fix check open filename arg using 'perf trace' on s390x") added an exception for s390x to use openat() instead of open() in the test that intercepts a open syscall to look for the filename argument as obtained by the vfs_getname 'perf probe' it puts in place at the getname_flags kernel function. Its not just s390x that uses openat() instead of open(), so use 'perf list' to look for the syscall:sys_enter_open(at)? present in the system being tested instead of checking if the system is s390x. In fact Namhyung pointed out that glibc 2.26 changed this behaviour, as described in https://lwn.net/Articles/738694/, so systems where glibc is >= 2.26 will need this patch for this test to work, which already took place in some distros for architectures such as s390x, while Fedora 26 x86_64 is at glibc 2.25, i.e. still uses open(). Signed-off-by: Michael Petlan Tested-by: Arnaldo Carvalho de Melo Tested-by: Thomas Richter Link: https://lkml.kernel.org/r/ab23fe42-1080-a46b-503e-744e097f414f@linux.vnet.ibm.com Cc: Adrian Hunter Cc: David Ahern Cc: Hendrik Brueckner Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan LPU-Reference: 1275675985.12835754.1513095723265.JavaMail.zimbra@redhat.com Link: https://lkml.kernel.org/n/tip-j2wbz9av1rw3thr3t0g4dtuk@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/shell/trace+probe_vfs_getname.sh | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/shell/trace+probe_vfs_getname.sh b/tools/perf/tests/shell/trace+probe_vfs_getname.sh index 2a9ef080efd0..55ad9793d544 100755 --- a/tools/perf/tests/shell/trace+probe_vfs_getname.sh +++ b/tools/perf/tests/shell/trace+probe_vfs_getname.sh @@ -17,10 +17,9 @@ skip_if_no_perf_probe || exit 2 file=$(mktemp /tmp/temporary_file.XXXXX) trace_open_vfs_getname() { - test "$(uname -m)" = s390x && { svc="openat"; txt="dfd: +CWD, +"; } - - perf trace -e ${svc:-open} touch $file 2>&1 | \ - egrep " +[0-9]+\.[0-9]+ +\( +[0-9]+\.[0-9]+ ms\): +touch\/[0-9]+ ${svc:-open}\(${txt}filename: +${file}, +flags: CREAT\|NOCTTY\|NONBLOCK\|WRONLY, +mode: +IRUGO\|IWUGO\) += +[0-9]+$" + evts=$(echo $(perf list syscalls:sys_enter_open* |& egrep 'open(at)? ' | sed -r 's/.*sys_enter_([a-z]+) +\[.*$/\1/') | sed 's/ /,/') + perf trace -e $evts touch $file 2>&1 | \ + egrep " +[0-9]+\.[0-9]+ +\( +[0-9]+\.[0-9]+ ms\): +touch\/[0-9]+ open(at)?\((dfd: +CWD, +)?filename: +${file}, +flags: CREAT\|NOCTTY\|NONBLOCK\|WRONLY, +mode: +IRUGO\|IWUGO\) += +[0-9]+$" } -- cgit v1.2.3-70-g09d2 From 922991c2b14219b33270c770f917e0d1bf8f5597 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 13 Dec 2017 17:43:40 -0300 Subject: Revert "perf s390: Always build with -fPIC" This one made x86 always build with -fPIC, when the intention was for s390 to be built that way, due to a rebase mistake. Reported-by: Hendrik Brueckner This reverts commit 1dc4ddf112a408e607a073d951b962b6c6e2bd6c. Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 1 - 1 file changed, 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 6f73c2316740..eb6bd99be0bd 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -43,7 +43,6 @@ ifeq ($(SRCARCH),x86) LIBUNWIND_LIBS = -lunwind-x86 -llzma -lunwind endif NO_PERF_REGS := 0 - CFLAGS += -fPIC endif ifeq ($(SRCARCH),arm) -- cgit v1.2.3-70-g09d2 From a9a3f1d18a6c9ccf89728e23474645aa91e2f4f1 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Wed, 13 Dec 2017 17:46:54 -0300 Subject: perf s390: Always build with -fPIC On s390, object files must be compiled with position-indepedent code in order to be incrementally linked or linked to shared libraries. Therefore, add -fPIC to the CFLAGS for s390 to ensure each object file is built properly. Reported-by: Jonathan Hermann Signed-off-by: Hendrik Brueckner Cc: Heiko Carstens Cc: Martin Schwidefsky Cc: Thomas Richter Cc: linux s390 list Link: https://lkml.kernel.org/r/20171207080951.GC4889@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index eb6bd99be0bd..f050f38d8fa3 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -58,7 +58,7 @@ endif ifeq ($(ARCH),s390) NO_PERF_REGS := 0 NO_SYSCALL_TABLE := 0 - CFLAGS += -I$(OUTPUT)arch/s390/include/generated + CFLAGS += -fPIC -I$(OUTPUT)arch/s390/include/generated endif ifeq ($(NO_PERF_REGS),0) -- cgit v1.2.3-70-g09d2 From ca8000684ec4e66f965e1f9547a3c6cb834154ca Mon Sep 17 00:00:00 2001 From: Mengting Zhang Date: Wed, 13 Dec 2017 15:01:53 +0800 Subject: perf evsel: Enable ignore_missing_thread for pid option While monitoring a multithread process with pid option, perf sometimes may return sys_perf_event_open failure with 3(No such process) if any of the process's threads die before we open the event. However, we want perf continue monitoring the remaining threads and do not exit with error. Here, the patch enables perf_evsel::ignore_missing_thread for -p option to ignore complete failure if any of threads die before we open the event. But it may still return sys_perf_event_open failure with 22(Invalid) if we monitors several event groups. sys_perf_event_open: pid 28960 cpu 40 group_fd 118202 flags 0x8 sys_perf_event_open: pid 28961 cpu 40 group_fd 118203 flags 0x8 WARNING: Ignored open failure for pid 28962 sys_perf_event_open: pid 28962 cpu 40 group_fd [118203] flags 0x8 sys_perf_event_open failed, error -22 That is because when we ignore a missing thread, we change the thread_idx without dealing with its fds, FD(evsel, cpu, thread). Then get_group_fd() may return a wrong group_fd for the next thread and sys_perf_event_open() return with 22. sys_perf_event_open(){ ... if (group_fd != -1) perf_fget_light()//to get corresponding group_leader by group_fd ... if (group_leader) if (group_leader->ctx->task != ctx->task)//should on the same task goto err_context ... } This patch also fixes this bug by introducing perf_evsel__remove_fd() and update_fds to allow removing fds for the missing thread. Changes since v1: - Change group_fd__remove() into a more genetic way without changing code logic - Remove redundant condition Changes since v2: - Use a proper function name and add some comment. - Multiline comment style fixes. Committer testing: Before this patch the recently added 'perf stat --per-thread' for system wide counting would race while enumerating all threads using /proc: [root@jouet ~]# perf stat --per-thread failed to parse CPUs map: No such file or directory Usage: perf stat [] [] -C, --cpu list of cpus to monitor in system-wide -a, --all-cpus system-wide collection from all CPUs [root@jouet ~]# perf stat --per-thread failed to parse CPUs map: No such file or directory Usage: perf stat [] [] -C, --cpu list of cpus to monitor in system-wide -a, --all-cpus system-wide collection from all CPUs [root@jouet ~]# When, say, the kernel was being built, so lots of shortlived threads, after this patch this doesn't happen. Signed-off-by: Mengting Zhang Tested-by: Arnaldo Carvalho de Melo Acked-by: Jiri Olsa Cc: Cheng Jian Cc: Li Bin Cc: Wang Nan Link: http://lkml.kernel.org/r/1513148513-6974-1-git-send-email-zhangmengting@huawei.com [ Remove one use 'evlist' alias variable ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-record.c | 4 ++-- tools/perf/util/evsel.c | 47 +++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 47 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 98da8cb8de93..50385d89c497 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -1804,8 +1804,8 @@ int cmd_record(int argc, const char **argv) goto out; } - /* Enable ignoring missing threads when -u option is defined. */ - rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX; + /* Enable ignoring missing threads when -u/-p option is defined. */ + rec->opts.ignore_missing_thread = rec->opts.target.uid != UINT_MAX || rec->opts.target.pid; err = -ENOMEM; if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0) diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 1cf044cbae36..a4d256ea0dc4 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1599,10 +1599,46 @@ static int __open_attr__fprintf(FILE *fp, const char *name, const char *val, return fprintf(fp, " %-32s %s\n", name, val); } +static void perf_evsel__remove_fd(struct perf_evsel *pos, + int nr_cpus, int nr_threads, + int thread_idx) +{ + for (int cpu = 0; cpu < nr_cpus; cpu++) + for (int thread = thread_idx; thread < nr_threads - 1; thread++) + FD(pos, cpu, thread) = FD(pos, cpu, thread + 1); +} + +static int update_fds(struct perf_evsel *evsel, + int nr_cpus, int cpu_idx, + int nr_threads, int thread_idx) +{ + struct perf_evsel *pos; + + if (cpu_idx >= nr_cpus || thread_idx >= nr_threads) + return -EINVAL; + + evlist__for_each_entry(evsel->evlist, pos) { + nr_cpus = pos != evsel ? nr_cpus : cpu_idx; + + perf_evsel__remove_fd(pos, nr_cpus, nr_threads, thread_idx); + + /* + * Since fds for next evsel has not been created, + * there is no need to iterate whole event list. + */ + if (pos == evsel) + break; + } + return 0; +} + static bool ignore_missing_thread(struct perf_evsel *evsel, + int nr_cpus, int cpu, struct thread_map *threads, int thread, int err) { + pid_t ignore_pid = thread_map__pid(threads, thread); + if (!evsel->ignore_missing_thread) return false; @@ -1618,11 +1654,18 @@ static bool ignore_missing_thread(struct perf_evsel *evsel, if (threads->nr == 1) return false; + /* + * We should remove fd for missing_thread first + * because thread_map__remove() will decrease threads->nr. + */ + if (update_fds(evsel, nr_cpus, cpu, threads->nr, thread)) + return false; + if (thread_map__remove(threads, thread)) return false; pr_warning("WARNING: Ignored open failure for pid %d\n", - thread_map__pid(threads, thread)); + ignore_pid); return true; } @@ -1727,7 +1770,7 @@ retry_open: if (fd < 0) { err = -errno; - if (ignore_missing_thread(evsel, threads, thread, err)) { + if (ignore_missing_thread(evsel, cpus->nr, cpu, threads, thread, err)) { /* * We just removed 1 thread, so take a step * back on thread index and lower the upper -- cgit v1.2.3-70-g09d2 From f1031c8d33a8c40d4cac26e58c37d9fba0e31a8a Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Thu, 14 Dec 2017 17:52:42 -0600 Subject: perf probe arm64: Fix symbol fixup issues due to ELF type On an arm64 machine running a CONFIG_RANDOMIZE_BASE=y kernel, perf kernel symbol resolution fails. Debugging saw symsrc_init calling the default elf__needs_adjust_symbols() where checks for an ET_DYN (3) ehdr.e_type failed when they should have succeeded. Fix by adopting powerpc version of the weak elf__needs_adjust_symbols() function, as done in commit d2332098331f ("perf probe ppc: Fix symbol fixup issues due to ELF type"). Prior to this patch, perf test 1 would fail: $ sudo oldperf test -v 1 |& head 1: vmlinux symtab matches kallsyms : test child forked, pid 33374 Looking at the vmlinux_path (8 entries long) Using /usr/lib/debug/boot/vmlinux for symbols ERR : 0xfffe0000100f1000: do_undefinstr not on kallsyms ERR : 0xfffe0000100f1320: do_sysinstr not on kallsyms ERR : 0xfffe0000100f13b0: do_debug_exception not on kallsyms ERR : 0xfffe0000100f1498: do_mem_abort not on kallsyms ERR : 0xfffe0000100f1580: do_sp_pc_abort not on kallsyms ... After applying this patch, perf test 1 now succeeds: $ sudo ./newperf test -v 1 |& head 1: vmlinux symtab matches kallsyms : test child forked, pid 33378 Looking at the vmlinux_path (8 entries long) Using /usr/lib/debug/boot/vmlinux for symbols WARN: 0xffff000008081000: diff name v: do_undefinstr k: __exception_text_start WARN: 0xffff0000080819e8: diff name v: __irqentry_text_end k: __softirqentry_text_start WARN: 0xffff000008081d08: diff name v: __entry_text_start k: __softirqentry_text_end WARN: 0xffff00000809db5c: diff name v: flush_icache_range k: __flush_cache_user_range WARN: 0xffff000008101908: diff name v: sys_ni_syscall k: sys_vm86old ... Signed-off-by: Kim Phillips Cc: Alexander Shishkin Cc: Ganapatrao Kulkarni Cc: Jiri Olsa Cc: Namhyung Kim Cc: Naveen N. Rao Cc: Peter Zijlstra Cc: Will Deacon Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/20171214175242.e30450f17f93ad675d968fa3@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm64/util/Build | 1 + tools/perf/arch/arm64/util/sym-handling.c | 22 ++++++++++++++++++++++ 2 files changed, 23 insertions(+) create mode 100644 tools/perf/arch/arm64/util/sym-handling.c (limited to 'tools') diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build index b1ab72d2a42e..e04f6cdd6f32 100644 --- a/tools/perf/arch/arm64/util/Build +++ b/tools/perf/arch/arm64/util/Build @@ -1,4 +1,5 @@ libperf-y += header.o +libperf-y += sym-handling.o libperf-$(CONFIG_DWARF) += dwarf-regs.o libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o diff --git a/tools/perf/arch/arm64/util/sym-handling.c b/tools/perf/arch/arm64/util/sym-handling.c new file mode 100644 index 000000000000..0051b1ee8450 --- /dev/null +++ b/tools/perf/arch/arm64/util/sym-handling.c @@ -0,0 +1,22 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * Copyright (C) 2015 Naveen N. Rao, IBM Corporation + */ + +#include "debug.h" +#include "symbol.h" +#include "map.h" +#include "probe-event.h" +#include "probe-file.h" + +#ifdef HAVE_LIBELF_SUPPORT +bool elf__needs_adjust_symbols(GElf_Ehdr ehdr) +{ + return ehdr.e_type == ET_EXEC || + ehdr.e_type == ET_REL || + ehdr.e_type == ET_DYN; +} +#endif -- cgit v1.2.3-70-g09d2 From 74cd5815d9af6e6c4f3bcecfbc8e439f2fd7e6b1 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Thu, 21 Dec 2017 17:26:10 +0800 Subject: perf tool: Improve bash command line auto-complete for multiple events with comma perf has perf-completion.sh to define command line auto-completion in bash/zsh. For record/stat -e it works for single events, but isn't working when specifying multiple events with comma. It would be very useful if it could be fixed to make it easier by supporting multiple events, comma separated. With this patch, the result can be like this: 1. Support the events returned from 'perf list --raw-dump' root@skl:/tmp# perf stat -e cpu/cache cpu/cache-misses/ cpu/cache-references/ root@skl:/tmp# perf stat -e cpu/cache-misses/,cpu/branch- cpu/branch-instructions/ cpu/branch-misses/ root@skl:/tmp# perf stat -e cpu/cache-misses/,cpu/branch-i root@skl:/tmp# perf stat -e cpu/cache-misses/,cpu/branch-instructions/ 2. Support the events listed in /sys/bus/event_source/devices/cpu/events root@skl:/tmp# perf stat -e cycle cycle_activity.cycles_l1d_miss cycle_activity.stalls_l3_miss cycle_activity.cycles_l2_miss cycle_activity.stalls_mem_any cycle_activity.cycles_l3_miss cycle_activity.stalls_total cycle_activity.cycles_mem_any cycles-ct cycle_activity.stalls_l1d_miss cycles-t cycle_activity.stalls_l2_miss root@skl:/tmp# perf stat -e cycles- cycles-ct cycles-t root@skl:/tmp# perf stat -e cycles-t,cpu/c cpu/cache-misses/ cpu/cpu-cycles/ cpu/cycles-t/ cpu/cache-references/ cpu/cycles-ct/ root@skl:/tmp# perf stat -e cycles-t,cpu/cache- cpu/cache-misses/ cpu/cache-references/ root@skl:/tmp# perf stat -e cycles-t,cpu/cache-misses/ 3. Support the uppercase event which is with prefix "cpu/" root@skl:/tmp# perf stat -e cpu/c cpu/cache-misses/ cpu/cpu-cycles/ cpu/cycles-t/ cpu/cache-references/ cpu/cycles-ct/ root@skl:/tmp# perf stat -e cpu/cache-misses/,cpu/C cpu/CACHE-MISSES/ cpu/CPU-CYCLES/ cpu/CYCLES-T/ cpu/CACHE-REFERENCES/ cpu/CYCLES-CT/ root@skl:/tmp# perf stat -e cpu/cache-misses/,cpu/CACHE-REFERENCES/ Note that: a) This patch only supports bash. b) It doesn't support the cases like {},{} or {...,...}. Signed-off-by: Jin Yao Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1513848370-8098-1-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf-completion.sh | 38 ++++++++++++++++++++++++++++++++++---- 1 file changed, 34 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/perf-completion.sh b/tools/perf/perf-completion.sh index 345f5d6e9ed5..d8310830a18b 100644 --- a/tools/perf/perf-completion.sh +++ b/tools/perf/perf-completion.sh @@ -162,8 +162,33 @@ __perf_main () # List possible events for -e option elif [[ $prev == @("-e"|"--event") && $prev_skip_opts == @(record|stat|top) ]]; then - evts=$($cmd list --raw-dump) - __perfcomp_colon "$evts" "$cur" + + local cur1=${COMP_WORDS[COMP_CWORD]} + local raw_evts=$($cmd list --raw-dump) + local arr s tmp result + + if [[ "$cur1" == */* && ${cur1#*/} =~ ^[A-Z] ]]; then + OLD_IFS="$IFS" + IFS=" " + arr=($raw_evts) + IFS="$OLD_IFS" + + for s in ${arr[@]} + do + if [[ "$s" == *cpu/* ]]; then + tmp=${s#*cpu/} + result=$result" ""cpu/"${tmp^^} + else + result=$result" "$s + fi + done + + evts=${result}+$(ls /sys/bus/event_source/devices/cpu/events) + else + evts=${raw_evts}+$(ls /sys/bus/event_source/devices/cpu/events) + fi + + __perfcomp_colon "$evts" "$cur1" else # List subcommands for perf commands if [[ $prev_skip_opts == @(kvm|kmem|mem|lock|sched| @@ -246,11 +271,16 @@ fi type perf &>/dev/null && _perf() { + if [[ "$COMP_WORDBREAKS" != *,* ]]; then + COMP_WORDBREAKS="${COMP_WORDBREAKS}," + export COMP_WORDBREAKS + fi + local cur words cword prev if [ $preload_get_comp_words_by_ref = "true" ]; then - _get_comp_words_by_ref -n =: cur words cword prev + _get_comp_words_by_ref -n =:, cur words cword prev else - __perf_get_comp_words_by_ref -n =: cur words cword prev + __perf_get_comp_words_by_ref -n =:, cur words cword prev fi __perf_main } && -- cgit v1.2.3-70-g09d2 From 34c16db0f035f3f3dc50fbed03747693c12b6a5b Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 22 Dec 2017 18:57:35 +0800 Subject: perf tools: Return all events as auto-completions after comma It's a follow up for one previous patch "perf tool: Improve bash command line auto-complete for multiple events with comma." It fixes an issue that no events are displayed when is directly typed after comma. With this patch, now the result is: root@skl:/tmp# perf stat -e cpu-cycles, Display all 2389 possibilities? (y or n) alarmtimer:alarmtimer_cancel alarmtimer:alarmtimer_fired alarmtimer:alarmtimer_start alarmtimer:alarmtimer_suspend alignment-faults arith.divider_active BAClear_Cost baclears.any block:block_bio_backmerge block:block_bio_bounce block:block_bio_complete block:block_bio_frontmerge block:block_bio_queue block:block_bio_remap block:block_dirty_buffer block:block_getrq block:block_plug block:block_rq_complete block:block_rq_insert block:block_rq_issue block:block_rq_remap block:block_rq_requeue block:block_sleeprq --More-- One remaining issue is that the auto-completions doesn't work well for the event with ':'. For example, clk:clk_enable. Because ':' is set as WORDBREAK by default in bash. Need more work for this case. Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Jin Yao Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1513940255-16528-1-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf-completion.sh | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/perf-completion.sh b/tools/perf/perf-completion.sh index d8310830a18b..90206413f4d7 100644 --- a/tools/perf/perf-completion.sh +++ b/tools/perf/perf-completion.sh @@ -183,12 +183,16 @@ __perf_main () fi done - evts=${result}+$(ls /sys/bus/event_source/devices/cpu/events) + evts=${result}" "$(ls /sys/bus/event_source/devices/cpu/events) else - evts=${raw_evts}+$(ls /sys/bus/event_source/devices/cpu/events) + evts=${raw_evts}" "$(ls /sys/bus/event_source/devices/cpu/events) fi - __perfcomp_colon "$evts" "$cur1" + if [[ "$cur1" == , ]]; then + __perfcomp_colon "$evts" "" + else + __perfcomp_colon "$evts" "$cur1" + fi else # List subcommands for perf commands if [[ $prev_skip_opts == @(kvm|kmem|mem|lock|sched| -- cgit v1.2.3-70-g09d2 From 5d4fd9c8b83b36d34521b3af361a5726899045bf Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Sat, 23 Dec 2017 04:15:58 +0800 Subject: perf tools: Auto-complete for events with ':' It's a follow up patch for a previous patch "perf tool: Return all events as auto-completions after comma". With this patch, auto-completion can work well for events with a ':'. For example: root@skl:/tmp# perf stat -e block:block_ block:block_bio_backmerge block:block_rq_complete block:block_bio_bounce block:block_rq_insert block:block_bio_complete block:block_rq_issue block:block_bio_frontmerge block:block_rq_remap block:block_bio_queue block:block_rq_requeue block:block_bio_remap block:block_sleeprq block:block_dirty_buffer block:block_split block:block_getrq block:block_touch_buffer block:block_plug block:block_unplug root@skl:/tmp# perf stat -e block:block_rq_ block:block_rq_complete block:block_rq_issue block:block_rq_requeue block:block_rq_insert block:block_rq_remap root@skl:/tmp# perf stat -e block:block_rq_complete block:block_rq_complete root@skl:/tmp# perf stat -e block:block_rq_complete Signed-off-by: Jin Yao Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1513973758-19109-1-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/perf-completion.sh | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'tools') diff --git a/tools/perf/perf-completion.sh b/tools/perf/perf-completion.sh index 90206413f4d7..fdf75d45efff 100644 --- a/tools/perf/perf-completion.sh +++ b/tools/perf/perf-completion.sh @@ -280,6 +280,11 @@ _perf() export COMP_WORDBREAKS fi + if [[ "$COMP_WORDBREAKS" == *:* ]]; then + COMP_WORDBREAKS="${COMP_WORDBREAKS/:/}" + export COMP_WORDBREAKS + fi + local cur words cword prev if [ $preload_get_comp_words_by_ref = "true" ]; then _get_comp_words_by_ref -n =:, cur words cword prev -- cgit v1.2.3-70-g09d2 From 6703c9771d83ebe092b0d49cb0609a3f9d8b4ff7 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 2 Jan 2018 14:59:21 -0300 Subject: perf test bpf: Improve message about expected samples When failing on one of the BPF tests we were just stating: BPF filter result incorrect Add some more info to help figuring out the problem: BPF filter result incorrect, expected 56, got 0 samples This came out while investigating this failure, first seen after updating the kernel to the 4.15.0-rc6 tag: [root@jouet ~]# perf test bpf 39: BPF filter : 39.1: Basic BPF filtering : FAILED! 39.2: BPF pinning : Skip 39.3: BPF prologue generation: Skip 39.4: BPF relocation checker : Skip [root@jouet ~]# Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-403npu7daupv6b2bmxliv5pk@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/bpf.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index c433dd30975a..057c6b8fdb53 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -190,7 +190,7 @@ static int do_test(struct bpf_object *obj, int (*func)(void), } if (count != expect) { - pr_debug("BPF filter result incorrect\n"); + pr_debug("BPF filter result incorrect, expected %d, got %d samples\n", expect, count); goto out_delete_evlist; } -- cgit v1.2.3-70-g09d2 From 13cb2d0f513cf35bf74484a392f745f9e9c5a3f2 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 2 Jan 2018 15:18:07 -0300 Subject: perf test bpf: Use designated struct field initializers To follow standard practice in the kernel sources, documenting the initialization better and helping quickly finding the value for some field in a struct with many entries. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-syn3hz9hz7ukxlxbx5x6hv20@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/bpf.c | 59 +++++++++++++++++++++++--------------------------- 1 file changed, 27 insertions(+), 32 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 057c6b8fdb53..0512f1b5bfdb 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -63,46 +63,41 @@ static struct { bool pin; } bpf_testcase_table[] = { { - LLVM_TESTCASE_BASE, - "Basic BPF filtering", - "[basic_bpf_test]", - "fix 'perf test LLVM' first", - "load bpf object failed", - &epoll_wait_loop, - (NR_ITERS + 1) / 2, - false, + .prog_id = LLVM_TESTCASE_BASE, + .desc = "Basic BPF filtering", + .name = "[basic_bpf_test]", + .msg_compile_fail = "fix 'perf test LLVM' first", + .msg_load_fail = "load bpf object failed", + .target_func = &epoll_wait_loop, + .expect_result = (NR_ITERS + 1) / 2, }, { - LLVM_TESTCASE_BASE, - "BPF pinning", - "[bpf_pinning]", - "fix kbuild first", - "check your vmlinux setting?", - &epoll_wait_loop, - (NR_ITERS + 1) / 2, - true, + .prog_id = LLVM_TESTCASE_BASE, + .desc = "BPF pinning", + .name = "[bpf_pinning]", + .msg_compile_fail = "fix kbuild first", + .msg_load_fail = "check your vmlinux setting?", + .target_func = &epoll_wait_loop, + .expect_result = (NR_ITERS + 1) / 2, + .pin = true, }, #ifdef HAVE_BPF_PROLOGUE { - LLVM_TESTCASE_BPF_PROLOGUE, - "BPF prologue generation", - "[bpf_prologue_test]", - "fix kbuild first", - "check your vmlinux setting?", - &llseek_loop, - (NR_ITERS + 1) / 4, - false, + .prog_id = LLVM_TESTCASE_BPF_PROLOGUE, + .desc = "BPF prologue generation", + .name = "[bpf_prologue_test]", + .msg_compile_fail = "fix kbuild first", + .msg_load_fail = "check your vmlinux setting?", + .target_func = &llseek_loop, + .expect_result = (NR_ITERS + 1) / 4, }, #endif { - LLVM_TESTCASE_BPF_RELOCATION, - "BPF relocation checker", - "[bpf_relocation_test]", - "fix 'perf test LLVM' first", - "libbpf error when dealing with relocation", - NULL, - 0, - false, + .prog_id = LLVM_TESTCASE_BPF_RELOCATION, + .desc = "BPF relocation checker", + .name = "[bpf_relocation_test]", + .msg_compile_fail = "fix 'perf test LLVM' first", + .msg_load_fail = "libbpf error when dealing with relocation", }, }; -- cgit v1.2.3-70-g09d2 From e0337f4f9aff60a19079b0f224136bb03877db58 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 4 Jan 2018 12:43:32 -0300 Subject: perf test bpf: Hook on epoll_pwait() The 'perf test bpf' was hooking a eBPF program on the SyS_epoll_wait() kernel function, that was what the epoll_wait() glibc function ended up calling, but since at least glibc 2.26, the one that comes with, for instance, Fedora 27, glibc ends up calling SyS_epoll_pwait() when epoll_wait() is used, causing this 'perf test' entry to fail. So switch to using epoll_pwait() and hook the eBPF program to the SyS_epoll_pwait() kernel function to make it work on a wider range of glibc and kernel versions. Tested-by: Wang Nan Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Link: https://lkml.kernel.org/n/tip-zynvquy63er8s5mrgsz65pto@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/tests/bpf-script-example.c | 4 ++-- tools/perf/tests/bpf.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/tests/bpf-script-example.c b/tools/perf/tests/bpf-script-example.c index 268e5f8e4aa2..e4123c1b0e88 100644 --- a/tools/perf/tests/bpf-script-example.c +++ b/tools/perf/tests/bpf-script-example.c @@ -31,8 +31,8 @@ struct bpf_map_def SEC("maps") flip_table = { .max_entries = 1, }; -SEC("func=SyS_epoll_wait") -int bpf_func__SyS_epoll_wait(void *ctx) +SEC("func=SyS_epoll_pwait") +int bpf_func__SyS_epoll_pwait(void *ctx) { int ind =0; int *flag = bpf_map_lookup_elem(&flip_table, &ind); diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 0512f1b5bfdb..8e709c9d512c 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -19,13 +19,13 @@ #ifdef HAVE_LIBBPF_SUPPORT -static int epoll_wait_loop(void) +static int epoll_pwait_loop(void) { int i; /* Should fail NR_ITERS times */ for (i = 0; i < NR_ITERS; i++) - epoll_wait(-(i + 1), NULL, 0, 0); + epoll_pwait(-(i + 1), NULL, 0, 0, NULL); return 0; } @@ -68,7 +68,7 @@ static struct { .name = "[basic_bpf_test]", .msg_compile_fail = "fix 'perf test LLVM' first", .msg_load_fail = "load bpf object failed", - .target_func = &epoll_wait_loop, + .target_func = &epoll_pwait_loop, .expect_result = (NR_ITERS + 1) / 2, }, { @@ -77,7 +77,7 @@ static struct { .name = "[bpf_pinning]", .msg_compile_fail = "fix kbuild first", .msg_load_fail = "check your vmlinux setting?", - .target_func = &epoll_wait_loop, + .target_func = &epoll_pwait_loop, .expect_result = (NR_ITERS + 1) / 2, .pin = true, }, -- cgit v1.2.3-70-g09d2 From 44df1afdb174fd6038e419f80efd914c0b5f2f85 Mon Sep 17 00:00:00 2001 From: Wang Nan Date: Wed, 6 Dec 2017 01:50:40 +0000 Subject: perf tools: Fix compile error with libunwind x86 Fix a compile error: ... CC util/libunwind/x86_32.o In file included from util/libunwind/x86_32.c:33:0: util/libunwind/../../arch/x86/util/unwind-libunwind.c: In function 'libunwind__x86_reg_id': util/libunwind/../../arch/x86/util/unwind-libunwind.c:110:11: error: 'EINVAL' undeclared (first use in this function) return -EINVAL; ^ util/libunwind/../../arch/x86/util/unwind-libunwind.c:110:11: note: each undeclared identifier is reported only once for each function it appears in mv: cannot stat 'util/libunwind/.x86_32.o.tmp': No such file or directory make[4]: *** [util/libunwind/x86_32.o] Error 1 make[3]: *** [util] Error 2 make[2]: *** [libperf-in.o] Error 2 make[1]: *** [sub-make] Error 2 make: *** [all] Error 2 It happens when libunwind-x86 feature is detected. Signed-off-by: Wang Nan Link: http://lkml.kernel.org/r/20171206015040.114574-1-wangnan0@huawei.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/unwind-libunwind.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/arch/x86/util/unwind-libunwind.c b/tools/perf/arch/x86/util/unwind-libunwind.c index 9c917f80c906..05920e3edf7a 100644 --- a/tools/perf/arch/x86/util/unwind-libunwind.c +++ b/tools/perf/arch/x86/util/unwind-libunwind.c @@ -1,7 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 -#ifndef REMOTE_UNWIND_LIBUNWIND #include +#ifndef REMOTE_UNWIND_LIBUNWIND #include #include "perf_regs.h" #include "../../util/unwind.h" -- cgit v1.2.3-70-g09d2 From 935f5a9d4500020879858c9224c98dfabf16101d Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Sat, 30 Dec 2017 00:26:52 +0800 Subject: perf report: Fix a wrong offset issue when using /proc/kcore When a valid vmlinux is not found, 'perf report' falls back to look at /proc/kcore. In this case, it will report the impossible large offset. For example: # perf record -b -e cycles:k find /etc/ > /dev/null # perf report --stdio --branch-history 22.77% _vm_normal_page+18446603336221188162 | ---page_remove_rmap +18446603336221188324 page_remove_rmap +18446603336221188487 (cycles:5) unlock_page_memcg +18446603336221188096 page_remove_rmap +18446603336221188327 (cycles:1) The issue is the value which is passed to parameter 'addr' in __get_srcline() is the objdump address. It's not correct if we calculate the offset by using 'addr - sym->start'. This patch creates a new parameter 'ip' in __get_srcline(). It is not converted to objdump address. With this patch, the perf report output is: 22.77% _vm_normal_page+66 | ---page_remove_rmap +228 page_remove_rmap +391 (cycles:5) unlock_page_memcg +0 page_remove_rmap +231 (cycles:1) page_remove_rmap +236 Committer testing: Make sure you get any valid vmlinux out of the way, using '-v' on the 'perf report' case and deleting it from places where perf searches them, like your kernel build dir and the build-id cache, in ~/.debug/. Reported-by: Arnaldo Carvalho de Melo Signed-off-by: Jin Yao Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1514564812-17344-1-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/annotate.c | 3 ++- tools/perf/util/machine.c | 2 +- tools/perf/util/map.c | 2 +- tools/perf/util/sort.c | 16 ++++++++++------ tools/perf/util/srcline.c | 9 +++++---- tools/perf/util/srcline.h | 5 +++-- 6 files changed, 22 insertions(+), 15 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 68e687d1bf99..28b233c3dcbe 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -1960,7 +1960,8 @@ static void annotation__calc_lines(struct annotation *notes, struct map *map, if (percent_max <= 0.5) continue; - al->path = get_srcline(map->dso, start + al->offset, NULL, false, true); + al->path = get_srcline(map->dso, start + al->offset, NULL, + false, true, start + al->offset); insert_source_line(&tmp_root, al); } diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 64d255f6a537..b05a67464c03 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -1726,7 +1726,7 @@ static char *callchain_srcline(struct map *map, struct symbol *sym, u64 ip) bool show_addr = callchain_param.key == CCKEY_ADDRESS; srcline = get_srcline(map->dso, map__rip_2objdump(map, ip), - sym, show_sym, show_addr); + sym, show_sym, show_addr, ip); srcline__tree_insert(&map->dso->srclines, ip, srcline); } diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 6d40efd74402..8fe57031e1a8 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -419,7 +419,7 @@ int map__fprintf_srcline(struct map *map, u64 addr, const char *prefix, if (map && map->dso) { srcline = get_srcline(map->dso, map__rip_2objdump(map, addr), NULL, - true, true); + true, true, addr); if (srcline != SRCLINE_UNKNOWN) ret = fprintf(fp, "%s%s", prefix, srcline); free_srcline(srcline); diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index a00eacdf02ed..211e7f326b9f 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -336,7 +336,7 @@ char *hist_entry__get_srcline(struct hist_entry *he) return SRCLINE_UNKNOWN; return get_srcline(map->dso, map__rip_2objdump(map, he->ip), - he->ms.sym, true, true); + he->ms.sym, true, true, he->ip); } static int64_t @@ -380,7 +380,8 @@ sort__srcline_from_cmp(struct hist_entry *left, struct hist_entry *right) map__rip_2objdump(map, left->branch_info->from.al_addr), left->branch_info->from.sym, - true, true); + true, true, + left->branch_info->from.al_addr); } if (!right->branch_info->srcline_from) { struct map *map = right->branch_info->from.map; @@ -391,7 +392,8 @@ sort__srcline_from_cmp(struct hist_entry *left, struct hist_entry *right) map__rip_2objdump(map, right->branch_info->from.al_addr), right->branch_info->from.sym, - true, true); + true, true, + right->branch_info->from.al_addr); } return strcmp(right->branch_info->srcline_from, left->branch_info->srcline_from); } @@ -423,7 +425,8 @@ sort__srcline_to_cmp(struct hist_entry *left, struct hist_entry *right) map__rip_2objdump(map, left->branch_info->to.al_addr), left->branch_info->from.sym, - true, true); + true, true, + left->branch_info->to.al_addr); } if (!right->branch_info->srcline_to) { struct map *map = right->branch_info->to.map; @@ -434,7 +437,8 @@ sort__srcline_to_cmp(struct hist_entry *left, struct hist_entry *right) map__rip_2objdump(map, right->branch_info->to.al_addr), right->branch_info->to.sym, - true, true); + true, true, + right->branch_info->to.al_addr); } return strcmp(right->branch_info->srcline_to, left->branch_info->srcline_to); } @@ -465,7 +469,7 @@ static char *hist_entry__get_srcfile(struct hist_entry *e) return no_srcfile; sf = __get_srcline(map->dso, map__rip_2objdump(map, e->ip), - e->ms.sym, false, true, true); + e->ms.sym, false, true, true, e->ip); if (!strcmp(sf, SRCLINE_UNKNOWN)) return no_srcfile; p = strchr(sf, ':'); diff --git a/tools/perf/util/srcline.c b/tools/perf/util/srcline.c index d19f05c56de6..3c21fd059b64 100644 --- a/tools/perf/util/srcline.c +++ b/tools/perf/util/srcline.c @@ -496,7 +496,8 @@ out: #define A2L_FAIL_LIMIT 123 char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym, - bool show_sym, bool show_addr, bool unwind_inlines) + bool show_sym, bool show_addr, bool unwind_inlines, + u64 ip) { char *file = NULL; unsigned line = 0; @@ -536,7 +537,7 @@ out: if (sym) { if (asprintf(&srcline, "%s+%" PRIu64, show_sym ? sym->name : "", - addr - sym->start) < 0) + ip - sym->start) < 0) return SRCLINE_UNKNOWN; } else if (asprintf(&srcline, "%s[%" PRIx64 "]", dso->short_name, addr) < 0) return SRCLINE_UNKNOWN; @@ -550,9 +551,9 @@ void free_srcline(char *srcline) } char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, - bool show_sym, bool show_addr) + bool show_sym, bool show_addr, u64 ip) { - return __get_srcline(dso, addr, sym, show_sym, show_addr, false); + return __get_srcline(dso, addr, sym, show_sym, show_addr, false, ip); } struct srcline_node { diff --git a/tools/perf/util/srcline.h b/tools/perf/util/srcline.h index 847b7086182c..b2bb5502fd62 100644 --- a/tools/perf/util/srcline.h +++ b/tools/perf/util/srcline.h @@ -11,9 +11,10 @@ struct symbol; extern bool srcline_full_filename; char *get_srcline(struct dso *dso, u64 addr, struct symbol *sym, - bool show_sym, bool show_addr); + bool show_sym, bool show_addr, u64 ip); char *__get_srcline(struct dso *dso, u64 addr, struct symbol *sym, - bool show_sym, bool show_addr, bool unwind_inlines); + bool show_sym, bool show_addr, bool unwind_inlines, + u64 ip); void free_srcline(char *srcline); /* insert the srcline into the DSO, which will take ownership */ -- cgit v1.2.3-70-g09d2 From 40c39e3046411f84bab82f66783ff3593e2bcd9b Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Tue, 26 Dec 2017 18:42:43 +0800 Subject: perf report: Fix a no annotate browser displayed issue When enabling '-b' option in perf record, for example, perf record -b ... perf report and then browsing the annotate browser from perf report (press 'A'), it would fail (annotate browser can't be displayed). It's because the '.add_entry_cb' op of struct report is overwritten by hist_iter__branch_callback() in builtin-report.c. But this function doesn't do something like mapping symbols and sources. So next, do_annotate() will return directly. notes = symbol__annotation(act->ms.sym); if (!notes->src) return 0; This patch adds the lost code to hist_iter__branch_callback (refer to hist_iter__report_callback). v2: Fix a crash bug when perform 'perf report --stdio'. The reason is that we init the symbol annotation only in browser mode, it doesn't allocate/init resources for stdio mode. So now in hist_iter__branch_callback(), it will return directly if it's not in browser mode. Signed-off-by: Jin Yao Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1514284963-18587-1-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index eb9ce6327e71..07827cd51480 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -162,12 +162,28 @@ static int hist_iter__branch_callback(struct hist_entry_iter *iter, struct hist_entry *he = iter->he; struct report *rep = arg; struct branch_info *bi; + struct perf_sample *sample = iter->sample; + struct perf_evsel *evsel = iter->evsel; + int err; + + if (!ui__has_annotation()) + return 0; + + hist__account_cycles(sample->branch_stack, al, sample, + rep->nonany_branch_mode); bi = he->branch_info; + err = addr_map_symbol__inc_samples(&bi->from, sample, evsel->idx); + if (err) + goto out; + + err = addr_map_symbol__inc_samples(&bi->to, sample, evsel->idx); + branch_type_count(&rep->brtype_stat, &bi->flags, bi->from.addr, bi->to.addr); - return 0; +out: + return err; } static int process_sample_event(struct perf_tool *tool, -- cgit v1.2.3-70-g09d2 From 6011518db3bd04c80cd3ce3e6aea1c399739adb4 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 8 Dec 2017 21:13:41 +0800 Subject: perf header: Add infrastructure to record first and last sample time perf report/script/... have a --time option to limit the time range of output. That's very useful to slice large traces, e.g. when processing the output of perf script for some analysis. But right now --time only supports absolute time. Also there is no fast way to get the start/end times of a given trace except for looking at it. This makes it hard to e.g. only decode the first half of the trace, which is useful for parallelization of scripts Another problem is that perf records are variable size and there is no synchronization mechanism. So the only way to find the last sample reliably would be to walk all samples. But we want to avoid that in perf report/... because it is already quite expensive. That is why storing the first sample time and last sample time in perf record is better. This patch creates a new header feature type HEADER_SAMPLE_TIME and related ops. Save the first sample time and the last sample time to the feature section in perf file header. That will be done when, for instance, processing build-ids, where we already have to process all samples to create the build-id table, take advantage of that to further amortize that processing by storing HEADER_SAMPLE_TIME to make 'perf report/script' faster when using --time. Committer testing: After this patch is applied the header is written with zeroes, we need the next patch, for "perf record" to actually write the timestamps: # perf report -D | grep PERF_RECORD_SAMPLE\( 22501155244406 0x44f0 [0x28]: PERF_RECORD_SAMPLE(IP, 0x4001): 25016/25016: 0xffffffffa21be8c5 period: 1 addr: 0 22501155793625 0x4a30 [0x28]: PERF_RECORD_SAMPLE(IP, 0x4001): 25016/25016: 0xffffffffa21ffd50 period: 2828043 addr: 0 # perf report --header | grep "time of " # time of first sample : 0.000000 # time of last sample : 0.000000 # Changelog: v7: 1. Rebase to latest perf/core branch. 2. Add following clarification in patch description according to Arnaldo's suggestion. "That will be done when, for instance, processing build-ids, where we already have to process all samples to create the build-id table, take advantage of that to further amortize that processing by storing HEADER_SAMPLE_TIME to make 'perf report/script' faster when using --time." v4: Use perf script time style for timestamp printing. Also add with the printing of sample duration. v3: Remove the definitions of first_sample_time/last_sample_time from perf_session. Just define them in perf_evlist Signed-off-by: Jin Yao Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512738826-2628-2-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf.data-file-format.txt | 4 ++ tools/perf/util/evlist.h | 2 + tools/perf/util/header.c | 60 ++++++++++++++++++++++ tools/perf/util/header.h | 1 + 4 files changed, 67 insertions(+) (limited to 'tools') diff --git a/tools/perf/Documentation/perf.data-file-format.txt b/tools/perf/Documentation/perf.data-file-format.txt index 15e8b48077ba..f7d85e89a98a 100644 --- a/tools/perf/Documentation/perf.data-file-format.txt +++ b/tools/perf/Documentation/perf.data-file-format.txt @@ -261,6 +261,10 @@ struct { struct perf_header_string map; }[number_of_cache_levels]; + HEADER_SAMPLE_TIME = 21, + +Two uint64_t for the time of first sample and the time of last sample. + other bits are reserved and should ignored for now HEADER_FEAT_BITS = 256, diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index 75160666d305..e7fbca69cbac 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -50,6 +50,8 @@ struct perf_evlist { struct perf_evsel *selected; struct events_stats stats; struct perf_env *env; + u64 first_sample_time; + u64 last_sample_time; }; struct perf_evsel_str_handler { diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index ca73aa7be708..a326e0d8b5b6 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -16,6 +16,7 @@ #include #include #include +#include #include "evlist.h" #include "evsel.h" @@ -35,6 +36,7 @@ #include #include "asm/bug.h" #include "tool.h" +#include "time-utils.h" #include "sane_ctype.h" @@ -1180,6 +1182,20 @@ static int write_stat(struct feat_fd *ff __maybe_unused, return 0; } +static int write_sample_time(struct feat_fd *ff, + struct perf_evlist *evlist) +{ + int ret; + + ret = do_write(ff, &evlist->first_sample_time, + sizeof(evlist->first_sample_time)); + if (ret < 0) + return ret; + + return do_write(ff, &evlist->last_sample_time, + sizeof(evlist->last_sample_time)); +} + static void print_hostname(struct feat_fd *ff, FILE *fp) { fprintf(fp, "# hostname : %s\n", ff->ph->env.hostname); @@ -1505,6 +1521,28 @@ static void print_group_desc(struct feat_fd *ff, FILE *fp) } } +static void print_sample_time(struct feat_fd *ff, FILE *fp) +{ + struct perf_session *session; + char time_buf[32]; + double d; + + session = container_of(ff->ph, struct perf_session, header); + + timestamp__scnprintf_usec(session->evlist->first_sample_time, + time_buf, sizeof(time_buf)); + fprintf(fp, "# time of first sample : %s\n", time_buf); + + timestamp__scnprintf_usec(session->evlist->last_sample_time, + time_buf, sizeof(time_buf)); + fprintf(fp, "# time of last sample : %s\n", time_buf); + + d = (double)(session->evlist->last_sample_time - + session->evlist->first_sample_time) / NSEC_PER_MSEC; + + fprintf(fp, "# sample duration : %10.3f ms\n", d); +} + static int __event_process_build_id(struct build_id_event *bev, char *filename, struct perf_session *session) @@ -2146,6 +2184,27 @@ out_free_caches: return -1; } +static int process_sample_time(struct feat_fd *ff, void *data __maybe_unused) +{ + struct perf_session *session; + u64 first_sample_time, last_sample_time; + int ret; + + session = container_of(ff->ph, struct perf_session, header); + + ret = do_read_u64(ff, &first_sample_time); + if (ret) + return -1; + + ret = do_read_u64(ff, &last_sample_time); + if (ret) + return -1; + + session->evlist->first_sample_time = first_sample_time; + session->evlist->last_sample_time = last_sample_time; + return 0; +} + struct feature_ops { int (*write)(struct feat_fd *ff, struct perf_evlist *evlist); void (*print)(struct feat_fd *ff, FILE *fp); @@ -2203,6 +2262,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPN(AUXTRACE, auxtrace, false), FEAT_OPN(STAT, stat, false), FEAT_OPN(CACHE, cache, true), + FEAT_OPR(SAMPLE_TIME, sample_time, false), }; struct header_print_data { diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 317fb901e47f..f28aaaa3a440 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -35,6 +35,7 @@ enum { HEADER_AUXTRACE, HEADER_STAT, HEADER_CACHE, + HEADER_SAMPLE_TIME, HEADER_LAST_FEATURE, HEADER_FEAT_BITS = 256, }; -- cgit v1.2.3-70-g09d2 From 68588baf8d01826673f2874f434123029e519052 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 8 Dec 2017 21:13:42 +0800 Subject: perf record: Record the first and last sample time in the header In the default 'perf record' configuration, all samples are processed, to create the HEADER_BUILD_ID table. So it's very easy to get the first/last samples and save the time to perf file header via the function write_sample_time(). Later, at post processing time, perf report/script will fetch the time from perf file header. Committer testing: # perf record -a sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 2.099 MB perf.data (1101 samples) ] [root@jouet home]# perf report --header | grep "time of " # time of first sample : 22947.909226 # time of last sample : 22948.910704 # # perf report -D | grep PERF_RECORD_SAMPLE\( 0 22947909226101 0x20bb68 [0x30]: PERF_RECORD_SAMPLE(IP, 0x4001): 0/0: 0xffffffffa21b1af3 period: 1 addr: 0 0 22947909229928 0x20bb98 [0x30]: PERF_RECORD_SAMPLE(IP, 0x4001): 0/0: 0xffffffffa200d204 period: 1 addr: 0 3 22948910397351 0x219360 [0x30]: PERF_RECORD_SAMPLE(IP, 0x4001): 28251/28251: 0xffffffffa22071d8 period: 169518 addr: 0 0 22948910652380 0x20f120 [0x30]: PERF_RECORD_SAMPLE(IP, 0x4001): 0/0: 0xffffffffa2856816 period: 198807 addr: 0 2 22948910704034 0x2172d0 [0x30]: PERF_RECORD_SAMPLE(IP, 0x4001): 0/0: 0xffffffffa2856816 period: 88111 addr: 0 # Changelog: v7: Just update the patch description according to Arnaldo's suggestion. v6: Currently '--buildid-all' is not enabled at default. So the walking on all samples is the default operation. There is no big overhead to calculate the timestamp boundary in process_sample_event handler once we already go through all samples. So the timestamp boundary calculation is enabled by default when '--buildid-all' is not enabled. While if '--buildid-all' is enabled, we creates a new option "--timestamp-boundary" for user to decide if it enables the timestamp boundary calculation. v5: There is an issue that the sample walking can only work when '--buildid-all' is not enabled. So we need to let the walking be able to work even if '--buildid-all' is enabled and let the processing skips the dso hit marking for this case. At first, I want to provide a new option "--record-time-boundaries". While after consideration, I think a new option is not very necessary. v3: Remove the definitions of first_sample_time and last_sample_time from struct record and directly save them in perf_evlist. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512738826-2628-3-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-record.txt | 3 +++ tools/perf/builtin-record.c | 18 +++++++++++++++--- 2 files changed, 18 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 5a626ef666c2..3eea6de35a38 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -430,6 +430,9 @@ Configure all used events to run in user space. --timestamp-filename Append timestamp to output file name. +--timestamp-boundary:: +Record timestamp boundary (time of first/last samples). + --switch-output[=mode]:: Generate multiple perf.data files, timestamp prefixed, switching to a new one based on 'mode' value: diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 50385d89c497..65681a1a292a 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -78,6 +78,7 @@ struct record { bool no_buildid_cache_set; bool buildid_all; bool timestamp_filename; + bool timestamp_boundary; struct switch_output switch_output; unsigned long long samples; }; @@ -409,8 +410,15 @@ static int process_sample_event(struct perf_tool *tool, { struct record *rec = container_of(tool, struct record, tool); - rec->samples++; + if (rec->evlist->first_sample_time == 0) + rec->evlist->first_sample_time = sample->time; + + rec->evlist->last_sample_time = sample->time; + if (rec->buildid_all) + return 0; + + rec->samples++; return build_id__mark_dso_hit(tool, event, sample, evsel, machine); } @@ -435,9 +443,11 @@ static int process_buildids(struct record *rec) /* * If --buildid-all is given, it marks all DSO regardless of hits, - * so no need to process samples. + * so no need to process samples. But if timestamp_boundary is enabled, + * it still needs to walk on all samples to get the timestamps of + * first/last samples. */ - if (rec->buildid_all) + if (rec->buildid_all && !rec->timestamp_boundary) rec->tool.sample = NULL; return perf_session__process_events(session); @@ -1621,6 +1631,8 @@ static struct option __record_options[] = { "Record build-id of all DSOs regardless of hits"), OPT_BOOLEAN(0, "timestamp-filename", &record.timestamp_filename, "append timestamp to output filename"), + OPT_BOOLEAN(0, "timestamp-boundary", &record.timestamp_boundary, + "Record timestamp boundary (time of first/last samples)"), OPT_STRING_OPTARG_SET(0, "switch-output", &record.switch_output.str, &record.switch_output.set, "signal,size,time", "Switch output when receive SIGUSR2 or cross size,time threshold", -- cgit v1.2.3-70-g09d2 From 13a70f350665580708ab11f725d3578eaacbf2d0 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 8 Dec 2017 21:13:43 +0800 Subject: perf tools: Create function to parse time percent Current perf report/script/... have a --time option to limit the time range of output. But right now it only supports absolute time, add support for time percentage. For example: 1. Select the second 10% time slice perf report --time 10%/2 2. Select from 0% to 10% time slice perf report --time 0%-10% It also support the multiple time ranges. 3. Select the first and second 10% time slices perf report --time 10%/1,10%/2 4. Select from 0% to 10% and 30% to 40% slices perf report --time 0%-10%,30%-40% Changelog: v4: An issue is found. Following passes. perf script --time 10%/10x12321xsdfdasfdsafdsafdsa Now it uses strtol to replace atoi. Committer notes: This just puts in place the infrastructure, so the examples in this cset comment will only work later, after more patches in this series are applied. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512738826-2628-4-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/time-utils.c | 205 ++++++++++++++++++++++++++++++++++++++++--- tools/perf/util/time-utils.h | 3 + 2 files changed, 196 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c index 81927d027417..61c46022de0b 100644 --- a/tools/perf/util/time-utils.c +++ b/tools/perf/util/time-utils.c @@ -6,6 +6,7 @@ #include #include #include +#include #include "perf.h" #include "debug.h" @@ -60,11 +61,10 @@ static int parse_timestr_sec_nsec(struct perf_time_interval *ptime, return 0; } -int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr) +static int split_start_end(char **start, char **end, const char *ostr, char ch) { char *start_str, *end_str; char *d, *str; - int rc = 0; if (ostr == NULL || *ostr == '\0') return 0; @@ -74,25 +74,35 @@ int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr) if (str == NULL) return -ENOMEM; - ptime->start = 0; - ptime->end = 0; - - /* str has the format: , - * variations: , - * , - * , - */ start_str = str; - d = strchr(start_str, ','); + d = strchr(start_str, ch); if (d) { *d = '\0'; ++d; } end_str = d; + *start = start_str; + *end = end_str; + + return 0; +} + +int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr) +{ + char *start_str = NULL, *end_str; + int rc; + + rc = split_start_end(&start_str, &end_str, ostr, ','); + if (rc || !start_str) + return rc; + + ptime->start = 0; + ptime->end = 0; + rc = parse_timestr_sec_nsec(ptime, start_str, end_str); - free(str); + free(start_str); /* make sure end time is after start time if it was given */ if (rc == 0 && ptime->end && ptime->end < ptime->start) @@ -104,6 +114,177 @@ int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr) return rc; } +static int parse_percent(double *pcnt, char *str) +{ + char *c; + + c = strchr(str, '%'); + if (c) + *c = '\0'; + else + return -1; + + *pcnt = atof(str) / 100.0; + + return 0; +} + +static int percent_slash_split(char *str, struct perf_time_interval *ptime, + u64 start, u64 end) +{ + char *p, *end_str; + double pcnt, start_pcnt, end_pcnt; + u64 total = end - start; + int i; + + /* + * Example: + * 10%/2: select the second 10% slice and the third 10% slice + */ + + /* We can modify this string since the original one is copied */ + p = strchr(str, '/'); + if (!p) + return -1; + + *p = '\0'; + if (parse_percent(&pcnt, str) < 0) + return -1; + + p++; + i = (int)strtol(p, &end_str, 10); + if (*end_str) + return -1; + + if (pcnt <= 0.0) + return -1; + + start_pcnt = pcnt * (i - 1); + end_pcnt = pcnt * i; + + if (start_pcnt < 0.0 || start_pcnt > 1.0 || + end_pcnt < 0.0 || end_pcnt > 1.0) { + return -1; + } + + ptime->start = start + round(start_pcnt * total); + ptime->end = start + round(end_pcnt * total); + + return 0; +} + +static int percent_dash_split(char *str, struct perf_time_interval *ptime, + u64 start, u64 end) +{ + char *start_str = NULL, *end_str; + double start_pcnt, end_pcnt; + u64 total = end - start; + int ret; + + /* + * Example: 0%-10% + */ + + ret = split_start_end(&start_str, &end_str, str, '-'); + if (ret || !start_str) + return ret; + + if ((parse_percent(&start_pcnt, start_str) != 0) || + (parse_percent(&end_pcnt, end_str) != 0)) { + free(start_str); + return -1; + } + + free(start_str); + + if (start_pcnt < 0.0 || start_pcnt > 1.0 || + end_pcnt < 0.0 || end_pcnt > 1.0 || + start_pcnt > end_pcnt) { + return -1; + } + + ptime->start = start + round(start_pcnt * total); + ptime->end = start + round(end_pcnt * total); + + return 0; +} + +typedef int (*time_pecent_split)(char *, struct perf_time_interval *, + u64 start, u64 end); + +static int percent_comma_split(struct perf_time_interval *ptime_buf, int num, + const char *ostr, u64 start, u64 end, + time_pecent_split func) +{ + char *str, *p1, *p2; + int len, ret, i = 0; + + str = strdup(ostr); + if (str == NULL) + return -ENOMEM; + + len = strlen(str); + p1 = str; + + while (p1 < str + len) { + if (i >= num) { + free(str); + return -1; + } + + p2 = strchr(p1, ','); + if (p2) + *p2 = '\0'; + + ret = (func)(p1, &ptime_buf[i], start, end); + if (ret < 0) { + free(str); + return -1; + } + + pr_debug("start time %d: %" PRIu64 ", ", i, ptime_buf[i].start); + pr_debug("end time %d: %" PRIu64 "\n", i, ptime_buf[i].end); + + i++; + + if (p2) + p1 = p2 + 1; + else + break; + } + + free(str); + return i; +} + +int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num, + const char *ostr, u64 start, u64 end) +{ + char *c; + + /* + * ostr example: + * 10%/2,10%/3: select the second 10% slice and the third 10% slice + * 0%-10%,30%-40%: multiple time range + */ + + memset(ptime_buf, 0, sizeof(*ptime_buf) * num); + + c = strchr(ostr, '/'); + if (c) { + return percent_comma_split(ptime_buf, num, ostr, start, + end, percent_slash_split); + } + + c = strchr(ostr, '-'); + if (c) { + return percent_comma_split(ptime_buf, num, ostr, start, + end, percent_dash_split); + } + + return -1; +} + bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp) { /* if time is not set don't drop sample */ diff --git a/tools/perf/util/time-utils.h b/tools/perf/util/time-utils.h index 15b475c50ccf..23087231785a 100644 --- a/tools/perf/util/time-utils.h +++ b/tools/perf/util/time-utils.h @@ -13,6 +13,9 @@ int parse_nsec_time(const char *str, u64 *ptime); int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr); +int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num, + const char *ostr, u64 start, u64 end); + bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp); int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz); -- cgit v1.2.3-70-g09d2 From 9a9b8b4b2271e763c1600311a3d4ecc2ac359b55 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 8 Dec 2017 21:13:44 +0800 Subject: perf tools: Create function to perform multiple time range checking Previous patch supports the multiple time range. For example, select the first and second 10% time slices. perf report --time 10%/1,10%/2 We need a function to check if a timestamp is in the ranges of [0, 10%) and [10%, 20%]. Note that it includes the last element in [10%, 20%] but it doesn't include the last element in [0, 10%). It's to avoid the overlap. This patch implments a new function perf_time__ranges_skip_sample for this checking. Change log: v4: Let perf_time__ranges_skip_sample be compatible with perf_time__skip_sample when only one time range. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512738826-2628-5-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/time-utils.c | 28 ++++++++++++++++++++++++++++ tools/perf/util/time-utils.h | 3 +++ 2 files changed, 31 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c index 61c46022de0b..3f7f18f06982 100644 --- a/tools/perf/util/time-utils.c +++ b/tools/perf/util/time-utils.c @@ -300,6 +300,34 @@ bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp) return false; } +bool perf_time__ranges_skip_sample(struct perf_time_interval *ptime_buf, + int num, u64 timestamp) +{ + struct perf_time_interval *ptime; + int i; + + if ((timestamp == 0) || (num == 0)) + return false; + + if (num == 1) + return perf_time__skip_sample(&ptime_buf[0], timestamp); + + /* + * start/end of multiple time ranges must be valid. + */ + for (i = 0; i < num; i++) { + ptime = &ptime_buf[i]; + + if (timestamp >= ptime->start && + ((timestamp < ptime->end && i < num - 1) || + (timestamp <= ptime->end && i == num - 1))) { + break; + } + } + + return (i == num) ? true : false; +} + int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz) { u64 sec = timestamp / NSEC_PER_SEC; diff --git a/tools/perf/util/time-utils.h b/tools/perf/util/time-utils.h index 23087231785a..34d5eba26bf5 100644 --- a/tools/perf/util/time-utils.h +++ b/tools/perf/util/time-utils.h @@ -18,6 +18,9 @@ int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num, bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp); +bool perf_time__ranges_skip_sample(struct perf_time_interval *ptime_buf, + int num, u64 timestamp); + int timestamp__scnprintf_usec(u64 timestamp, char *buf, size_t sz); int fetch_current_timestamp(char *buf, size_t sz); -- cgit v1.2.3-70-g09d2 From 5b969bc766807e5c2f184d1d6f97b8471de946f1 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 8 Dec 2017 21:13:45 +0800 Subject: perf report: Support time percent and multiple time ranges perf report has a --time option to limit the time range of output. It only supports absolute time. Now this option is extended to support multiple time ranges and support the percent of time. For example: 1. Select the first and second 10% time slices: perf report --time 10%/1,10%/2 2. Select from 0% to 10% and 30% to 40% slices: perf report --time 0%-10%,30%-40% Changelog: v6: Fix the merge issue with latest perf/core branch. No functional changes. v5: Add checking of first/last sample time to detect if it's recorded in perf.data. If it's not recorded, returns error message to user. v4: Remove perf_time__skip_sample, only uses perf_time__ranges_skip_sample v3: Since the definitions of first_sample_time/last_sample_time are moved from perf_session to perf_evlist so change the related code. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512738826-2628-6-git-send-email-yao.jin@linux.intel.com [ Add missing colons at end of examples in the man page ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 20 ++++++++++++++++++++ tools/perf/builtin-report.c | 31 ++++++++++++++++++++++++++----- 2 files changed, 46 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index ddde2b54af57..1e02c4e1a81f 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -402,6 +402,26 @@ OPTIONS stop time is not given (i.e, time string is 'x.y,') then analysis goes to end of file. + Also support time percent with multiple time range. Time string is + 'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'. The maximum number of slices is 10. + + For example: + Select the second 10% time slice: + + perf report --time 10%/2 + + Select from 0% to 10% time slice: + + perf report --time 0%-10% + + Select the first and second 10% time slices: + + perf report --time 10%/1,10%/2 + + Select from 0% to 10% and 30% to 40% slices: + + perf report --time 0%-10%,30%-40% + --itrace:: Options for decoding instruction tracing data. The options are: diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 07827cd51480..770bf8a614f2 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -52,6 +52,8 @@ #include #include +#define PTIME_RANGE_MAX 10 + struct report { struct perf_tool tool; struct perf_session *session; @@ -69,7 +71,8 @@ struct report { const char *cpu_list; const char *symbol_filter_str; const char *time_str; - struct perf_time_interval ptime; + struct perf_time_interval ptime_range[PTIME_RANGE_MAX]; + int range_num; float min_percent; u64 nr_entries; u64 queue_size; @@ -202,8 +205,10 @@ static int process_sample_event(struct perf_tool *tool, }; int ret = 0; - if (perf_time__skip_sample(&rep->ptime, sample->time)) + if (perf_time__ranges_skip_sample(rep->ptime_range, rep->range_num, + sample->time)) { return 0; + } if (machine__resolve(machine, &al, sample) < 0) { pr_debug("problem processing %d event, skipping it.\n", @@ -1093,9 +1098,25 @@ repeat: if (symbol__init(&session->header.env) < 0) goto error; - if (perf_time__parse_str(&report.ptime, report.time_str) != 0) { - pr_err("Invalid time string\n"); - return -EINVAL; + if (perf_time__parse_str(report.ptime_range, report.time_str) != 0) { + if (session->evlist->first_sample_time == 0 && + session->evlist->last_sample_time == 0) { + pr_err("No first/last sample time in perf data\n"); + return -EINVAL; + } + + report.range_num = perf_time__percent_parse_str( + report.ptime_range, PTIME_RANGE_MAX, + report.time_str, + session->evlist->first_sample_time, + session->evlist->last_sample_time); + + if (report.range_num < 0) { + pr_err("Invalid time string\n"); + return -EINVAL; + } + } else { + report.range_num = 1; } sort__setup_elide(stdout); -- cgit v1.2.3-70-g09d2 From 2ab046cd01e33a854798a3e245c9e3f32b950a7d Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Fri, 8 Dec 2017 21:13:46 +0800 Subject: perf script: Support time percent and multiple time ranges perf script has a --time option to limit the time range of output. It only supports absolute time. Now this option is extended to support multiple time ranges and support the percent of time. For example: 1. Select the first and second 10% time slices: perf script --time 10%/1,10%/2 2. Select from 0% to 10% and 30% to 40% slices: perf script --time 0%-10%,30%-40% Changelog: v6: Fix the merge issue with latest perf/core branch. No functional changes. v5: Add checking of first/last sample time to detect if it's recorded in perf.data. If it's not recorded, returns error message to user. v4: Remove perf_time__skip_sample, only uses perf_time__ranges_skip_sample v3: Since the definitions of first_sample_time/last_sample_time are moved from perf_session to perf_evlist so change the related code. Signed-off-by: Jin Yao Acked-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1512738826-2628-7-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-script.txt | 16 +++++++++++++++ tools/perf/builtin-script.c | 34 ++++++++++++++++++++++++++------ 2 files changed, 44 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 974ceb12c7f3..7b622a812a72 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -329,6 +329,22 @@ include::itrace.txt[] stop time is not given (i.e, time string is 'x.y,') then analysis goes to end of file. + Also support time percent with multipe time range. Time string is + 'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'. The maximum number of slices is 10. + + For example: + Select the second 10% time slice + perf script --time 10%/2 + + Select from 0% to 10% time slice + perf script --time 0%-10% + + Select the first and second 10% time slices + perf script --time 10%/1,10%/2 + + Select from 0% to 10% and 30% to 40% slices + perf script --time 0%-10%,30%-40% + --max-blocks:: Set the maximum number of program blocks to print with brstackasm for each sample. diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 77e47cf39f2c..330dcd9b9b8f 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1436,6 +1436,8 @@ static int perf_sample__fprintf_synth(struct perf_sample *sample, return 0; } +#define PTIME_RANGE_MAX 10 + struct perf_script { struct perf_tool tool; struct perf_session *session; @@ -1449,7 +1451,8 @@ struct perf_script { struct thread_map *threads; int name_width; const char *time_str; - struct perf_time_interval ptime; + struct perf_time_interval ptime_range[PTIME_RANGE_MAX]; + int range_num; }; static int perf_evlist__max_name_len(struct perf_evlist *evlist) @@ -1734,8 +1737,10 @@ static int process_sample_event(struct perf_tool *tool, struct perf_script *scr = container_of(tool, struct perf_script, tool); struct addr_location al; - if (perf_time__skip_sample(&scr->ptime, sample->time)) + if (perf_time__ranges_skip_sample(scr->ptime_range, scr->range_num, + sample->time)) { return 0; + } if (debug_mode) { if (sample->time < last_timestamp) { @@ -3360,10 +3365,27 @@ int cmd_script(int argc, const char **argv) goto out_delete; /* needs to be parsed after looking up reference time */ - if (perf_time__parse_str(&script.ptime, script.time_str) != 0) { - pr_err("Invalid time string\n"); - err = -EINVAL; - goto out_delete; + if (perf_time__parse_str(script.ptime_range, script.time_str) != 0) { + if (session->evlist->first_sample_time == 0 && + session->evlist->last_sample_time == 0) { + pr_err("No first/last sample time in perf data\n"); + err = -EINVAL; + goto out_delete; + } + + script.range_num = perf_time__percent_parse_str( + script.ptime_range, PTIME_RANGE_MAX, + script.time_str, + session->evlist->first_sample_time, + session->evlist->last_sample_time); + + if (script.range_num < 0) { + pr_err("Invalid time string\n"); + err = -EINVAL; + goto out_delete; + } + } else { + script.range_num = 1; } err = __cmd_script(&script); -- cgit v1.2.3-70-g09d2 From 24787afbcd0127859394eb9230659ee6d5dc4644 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 7 Jan 2018 17:03:45 +0100 Subject: perf tools: Enable LIBBABELTRACE by default There's no reason anymore to treat babel trace in a special way, because a) we no longer display its state b) the needed babeltrace library is now out and well adopted among distros. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180107160356.28203-2-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 2 +- tools/perf/Makefile.perf | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index f050f38d8fa3..12dec6ea5ed2 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -780,7 +780,7 @@ else NO_PERF_READ_VDSOX32 := 1 endif -ifdef LIBBABELTRACE +ifndef NO_LIBBABELTRACE $(call feature_check,libbabeltrace) ifeq ($(feature-libbabeltrace), 1) CFLAGS += -DHAVE_LIBBABELTRACE_SUPPORT $(LIBBABELTRACE_CFLAGS) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 68cf1360a3f3..9fdefd748e2e 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -77,7 +77,7 @@ include ../scripts/utilities.mak # # Define NO_ZLIB if you do not want to support compressed kernel modules # -# Define LIBBABELTRACE if you DO want libbabeltrace support +# Define NO_LIBBABELTRACE if you do not want libbabeltrace support # for CTF data format. # # Define NO_LZMA if you do not want to support compressed (xz) kernel modules -- cgit v1.2.3-70-g09d2 From db9fc765e8f4d0144d13cdfa4be32d81eae01710 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 7 Jan 2018 17:03:46 +0100 Subject: perf tools: Display perf_event_attr::namespaces debug info Display namespaces bit in -vv debug display: $ perf record -vv --namespaces ... ... perf_event_attr: size 112 ... namespaces 1 Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180107160356.28203-3-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index a4d256ea0dc4..c435b2444153 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -1577,6 +1577,7 @@ int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, PRINT_ATTRf(use_clockid, p_unsigned); PRINT_ATTRf(context_switch, p_unsigned); PRINT_ATTRf(write_backward, p_unsigned); + PRINT_ATTRf(namespaces, p_unsigned); PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned); PRINT_ATTRf(bp_type, p_unsigned); -- cgit v1.2.3-70-g09d2 From 81df978c49379481716aef591de77313c286d747 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 7 Jan 2018 17:03:48 +0100 Subject: perf: Add sample_id to PERF_RECORD_ITRACE_START event comment Adding missing sample_id line into PERF_RECORD_ITRACE_START event comment. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180107160356.28203-5-jolsa@kernel.org [ Update the tools/include/uapi/linux copy ] Signed-off-by: Arnaldo Carvalho de Melo --- include/uapi/linux/perf_event.h | 1 + tools/include/uapi/linux/perf_event.h | 1 + 2 files changed, 2 insertions(+) (limited to 'tools') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index b9a4953018ed..8bb66e8da945 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -864,6 +864,7 @@ enum perf_event_type { * struct perf_event_header header; * u32 pid; * u32 tid; + * struct sample_id sample_id; * }; */ PERF_RECORD_ITRACE_START = 12, diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index b9a4953018ed..8bb66e8da945 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -864,6 +864,7 @@ enum perf_event_type { * struct perf_event_header header; * u32 pid; * u32 tid; + * struct sample_id sample_id; * }; */ PERF_RECORD_ITRACE_START = 12, -- cgit v1.2.3-70-g09d2 From 972c14884728bf5f69ec69cfb1beeec1a9cd29ee Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 7 Jan 2018 17:03:51 +0100 Subject: perf: Update PERF_RECORD_MISC_* comment for perf_event_header::misc bit 13 The perf_event_header::misc bit 13 is shared on different events and next patch is adding yet another bit 13 user. Updating the comment to make it more structured and clear which events use bit 13. Suggested-by: Peter Zijlstra Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Link: http://lkml.kernel.org/r/20180107160356.28203-8-jolsa@kernel.org [ Update the tools/include/uapi/linux copy ] Signed-off-by: Arnaldo Carvalho de Melo --- include/uapi/linux/perf_event.h | 9 ++++++--- tools/include/uapi/linux/perf_event.h | 9 ++++++--- 2 files changed, 12 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index 8bb66e8da945..c77c9a2ebbbb 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -612,9 +612,12 @@ struct perf_event_mmap_page { */ #define PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT (1 << 12) /* - * PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on - * different events so can reuse the same bit position. - * Ditto PERF_RECORD_MISC_SWITCH_OUT. + * Following PERF_RECORD_MISC_* are used on different + * events, so can reuse the same bit position: + * + * PERF_RECORD_MISC_MMAP_DATA - PERF_RECORD_MMAP* events + * PERF_RECORD_MISC_COMM_EXEC - PERF_RECORD_COMM event + * PERF_RECORD_MISC_SWITCH_OUT - PERF_RECORD_SWITCH* events */ #define PERF_RECORD_MISC_MMAP_DATA (1 << 13) #define PERF_RECORD_MISC_COMM_EXEC (1 << 13) diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index 8bb66e8da945..c77c9a2ebbbb 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -612,9 +612,12 @@ struct perf_event_mmap_page { */ #define PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT (1 << 12) /* - * PERF_RECORD_MISC_MMAP_DATA and PERF_RECORD_MISC_COMM_EXEC are used on - * different events so can reuse the same bit position. - * Ditto PERF_RECORD_MISC_SWITCH_OUT. + * Following PERF_RECORD_MISC_* are used on different + * events, so can reuse the same bit position: + * + * PERF_RECORD_MISC_MMAP_DATA - PERF_RECORD_MMAP* events + * PERF_RECORD_MISC_COMM_EXEC - PERF_RECORD_COMM event + * PERF_RECORD_MISC_SWITCH_OUT - PERF_RECORD_SWITCH* events */ #define PERF_RECORD_MISC_MMAP_DATA (1 << 13) #define PERF_RECORD_MISC_COMM_EXEC (1 << 13) -- cgit v1.2.3-70-g09d2 From 28a0b39877f5ed64ae9fadf95dddb90999309dee Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 7 Jan 2018 17:03:52 +0100 Subject: perf script: Add support to display sample misc field Adding support to display sample misc field in form of letter for each bit: # perf script -F +misc ... sched-messaging 1414 K 28690.636582: 4590 cycles ... sched-messaging 1407 U 28690.636600: 325620 cycles ... sched-messaging 1414 K 28690.636608: 19473 cycles ... misc field __________/ The misc bits are assigned to following letters: PERF_RECORD_MISC_KERNEL K PERF_RECORD_MISC_USER U PERF_RECORD_MISC_HYPERVISOR H PERF_RECORD_MISC_GUEST_KERNEL G PERF_RECORD_MISC_GUEST_USER g PERF_RECORD_MISC_MMAP_DATA* M PERF_RECORD_MISC_COMM_EXEC E PERF_RECORD_MISC_SWITCH_OUT S Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180107160356.28203-9-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-script.txt | 20 ++++++++- tools/perf/builtin-script.c | 74 +++++++++++++++++++++++++++----- tools/perf/util/event.h | 1 + tools/perf/util/evsel.c | 1 + 4 files changed, 84 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 7b622a812a72..93ae8d60e3d3 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -117,7 +117,7 @@ OPTIONS Comma separated list of fields to print. Options are: comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, srcline, period, iregs, uregs, brstack, brstacksym, flags, bpf-output, brstackinsn, - brstackoff, callindent, insn, insnlen, synth, phys_addr, metric. + brstackoff, callindent, insn, insnlen, synth, phys_addr, metric, misc. Field list can be prepended with the type, trace, sw or hw, to indicate to which event type the field list applies. e.g., -F sw:comm,tid,time,ip,sym and -F trace:time,cpu,trace @@ -225,6 +225,24 @@ OPTIONS that the metric computed is averaged over the whole sampling period, not just for the sample point. + For sample events it's possible to display misc field with -F +misc option, + following letters are displayed for each bit: + + PERF_RECORD_MISC_KERNEL K + PERF_RECORD_MISC_USER U + PERF_RECORD_MISC_HYPERVISOR H + PERF_RECORD_MISC_GUEST_KERNEL G + PERF_RECORD_MISC_GUEST_USER g + PERF_RECORD_MISC_MMAP_DATA* M + PERF_RECORD_MISC_COMM_EXEC E + PERF_RECORD_MISC_SWITCH_OUT S + + $ perf script -F +misc ... + sched-messaging 1414 K 28690.636582: 4590 cycles ... + sched-messaging 1407 U 28690.636600: 325620 cycles ... + sched-messaging 1414 K 28690.636608: 19473 cycles ... + misc field ___________/ + -k:: --vmlinux=:: vmlinux pathname diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 330dcd9b9b8f..bb603495cf4a 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -93,6 +93,7 @@ enum perf_output_field { PERF_OUTPUT_PHYS_ADDR = 1U << 26, PERF_OUTPUT_UREGS = 1U << 27, PERF_OUTPUT_METRIC = 1U << 28, + PERF_OUTPUT_MISC = 1U << 29, }; struct output_option { @@ -128,6 +129,7 @@ struct output_option { {.str = "synth", .field = PERF_OUTPUT_SYNTH}, {.str = "phys_addr", .field = PERF_OUTPUT_PHYS_ADDR}, {.str = "metric", .field = PERF_OUTPUT_METRIC}, + {.str = "misc", .field = PERF_OUTPUT_MISC}, }; enum { @@ -594,7 +596,8 @@ static int perf_sample__fprintf_uregs(struct perf_sample *sample, static int perf_sample__fprintf_start(struct perf_sample *sample, struct thread *thread, - struct perf_evsel *evsel, FILE *fp) + struct perf_evsel *evsel, + u32 type, FILE *fp) { struct perf_event_attr *attr = &evsel->attr; unsigned long secs; @@ -624,6 +627,47 @@ static int perf_sample__fprintf_start(struct perf_sample *sample, printed += fprintf(fp, "[%03d] ", sample->cpu); } + if (PRINT_FIELD(MISC)) { + int ret = 0; + + #define has(m) \ + (sample->misc & PERF_RECORD_MISC_##m) == PERF_RECORD_MISC_##m + + if (has(KERNEL)) + ret += fprintf(fp, "K"); + if (has(USER)) + ret += fprintf(fp, "U"); + if (has(HYPERVISOR)) + ret += fprintf(fp, "H"); + if (has(GUEST_KERNEL)) + ret += fprintf(fp, "G"); + if (has(GUEST_USER)) + ret += fprintf(fp, "g"); + + switch (type) { + case PERF_RECORD_MMAP: + case PERF_RECORD_MMAP2: + if (has(MMAP_DATA)) + ret += fprintf(fp, "M"); + break; + case PERF_RECORD_COMM: + if (has(COMM_EXEC)) + ret += fprintf(fp, "E"); + break; + case PERF_RECORD_SWITCH: + case PERF_RECORD_SWITCH_CPU_WIDE: + if (has(SWITCH_OUT)) + ret += fprintf(fp, "S"); + default: + break; + } + + #undef has + + ret += fprintf(fp, "%*s", 6 - ret, " "); + printed += ret; + } + if (PRINT_FIELD(TIME)) { nsecs = sample->time; secs = nsecs / NSEC_PER_SEC; @@ -1502,7 +1546,7 @@ static void script_print_metric(void *ctx, const char *color, if (!fmt) return; perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel, - mctx->fp); + PERF_RECORD_SAMPLE, mctx->fp); fputs("\tmetric: ", mctx->fp); if (color) color_fprintf(mctx->fp, color, fmt, val); @@ -1516,7 +1560,7 @@ static void script_new_line(void *ctx) struct metric_ctx *mctx = ctx; perf_sample__fprintf_start(mctx->sample, mctx->thread, mctx->evsel, - mctx->fp); + PERF_RECORD_SAMPLE, mctx->fp); fputs("\tmetric: ", mctx->fp); } @@ -1584,7 +1628,8 @@ static void process_event(struct perf_script *script, ++es->samples; - perf_sample__fprintf_start(sample, thread, evsel, fp); + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_SAMPLE, fp); if (PRINT_FIELD(PERIOD)) fprintf(fp, "%10" PRIu64 " ", sample->period); @@ -1833,7 +1878,8 @@ static int process_comm_event(struct perf_tool *tool, sample->tid = event->comm.tid; sample->pid = event->comm.pid; } - perf_sample__fprintf_start(sample, thread, evsel, stdout); + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_COMM, stdout); perf_event__fprintf(event, stdout); ret = 0; out: @@ -1868,7 +1914,8 @@ static int process_namespaces_event(struct perf_tool *tool, sample->tid = event->namespaces.tid; sample->pid = event->namespaces.pid; } - perf_sample__fprintf_start(sample, thread, evsel, stdout); + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_NAMESPACES, stdout); perf_event__fprintf(event, stdout); ret = 0; out: @@ -1901,7 +1948,8 @@ static int process_fork_event(struct perf_tool *tool, sample->tid = event->fork.tid; sample->pid = event->fork.pid; } - perf_sample__fprintf_start(sample, thread, evsel, stdout); + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_FORK, stdout); perf_event__fprintf(event, stdout); thread__put(thread); @@ -1930,7 +1978,8 @@ static int process_exit_event(struct perf_tool *tool, sample->tid = event->fork.tid; sample->pid = event->fork.pid; } - perf_sample__fprintf_start(sample, thread, evsel, stdout); + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_EXIT, stdout); perf_event__fprintf(event, stdout); if (perf_event__process_exit(tool, event, sample, machine) < 0) @@ -1965,7 +2014,8 @@ static int process_mmap_event(struct perf_tool *tool, sample->tid = event->mmap.tid; sample->pid = event->mmap.pid; } - perf_sample__fprintf_start(sample, thread, evsel, stdout); + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_MMAP, stdout); perf_event__fprintf(event, stdout); thread__put(thread); return 0; @@ -1996,7 +2046,8 @@ static int process_mmap2_event(struct perf_tool *tool, sample->tid = event->mmap2.tid; sample->pid = event->mmap2.pid; } - perf_sample__fprintf_start(sample, thread, evsel, stdout); + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_MMAP2, stdout); perf_event__fprintf(event, stdout); thread__put(thread); return 0; @@ -2022,7 +2073,8 @@ static int process_switch_event(struct perf_tool *tool, return -1; } - perf_sample__fprintf_start(sample, thread, evsel, stdout); + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_SWITCH, stdout); perf_event__fprintf(event, stdout); thread__put(thread); return 0; diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 1ae95efbfb95..e5fbd6dd1b01 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -205,6 +205,7 @@ struct perf_sample { u32 flags; u16 insn_len; u8 cpumode; + u16 misc; char insn[MAX_INSN]; void *raw_data; struct ip_callchain *callchain; diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index c435b2444153..d934f04e3110 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2042,6 +2042,7 @@ int perf_evsel__parse_sample(struct perf_evsel *evsel, union perf_event *event, data->stream_id = data->id = data->time = -1ULL; data->period = evsel->attr.sample_period; data->cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + data->misc = event->header.misc; data->id = -1ULL; data->data_src = PERF_MEM_DATA_SRC_NONE; -- cgit v1.2.3-70-g09d2 From 3d7c27b6dbca4c90e7d921b45c2240e7c3cb92a2 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 7 Jan 2018 17:03:53 +0100 Subject: perf script: Add support to display lost events Adding option to display lost events: $ perf script --show-lost-events ... mplayer 13810 [002] 468011.402396: 100 cycles:ppp: ff.. mplayer 13810 [002] 468011.402396: PERF_RECORD_LOST lost 3880 mplayer 13810 [002] 468011.402397: 100 cycles:ppp: ff.. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180107160356.28203-10-jolsa@kernel.org [ Use PRIu64 when printing u64 values, fixing the build in some arches ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-script.txt | 3 +++ tools/perf/builtin-script.c | 28 ++++++++++++++++++++++++++++ tools/perf/util/event.c | 8 ++++++++ 3 files changed, 39 insertions(+) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 93ae8d60e3d3..806ec6391fd6 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -300,6 +300,9 @@ OPTIONS Display context switch events i.e. events of type PERF_RECORD_SWITCH or PERF_RECORD_SWITCH_CPU_WIDE. +--show-lost-events + Display lost events i.e. events of type PERF_RECORD_LOST. + --demangle:: Demangle symbol names to human readable form. It's enabled by default, disable with --no-demangle. diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index bb603495cf4a..c1cce474c0f1 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1489,6 +1489,7 @@ struct perf_script { bool show_mmap_events; bool show_switch_events; bool show_namespace_events; + bool show_lost_events; bool allocated; bool per_event_dump; struct cpu_map *cpus; @@ -2080,6 +2081,29 @@ static int process_switch_event(struct perf_tool *tool, return 0; } +static int +process_lost_event(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + struct perf_script *script = container_of(tool, struct perf_script, tool); + struct perf_session *session = script->session; + struct perf_evsel *evsel = perf_evlist__id2evsel(session->evlist, sample->id); + struct thread *thread; + + thread = machine__findnew_thread(machine, sample->pid, + sample->tid); + if (thread == NULL) + return -1; + + perf_sample__fprintf_start(sample, thread, evsel, + PERF_RECORD_LOST, stdout); + perf_event__fprintf(event, stdout); + thread__put(thread); + return 0; +} + static void sig_handler(int sig __maybe_unused) { session_done = 1; @@ -2174,6 +2198,8 @@ static int __cmd_script(struct perf_script *script) script->tool.context_switch = process_switch_event; if (script->show_namespace_events) script->tool.namespaces = process_namespaces_event; + if (script->show_lost_events) + script->tool.lost = process_lost_event; if (perf_script__setup_per_event_dump(script)) { pr_err("Couldn't create the per event dump files\n"); @@ -3110,6 +3136,8 @@ int cmd_script(int argc, const char **argv) "Show context switch events (if recorded)"), OPT_BOOLEAN('\0', "show-namespace-events", &script.show_namespace_events, "Show namespace events (if recorded)"), + OPT_BOOLEAN('\0', "show-lost-events", &script.show_lost_events, + "Show lost events (if recorded)"), OPT_BOOLEAN('\0', "per-event-dump", &script.per_event_dump, "Dump trace output to files named by the monitored events"), OPT_BOOLEAN('f', "force", &symbol_conf.force, "don't complain, do it"), diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 97a8ef9980db..44e603c27944 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -1435,6 +1435,11 @@ size_t perf_event__fprintf_switch(union perf_event *event, FILE *fp) event->context_switch.next_prev_tid); } +static size_t perf_event__fprintf_lost(union perf_event *event, FILE *fp) +{ + return fprintf(fp, " lost %" PRIu64 "\n", event->lost.lost); +} + size_t perf_event__fprintf(union perf_event *event, FILE *fp) { size_t ret = fprintf(fp, "PERF_RECORD_%s", @@ -1467,6 +1472,9 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp) case PERF_RECORD_SWITCH_CPU_WIDE: ret += perf_event__fprintf_switch(event, fp); break; + case PERF_RECORD_LOST: + ret += perf_event__fprintf_lost(event, fp); + break; default: ret += fprintf(fp, "\n"); } -- cgit v1.2.3-70-g09d2 From 075ca1ebb25e798e4072a1e3a482b829bb51afb2 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 7 Jan 2018 17:03:54 +0100 Subject: perf tools: Make the tool's warning messages optional I want to display the pure events status coming in the next patch and the tool's warnings are superfluous in the output. Making it optional, enabled by default. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180107160356.28203-11-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 6 ++++-- tools/perf/util/tool.h | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 54e30f1bcbd7..8d0fa2f8da16 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1773,7 +1773,8 @@ done: err = perf_session__flush_thread_stacks(session); out_err: free(buf); - perf_session__warn_about_errors(session); + if (!tool->no_warn) + perf_session__warn_about_errors(session); ordered_events__free(&session->ordered_events); auxtrace__free_events(session); return err; @@ -1929,7 +1930,8 @@ out: err = perf_session__flush_thread_stacks(session); out_err: ui_progress__finish(); - perf_session__warn_about_errors(session); + if (!tool->no_warn) + perf_session__warn_about_errors(session); /* * We may switching perf.data output, make ordered_events * reusable. diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index 2532b558099b..183c91453522 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -76,6 +76,7 @@ struct perf_tool { bool ordered_events; bool ordering_requires_timestamps; bool namespace_events; + bool no_warn; enum show_feature_header show_feat_hdr; }; -- cgit v1.2.3-70-g09d2 From a4a4d0a7a2b20f7880262de4f51685baaf693476 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 7 Jan 2018 17:03:55 +0100 Subject: perf report: Add --stats option to display quick data statistics Add --stats option to display quick data statistics of event numbers, without any further processing, like the one at the end of the perf report -D command. $ perf report --stat Aggregated stats: TOTAL events: 4566 MMAP events: 113 LOST events: 19 COMM events: 3 FORK events: 400 SAMPLE events: 3315 MMAP2 events: 32 FINISHED_ROUND events: 681 THREAD_MAP events: 1 CPU_MAP events: 1 TIME_CONV events: 1 I found this useful when hunting lost events for another change. Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180107160356.28203-12-jolsa@kernel.org [ Rename it to --stats, plural ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 4 ++++ tools/perf/builtin-report.c | 26 +++++++++++++++++++++++++- 2 files changed, 29 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 1e02c4e1a81f..a7d11ef2fe25 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -457,6 +457,10 @@ include::itrace.txt[] will be printed. Each entry is function name or file/line. Enabled by default, disable with --no-inline. +--stats:: + Display overall events statistics without any further processing. + (like the one at the end of the perf report -D command) + include::callchain-overhead-calculation.txt[] SEE ALSO diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 770bf8a614f2..8e67a8c25ab1 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -62,6 +62,7 @@ struct report { bool show_threads; bool inverted_callchain; bool mem_mode; + bool stats_mode; bool header; bool header_only; bool nonany_branch_mode; @@ -588,6 +589,20 @@ static void report__output_resort(struct report *rep) ui_progress__finish(); } +static void stats_setup(struct report *rep) +{ + memset(&rep->tool, 0, sizeof(rep->tool)); + rep->tool.no_warn = true; +} + +static int stats_print(struct report *rep) +{ + struct perf_session *session = rep->session; + + perf_session__fprintf_nr_events(session, stdout); + return 0; +} + static int __cmd_report(struct report *rep) { int ret; @@ -619,12 +634,18 @@ static int __cmd_report(struct report *rep) return ret; } + if (rep->stats_mode) + stats_setup(rep); + ret = perf_session__process_events(session); if (ret) { ui__error("failed to process sample\n"); return ret; } + if (rep->stats_mode) + return stats_print(rep); + report__warn_kptr_restrict(rep); evlist__for_each_entry(session->evlist, pos) @@ -781,6 +802,7 @@ int cmd_report(int argc, const char **argv) OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any message"), OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), + OPT_BOOLEAN(0, "stats", &report.stats_mode, "Display event stats"), OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, "file", "vmlinux pathname"), OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, @@ -1042,6 +1064,8 @@ repeat: report.tool.show_feat_hdr = SHOW_FEAT_HEADER; if (report.show_full_info) report.tool.show_feat_hdr = SHOW_FEAT_HEADER_FULL_INFO; + if (report.stats_mode) + use_browser = 0; if (strcmp(input_name, "-") != 0) setup_browser(true); @@ -1064,7 +1088,7 @@ repeat: ret = 0; goto error; } - } else if (use_browser == 0 && !quiet) { + } else if (use_browser == 0 && !quiet && !report.stats_mode) { fputs("# To display the perf.data header info, please use --header/--header-only options.\n#\n", stdout); } -- cgit v1.2.3-70-g09d2 From 2d1073def3cb69aa44f99be7ef42da7cc561be1f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Jan 2018 12:03:47 -0300 Subject: perf trace: Beautify 'gettid' syscall result Before: # trace -a -e gettid sleep 0.01 4.863 ( 0.005 ms): Chrome_ChildIO/26241 gettid() = 26241 4.931 ( 0.004 ms): Chrome_IOThrea/26154 gettid() = 26154 4.942 ( 0.001 ms): Chrome_IOThrea/26154 gettid() = 26154 4.946 ( 0.001 ms): Chrome_IOThrea/26154 gettid() = 26154 4.970 ( 0.002 ms): Chrome_IOThrea/26154 gettid() = 26154 # After: # trace -a -e gettid sleep 0.01 0.000 ( 0.009 ms): Chrome_IOThrea/26154 gettid() = 26154 (Chrome_IOThread) 3.416 ( 0.002 ms): Chrome_ChildIO/26241 gettid() = 26241 (Chrome_ChildIOT) 3.424 ( 0.001 ms): Chrome_ChildIO/26241 gettid() = 26241 (Chrome_ChildIOT) 3.343 ( 0.002 ms): chrome/26116 gettid() = 26116 (chrome) 3.386 ( 0.002 ms): Chrome_IOThrea/26154 gettid() = 26154 (Chrome_IOThread) 4.003 ( 0.003 ms): Chrome_ChildIO/26241 gettid() = 26241 (Chrome_ChildIOT) 4.031 ( 0.002 ms): Chrome_IOThrea/26154 gettid() = 26154 (Chrome_IOThread) # Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-kyg4gz2yy0vkrrh2vtq29u71@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 7c57898095ea..71e64bdca86f 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -622,6 +622,7 @@ static struct syscall_fmt { .arg = { [2] = { .scnprintf = SCA_GETRANDOM_FLAGS, /* flags */ }, }, }, { .name = "getrlimit", .arg = { [0] = STRARRAY(resource, rlimit_resources), }, }, + { .name = "gettid", .errpid = true, }, { .name = "ioctl", .arg = { #if defined(__i386__) || defined(__x86_64__) -- cgit v1.2.3-70-g09d2 From 930f8b3479444d264aa33e008c4b00b86e8c62cc Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sun, 7 Jan 2018 17:03:56 +0100 Subject: perf report: Add --tasks option to display monitored tasks Add --tasks option to display monitored tasks stored in perf.data. Displaying pid/tid/ppid plus the command string aligned to distinguish parent and child tasks. $ perf record -a ... $ perf report --tasks # pid tid ppid comm 0 0 -1 |swapper 2 2 0 | kthreadd 14080 14080 2 | kworker/u17:1 4 4 2 | kworker/0:0H 6 6 2 | mm_percpu_wq ... 1 1 0 | systemd 23242 23242 1 | firefox 23242 23298 23242 | Cache2 I/O 23242 23304 23242 | GMPThread ... 1195 1195 1 | login 1611 1611 1195 | bash 1639 1639 1611 | startx 1663 1663 1639 | xinit 1673 1673 1663 | xmonad-x86_64-l 23939 23939 1673 | xterm 23941 23941 23939 | bash 23963 23963 23941 | mutt 24954 24954 23963 | offlineimap Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180107160356.28203-13-jolsa@kernel.org [ Make it --tasks, plural, --task works as well, as its unambiguous ] [ Use machine__find_thread(), not findnew(), as pointed out by Namhyung ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 4 + tools/perf/builtin-report.c | 136 ++++++++++++++++++++++++++++++- 2 files changed, 138 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index a7d11ef2fe25..856c3c7e94fa 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -461,6 +461,10 @@ include::itrace.txt[] Display overall events statistics without any further processing. (like the one at the end of the perf report -D command) +--tasks:: + Display monitored tasks stored in perf data. Displaying pid/tid/ppid + plus the command string aligned to distinguish parent and child tasks. + include::callchain-overhead-calculation.txt[] SEE ALSO diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 8e67a8c25ab1..2c7bd85651dc 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -15,6 +15,7 @@ #include "util/color.h" #include #include +#include #include "util/symbol.h" #include "util/callchain.h" #include "util/values.h" @@ -63,6 +64,7 @@ struct report { bool inverted_callchain; bool mem_mode; bool stats_mode; + bool tasks_mode; bool header; bool header_only; bool nonany_branch_mode; @@ -603,6 +605,124 @@ static int stats_print(struct report *rep) return 0; } +static void tasks_setup(struct report *rep) +{ + memset(&rep->tool, 0, sizeof(rep->tool)); + rep->tool.comm = perf_event__process_comm; + rep->tool.exit = perf_event__process_exit; + rep->tool.fork = perf_event__process_fork; + rep->tool.no_warn = true; +} + +struct task { + struct thread *thread; + struct list_head list; + struct list_head children; +}; + +static struct task *tasks_list(struct task *task, struct machine *machine) +{ + struct thread *parent_thread, *thread = task->thread; + struct task *parent_task; + + /* Already listed. */ + if (!list_empty(&task->list)) + return NULL; + + /* Last one in the chain. */ + if (thread->ppid == -1) + return task; + + parent_thread = machine__find_thread(machine, -1, thread->ppid); + if (!parent_thread) + return ERR_PTR(-ENOENT); + + parent_task = thread__priv(parent_thread); + list_add_tail(&task->list, &parent_task->children); + return tasks_list(parent_task, machine); +} + +static void task__print_level(struct task *task, FILE *fp, int level) +{ + struct thread *thread = task->thread; + struct task *child; + + fprintf(fp, " %8d %8d %8d |%*s%s\n", + thread->pid_, thread->tid, thread->ppid, + level, "", thread__comm_str(thread)); + + if (!list_empty(&task->children)) { + list_for_each_entry(child, &task->children, list) + task__print_level(child, fp, level + 1); + } +} + +static int tasks_print(struct report *rep, FILE *fp) +{ + struct perf_session *session = rep->session; + struct machine *machine = &session->machines.host; + struct task *tasks, *task; + unsigned int nr = 0, itask = 0, i; + struct rb_node *nd; + LIST_HEAD(list); + + /* + * No locking needed while accessing machine->threads, + * because --tasks is single threaded command. + */ + + /* Count all the threads. */ + for (i = 0; i < THREADS__TABLE_SIZE; i++) + nr += machine->threads[i].nr; + + tasks = malloc(sizeof(*tasks) * nr); + if (!tasks) + return -ENOMEM; + + for (i = 0; i < THREADS__TABLE_SIZE; i++) { + struct threads *threads = &machine->threads[i]; + + for (nd = rb_first(&threads->entries); nd; nd = rb_next(nd)) { + task = tasks + itask++; + + task->thread = rb_entry(nd, struct thread, rb_node); + INIT_LIST_HEAD(&task->children); + INIT_LIST_HEAD(&task->list); + thread__set_priv(task->thread, task); + } + } + + /* + * Iterate every task down to the unprocessed parent + * and link all in task children list. Task with no + * parent is added into 'list'. + */ + for (itask = 0; itask < nr; itask++) { + task = tasks + itask; + + if (!list_empty(&task->list)) + continue; + + task = tasks_list(task, machine); + if (IS_ERR(task)) { + pr_err("Error: failed to process tasks\n"); + free(tasks); + return PTR_ERR(task); + } + + if (task) + list_add_tail(&task->list, &list); + } + + fprintf(fp, "# %8s %8s %8s %s\n", "pid", "tid", "ppid", "comm"); + + list_for_each_entry(task, &list, list) + task__print_level(task, fp, 0); + + free(tasks); + return 0; +} + static int __cmd_report(struct report *rep) { int ret; @@ -637,6 +757,9 @@ static int __cmd_report(struct report *rep) if (rep->stats_mode) stats_setup(rep); + if (rep->tasks_mode) + tasks_setup(rep); + ret = perf_session__process_events(session); if (ret) { ui__error("failed to process sample\n"); @@ -646,6 +769,9 @@ static int __cmd_report(struct report *rep) if (rep->stats_mode) return stats_print(rep); + if (rep->tasks_mode) + return tasks_print(rep, stdout); + report__warn_kptr_restrict(rep); evlist__for_each_entry(session->evlist, pos) @@ -803,6 +929,7 @@ int cmd_report(int argc, const char **argv) OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), OPT_BOOLEAN(0, "stats", &report.stats_mode, "Display event stats"), + OPT_BOOLEAN(0, "tasks", &report.tasks_mode, "Display recorded tasks"), OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, "file", "vmlinux pathname"), OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, @@ -1064,8 +1191,12 @@ repeat: report.tool.show_feat_hdr = SHOW_FEAT_HEADER; if (report.show_full_info) report.tool.show_feat_hdr = SHOW_FEAT_HEADER_FULL_INFO; - if (report.stats_mode) + if (report.stats_mode || report.tasks_mode) use_browser = 0; + if (report.stats_mode && report.tasks_mode) { + pr_err("Error: --tasks and --stats options cannot be used together\n"); + goto error; + } if (strcmp(input_name, "-") != 0) setup_browser(true); @@ -1088,7 +1219,8 @@ repeat: ret = 0; goto error; } - } else if (use_browser == 0 && !quiet && !report.stats_mode) { + } else if (use_browser == 0 && !quiet && + !report.stats_mode && !report.tasks_mode) { fputs("# To display the perf.data header info, please use --header/--header-only options.\n#\n", stdout); } -- cgit v1.2.3-70-g09d2 From 6439d7d16c94324300eb392ed85e3632e489e197 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 9 Jan 2018 15:25:03 -0300 Subject: perf report: Introduce --mmaps Similar to --tasks, producing the same output plus /proc//maps similar lines for each mmap record present in a perf.data file. Please note that not all mmaps are stored, for instance, some of the non-executable mmaps are only stored when 'perf record --data' is used, when the user wants to resolve data accesses in addition to asking for executable mmaps to get the DSO with symtabs. E.g.: # perf record sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.018 MB perf.data (7 samples) ] [root@jouet ~]# perf report --mmaps # pid tid ppid comm 0 0 -1 |swapper 4137 4137 -1 |sleep 5628a35a1000-5628a37aa000 r-xp 00000000 3147148 /usr/bin/sleep 7fb65ad51000-7fb65b134000 r-xp 00000000 3149795 /usr/lib64/libc-2.26.so 7fb65b134000-7fb65b35e000 r-xp 00000000 3149715 /usr/lib64/ld-2.26.so 7ffd94b9f000-7ffd94ba1000 r-xp 00000000 0 [vdso] # # perf record sleep 1 [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.019 MB perf.data (8 samples) ] # perf report --mmaps # pid tid ppid comm 0 0 -1 |swapper 4161 4161 -1 |sleep 55afae69a000-55afae8a3000 r-xp 00000000 3147148 /usr/bin/sleep 7f569f00d000-7f569f3f0000 r-xp 00000000 3149795 /usr/lib64/libc-2.26.so 7f569f3f0000-7f569f61a000 r-xp 00000000 3149715 /usr/lib64/ld-2.26.so 7fff6fffe000-7fff70000000 r-xp 00000000 0 [vdso] # # perf record time sleep 1 0.00user 0.00system 0:01.00elapsed 0%CPU (0avgtext+0avgdata 2156maxresident)k 0inputs+0outputs (0major+73minor)pagefaults 0swaps [ perf record: Woken up 1 times to write data ] [ perf record: Captured and wrote 0.019 MB perf.data (14 samples) ] # perf report --mmaps # pid tid ppid comm 0 0 -1 |swapper 4281 4281 -1 |time 560560dca000-560560fcf000 r-xp 00000000 3190458 /usr/bin/time 7fc175196000-7fc175579000 r-xp 00000000 3149795 /usr/lib64/libc-2.26.so 7fc175579000-7fc1757a3000 r-xp 00000000 3149715 /usr/lib64/ld-2.26.so 7ffc924f6000-7ffc924f8000 r-xp 00000000 0 [vdso] 4282 4282 4281 | sleep 560560dca000-560560fcf000 r-xp 00000000 3190458 /usr/bin/time 564b4de3c000-564b4e045000 r-xp 00000000 3147148 /usr/bin/sleep 7f6a5a716000-7f6a5aaf9000 r-xp 00000000 3149795 /usr/lib64/libc-2.26.so 7f6a5aaf9000-7f6a5ad23000 r-xp 00000000 3149715 /usr/lib64/ld-2.26.so 7fc175196000-7fc175579000 r-xp 00000000 3149795 /usr/lib64/libc-2.26.so 7fc175579000-7fc1757a3000 r-xp 00000000 3149715 /usr/lib64/ld-2.26.so 7ffc924f6000-7ffc924f8000 r-xp 00000000 0 [vdso] 7ffcec7e6000-7ffcec7e8000 r-xp 00000000 0 [vdso] # Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-zulwdlg5rfowogr1qznorvvc@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 9 +++++- tools/perf/builtin-report.c | 50 +++++++++++++++++++++++++++++--- 2 files changed, 54 insertions(+), 5 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 856c3c7e94fa..63d0db3184c9 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -457,6 +457,13 @@ include::itrace.txt[] will be printed. Each entry is function name or file/line. Enabled by default, disable with --no-inline. +--mmaps:: + Show --tasks output plus mmap information in a format similar to + /proc//maps. + + Please note that not all mmaps are stored, options affecting which ones + are include 'perf record --data', for instance. + --stats:: Display overall events statistics without any further processing. (like the one at the end of the perf report -D command) @@ -469,4 +476,4 @@ include::callchain-overhead-calculation.txt[] SEE ALSO -------- -linkperf:perf-stat[1], linkperf:perf-annotate[1] +linkperf:perf-stat[1], linkperf:perf-annotate[1], linkperf:perf-record[1] diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 2c7bd85651dc..dd4df9a5cd06 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -52,6 +52,7 @@ #include #include #include +#include #define PTIME_RANGE_MAX 10 @@ -65,6 +66,7 @@ struct report { bool mem_mode; bool stats_mode; bool tasks_mode; + bool mmaps_mode; bool header; bool header_only; bool nonany_branch_mode; @@ -608,6 +610,10 @@ static int stats_print(struct report *rep) static void tasks_setup(struct report *rep) { memset(&rep->tool, 0, sizeof(rep->tool)); + if (rep->mmaps_mode) { + rep->tool.mmap = perf_event__process_mmap; + rep->tool.mmap2 = perf_event__process_mmap2; + } rep->tool.comm = perf_event__process_comm; rep->tool.exit = perf_event__process_exit; rep->tool.fork = perf_event__process_fork; @@ -642,14 +648,46 @@ static struct task *tasks_list(struct task *task, struct machine *machine) return tasks_list(parent_task, machine); } +static size_t maps__fprintf_task(struct maps *maps, int indent, FILE *fp) +{ + size_t printed = 0; + struct rb_node *nd; + + for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) { + struct map *map = rb_entry(nd, struct map, rb_node); + + printed += fprintf(fp, "%*s %" PRIx64 "-%" PRIx64 " %c%c%c%c %08" PRIx64 " %" PRIu64 " %s\n", + indent, "", map->start, map->end, + map->prot & PROT_READ ? 'r' : '-', + map->prot & PROT_WRITE ? 'w' : '-', + map->prot & PROT_EXEC ? 'x' : '-', + map->flags & MAP_SHARED ? 's' : 'p', + map->pgoff, + map->ino, map->dso->name); + } + + return printed; +} + +static int map_groups__fprintf_task(struct map_groups *mg, int indent, FILE *fp) +{ + int printed = 0, i; + for (i = 0; i < MAP__NR_TYPES; ++i) + printed += maps__fprintf_task(&mg->maps[i], indent, fp); + return printed; +} + static void task__print_level(struct task *task, FILE *fp, int level) { struct thread *thread = task->thread; struct task *child; + int comm_indent = fprintf(fp, " %8d %8d %8d |%*s", + thread->pid_, thread->tid, thread->ppid, + level, ""); + + fprintf(fp, "%s\n", thread__comm_str(thread)); - fprintf(fp, " %8d %8d %8d |%*s%s\n", - thread->pid_, thread->tid, thread->ppid, - level, "", thread__comm_str(thread)); + map_groups__fprintf_task(thread->mg, comm_indent, fp); if (!list_empty(&task->children)) { list_for_each_entry(child, &task->children, list) @@ -930,6 +968,7 @@ int cmd_report(int argc, const char **argv) "dump raw trace in ASCII"), OPT_BOOLEAN(0, "stats", &report.stats_mode, "Display event stats"), OPT_BOOLEAN(0, "tasks", &report.tasks_mode, "Display recorded tasks"), + OPT_BOOLEAN(0, "mmaps", &report.mmaps_mode, "Display recorded tasks memory maps"), OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, "file", "vmlinux pathname"), OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, @@ -1077,6 +1116,9 @@ int cmd_report(int argc, const char **argv) report.symbol_filter_str = argv[0]; } + if (report.mmaps_mode) + report.tasks_mode = true; + if (quiet) perf_quiet_option(); @@ -1194,7 +1236,7 @@ repeat: if (report.stats_mode || report.tasks_mode) use_browser = 0; if (report.stats_mode && report.tasks_mode) { - pr_err("Error: --tasks and --stats options cannot be used together\n"); + pr_err("Error: --tasks and --mmaps can't be used together with --stats\n"); goto error; } -- cgit v1.2.3-70-g09d2 From 5d64db2966e38bfd99114ecf0b54f97d33023dcd Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Wed, 10 Jan 2018 11:36:07 -0300 Subject: tools headers: Synchronize kernel <-> tooling headers Two kernel headers got modified recently due to meltdown/spectre, in: a89f040fa34e ("x86/cpufeatures: Add X86_BUG_CPU_INSECURE") which are used by tooling as well: arch/x86/include/asm/cpufeatures.h arch/x86/include/asm/disabled-features.h None of those changes have an effect on tooling, so do a plain copy. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Thomas Gleixner Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-qqzcs8ri3vks8cypg0puk0ae@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/x86/include/asm/cpufeatures.h | 4 +++- tools/arch/x86/include/asm/disabled-features.h | 8 +++++++- 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index 800104c8a3ed..21ac898df2d8 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -197,11 +197,12 @@ #define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ #define X86_FEATURE_CAT_L2 ( 7*32+ 5) /* Cache Allocation Technology L2 */ #define X86_FEATURE_CDP_L3 ( 7*32+ 6) /* Code and Data Prioritization L3 */ +#define X86_FEATURE_INVPCID_SINGLE ( 7*32+ 7) /* Effectively INVPCID && CR4.PCIDE=1 */ #define X86_FEATURE_HW_PSTATE ( 7*32+ 8) /* AMD HW-PState */ #define X86_FEATURE_PROC_FEEDBACK ( 7*32+ 9) /* AMD ProcFeedbackInterface */ #define X86_FEATURE_SME ( 7*32+10) /* AMD Secure Memory Encryption */ - +#define X86_FEATURE_PTI ( 7*32+11) /* Kernel Page Table Isolation enabled */ #define X86_FEATURE_INTEL_PPIN ( 7*32+14) /* Intel Processor Inventory Number */ #define X86_FEATURE_INTEL_PT ( 7*32+15) /* Intel Processor Trace */ #define X86_FEATURE_AVX512_4VNNIW ( 7*32+16) /* AVX-512 Neural Network Instructions */ @@ -340,5 +341,6 @@ #define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ #define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ +#define X86_BUG_CPU_MELTDOWN X86_BUG(14) /* CPU is affected by meltdown attack and needs kernel page table isolation */ #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/tools/arch/x86/include/asm/disabled-features.h b/tools/arch/x86/include/asm/disabled-features.h index 14d6d5007314..b027633e7300 100644 --- a/tools/arch/x86/include/asm/disabled-features.h +++ b/tools/arch/x86/include/asm/disabled-features.h @@ -50,6 +50,12 @@ # define DISABLE_LA57 (1<<(X86_FEATURE_LA57 & 31)) #endif +#ifdef CONFIG_PAGE_TABLE_ISOLATION +# define DISABLE_PTI 0 +#else +# define DISABLE_PTI (1 << (X86_FEATURE_PTI & 31)) +#endif + /* * Make sure to add features to the correct mask */ @@ -60,7 +66,7 @@ #define DISABLED_MASK4 (DISABLE_PCID) #define DISABLED_MASK5 0 #define DISABLED_MASK6 0 -#define DISABLED_MASK7 0 +#define DISABLED_MASK7 (DISABLE_PTI) #define DISABLED_MASK8 0 #define DISABLED_MASK9 (DISABLE_MPX) #define DISABLED_MASK10 0 -- cgit v1.2.3-70-g09d2 From 2178790baa8639a1f516f91685ae64cc8d09fee7 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Wed, 10 Jan 2018 13:46:51 -0700 Subject: perf evsel: Fix incorrect handling of type _TERM_DRV_CFG Commit ("d0565132605f perf evsel: Enable type checking for perf_evsel_config_term types") assumes PERF_EVSEL__CONFIG_TERM_DRV_CFG isn't used and as such adds a BUG_ON(). Since the enumeration type is used in macro ADD_CONFIG_TERM() the change break CoreSight trace acquisition. This patch restores the original code. Signed-off-by: Mathieu Poirier Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Namhyung Kim Cc: Peter Zijlstra Fixes: d0565132605f ("perf evsel: Enable type checking for perf_evsel_config_term types") Link: http://lkml.kernel.org/r/1515617211-32024-1-git-send-email-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index d934f04e3110..4eea3b404507 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -781,7 +781,7 @@ static void apply_config_terms(struct perf_evsel *evsel, attr->write_backward = term->val.overwrite ? 1 : 0; break; case PERF_EVSEL__CONFIG_TERM_DRV_CFG: - BUG_ON(1); + break; default: break; } -- cgit v1.2.3-70-g09d2 From dd8bd53ab86133327412e74bf5ba31a8ec2826d4 Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Thu, 11 Jan 2018 15:50:20 +0000 Subject: perf evlist: Remove trailing semicolon The trailing semicolon is an empty statement that does no operation. Removing it since it doesn't do anything. Signed-off-by: Luis de Bethencourt Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Joe Perches Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180111155020.9782-1-luisbg@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evlist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index f0a5e09c4071..120efd85f2c8 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -1760,7 +1760,7 @@ void perf_evlist__toggle_bkw_mmap(struct perf_evlist *evlist, switch (old_state) { case BKW_MMAP_NOTREADY: { if (state != BKW_MMAP_RUNNING) - goto state_err;; + goto state_err; break; } case BKW_MMAP_RUNNING: { -- cgit v1.2.3-70-g09d2 From 41013f0c095980775e0746272873891ca7c28fb1 Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Thu, 4 Jan 2018 12:59:55 -0800 Subject: perf script python: Add script to profile and resolve physical mem type There could be different types of memory in the system. E.g normal System Memory, Persistent Memory. To understand how the workload maps to those memories, it's important to know the I/O statistics of them. Perf can collect physical addresses, but those are raw data. It still needs extra work to resolve the physical addresses. Provide a script to facilitate the physical addresses resolving and I/O statistics. Profile with MEM_INST_RETIRED.ALL_LOADS or MEM_UOPS_RETIRED.ALL_LOADS event if any of them is available. Look up the /proc/iomem and resolve the physical address. Provide memory type summary. Here is an example output: # perf script report mem-phys-addr Event: mem_inst_retired.all_loads:P Memory type count percentage ---------------------------------------- ----------- ----------- System RAM 74 53.2% Persistent Memory 55 39.6% N/A --- Changes since V2: - Apply the new license rules. - Add comments for globals Changes since V1: - Do not mix DLA and Load Latency. Do not compare the loads and stores. Only profile the loads. - Use event name to replace the RAW event Signed-off-by: Kan Liang Reviewed-by: Andi Kleen Cc: Dan Williams Cc: Jiri Olsa Cc: Peter Zijlstra Cc: Philippe Ombredanne Cc: Stephane Eranian Link: https://lkml.kernel.org/r/1515099595-34770-1-git-send-email-kan.liang@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/scripts/python/bin/mem-phys-addr-record | 19 +++++ tools/perf/scripts/python/bin/mem-phys-addr-report | 3 + tools/perf/scripts/python/mem-phys-addr.py | 95 ++++++++++++++++++++++ .../util/scripting-engines/trace-event-python.c | 2 + 4 files changed, 119 insertions(+) create mode 100644 tools/perf/scripts/python/bin/mem-phys-addr-record create mode 100644 tools/perf/scripts/python/bin/mem-phys-addr-report create mode 100644 tools/perf/scripts/python/mem-phys-addr.py (limited to 'tools') diff --git a/tools/perf/scripts/python/bin/mem-phys-addr-record b/tools/perf/scripts/python/bin/mem-phys-addr-record new file mode 100644 index 000000000000..5a875122a904 --- /dev/null +++ b/tools/perf/scripts/python/bin/mem-phys-addr-record @@ -0,0 +1,19 @@ +#!/bin/bash + +# +# Profiling physical memory by all retired load instructions/uops event +# MEM_INST_RETIRED.ALL_LOADS or MEM_UOPS_RETIRED.ALL_LOADS +# + +load=`perf list | grep mem_inst_retired.all_loads` +if [ -z "$load" ]; then + load=`perf list | grep mem_uops_retired.all_loads` +fi +if [ -z "$load" ]; then + echo "There is no event to count all retired load instructions/uops." + exit 1 +fi + +arg=$(echo $load | tr -d ' ') +arg="$arg:P" +perf record --phys-data -e $arg $@ diff --git a/tools/perf/scripts/python/bin/mem-phys-addr-report b/tools/perf/scripts/python/bin/mem-phys-addr-report new file mode 100644 index 000000000000..3f2b847e2eab --- /dev/null +++ b/tools/perf/scripts/python/bin/mem-phys-addr-report @@ -0,0 +1,3 @@ +#!/bin/bash +# description: resolve physical address samples +perf script $@ -s "$PERF_EXEC_PATH"/scripts/python/mem-phys-addr.py diff --git a/tools/perf/scripts/python/mem-phys-addr.py b/tools/perf/scripts/python/mem-phys-addr.py new file mode 100644 index 000000000000..ebee2c5ae496 --- /dev/null +++ b/tools/perf/scripts/python/mem-phys-addr.py @@ -0,0 +1,95 @@ +# mem-phys-addr.py: Resolve physical address samples +# SPDX-License-Identifier: GPL-2.0 +# +# Copyright (c) 2018, Intel Corporation. + +from __future__ import division +import os +import sys +import struct +import re +import bisect +import collections + +sys.path.append(os.environ['PERF_EXEC_PATH'] + \ + '/scripts/python/Perf-Trace-Util/lib/Perf/Trace') + +#physical address ranges for System RAM +system_ram = [] +#physical address ranges for Persistent Memory +pmem = [] +#file object for proc iomem +f = None +#Count for each type of memory +load_mem_type_cnt = collections.Counter() +#perf event name +event_name = None + +def parse_iomem(): + global f + f = open('/proc/iomem', 'r') + for i, j in enumerate(f): + m = re.split('-|:',j,2) + if m[2].strip() == 'System RAM': + system_ram.append(long(m[0], 16)) + system_ram.append(long(m[1], 16)) + if m[2].strip() == 'Persistent Memory': + pmem.append(long(m[0], 16)) + pmem.append(long(m[1], 16)) + +def print_memory_type(): + print "Event: %s" % (event_name) + print "%-40s %10s %10s\n" % ("Memory type", "count", "percentage"), + print "%-40s %10s %10s\n" % ("----------------------------------------", \ + "-----------", "-----------"), + total = sum(load_mem_type_cnt.values()) + for mem_type, count in sorted(load_mem_type_cnt.most_common(), \ + key = lambda(k, v): (v, k), reverse = True): + print "%-40s %10d %10.1f%%\n" % (mem_type, count, 100 * count / total), + +def trace_begin(): + parse_iomem() + +def trace_end(): + print_memory_type() + f.close() + +def is_system_ram(phys_addr): + #/proc/iomem is sorted + position = bisect.bisect(system_ram, phys_addr) + if position % 2 == 0: + return False + return True + +def is_persistent_mem(phys_addr): + position = bisect.bisect(pmem, phys_addr) + if position % 2 == 0: + return False + return True + +def find_memory_type(phys_addr): + if phys_addr == 0: + return "N/A" + if is_system_ram(phys_addr): + return "System RAM" + + if is_persistent_mem(phys_addr): + return "Persistent Memory" + + #slow path, search all + f.seek(0, 0) + for j in f: + m = re.split('-|:',j,2) + if long(m[0], 16) <= phys_addr <= long(m[1], 16): + return m[2] + return "N/A" + +def process_event(param_dict): + name = param_dict["ev_name"] + sample = param_dict["sample"] + phys_addr = sample["phys_addr"] + + global event_name + if event_name == None: + event_name = name + load_mem_type_cnt[find_memory_type(phys_addr)] += 1 diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index c1848b543f27..ea070883c593 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -499,6 +499,8 @@ static PyObject *get_perf_sample_dict(struct perf_sample *sample, PyLong_FromUnsignedLongLong(sample->time)); pydict_set_item_string_decref(dict_sample, "period", PyLong_FromUnsignedLongLong(sample->period)); + pydict_set_item_string_decref(dict_sample, "phys_addr", + PyLong_FromUnsignedLongLong(sample->phys_addr)); set_sample_read_in_dict(dict_sample, sample, evsel); pydict_set_item_string_decref(dict, "sample", dict_sample); -- cgit v1.2.3-70-g09d2 From 236d812c55c2f38665663f0af61606b1cecea825 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 12 Jan 2018 13:14:52 -0300 Subject: perf trace: No need to set PERF_SAMPLE_IDENTIFIER explicitely Since 75562573bab3 ("perf tools: Add support for PERF_SAMPLE_IDENTIFIER") we don't need explicitely set PERF_SAMPLE_IDENTIFIER, as perf_evlist__config() will do this for us, i.e. when there are more than one evsel in an evlist, it will check if some evsel has a sample_type different than the one on the first evsel in the list, setting PERF_SAMPLE_IDENTIFIER in that case. So, to simplify 'perf trace' codebase, ditch that check. Cc: Adrian Hunter Cc: David Ahern Cc: Hendrick Brueckner Cc: Jiri Olsa Cc: Namhyung Kim Cc: Thomas Richter Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-12xq6orhwttee2tdtu96ucrp@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 23 ----------------------- 1 file changed, 23 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 71e64bdca86f..e84816d02117 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2348,40 +2348,17 @@ static int trace__run(struct trace *trace, int argc, const char **argv) perf_evlist__config(evlist, &trace->opts, NULL); if (callchain_param.enabled) { - bool use_identifier = false; - if (trace->syscalls.events.sys_exit) { perf_evsel__config_callchain(trace->syscalls.events.sys_exit, &trace->opts, &callchain_param); - use_identifier = true; } if (pgfault_maj) { perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param); - use_identifier = true; } if (pgfault_min) { perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param); - use_identifier = true; - } - - if (use_identifier) { - /* - * Now we have evsels with different sample_ids, use - * PERF_SAMPLE_IDENTIFIER to map from sample to evsel - * from a fixed position in each ring buffer record. - * - * As of this the changeset introducing this comment, this - * isn't strictly needed, as the fields that can come before - * PERF_SAMPLE_ID are all used, but we'll probably disable - * some of those for things like copying the payload of - * pointer syscall arguments, and for vfs_getname we don't - * need PERF_SAMPLE_ADDR and PERF_SAMPLE_IP, so do this - * here as a warning we need to use PERF_SAMPLE_IDENTIFIER. - */ - perf_evlist__set_sample_bit(evlist, IDENTIFIER); - perf_evlist__reset_sample_bit(evlist, ID); } } -- cgit v1.2.3-70-g09d2 From fa1195ccc0af2d121abe0fe266a1caee8c265eea Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 9 Jan 2018 14:39:23 +0100 Subject: perf tools: Fix copyfile_offset update of output offset We need to increase output offset in each iteration, not decrease it as we currently do. I guess we were lucky to finish in most cases in first iteration, so the bug never showed. However it shows a lot when working with big (~4GB) size data. Signed-off-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Namhyung Kim Cc: Peter Zijlstra Fixes: 9c9f5a2f1944 ("perf tools: Introduce copyfile_offset() function") Link: http://lkml.kernel.org/r/20180109133923.25406-1-jolsa@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/util.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index a789f952b3e9..443892dabedb 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -210,7 +210,7 @@ static int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size -= ret; off_in += ret; - off_out -= ret; + off_out += ret; } munmap(ptr, off_in + size); -- cgit v1.2.3-70-g09d2 From 1688c2fdf6e72633c7f463da3878b58eef01ba56 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 12 Jan 2018 16:21:04 -0300 Subject: perf evsel: Check if callchain is enabled before setting it up The construct: if (callchain_param) perf_evsel__config_callchain(evsel, opts, &callchain_param); happens in several places, so make perf_evsel__config_callchain() work just like free(NULL), do nothing if param->enabled is not set. Cc: Adrian Hunter Cc: David Ahern Cc: Hendrick Brueckner Cc: Jiri Olsa Cc: Namhyung Kim Cc: Thomas Richter Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-ykk0qzxnxwx3o611ctjnmxav@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 4eea3b404507..efa2e629a669 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -651,9 +651,9 @@ int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size) return ret; } -void perf_evsel__config_callchain(struct perf_evsel *evsel, - struct record_opts *opts, - struct callchain_param *param) +static void __perf_evsel__config_callchain(struct perf_evsel *evsel, + struct record_opts *opts, + struct callchain_param *param) { bool function = perf_evsel__is_function_event(evsel); struct perf_event_attr *attr = &evsel->attr; @@ -699,6 +699,14 @@ void perf_evsel__config_callchain(struct perf_evsel *evsel, } } +void perf_evsel__config_callchain(struct perf_evsel *evsel, + struct record_opts *opts, + struct callchain_param *param) +{ + if (param->enabled) + return __perf_evsel__config_callchain(evsel, opts, param); +} + static void perf_evsel__reset_callgraph(struct perf_evsel *evsel, struct callchain_param *param) -- cgit v1.2.3-70-g09d2 From 08e26396c6f29642fecfb7ca083459264a033a89 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Fri, 12 Jan 2018 13:29:05 -0300 Subject: perf trace: Fix setting of --call-graph/--max-stack for non-syscall events The raw_syscalls:sys_{enter,exit} were first supported in 'perf trace', together with minor and major page faults, then we supported --call-graph, then --max-stack, but when the other tracepoints got supported, and bpf, etc, I forgot to make those global call-graph settings apply to them. Fix it by realizing that the global --max-stack and --call-graph settings are done via: OPT_CALLBACK(0, "call-graph", &trace.opts, "record_mode[,record_size]", record_callchain_help, &record_parse_callchain_opt), And then, when we go to parse the events in -e via: OPT_CALLBACK('e', "event", &trace, "event", "event/syscall selector. use 'perf list' to list available events", trace__parse_events_option), And trace__parse_sevents_option() calls: struct option o = OPT_CALLBACK('e', "event", &trace->evlist, "event", "event selector. use 'perf list' to list available events", parse_events_option); err = parse_events_option(&o, lists[0], 0); parse_events_option() will override the global --call-graph and --max-stack if the "call-graph" and/or "max-stack" terms are in the event definition, such as in the probe_libc:inet_pton event in one of the examples below (-e probe_libc:inet_pton/max-stack=2). Before: # perf trace --mmap 1024 --call-graph dwarf -e sendto,probe_libc:inet_pton ping -6 -c 1 ::1 1.525 ( ): probe_libc:inet_pton:(7f77f3ac9350)) PING ::1(::1) 56 data bytes 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.071 ms --- ::1 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.071/0.071/0.071/0.000 ms 1.677 ( 0.081 ms): ping/31296 sendto(fd: 3, buff: 0x55681b652720, len: 64, addr: 0x55681b650640, addr_len: 28) = 64 __libc_sendto (/usr/lib64/libc-2.26.so) [0xffffaa97e4bc9cef] (/usr/bin/ping) [0xffffaa97e4bc656d] (/usr/bin/ping) [0xffffaa97e4bc7d0a] (/usr/bin/ping) [0xffffaa97e4bca447] (/usr/bin/ping) [0xffffaa97e4bc2f91] (/usr/bin/ping) __libc_start_main (/usr/lib64/libc-2.26.so) [0xffffaa97e4bc3379] (/usr/bin/ping) # After: # perf trace --mmap 1024 --call-graph dwarf -e sendto,probe_libc:inet_pton ping -6 -c 1 ::1 PING ::1(::1) 56 data bytes 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.089 ms --- ::1 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.089/0.089/0.089/0.000 ms 1.955 ( ): probe_libc:inet_pton:(7f383a311350)) __inet_pton (inlined) gaih_inet.constprop.7 (/usr/lib64/libc-2.26.so) __GI_getaddrinfo (inlined) [0xffffaa5d91444f3f] (/usr/bin/ping) __libc_start_main (/usr/lib64/libc-2.26.so) [0xffffaa5d91445379] (/usr/bin/ping) 2.140 ( 0.101 ms): ping/32047 sendto(fd: 3, buff: 0x55a26edd0720, len: 64, addr: 0x55a26edce640, addr_len: 28) = 64 __libc_sendto (/usr/lib64/libc-2.26.so) [0xffffaa5d9144bcef] (/usr/bin/ping) [0xffffaa5d9144856d] (/usr/bin/ping) [0xffffaa5d91449d0a] (/usr/bin/ping) [0xffffaa5d9144c447] (/usr/bin/ping) [0xffffaa5d91444f91] (/usr/bin/ping) __libc_start_main (/usr/lib64/libc-2.26.so) [0xffffaa5d91445379] (/usr/bin/ping) # Same thing for --max-stack, the global one: # perf trace --max-stack 3 -e sendto,probe_libc:inet_pton ping -6 -c 1 ::1 PING ::1(::1) 56 data bytes 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.097 ms --- ::1 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.097/0.097/0.097/0.000 ms 1.577 ( ): probe_libc:inet_pton:(7f32f3957350)) __inet_pton (inlined) gaih_inet.constprop.7 (/usr/lib64/libc-2.26.so) __GI_getaddrinfo (inlined) 1.738 ( 0.108 ms): ping/32103 sendto(fd: 3, buff: 0x55c3132d7720, len: 64, addr: 0x55c3132d5640, addr_len: 28) = 64 __libc_sendto (/usr/lib64/libc-2.26.so) [0xffffaa3cecf44cef] (/usr/bin/ping) [0xffffaa3cecf4156d] (/usr/bin/ping) # And then setting up a global setting (dwarf, max-stack=4), that will affect the raw_syscall:sys_enter for the 'sendto' syscall and that will be overriden in the probe_libc:inet_pton call to just one entry. # perf trace --max-stack=4 --call-graph dwarf -e sendto -e probe_libc:inet_pton/max-stack=1/ ping -6 -c 1 ::1 PING ::1(::1) 56 data bytes 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.090 ms --- ::1 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.090/0.090/0.090/0.000 ms 2.140 ( ): probe_libc:inet_pton:(7f9fe9337350)) __GI___inet_pton (/usr/lib64/libc-2.26.so) 2.283 ( 0.103 ms): ping/31804 sendto(fd: 3, buff: 0x55c7f3e19720, len: 64, addr: 0x55c7f3e17640, addr_len: 28) = 64 __libc_sendto (/usr/lib64/libc-2.26.so) [0xffffaa380c402cef] (/usr/bin/ping) [0xffffaa380c3ff56d] (/usr/bin/ping) [0xffffaa380c400d0a] (/usr/bin/ping) # Install iputils-debuginfo to get those /usr/bin/ping addresses resolved, those routines are not on its .dymsym nor .symtab :-) Cc: Adrian Hunter Cc: David Ahern Cc: Hendrick Brueckner Cc: Jiri Olsa Cc: Namhyung Kim Cc: Thomas Richter Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-qgl2gse8elhh9zztw4ajopg3@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 20 +++++--------------- 1 file changed, 5 insertions(+), 15 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index e84816d02117..0362974854e9 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2222,6 +2222,9 @@ static int trace__add_syscall_newtp(struct trace *trace) if (perf_evsel__init_sc_tp_uint_field(sys_exit, ret)) goto out_delete_sys_exit; + perf_evsel__config_callchain(sys_enter, &trace->opts, &callchain_param); + perf_evsel__config_callchain(sys_exit, &trace->opts, &callchain_param); + perf_evlist__add(evlist, sys_enter); perf_evlist__add(evlist, sys_exit); @@ -2318,6 +2321,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) pgfault_maj = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MAJ); if (pgfault_maj == NULL) goto out_error_mem; + perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param); perf_evlist__add(evlist, pgfault_maj); } @@ -2325,6 +2329,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) pgfault_min = perf_evsel__new_pgfault(PERF_COUNT_SW_PAGE_FAULTS_MIN); if (pgfault_min == NULL) goto out_error_mem; + perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param); perf_evlist__add(evlist, pgfault_min); } @@ -2347,21 +2352,6 @@ static int trace__run(struct trace *trace, int argc, const char **argv) perf_evlist__config(evlist, &trace->opts, NULL); - if (callchain_param.enabled) { - if (trace->syscalls.events.sys_exit) { - perf_evsel__config_callchain(trace->syscalls.events.sys_exit, - &trace->opts, &callchain_param); - } - - if (pgfault_maj) { - perf_evsel__config_callchain(pgfault_maj, &trace->opts, &callchain_param); - } - - if (pgfault_min) { - perf_evsel__config_callchain(pgfault_min, &trace->opts, &callchain_param); - } - } - signal(SIGCHLD, sig_handler); signal(SIGINT, sig_handler); -- cgit v1.2.3-70-g09d2 From 952a99ccfa9db2f9a32810fc9c0084f532dd871a Mon Sep 17 00:00:00 2001 From: Michael Sartain Date: Thu, 11 Jan 2018 19:47:42 -0500 Subject: tools lib traceevent: Fix bad force_token escape sequence Older kernels have a bug that creates invalid symbols. event-parse.c handles them by replacing them with a "%s" token. But the fix included an extra backslash, and "\%s" was added incorrectly. Signed-off-by: Michael Sartain Acked-by: Namhyung Kim Cc: Andrew Morton Link: http://lkml.kernel.org/r/20180112004821.827168881@goodmis.org Link: http://lkml.kernel.org/r/d320000d37c10ce0912851e1fb78d1e0c946bcd9.1497486273.git.mikesart@fastmail.com Signed-off-by: Steven Rostedt Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 7ce724fc0544..0bc1a6df8a27 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -1094,7 +1094,7 @@ static enum event_type __read_token(char **tok) if (strcmp(*tok, "LOCAL_PR_FMT") == 0) { free(*tok); *tok = NULL; - return force_token("\"\%s\" ", tok); + return force_token("\"%s\" ", tok); } else if (strcmp(*tok, "STA_PR_FMT") == 0) { free(*tok); *tok = NULL; -- cgit v1.2.3-70-g09d2 From 3df76c9a8167ffff1588516fc74b980cde664efe Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 11 Jan 2018 19:47:43 -0500 Subject: tools lib traceevent: Show value of flags that have not been parsed If the value contains bits that are not defined by print_flags() helper, then show the remaining bits. This aligns with the functionality of the kernel. Signed-off-by: Steven Rostedt Acked-by: Namhyung Kim Cc: Andrew Morton Link: http://lkml.kernel.org/r/e60c889f-55e7-4ee8-0e50-151e435ffd8c@siemens.com Link: http://lkml.kernel.org/r/20180112004821.976225232@goodmis.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'tools') diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 0bc1a6df8a27..96c9c0b33423 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -3970,6 +3970,11 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, val &= ~fval; } } + if (val) { + if (print && arg->flags.delim) + trace_seq_puts(s, arg->flags.delim); + trace_seq_printf(s, "0x%llx", val); + } break; case PRINT_SYMBOL: val = eval_num_arg(data, size, event, arg->symbol.field); -- cgit v1.2.3-70-g09d2 From d63444739bee6acfa9a834515da17f9cec544505 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Thu, 11 Jan 2018 19:47:44 -0500 Subject: tools lib traceevent: Print value of unknown symbolic fields Aligns trace-cmd with the behavior of the kernel. Signed-off-by: Jan Kiszka Acked-by: Namhyung Kim Cc: Andrew Morton Link: http://lkml.kernel.org/r/e60c889f-55e7-4ee8-0e50-151e435ffd8c@siemens.com Link: http://lkml.kernel.org/r/20180112004822.118332436@goodmis.org Signed-off-by: Steven Rostedt Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools') diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 96c9c0b33423..87757eabbb08 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -3985,6 +3985,8 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, break; } } + if (!flag) + trace_seq_printf(s, "0x%llx", val); break; case PRINT_HEX: case PRINT_HEX_STR: -- cgit v1.2.3-70-g09d2 From 38d70b7ca1769f26c0b79f3c08ff2cc949712b59 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 11 Jan 2018 19:47:45 -0500 Subject: tools lib traceevent: Simplify pointer print logic and fix %pF When processing %pX in pretty_print(), simplify the logic slightly by incrementing the ptr to the format string if isalnum(ptr[1]) is true. This follows the logic a bit more closely to what is in the kernel. Also, this fixes a small bug where %pF was not giving the offset of the function. Signed-off-by: Steven Rostedt Acked-by: Namhyung Kim Cc: Andrew Morton Link: http://lkml.kernel.org/r/20180112004822.260262257@goodmis.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 87757eabbb08..8757dd64e42c 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -4956,21 +4956,22 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event else ls = 2; - if (*(ptr+1) == 'F' || *(ptr+1) == 'f' || - *(ptr+1) == 'S' || *(ptr+1) == 's') { + if (isalnum(ptr[1])) ptr++; + + if (*ptr == 'F' || *ptr == 'f' || + *ptr == 'S' || *ptr == 's') { show_func = *ptr; - } else if (*(ptr+1) == 'M' || *(ptr+1) == 'm') { - print_mac_arg(s, *(ptr+1), data, size, event, arg); - ptr++; + } else if (*ptr == 'M' || *ptr == 'm') { + print_mac_arg(s, *ptr, data, size, event, arg); arg = arg->next; break; - } else if (*(ptr+1) == 'I' || *(ptr+1) == 'i') { + } else if (*ptr == 'I' || *ptr == 'i') { int n; - n = print_ip_arg(s, ptr+1, data, size, event, arg); + n = print_ip_arg(s, ptr, data, size, event, arg); if (n > 0) { - ptr += n; + ptr += n - 1; arg = arg->next; break; } -- cgit v1.2.3-70-g09d2 From 37db96bb49629681cb839d7304a70524fe10f969 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 11 Jan 2018 19:47:46 -0500 Subject: tools lib traceevent: Handle new pointer processing of bprint strings The Linux kernel printf() has some extended use cases that dereference the pointer. This is dangerouse for tracing because the pointer that is dereferenced can change or even be unmapped. It also causes issues when the trace data is extracted, because user space does not have access to the contents of the pointer even if it still exists. To handle this, the kernel was updated to process these dereferenced pointers at the time they are recorded, and not post processed. Now they exist in the tracing buffer, and no dereference is needed at the time of reading the trace. The event parsing library needs to handle this new case. Signed-off-by: Steven Rostedt Acked-by: Namhyung Kim Cc: Andrew Morton Link: http://lkml.kernel.org/r/20180112004822.403349289@goodmis.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'tools') diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 8757dd64e42c..344a034a8fbc 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -4300,6 +4300,26 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc goto process_again; case 'p': ls = 1; + if (isalnum(ptr[1])) { + ptr++; + /* Check for special pointers */ + switch (*ptr) { + case 's': + case 'S': + case 'f': + case 'F': + break; + default: + /* + * Older kernels do not process + * dereferenced pointers. + * Only process if the pointer + * value is a printable. + */ + if (isprint(*(char *)bptr)) + goto process_string; + } + } /* fall through */ case 'd': case 'u': @@ -4352,6 +4372,7 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc break; case 's': + process_string: arg = alloc_arg(); if (!arg) { do_warning_event(event, "%s(%d): not enough memory!", @@ -4959,6 +4980,11 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct event if (isalnum(ptr[1])) ptr++; + if (arg->type == PRINT_BSTRING) { + trace_seq_puts(s, arg->string.string); + break; + } + if (*ptr == 'F' || *ptr == 'f' || *ptr == 'S' || *ptr == 's') { show_func = *ptr; -- cgit v1.2.3-70-g09d2 From e877372880f72399323e433187cce2bfbea40263 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 11 Jan 2018 19:47:47 -0500 Subject: tools lib traceevent: Show contents (in hex) of data of unrecognized type records When a record has an unrecognized type, an error message is reported, but it would also be helpful to see the contents of that record. At least show what it is in hex, instead of just showing a blank line. Signed-off-by: Steven Rostedt Acked-by: Namhyung Kim Cc: Andrew Morton Link: http://lkml.kernel.org/r/20180112004822.542204577@goodmis.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-parse.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 344a034a8fbc..e5f2acbb70cc 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -5566,8 +5566,14 @@ void pevent_print_event(struct pevent *pevent, struct trace_seq *s, event = pevent_find_event_by_record(pevent, record); if (!event) { - do_warning("ug! no event found for type %d", - trace_parse_common_type(pevent, record->data)); + int i; + int type = trace_parse_common_type(pevent, record->data); + + do_warning("ug! no event found for type %d", type); + trace_seq_printf(s, "[UNKNOWN TYPE %d]", type); + for (i = 0; i < record->size; i++) + trace_seq_printf(s, " %02x", + ((unsigned char *)record->data)[i]); return; } -- cgit v1.2.3-70-g09d2 From 67dfc376f3dfdc39b9125f32d5b24053a4da264f Mon Sep 17 00:00:00 2001 From: Federico Vaga Date: Thu, 11 Jan 2018 19:47:48 -0500 Subject: tools lib traceevent: Use asprintf when possible It makes the code clearer and less error prone. clearer: - less code - the code is now using the same format to create strings dynamically less error prone: - no magic number +2 +9 +5 to compute the size - no copy&paste of the strings to compute the size and to concatenate The function `asprintf` is not POSIX standard but the program was already using it. Later it can be decided to use only POSIX functions, then we can easly replace all the `asprintf(3)` with a local implementation of that function. Signed-off-by: Federico Vaga Acked-by: Namhyung Kim Cc: Andrew Morton Cc: Federico Vaga Link: http://lkml.kernel.org/r/20170802221558.9684-2-federico.vaga@vaga.pv.it Link: http://lkml.kernel.org/r/20180112004822.686281649@goodmis.org Signed-off-by: Steven Rostedt Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/event-plugin.c | 24 +++++++++--------------- tools/lib/traceevent/parse-filter.c | 11 ++++------- 2 files changed, 13 insertions(+), 22 deletions(-) (limited to 'tools') diff --git a/tools/lib/traceevent/event-plugin.c b/tools/lib/traceevent/event-plugin.c index a16756ae3526..d542cb60ca1a 100644 --- a/tools/lib/traceevent/event-plugin.c +++ b/tools/lib/traceevent/event-plugin.c @@ -120,12 +120,12 @@ char **traceevent_plugin_list_options(void) for (op = reg->options; op->name; op++) { char *alias = op->plugin_alias ? op->plugin_alias : op->file; char **temp = list; + int ret; - name = malloc(strlen(op->name) + strlen(alias) + 2); - if (!name) + ret = asprintf(&name, "%s:%s", alias, op->name); + if (ret < 0) goto err; - sprintf(name, "%s:%s", alias, op->name); list = realloc(list, count + 2); if (!list) { list = temp; @@ -290,17 +290,14 @@ load_plugin(struct pevent *pevent, const char *path, const char *alias; char *plugin; void *handle; + int ret; - plugin = malloc(strlen(path) + strlen(file) + 2); - if (!plugin) { + ret = asprintf(&plugin, "%s/%s", path, file); + if (ret < 0) { warning("could not allocate plugin memory\n"); return; } - strcpy(plugin, path); - strcat(plugin, "/"); - strcat(plugin, file); - handle = dlopen(plugin, RTLD_NOW | RTLD_GLOBAL); if (!handle) { warning("could not load plugin '%s'\n%s\n", @@ -391,6 +388,7 @@ load_plugins(struct pevent *pevent, const char *suffix, char *home; char *path; char *envdir; + int ret; if (pevent->flags & PEVENT_DISABLE_PLUGINS) return; @@ -421,16 +419,12 @@ load_plugins(struct pevent *pevent, const char *suffix, if (!home) return; - path = malloc(strlen(home) + strlen(LOCAL_PLUGIN_DIR) + 2); - if (!path) { + ret = asprintf(&path, "%s/%s", home, LOCAL_PLUGIN_DIR); + if (ret < 0) { warning("could not allocate plugin memory\n"); return; } - strcpy(path, home); - strcat(path, "/"); - strcat(path, LOCAL_PLUGIN_DIR); - load_plugins_dir(pevent, suffix, path, load_plugin, data); free(path); diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c index 315df0a70265..2410afdcbcfe 100644 --- a/tools/lib/traceevent/parse-filter.c +++ b/tools/lib/traceevent/parse-filter.c @@ -287,12 +287,10 @@ find_event(struct pevent *pevent, struct event_list **events, sys_name = NULL; } - reg = malloc(strlen(event_name) + 3); - if (reg == NULL) + ret = asprintf(®, "^%s$", event_name); + if (ret < 0) return PEVENT_ERRNO__MEM_ALLOC_FAILED; - sprintf(reg, "^%s$", event_name); - ret = regcomp(&ereg, reg, REG_ICASE|REG_NOSUB); free(reg); @@ -300,13 +298,12 @@ find_event(struct pevent *pevent, struct event_list **events, return PEVENT_ERRNO__INVALID_EVENT_NAME; if (sys_name) { - reg = malloc(strlen(sys_name) + 3); - if (reg == NULL) { + ret = asprintf(®, "^%s$", sys_name); + if (ret < 0) { regfree(&ereg); return PEVENT_ERRNO__MEM_ALLOC_FAILED; } - sprintf(reg, "^%s$", sys_name); ret = regcomp(&sreg, reg, REG_ICASE|REG_NOSUB); free(reg); if (ret) { -- cgit v1.2.3-70-g09d2 From 6d36ce261614fbac3557cc58ba6a33424944c8a2 Mon Sep 17 00:00:00 2001 From: Michael Sartain Date: Thu, 11 Jan 2018 19:47:49 -0500 Subject: tools lib traceevent: Add UL suffix to MISSING_EVENTS Add UL suffix to MISSING_EVENTS since ints shouldn't be left shifted by 31. Signed-off-by: Michael Sartain Acked-by: Namhyung Kim Cc: Andrew Morton Link: http://lkml.kernel.org/r/20171016165542.13038-4-mikesart@fastmail.com Link: http://lkml.kernel.org/r/20180112004822.829533885@goodmis.org Signed-off-by: Steven Rostedt Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/kbuffer-parse.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/lib/traceevent/kbuffer-parse.c b/tools/lib/traceevent/kbuffer-parse.c index c94e3641b046..ca424b157e46 100644 --- a/tools/lib/traceevent/kbuffer-parse.c +++ b/tools/lib/traceevent/kbuffer-parse.c @@ -24,8 +24,8 @@ #include "kbuffer.h" -#define MISSING_EVENTS (1 << 31) -#define MISSING_STORED (1 << 30) +#define MISSING_EVENTS (1UL << 31) +#define MISSING_STORED (1UL << 30) #define COMMIT_MASK ((1 << 27) - 1) -- cgit v1.2.3-70-g09d2 From 806efaed3cacab1521895d20bb3b5ed610909299 Mon Sep 17 00:00:00 2001 From: Taeung Song Date: Thu, 11 Jan 2018 19:47:50 -0500 Subject: tools lib traceevent: Fix missing break in FALSE case of pevent_filter_clear_trivial() Currently the FILTER_TRIVIAL_FALSE case has a missing break statement, if the trivial type is FALSE, it will also run into the TRUE case, and always be skipped as the TRUE statement will continue the loop on the inverse condition of the FALSE statement. Reported-by: Namhyung Kim Acked-by: Namhyung Kim Signed-off-by: Taeung Song Cc: Andrew Morton Link: http://lkml.kernel.org/r/20180112004823.012918807@goodmis.org Link: http://lkml.kernel.org/r/1493218540-12296-1-git-send-email-treeze.taeung@gmail.com Signed-off-by: Steven Rostedt Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/parse-filter.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c index 2410afdcbcfe..2b9048f90bae 100644 --- a/tools/lib/traceevent/parse-filter.c +++ b/tools/lib/traceevent/parse-filter.c @@ -1631,6 +1631,7 @@ int pevent_filter_clear_trivial(struct event_filter *filter, case FILTER_TRIVIAL_FALSE: if (filter_type->filter->boolean.value) continue; + break; case FILTER_TRIVIAL_TRUE: if (!filter_type->filter->boolean.value) continue; -- cgit v1.2.3-70-g09d2 From d777f8de99b05d399c0e4e51cdce016f26bd971b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 11 Jan 2018 19:47:51 -0500 Subject: tools lib traceevent: Fix get_field_str() for dynamic strings If a field is a dynamic string, get_field_str() returned just the offset/size value and not the string. Have it parse the offset/size correctly to return the actual string. Otherwise filtering fails when trying to filter fields that are dynamic strings. Reported-by: Gopanapalli Pradeep Signed-off-by: Steven Rostedt Acked-by: Namhyung Kim Cc: Andrew Morton Link: http://lkml.kernel.org/r/20180112004823.146333275@goodmis.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/lib/traceevent/parse-filter.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c index 2b9048f90bae..431e8b309f6e 100644 --- a/tools/lib/traceevent/parse-filter.c +++ b/tools/lib/traceevent/parse-filter.c @@ -1877,17 +1877,25 @@ static const char *get_field_str(struct filter_arg *arg, struct pevent_record *r struct pevent *pevent; unsigned long long addr; const char *val = NULL; + unsigned int size; char hex[64]; /* If the field is not a string convert it */ if (arg->str.field->flags & FIELD_IS_STRING) { val = record->data + arg->str.field->offset; + size = arg->str.field->size; + + if (arg->str.field->flags & FIELD_IS_DYNAMIC) { + addr = *(unsigned int *)val; + val = record->data + (addr & 0xffff); + size = addr >> 16; + } /* * We need to copy the data since we can't be sure the field * is null terminated. */ - if (*(val + arg->str.field->size - 1)) { + if (*(val + size - 1)) { /* copy it */ memcpy(arg->str.buffer, val, arg->str.field->size); /* the buffer is already NULL terminated */ -- cgit v1.2.3-70-g09d2 From ffd3d18c20b8df281a18940ee80a99b28114d4b7 Mon Sep 17 00:00:00 2001 From: Kim Phillips Date: Sun, 14 Jan 2018 13:28:50 -0600 Subject: perf tools: Add ARM Statistical Profiling Extensions (SPE) support 'perf record' and 'perf report --dump-raw-trace' supported in this release. Example usage: # perf record -e arm_spe/ts_enable=1,pa_enable=1/ dd if=/dev/zero of=/dev/null count=10000 # perf report --dump-raw-trace Note that the perf.data file is portable, so the report can be run on another architecture host if necessary. Output will contain raw SPE data and its textual representation, such as: 0x5c8 [0x30]: PERF_RECORD_AUXTRACE size: 0x200000 offset: 0 ref: 0x1891ad0e idx: 1 tid: 2227 cpu: 1 . . ... ARM SPE data: size 2097152 bytes . 00000000: 49 00 LD . 00000002: b2 c0 3b 29 0f 00 00 ff ff VA 0xffff00000f293bc0 . 0000000b: b3 c0 eb 24 fb 00 00 00 80 PA 0xfb24ebc0 ns=1 . 00000014: 9a 00 00 LAT 0 XLAT . 00000017: 42 16 EV RETIRED L1D-ACCESS TLB-ACCESS . 00000019: b0 00 c4 15 08 00 00 ff ff PC 0xff00000815c400 el3 ns=1 . 00000022: 98 00 00 LAT 0 TOT . 00000025: 71 36 6c 21 2c 09 00 00 00 TS 39395093558 . 0000002e: 49 00 LD . 00000030: b2 80 3c 29 0f 00 00 ff ff VA 0xffff00000f293c80 . 00000039: b3 80 ec 24 fb 00 00 00 80 PA 0xfb24ec80 ns=1 . 00000042: 9a 00 00 LAT 0 XLAT . 00000045: 42 16 EV RETIRED L1D-ACCESS TLB-ACCESS . 00000047: b0 f4 11 16 08 00 00 ff ff PC 0xff0000081611f4 el3 ns=1 . 00000050: 98 00 00 LAT 0 TOT . 00000053: 71 36 6c 21 2c 09 00 00 00 TS 39395093558 . 0000005c: 48 00 INSN-OTHER . 0000005e: 42 02 EV RETIRED . 00000060: b0 2c ef 7f 08 00 00 ff ff PC 0xff0000087fef2c el3 ns=1 . 00000069: 98 00 00 LAT 0 TOT . 0000006c: 71 d1 6f 21 2c 09 00 00 00 TS 39395094481 ... Other release notes: - applies to acme's perf/{core,urgent} branches, likely elsewhere - Report is self-contained within the tool. Record requires enabling the kernel SPE driver by setting CONFIG_ARM_SPE_PMU. - The intel-bts implementation was used as a starting point; its min/default/max buffer sizes and power of 2 pages granularity need to be revisited for ARM SPE - Recording across multiple SPE clusters/domains not supported - Snapshot support (record -S), and conversion to native perf events (e.g., via 'perf inject --itrace'), are also not supported - Technically both cs-etm and spe can be used simultaneously, however disabled for simplicity in this release Signed-off-by: Kim Phillips Reviewed-by: Dongjiu Geng Acked-by: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: linux-arm-kernel@lists.infradead.org Cc: Marc Zyngier Cc: Mark Rutland Cc: Mathieu Poirier Cc: Pawel Moll Cc: Peter Zijlstra Cc: Rob Herring Cc: Suzuki Poulouse Cc: Thomas Gleixner Cc: Wang Nan Cc: Will Deacon Link: http://lkml.kernel.org/r/20180114132850.0b127434b704a26bad13268f@arm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/arm/util/auxtrace.c | 77 +++++- tools/perf/arch/arm/util/pmu.c | 6 + tools/perf/arch/arm64/util/Build | 3 +- tools/perf/arch/arm64/util/arm-spe.c | 225 +++++++++++++++++ tools/perf/util/Build | 2 + tools/perf/util/arm-spe-pkt-decoder.c | 462 ++++++++++++++++++++++++++++++++++ tools/perf/util/arm-spe-pkt-decoder.h | 43 ++++ tools/perf/util/arm-spe.c | 231 +++++++++++++++++ tools/perf/util/arm-spe.h | 31 +++ tools/perf/util/auxtrace.c | 3 + tools/perf/util/auxtrace.h | 1 + 11 files changed, 1077 insertions(+), 7 deletions(-) create mode 100644 tools/perf/arch/arm64/util/arm-spe.c create mode 100644 tools/perf/util/arm-spe-pkt-decoder.c create mode 100644 tools/perf/util/arm-spe-pkt-decoder.h create mode 100644 tools/perf/util/arm-spe.c create mode 100644 tools/perf/util/arm-spe.h (limited to 'tools') diff --git a/tools/perf/arch/arm/util/auxtrace.c b/tools/perf/arch/arm/util/auxtrace.c index 8edf2cb71564..2323581b157d 100644 --- a/tools/perf/arch/arm/util/auxtrace.c +++ b/tools/perf/arch/arm/util/auxtrace.c @@ -22,6 +22,42 @@ #include "../../util/evlist.h" #include "../../util/pmu.h" #include "cs-etm.h" +#include "arm-spe.h" + +static struct perf_pmu **find_all_arm_spe_pmus(int *nr_spes, int *err) +{ + struct perf_pmu **arm_spe_pmus = NULL; + int ret, i, nr_cpus = sysconf(_SC_NPROCESSORS_CONF); + /* arm_spe_xxxxxxxxx\0 */ + char arm_spe_pmu_name[sizeof(ARM_SPE_PMU_NAME) + 10]; + + arm_spe_pmus = zalloc(sizeof(struct perf_pmu *) * nr_cpus); + if (!arm_spe_pmus) { + pr_err("spes alloc failed\n"); + *err = -ENOMEM; + return NULL; + } + + for (i = 0; i < nr_cpus; i++) { + ret = sprintf(arm_spe_pmu_name, "%s%d", ARM_SPE_PMU_NAME, i); + if (ret < 0) { + pr_err("sprintf failed\n"); + *err = -ENOMEM; + return NULL; + } + + arm_spe_pmus[*nr_spes] = perf_pmu__find(arm_spe_pmu_name); + if (arm_spe_pmus[*nr_spes]) { + pr_debug2("%s %d: arm_spe_pmu %d type %d name %s\n", + __func__, __LINE__, *nr_spes, + arm_spe_pmus[*nr_spes]->type, + arm_spe_pmus[*nr_spes]->name); + (*nr_spes)++; + } + } + + return arm_spe_pmus; +} struct auxtrace_record *auxtrace_record__init(struct perf_evlist *evlist, int *err) @@ -29,22 +65,51 @@ struct auxtrace_record struct perf_pmu *cs_etm_pmu; struct perf_evsel *evsel; bool found_etm = false; + bool found_spe = false; + static struct perf_pmu **arm_spe_pmus = NULL; + static int nr_spes = 0; + int i; + + if (!evlist) + return NULL; cs_etm_pmu = perf_pmu__find(CORESIGHT_ETM_PMU_NAME); - if (evlist) { - evlist__for_each_entry(evlist, evsel) { - if (cs_etm_pmu && - evsel->attr.type == cs_etm_pmu->type) - found_etm = true; + if (!arm_spe_pmus) + arm_spe_pmus = find_all_arm_spe_pmus(&nr_spes, err); + + evlist__for_each_entry(evlist, evsel) { + if (cs_etm_pmu && + evsel->attr.type == cs_etm_pmu->type) + found_etm = true; + + if (!nr_spes) + continue; + + for (i = 0; i < nr_spes; i++) { + if (evsel->attr.type == arm_spe_pmus[i]->type) { + found_spe = true; + break; + } } } + if (found_etm && found_spe) { + pr_err("Concurrent ARM Coresight ETM and SPE operation not currently supported\n"); + *err = -EOPNOTSUPP; + return NULL; + } + if (found_etm) return cs_etm_record_init(err); +#if defined(__aarch64__) + if (found_spe) + return arm_spe_recording_init(err, arm_spe_pmus[i]); +#endif + /* - * Clear 'err' even if we haven't found a cs_etm event - that way perf + * Clear 'err' even if we haven't found an event - that way perf * record can still be used even if tracers aren't present. The NULL * return value will take care of telling the infrastructure HW tracing * isn't available. diff --git a/tools/perf/arch/arm/util/pmu.c b/tools/perf/arch/arm/util/pmu.c index 98d67399a0d6..ac4dffc807b8 100644 --- a/tools/perf/arch/arm/util/pmu.c +++ b/tools/perf/arch/arm/util/pmu.c @@ -20,6 +20,7 @@ #include #include "cs-etm.h" +#include "arm-spe.h" #include "../../util/pmu.h" struct perf_event_attr @@ -30,7 +31,12 @@ struct perf_event_attr /* add ETM default config here */ pmu->selectable = true; pmu->set_drv_config = cs_etm_set_drv_config; +#if defined(__aarch64__) + } else if (strstarts(pmu->name, ARM_SPE_PMU_NAME)) { + return arm_spe_pmu_default_config(pmu); +#endif } + #endif return NULL; } diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build index e04f6cdd6f32..c0b8dfef98ba 100644 --- a/tools/perf/arch/arm64/util/Build +++ b/tools/perf/arch/arm64/util/Build @@ -5,4 +5,5 @@ libperf-$(CONFIG_LOCAL_LIBUNWIND) += unwind-libunwind.o libperf-$(CONFIG_AUXTRACE) += ../../arm/util/pmu.o \ ../../arm/util/auxtrace.o \ - ../../arm/util/cs-etm.o + ../../arm/util/cs-etm.o \ + arm-spe.o diff --git a/tools/perf/arch/arm64/util/arm-spe.c b/tools/perf/arch/arm64/util/arm-spe.c new file mode 100644 index 000000000000..1120e39c1b00 --- /dev/null +++ b/tools/perf/arch/arm64/util/arm-spe.c @@ -0,0 +1,225 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Arm Statistical Profiling Extensions (SPE) support + * Copyright (c) 2017-2018, Arm Ltd. + */ + +#include +#include +#include +#include +#include + +#include "../../util/cpumap.h" +#include "../../util/evsel.h" +#include "../../util/evlist.h" +#include "../../util/session.h" +#include "../../util/util.h" +#include "../../util/pmu.h" +#include "../../util/debug.h" +#include "../../util/auxtrace.h" +#include "../../util/arm-spe.h" + +#define KiB(x) ((x) * 1024) +#define MiB(x) ((x) * 1024 * 1024) + +struct arm_spe_recording { + struct auxtrace_record itr; + struct perf_pmu *arm_spe_pmu; + struct perf_evlist *evlist; +}; + +static size_t +arm_spe_info_priv_size(struct auxtrace_record *itr __maybe_unused, + struct perf_evlist *evlist __maybe_unused) +{ + return ARM_SPE_AUXTRACE_PRIV_SIZE; +} + +static int arm_spe_info_fill(struct auxtrace_record *itr, + struct perf_session *session, + struct auxtrace_info_event *auxtrace_info, + size_t priv_size) +{ + struct arm_spe_recording *sper = + container_of(itr, struct arm_spe_recording, itr); + struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu; + + if (priv_size != ARM_SPE_AUXTRACE_PRIV_SIZE) + return -EINVAL; + + if (!session->evlist->nr_mmaps) + return -EINVAL; + + auxtrace_info->type = PERF_AUXTRACE_ARM_SPE; + auxtrace_info->priv[ARM_SPE_PMU_TYPE] = arm_spe_pmu->type; + + return 0; +} + +static int arm_spe_recording_options(struct auxtrace_record *itr, + struct perf_evlist *evlist, + struct record_opts *opts) +{ + struct arm_spe_recording *sper = + container_of(itr, struct arm_spe_recording, itr); + struct perf_pmu *arm_spe_pmu = sper->arm_spe_pmu; + struct perf_evsel *evsel, *arm_spe_evsel = NULL; + bool privileged = geteuid() == 0 || perf_event_paranoid() < 0; + struct perf_evsel *tracking_evsel; + int err; + + sper->evlist = evlist; + + evlist__for_each_entry(evlist, evsel) { + if (evsel->attr.type == arm_spe_pmu->type) { + if (arm_spe_evsel) { + pr_err("There may be only one " ARM_SPE_PMU_NAME "x event\n"); + return -EINVAL; + } + evsel->attr.freq = 0; + evsel->attr.sample_period = 1; + arm_spe_evsel = evsel; + opts->full_auxtrace = true; + } + } + + if (!opts->full_auxtrace) + return 0; + + /* We are in full trace mode but '-m,xyz' wasn't specified */ + if (opts->full_auxtrace && !opts->auxtrace_mmap_pages) { + if (privileged) { + opts->auxtrace_mmap_pages = MiB(4) / page_size; + } else { + opts->auxtrace_mmap_pages = KiB(128) / page_size; + if (opts->mmap_pages == UINT_MAX) + opts->mmap_pages = KiB(256) / page_size; + } + } + + /* Validate auxtrace_mmap_pages */ + if (opts->auxtrace_mmap_pages) { + size_t sz = opts->auxtrace_mmap_pages * (size_t)page_size; + size_t min_sz = KiB(8); + + if (sz < min_sz || !is_power_of_2(sz)) { + pr_err("Invalid mmap size for ARM SPE: must be at least %zuKiB and a power of 2\n", + min_sz / 1024); + return -EINVAL; + } + } + + + /* + * To obtain the auxtrace buffer file descriptor, the auxtrace event + * must come first. + */ + perf_evlist__to_front(evlist, arm_spe_evsel); + + perf_evsel__set_sample_bit(arm_spe_evsel, CPU); + perf_evsel__set_sample_bit(arm_spe_evsel, TIME); + perf_evsel__set_sample_bit(arm_spe_evsel, TID); + + /* Add dummy event to keep tracking */ + err = parse_events(evlist, "dummy:u", NULL); + if (err) + return err; + + tracking_evsel = perf_evlist__last(evlist); + perf_evlist__set_tracking_event(evlist, tracking_evsel); + + tracking_evsel->attr.freq = 0; + tracking_evsel->attr.sample_period = 1; + perf_evsel__set_sample_bit(tracking_evsel, TIME); + perf_evsel__set_sample_bit(tracking_evsel, CPU); + perf_evsel__reset_sample_bit(tracking_evsel, BRANCH_STACK); + + return 0; +} + +static u64 arm_spe_reference(struct auxtrace_record *itr __maybe_unused) +{ + struct timespec ts; + + clock_gettime(CLOCK_MONOTONIC_RAW, &ts); + + return ts.tv_sec ^ ts.tv_nsec; +} + +static void arm_spe_recording_free(struct auxtrace_record *itr) +{ + struct arm_spe_recording *sper = + container_of(itr, struct arm_spe_recording, itr); + + free(sper); +} + +static int arm_spe_read_finish(struct auxtrace_record *itr, int idx) +{ + struct arm_spe_recording *sper = + container_of(itr, struct arm_spe_recording, itr); + struct perf_evsel *evsel; + + evlist__for_each_entry(sper->evlist, evsel) { + if (evsel->attr.type == sper->arm_spe_pmu->type) + return perf_evlist__enable_event_idx(sper->evlist, + evsel, idx); + } + return -EINVAL; +} + +struct auxtrace_record *arm_spe_recording_init(int *err, + struct perf_pmu *arm_spe_pmu) +{ + struct arm_spe_recording *sper; + + if (!arm_spe_pmu) { + *err = -ENODEV; + return NULL; + } + + sper = zalloc(sizeof(struct arm_spe_recording)); + if (!sper) { + *err = -ENOMEM; + return NULL; + } + + sper->arm_spe_pmu = arm_spe_pmu; + sper->itr.recording_options = arm_spe_recording_options; + sper->itr.info_priv_size = arm_spe_info_priv_size; + sper->itr.info_fill = arm_spe_info_fill; + sper->itr.free = arm_spe_recording_free; + sper->itr.reference = arm_spe_reference; + sper->itr.read_finish = arm_spe_read_finish; + sper->itr.alignment = 0; + + return &sper->itr; +} + +struct perf_event_attr +*arm_spe_pmu_default_config(struct perf_pmu *arm_spe_pmu) +{ + struct perf_event_attr *attr; + + attr = zalloc(sizeof(struct perf_event_attr)); + if (!attr) { + pr_err("arm_spe default config cannot allocate a perf_event_attr\n"); + return NULL; + } + + /* + * If kernel driver doesn't advertise a minimum, + * use max allowable by PMSIDR_EL1.INTERVAL + */ + if (perf_pmu__scan_file(arm_spe_pmu, "caps/min_interval", "%llu", + &attr->sample_period) != 1) { + pr_debug("arm_spe driver doesn't advertise a min. interval. Using 4096\n"); + attr->sample_period = 4096; + } + + arm_spe_pmu->selectable = true; + arm_spe_pmu->is_uncore = false; + + return attr; +} diff --git a/tools/perf/util/Build b/tools/perf/util/Build index a3de7916fe63..7c6a8b461e24 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -86,6 +86,8 @@ libperf-$(CONFIG_AUXTRACE) += auxtrace.o libperf-$(CONFIG_AUXTRACE) += intel-pt-decoder/ libperf-$(CONFIG_AUXTRACE) += intel-pt.o libperf-$(CONFIG_AUXTRACE) += intel-bts.o +libperf-$(CONFIG_AUXTRACE) += arm-spe.o +libperf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o libperf-y += parse-branch-options.o libperf-y += dump-insn.o libperf-y += parse-regs-options.o diff --git a/tools/perf/util/arm-spe-pkt-decoder.c b/tools/perf/util/arm-spe-pkt-decoder.c new file mode 100644 index 000000000000..b94001b756c7 --- /dev/null +++ b/tools/perf/util/arm-spe-pkt-decoder.c @@ -0,0 +1,462 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Arm Statistical Profiling Extensions (SPE) support + * Copyright (c) 2017-2018, Arm Ltd. + */ + +#include +#include +#include +#include + +#include "arm-spe-pkt-decoder.h" + +#define BIT(n) (1ULL << (n)) + +#define NS_FLAG BIT(63) +#define EL_FLAG (BIT(62) | BIT(61)) + +#define SPE_HEADER0_PAD 0x0 +#define SPE_HEADER0_END 0x1 +#define SPE_HEADER0_ADDRESS 0x30 /* address packet (short) */ +#define SPE_HEADER0_ADDRESS_MASK 0x38 +#define SPE_HEADER0_COUNTER 0x18 /* counter packet (short) */ +#define SPE_HEADER0_COUNTER_MASK 0x38 +#define SPE_HEADER0_TIMESTAMP 0x71 +#define SPE_HEADER0_TIMESTAMP 0x71 +#define SPE_HEADER0_EVENTS 0x2 +#define SPE_HEADER0_EVENTS_MASK 0xf +#define SPE_HEADER0_SOURCE 0x3 +#define SPE_HEADER0_SOURCE_MASK 0xf +#define SPE_HEADER0_CONTEXT 0x24 +#define SPE_HEADER0_CONTEXT_MASK 0x3c +#define SPE_HEADER0_OP_TYPE 0x8 +#define SPE_HEADER0_OP_TYPE_MASK 0x3c +#define SPE_HEADER1_ALIGNMENT 0x0 +#define SPE_HEADER1_ADDRESS 0xb0 /* address packet (extended) */ +#define SPE_HEADER1_ADDRESS_MASK 0xf8 +#define SPE_HEADER1_COUNTER 0x98 /* counter packet (extended) */ +#define SPE_HEADER1_COUNTER_MASK 0xf8 + +#if __BYTE_ORDER == __BIG_ENDIAN +#define le16_to_cpu bswap_16 +#define le32_to_cpu bswap_32 +#define le64_to_cpu bswap_64 +#define memcpy_le64(d, s, n) do { \ + memcpy((d), (s), (n)); \ + *(d) = le64_to_cpu(*(d)); \ +} while (0) +#else +#define le16_to_cpu +#define le32_to_cpu +#define le64_to_cpu +#define memcpy_le64 memcpy +#endif + +static const char * const arm_spe_packet_name[] = { + [ARM_SPE_PAD] = "PAD", + [ARM_SPE_END] = "END", + [ARM_SPE_TIMESTAMP] = "TS", + [ARM_SPE_ADDRESS] = "ADDR", + [ARM_SPE_COUNTER] = "LAT", + [ARM_SPE_CONTEXT] = "CONTEXT", + [ARM_SPE_OP_TYPE] = "OP-TYPE", + [ARM_SPE_EVENTS] = "EVENTS", + [ARM_SPE_DATA_SOURCE] = "DATA-SOURCE", +}; + +const char *arm_spe_pkt_name(enum arm_spe_pkt_type type) +{ + return arm_spe_packet_name[type]; +} + +/* return ARM SPE payload size from its encoding, + * which is in bits 5:4 of the byte. + * 00 : byte + * 01 : halfword (2) + * 10 : word (4) + * 11 : doubleword (8) + */ +static int payloadlen(unsigned char byte) +{ + return 1 << ((byte & 0x30) >> 4); +} + +static int arm_spe_get_payload(const unsigned char *buf, size_t len, + struct arm_spe_pkt *packet) +{ + size_t payload_len = payloadlen(buf[0]); + + if (len < 1 + payload_len) + return ARM_SPE_NEED_MORE_BYTES; + + buf++; + + switch (payload_len) { + case 1: packet->payload = *(uint8_t *)buf; break; + case 2: packet->payload = le16_to_cpu(*(uint16_t *)buf); break; + case 4: packet->payload = le32_to_cpu(*(uint32_t *)buf); break; + case 8: packet->payload = le64_to_cpu(*(uint64_t *)buf); break; + default: return ARM_SPE_BAD_PACKET; + } + + return 1 + payload_len; +} + +static int arm_spe_get_pad(struct arm_spe_pkt *packet) +{ + packet->type = ARM_SPE_PAD; + return 1; +} + +static int arm_spe_get_alignment(const unsigned char *buf, size_t len, + struct arm_spe_pkt *packet) +{ + unsigned int alignment = 1 << ((buf[0] & 0xf) + 1); + + if (len < alignment) + return ARM_SPE_NEED_MORE_BYTES; + + packet->type = ARM_SPE_PAD; + return alignment - (((uintptr_t)buf) & (alignment - 1)); +} + +static int arm_spe_get_end(struct arm_spe_pkt *packet) +{ + packet->type = ARM_SPE_END; + return 1; +} + +static int arm_spe_get_timestamp(const unsigned char *buf, size_t len, + struct arm_spe_pkt *packet) +{ + packet->type = ARM_SPE_TIMESTAMP; + return arm_spe_get_payload(buf, len, packet); +} + +static int arm_spe_get_events(const unsigned char *buf, size_t len, + struct arm_spe_pkt *packet) +{ + int ret = arm_spe_get_payload(buf, len, packet); + + packet->type = ARM_SPE_EVENTS; + + /* we use index to identify Events with a less number of + * comparisons in arm_spe_pkt_desc(): E.g., the LLC-ACCESS, + * LLC-REFILL, and REMOTE-ACCESS events are identified iff + * index > 1. + */ + packet->index = ret - 1; + + return ret; +} + +static int arm_spe_get_data_source(const unsigned char *buf, size_t len, + struct arm_spe_pkt *packet) +{ + packet->type = ARM_SPE_DATA_SOURCE; + return arm_spe_get_payload(buf, len, packet); +} + +static int arm_spe_get_context(const unsigned char *buf, size_t len, + struct arm_spe_pkt *packet) +{ + packet->type = ARM_SPE_CONTEXT; + packet->index = buf[0] & 0x3; + + return arm_spe_get_payload(buf, len, packet); +} + +static int arm_spe_get_op_type(const unsigned char *buf, size_t len, + struct arm_spe_pkt *packet) +{ + packet->type = ARM_SPE_OP_TYPE; + packet->index = buf[0] & 0x3; + return arm_spe_get_payload(buf, len, packet); +} + +static int arm_spe_get_counter(const unsigned char *buf, size_t len, + const unsigned char ext_hdr, struct arm_spe_pkt *packet) +{ + if (len < 2) + return ARM_SPE_NEED_MORE_BYTES; + + packet->type = ARM_SPE_COUNTER; + if (ext_hdr) + packet->index = ((buf[0] & 0x3) << 3) | (buf[1] & 0x7); + else + packet->index = buf[0] & 0x7; + + packet->payload = le16_to_cpu(*(uint16_t *)(buf + 1)); + + return 1 + ext_hdr + 2; +} + +static int arm_spe_get_addr(const unsigned char *buf, size_t len, + const unsigned char ext_hdr, struct arm_spe_pkt *packet) +{ + if (len < 8) + return ARM_SPE_NEED_MORE_BYTES; + + packet->type = ARM_SPE_ADDRESS; + if (ext_hdr) + packet->index = ((buf[0] & 0x3) << 3) | (buf[1] & 0x7); + else + packet->index = buf[0] & 0x7; + + memcpy_le64(&packet->payload, buf + 1, 8); + + return 1 + ext_hdr + 8; +} + +static int arm_spe_do_get_packet(const unsigned char *buf, size_t len, + struct arm_spe_pkt *packet) +{ + unsigned int byte; + + memset(packet, 0, sizeof(struct arm_spe_pkt)); + + if (!len) + return ARM_SPE_NEED_MORE_BYTES; + + byte = buf[0]; + if (byte == SPE_HEADER0_PAD) + return arm_spe_get_pad(packet); + else if (byte == SPE_HEADER0_END) /* no timestamp at end of record */ + return arm_spe_get_end(packet); + else if (byte & 0xc0 /* 0y11xxxxxx */) { + if (byte & 0x80) { + if ((byte & SPE_HEADER0_ADDRESS_MASK) == SPE_HEADER0_ADDRESS) + return arm_spe_get_addr(buf, len, 0, packet); + if ((byte & SPE_HEADER0_COUNTER_MASK) == SPE_HEADER0_COUNTER) + return arm_spe_get_counter(buf, len, 0, packet); + } else + if (byte == SPE_HEADER0_TIMESTAMP) + return arm_spe_get_timestamp(buf, len, packet); + else if ((byte & SPE_HEADER0_EVENTS_MASK) == SPE_HEADER0_EVENTS) + return arm_spe_get_events(buf, len, packet); + else if ((byte & SPE_HEADER0_SOURCE_MASK) == SPE_HEADER0_SOURCE) + return arm_spe_get_data_source(buf, len, packet); + else if ((byte & SPE_HEADER0_CONTEXT_MASK) == SPE_HEADER0_CONTEXT) + return arm_spe_get_context(buf, len, packet); + else if ((byte & SPE_HEADER0_OP_TYPE_MASK) == SPE_HEADER0_OP_TYPE) + return arm_spe_get_op_type(buf, len, packet); + } else if ((byte & 0xe0) == 0x20 /* 0y001xxxxx */) { + /* 16-bit header */ + byte = buf[1]; + if (byte == SPE_HEADER1_ALIGNMENT) + return arm_spe_get_alignment(buf, len, packet); + else if ((byte & SPE_HEADER1_ADDRESS_MASK) == SPE_HEADER1_ADDRESS) + return arm_spe_get_addr(buf, len, 1, packet); + else if ((byte & SPE_HEADER1_COUNTER_MASK) == SPE_HEADER1_COUNTER) + return arm_spe_get_counter(buf, len, 1, packet); + } + + return ARM_SPE_BAD_PACKET; +} + +int arm_spe_get_packet(const unsigned char *buf, size_t len, + struct arm_spe_pkt *packet) +{ + int ret; + + ret = arm_spe_do_get_packet(buf, len, packet); + /* put multiple consecutive PADs on the same line, up to + * the fixed-width output format of 16 bytes per line. + */ + if (ret > 0 && packet->type == ARM_SPE_PAD) { + while (ret < 16 && len > (size_t)ret && !buf[ret]) + ret += 1; + } + return ret; +} + +int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf, + size_t buf_len) +{ + int ret, ns, el, idx = packet->index; + unsigned long long payload = packet->payload; + const char *name = arm_spe_pkt_name(packet->type); + + switch (packet->type) { + case ARM_SPE_BAD: + case ARM_SPE_PAD: + case ARM_SPE_END: + return snprintf(buf, buf_len, "%s", name); + case ARM_SPE_EVENTS: { + size_t blen = buf_len; + + ret = 0; + ret = snprintf(buf, buf_len, "EV"); + buf += ret; + blen -= ret; + if (payload & 0x1) { + ret = snprintf(buf, buf_len, " EXCEPTION-GEN"); + buf += ret; + blen -= ret; + } + if (payload & 0x2) { + ret = snprintf(buf, buf_len, " RETIRED"); + buf += ret; + blen -= ret; + } + if (payload & 0x4) { + ret = snprintf(buf, buf_len, " L1D-ACCESS"); + buf += ret; + blen -= ret; + } + if (payload & 0x8) { + ret = snprintf(buf, buf_len, " L1D-REFILL"); + buf += ret; + blen -= ret; + } + if (payload & 0x10) { + ret = snprintf(buf, buf_len, " TLB-ACCESS"); + buf += ret; + blen -= ret; + } + if (payload & 0x20) { + ret = snprintf(buf, buf_len, " TLB-REFILL"); + buf += ret; + blen -= ret; + } + if (payload & 0x40) { + ret = snprintf(buf, buf_len, " NOT-TAKEN"); + buf += ret; + blen -= ret; + } + if (payload & 0x80) { + ret = snprintf(buf, buf_len, " MISPRED"); + buf += ret; + blen -= ret; + } + if (idx > 1) { + if (payload & 0x100) { + ret = snprintf(buf, buf_len, " LLC-ACCESS"); + buf += ret; + blen -= ret; + } + if (payload & 0x200) { + ret = snprintf(buf, buf_len, " LLC-REFILL"); + buf += ret; + blen -= ret; + } + if (payload & 0x400) { + ret = snprintf(buf, buf_len, " REMOTE-ACCESS"); + buf += ret; + blen -= ret; + } + } + if (ret < 0) + return ret; + blen -= ret; + return buf_len - blen; + } + case ARM_SPE_OP_TYPE: + switch (idx) { + case 0: return snprintf(buf, buf_len, "%s", payload & 0x1 ? + "COND-SELECT" : "INSN-OTHER"); + case 1: { + size_t blen = buf_len; + + if (payload & 0x1) + ret = snprintf(buf, buf_len, "ST"); + else + ret = snprintf(buf, buf_len, "LD"); + buf += ret; + blen -= ret; + if (payload & 0x2) { + if (payload & 0x4) { + ret = snprintf(buf, buf_len, " AT"); + buf += ret; + blen -= ret; + } + if (payload & 0x8) { + ret = snprintf(buf, buf_len, " EXCL"); + buf += ret; + blen -= ret; + } + if (payload & 0x10) { + ret = snprintf(buf, buf_len, " AR"); + buf += ret; + blen -= ret; + } + } else if (payload & 0x4) { + ret = snprintf(buf, buf_len, " SIMD-FP"); + buf += ret; + blen -= ret; + } + if (ret < 0) + return ret; + blen -= ret; + return buf_len - blen; + } + case 2: { + size_t blen = buf_len; + + ret = snprintf(buf, buf_len, "B"); + buf += ret; + blen -= ret; + if (payload & 0x1) { + ret = snprintf(buf, buf_len, " COND"); + buf += ret; + blen -= ret; + } + if (payload & 0x2) { + ret = snprintf(buf, buf_len, " IND"); + buf += ret; + blen -= ret; + } + if (ret < 0) + return ret; + blen -= ret; + return buf_len - blen; + } + default: return 0; + } + case ARM_SPE_DATA_SOURCE: + case ARM_SPE_TIMESTAMP: + return snprintf(buf, buf_len, "%s %lld", name, payload); + case ARM_SPE_ADDRESS: + switch (idx) { + case 0: + case 1: ns = !!(packet->payload & NS_FLAG); + el = (packet->payload & EL_FLAG) >> 61; + payload &= ~(0xffULL << 56); + return snprintf(buf, buf_len, "%s 0x%llx el%d ns=%d", + (idx == 1) ? "TGT" : "PC", payload, el, ns); + case 2: return snprintf(buf, buf_len, "VA 0x%llx", payload); + case 3: ns = !!(packet->payload & NS_FLAG); + payload &= ~(0xffULL << 56); + return snprintf(buf, buf_len, "PA 0x%llx ns=%d", + payload, ns); + default: return 0; + } + case ARM_SPE_CONTEXT: + return snprintf(buf, buf_len, "%s 0x%lx el%d", name, + (unsigned long)payload, idx + 1); + case ARM_SPE_COUNTER: { + size_t blen = buf_len; + + ret = snprintf(buf, buf_len, "%s %d ", name, + (unsigned short)payload); + buf += ret; + blen -= ret; + switch (idx) { + case 0: ret = snprintf(buf, buf_len, "TOT"); break; + case 1: ret = snprintf(buf, buf_len, "ISSUE"); break; + case 2: ret = snprintf(buf, buf_len, "XLAT"); break; + default: ret = 0; + } + if (ret < 0) + return ret; + blen -= ret; + return buf_len - blen; + } + default: + break; + } + + return snprintf(buf, buf_len, "%s 0x%llx (%d)", + name, payload, packet->index); +} diff --git a/tools/perf/util/arm-spe-pkt-decoder.h b/tools/perf/util/arm-spe-pkt-decoder.h new file mode 100644 index 000000000000..d786ef65113f --- /dev/null +++ b/tools/perf/util/arm-spe-pkt-decoder.h @@ -0,0 +1,43 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Arm Statistical Profiling Extensions (SPE) support + * Copyright (c) 2017-2018, Arm Ltd. + */ + +#ifndef INCLUDE__ARM_SPE_PKT_DECODER_H__ +#define INCLUDE__ARM_SPE_PKT_DECODER_H__ + +#include +#include + +#define ARM_SPE_PKT_DESC_MAX 256 + +#define ARM_SPE_NEED_MORE_BYTES -1 +#define ARM_SPE_BAD_PACKET -2 + +enum arm_spe_pkt_type { + ARM_SPE_BAD, + ARM_SPE_PAD, + ARM_SPE_END, + ARM_SPE_TIMESTAMP, + ARM_SPE_ADDRESS, + ARM_SPE_COUNTER, + ARM_SPE_CONTEXT, + ARM_SPE_OP_TYPE, + ARM_SPE_EVENTS, + ARM_SPE_DATA_SOURCE, +}; + +struct arm_spe_pkt { + enum arm_spe_pkt_type type; + unsigned char index; + uint64_t payload; +}; + +const char *arm_spe_pkt_name(enum arm_spe_pkt_type); + +int arm_spe_get_packet(const unsigned char *buf, size_t len, + struct arm_spe_pkt *packet); + +int arm_spe_pkt_desc(const struct arm_spe_pkt *packet, char *buf, size_t len); +#endif diff --git a/tools/perf/util/arm-spe.c b/tools/perf/util/arm-spe.c new file mode 100644 index 000000000000..6067267cc76c --- /dev/null +++ b/tools/perf/util/arm-spe.c @@ -0,0 +1,231 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Arm Statistical Profiling Extensions (SPE) support + * Copyright (c) 2017-2018, Arm Ltd. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "cpumap.h" +#include "color.h" +#include "evsel.h" +#include "evlist.h" +#include "machine.h" +#include "session.h" +#include "util.h" +#include "thread.h" +#include "debug.h" +#include "auxtrace.h" +#include "arm-spe.h" +#include "arm-spe-pkt-decoder.h" + +struct arm_spe { + struct auxtrace auxtrace; + struct auxtrace_queues queues; + struct auxtrace_heap heap; + u32 auxtrace_type; + struct perf_session *session; + struct machine *machine; + u32 pmu_type; +}; + +struct arm_spe_queue { + struct arm_spe *spe; + unsigned int queue_nr; + struct auxtrace_buffer *buffer; + bool on_heap; + bool done; + pid_t pid; + pid_t tid; + int cpu; +}; + +static void arm_spe_dump(struct arm_spe *spe __maybe_unused, + unsigned char *buf, size_t len) +{ + struct arm_spe_pkt packet; + size_t pos = 0; + int ret, pkt_len, i; + char desc[ARM_SPE_PKT_DESC_MAX]; + const char *color = PERF_COLOR_BLUE; + + color_fprintf(stdout, color, + ". ... ARM SPE data: size %zu bytes\n", + len); + + while (len) { + ret = arm_spe_get_packet(buf, len, &packet); + if (ret > 0) + pkt_len = ret; + else + pkt_len = 1; + printf("."); + color_fprintf(stdout, color, " %08x: ", pos); + for (i = 0; i < pkt_len; i++) + color_fprintf(stdout, color, " %02x", buf[i]); + for (; i < 16; i++) + color_fprintf(stdout, color, " "); + if (ret > 0) { + ret = arm_spe_pkt_desc(&packet, desc, + ARM_SPE_PKT_DESC_MAX); + if (ret > 0) + color_fprintf(stdout, color, " %s\n", desc); + } else { + color_fprintf(stdout, color, " Bad packet!\n"); + } + pos += pkt_len; + buf += pkt_len; + len -= pkt_len; + } +} + +static void arm_spe_dump_event(struct arm_spe *spe, unsigned char *buf, + size_t len) +{ + printf(".\n"); + arm_spe_dump(spe, buf, len); +} + +static int arm_spe_process_event(struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_sample *sample __maybe_unused, + struct perf_tool *tool __maybe_unused) +{ + return 0; +} + +static int arm_spe_process_auxtrace_event(struct perf_session *session, + union perf_event *event, + struct perf_tool *tool __maybe_unused) +{ + struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, + auxtrace); + struct auxtrace_buffer *buffer; + off_t data_offset; + int fd = perf_data__fd(session->data); + int err; + + if (perf_data__is_pipe(session->data)) { + data_offset = 0; + } else { + data_offset = lseek(fd, 0, SEEK_CUR); + if (data_offset == -1) + return -errno; + } + + err = auxtrace_queues__add_event(&spe->queues, session, event, + data_offset, &buffer); + if (err) + return err; + + /* Dump here now we have copied a piped trace out of the pipe */ + if (dump_trace) { + if (auxtrace_buffer__get_data(buffer, fd)) { + arm_spe_dump_event(spe, buffer->data, + buffer->size); + auxtrace_buffer__put_data(buffer); + } + } + + return 0; +} + +static int arm_spe_flush(struct perf_session *session __maybe_unused, + struct perf_tool *tool __maybe_unused) +{ + return 0; +} + +static void arm_spe_free_queue(void *priv) +{ + struct arm_spe_queue *speq = priv; + + if (!speq) + return; + free(speq); +} + +static void arm_spe_free_events(struct perf_session *session) +{ + struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, + auxtrace); + struct auxtrace_queues *queues = &spe->queues; + unsigned int i; + + for (i = 0; i < queues->nr_queues; i++) { + arm_spe_free_queue(queues->queue_array[i].priv); + queues->queue_array[i].priv = NULL; + } + auxtrace_queues__free(queues); +} + +static void arm_spe_free(struct perf_session *session) +{ + struct arm_spe *spe = container_of(session->auxtrace, struct arm_spe, + auxtrace); + + auxtrace_heap__free(&spe->heap); + arm_spe_free_events(session); + session->auxtrace = NULL; + free(spe); +} + +static const char * const arm_spe_info_fmts[] = { + [ARM_SPE_PMU_TYPE] = " PMU Type %"PRId64"\n", +}; + +static void arm_spe_print_info(u64 *arr) +{ + if (!dump_trace) + return; + + fprintf(stdout, arm_spe_info_fmts[ARM_SPE_PMU_TYPE], arr[ARM_SPE_PMU_TYPE]); +} + +int arm_spe_process_auxtrace_info(union perf_event *event, + struct perf_session *session) +{ + struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info; + size_t min_sz = sizeof(u64) * ARM_SPE_PMU_TYPE; + struct arm_spe *spe; + int err; + + if (auxtrace_info->header.size < sizeof(struct auxtrace_info_event) + + min_sz) + return -EINVAL; + + spe = zalloc(sizeof(struct arm_spe)); + if (!spe) + return -ENOMEM; + + err = auxtrace_queues__init(&spe->queues); + if (err) + goto err_free; + + spe->session = session; + spe->machine = &session->machines.host; /* No kvm support */ + spe->auxtrace_type = auxtrace_info->type; + spe->pmu_type = auxtrace_info->priv[ARM_SPE_PMU_TYPE]; + + spe->auxtrace.process_event = arm_spe_process_event; + spe->auxtrace.process_auxtrace_event = arm_spe_process_auxtrace_event; + spe->auxtrace.flush_events = arm_spe_flush; + spe->auxtrace.free_events = arm_spe_free_events; + spe->auxtrace.free = arm_spe_free; + session->auxtrace = &spe->auxtrace; + + arm_spe_print_info(&auxtrace_info->priv[0]); + + return 0; + +err_free: + free(spe); + return err; +} diff --git a/tools/perf/util/arm-spe.h b/tools/perf/util/arm-spe.h new file mode 100644 index 000000000000..98d3235781c3 --- /dev/null +++ b/tools/perf/util/arm-spe.h @@ -0,0 +1,31 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * Arm Statistical Profiling Extensions (SPE) support + * Copyright (c) 2017-2018, Arm Ltd. + */ + +#ifndef INCLUDE__PERF_ARM_SPE_H__ +#define INCLUDE__PERF_ARM_SPE_H__ + +#define ARM_SPE_PMU_NAME "arm_spe_" + +enum { + ARM_SPE_PMU_TYPE, + ARM_SPE_PER_CPU_MMAPS, + ARM_SPE_AUXTRACE_PRIV_MAX, +}; + +#define ARM_SPE_AUXTRACE_PRIV_SIZE (ARM_SPE_AUXTRACE_PRIV_MAX * sizeof(u64)) + +union perf_event; +struct perf_session; +struct perf_pmu; + +struct auxtrace_record *arm_spe_recording_init(int *err, + struct perf_pmu *arm_spe_pmu); + +int arm_spe_process_auxtrace_info(union perf_event *event, + struct perf_session *session); + +struct perf_event_attr *arm_spe_pmu_default_config(struct perf_pmu *arm_spe_pmu); +#endif diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index c76687e42344..3bba9947ab7f 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -54,6 +54,7 @@ #include "intel-pt.h" #include "intel-bts.h" +#include "arm-spe.h" #include "sane_ctype.h" #include "symbol/kallsyms.h" @@ -910,6 +911,8 @@ int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused, return intel_pt_process_auxtrace_info(event, session); case PERF_AUXTRACE_INTEL_BTS: return intel_bts_process_auxtrace_info(event, session); + case PERF_AUXTRACE_ARM_SPE: + return arm_spe_process_auxtrace_info(event, session); case PERF_AUXTRACE_CS_ETM: case PERF_AUXTRACE_UNKNOWN: default: diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index d19e11b68de7..453c148d2158 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -43,6 +43,7 @@ enum auxtrace_type { PERF_AUXTRACE_INTEL_PT, PERF_AUXTRACE_INTEL_BTS, PERF_AUXTRACE_CS_ETM, + PERF_AUXTRACE_ARM_SPE, }; enum itrace_period_type { -- cgit v1.2.3-70-g09d2 From 249d98e567e25dd03e015e2d31e1b7b9648f34df Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 15 Jan 2018 11:07:58 -0300 Subject: perf callchain: Fix attr.sample_max_stack setting When setting the "dwarf" unwinder for a specific event and not specifying the max-stack, the attr.sample_max_stack ended up using an uninitialized callchain_param.max_stack, fix it by using designated initializers for that callchain_param variable, zeroing all non explicitely initialized struct members. Here is what happened: # perf trace -vv --no-syscalls --max-stack 4 -e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1 callchain: type DWARF callchain: stack dump size 8192 perf_event_attr: type 2 size 112 config 0x730 { sample_period, sample_freq } 1 sample_type IP|TID|TIME|ADDR|CALLCHAIN|CPU|PERIOD|RAW|REGS_USER|STACK_USER|DATA_SRC exclude_callchain_user 1 { wakeup_events, wakeup_watermark } 1 sample_regs_user 0xff0fff sample_stack_user 8192 sample_max_stack 50656 sys_perf_event_open failed, error -75 Value too large for defined data type # perf trace -vv --no-syscalls --max-stack 4 -e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1 callchain: type DWARF callchain: stack dump size 8192 perf_event_attr: type 2 size 112 config 0x730 sample_type IP|TID|TIME|ADDR|CALLCHAIN|CPU|PERIOD|RAW|REGS_USER|STACK_USER|DATA_SRC exclude_callchain_user 1 sample_regs_user 0xff0fff sample_stack_user 8192 sample_max_stack 30448 sys_perf_event_open failed, error -75 Value too large for defined data type # Now the attr.sample_max_stack is set to zero and the above works as expected: # perf trace --no-syscalls --max-stack 4 -e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1 PING ::1(::1) 56 data bytes 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.072 ms --- ::1 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.072/0.072/0.072/0.000 ms 0.000 probe_libc:inet_pton:(7feb7a998350)) __inet_pton (inlined) gaih_inet.constprop.7 (/usr/lib64/libc-2.26.so) __GI_getaddrinfo (inlined) [0xffffaa39b6108f3f] (/usr/bin/ping) # Cc: Adrian Hunter Cc: David Ahern Cc: Hendrick Brueckner Cc: Jiri Olsa Cc: Namhyung Kim Cc: Thomas Richter Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-is9tramondqa9jlxxsgcm9iz@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index efa2e629a669..8f971a2301d1 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -731,14 +731,14 @@ static void apply_config_terms(struct perf_evsel *evsel, struct perf_evsel_config_term *term; struct list_head *config_terms = &evsel->config_terms; struct perf_event_attr *attr = &evsel->attr; - struct callchain_param param; + /* callgraph default */ + struct callchain_param param = { + .record_mode = callchain_param.record_mode, + }; u32 dump_size = 0; int max_stack = 0; const char *callgraph_buf = NULL; - /* callgraph default */ - param.record_mode = callchain_param.record_mode; - list_for_each_entry(term, config_terms, list) { switch (term->type) { case PERF_EVSEL__CONFIG_TERM_PERIOD: -- cgit v1.2.3-70-g09d2 From eabad8c6856f185f876b54c426c2cc69fe0f0a7d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 15 Jan 2018 16:48:46 -0300 Subject: perf unwind: Do not look just at the global callchain_param.record_mode When setting up DWARF callchains on specific events, without using 'record' or 'trace' --call-graph, but instead doing it like: perf trace -e cycles/call-graph=dwarf/ The unwind__prepare_access() call in thread__insert_map() when we process PERF_RECORD_MMAP(2) metadata events were not being performed, precluding us from using per-event DWARF callchains, handling them just when we asked for all events to be DWARF, using "--call-graph dwarf". We do it in the PERF_RECORD_MMAP because we have to look at one of the executable maps to figure out the executable type (64-bit, 32-bit) of the DSO laid out in that mmap. Also to look at the architecture where the perf.data file was recorded. All this probably should be deferred to when we process a sample for some thread that has callchains, so that we do this processing only for the threads with samples, not for all of them. For now, fix using DWARF on specific events. Before: # perf trace --no-syscalls -e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1 PING ::1(::1) 56 data bytes 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.048 ms --- ::1 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.048/0.048/0.048/0.000 ms 0.000 probe_libc:inet_pton:(7fe9597bb350)) Problem processing probe_libc:inet_pton callchain, skipping... # After: # perf trace --no-syscalls -e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1 PING ::1(::1) 56 data bytes 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.060 ms --- ::1 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.060/0.060/0.060/0.000 ms 0.000 probe_libc:inet_pton:(7fd4aa930350)) __inet_pton (inlined) gaih_inet.constprop.7 (/usr/lib64/libc-2.26.so) __GI_getaddrinfo (inlined) [0xffffaa804e51af3f] (/usr/bin/ping) __libc_start_main (/usr/lib64/libc-2.26.so) [0xffffaa804e51b379] (/usr/bin/ping) # # perf trace --call-graph=dwarf --no-syscalls -e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1 PING ::1(::1) 56 data bytes 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.057 ms --- ::1 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.057/0.057/0.057/0.000 ms 0.000 probe_libc:inet_pton:(7f9363b9e350)) __inet_pton (inlined) gaih_inet.constprop.7 (/usr/lib64/libc-2.26.so) __GI_getaddrinfo (inlined) [0xffffa9e8a14e0f3f] (/usr/bin/ping) __libc_start_main (/usr/lib64/libc-2.26.so) [0xffffa9e8a14e1379] (/usr/bin/ping) # # perf trace --call-graph=fp --no-syscalls -e probe_libc:inet_pton/call-graph=dwarf/ ping -6 -c 1 ::1 PING ::1(::1) 56 data bytes 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.077 ms --- ::1 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.077/0.077/0.077/0.000 ms 0.000 probe_libc:inet_pton:(7f4947e1c350)) __inet_pton (inlined) gaih_inet.constprop.7 (/usr/lib64/libc-2.26.so) __GI_getaddrinfo (inlined) [0xffffaa716d88ef3f] (/usr/bin/ping) __libc_start_main (/usr/lib64/libc-2.26.so) [0xffffaa716d88f379] (/usr/bin/ping) # # perf trace --no-syscalls -e probe_libc:inet_pton/call-graph=fp/ ping -6 -c 1 ::1 PING ::1(::1) 56 data bytes 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.078 ms --- ::1 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.078/0.078/0.078/0.000 ms 0.000 probe_libc:inet_pton:(7fa157696350)) __GI___inet_pton (/usr/lib64/libc-2.26.so) getaddrinfo (/usr/lib64/libc-2.26.so) [0xffffa9ba39c74f40] (/usr/bin/ping) # Acked-by: Namhyung Kim Cc: Adrian Hunter Cc: David Ahern Cc: Hendrick Brueckner Cc: Jiri Olsa Cc: Thomas Richter Cc: Wang Nan Link: https://lkml.kernel.org/r/20180116182650.GE16107@kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-c2c.c | 5 +++-- tools/perf/builtin-report.c | 5 +++-- tools/perf/builtin-script.c | 5 +++-- tools/perf/tests/dwarf-unwind.c | 1 + tools/perf/util/callchain.c | 10 ++++++++++ tools/perf/util/callchain.h | 2 ++ tools/perf/util/unwind-libunwind-local.c | 9 +++------ 7 files changed, 25 insertions(+), 12 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-c2c.c b/tools/perf/builtin-c2c.c index c0debc3f79b6..c0815a37fdb5 100644 --- a/tools/perf/builtin-c2c.c +++ b/tools/perf/builtin-c2c.c @@ -2390,9 +2390,10 @@ static int setup_callchain(struct perf_evlist *evlist) enum perf_call_graph_mode mode = CALLCHAIN_NONE; if ((sample_type & PERF_SAMPLE_REGS_USER) && - (sample_type & PERF_SAMPLE_STACK_USER)) + (sample_type & PERF_SAMPLE_STACK_USER)) { mode = CALLCHAIN_DWARF; - else if (sample_type & PERF_SAMPLE_BRANCH_STACK) + dwarf_callchain_users = true; + } else if (sample_type & PERF_SAMPLE_BRANCH_STACK) mode = CALLCHAIN_LBR; else if (sample_type & PERF_SAMPLE_CALLCHAIN) mode = CALLCHAIN_FP; diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index dd4df9a5cd06..6593779224d5 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -338,9 +338,10 @@ static int report__setup_sample_type(struct report *rep) if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) { if ((sample_type & PERF_SAMPLE_REGS_USER) && - (sample_type & PERF_SAMPLE_STACK_USER)) + (sample_type & PERF_SAMPLE_STACK_USER)) { callchain_param.record_mode = CALLCHAIN_DWARF; - else if (sample_type & PERF_SAMPLE_BRANCH_STACK) + dwarf_callchain_users = true; + } else if (sample_type & PERF_SAMPLE_BRANCH_STACK) callchain_param.record_mode = CALLCHAIN_LBR; else callchain_param.record_mode = CALLCHAIN_FP; diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index c1cce474c0f1..08bc818f371b 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2919,9 +2919,10 @@ static void script__setup_sample_type(struct perf_script *script) if (symbol_conf.use_callchain || symbol_conf.cumulate_callchain) { if ((sample_type & PERF_SAMPLE_REGS_USER) && - (sample_type & PERF_SAMPLE_STACK_USER)) + (sample_type & PERF_SAMPLE_STACK_USER)) { callchain_param.record_mode = CALLCHAIN_DWARF; - else if (sample_type & PERF_SAMPLE_BRANCH_STACK) + dwarf_callchain_users = true; + } else if (sample_type & PERF_SAMPLE_BRANCH_STACK) callchain_param.record_mode = CALLCHAIN_LBR; else callchain_param.record_mode = CALLCHAIN_FP; diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index ac40e05bcab4..260418969120 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c @@ -173,6 +173,7 @@ int test__dwarf_unwind(struct test *test __maybe_unused, int subtest __maybe_unu } callchain_param.record_mode = CALLCHAIN_DWARF; + dwarf_callchain_users = true; if (init_live_machine(machine)) { pr_err("Could not init machine\n"); diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 082505d08d72..32ef7bdca1cf 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -37,6 +37,15 @@ struct callchain_param callchain_param = { CALLCHAIN_PARAM_DEFAULT }; +/* + * Are there any events usind DWARF callchains? + * + * I.e. + * + * -e cycles/call-graph=dwarf/ + */ +bool dwarf_callchain_users; + struct callchain_param callchain_param_default = { CALLCHAIN_PARAM_DEFAULT }; @@ -265,6 +274,7 @@ int parse_callchain_record(const char *arg, struct callchain_param *param) ret = 0; param->record_mode = CALLCHAIN_DWARF; param->dump_size = default_stack_dump_size; + dwarf_callchain_users = true; tok = strtok_r(NULL, ",", &saveptr); if (tok) { diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index b79ef2478a57..154560b1eb65 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -89,6 +89,8 @@ enum chain_value { CCVAL_COUNT, }; +extern bool dwarf_callchain_users; + struct callchain_param { bool enabled; enum perf_call_graph_mode record_mode; diff --git a/tools/perf/util/unwind-libunwind-local.c b/tools/perf/util/unwind-libunwind-local.c index 7a42f703e858..af873044d33a 100644 --- a/tools/perf/util/unwind-libunwind-local.c +++ b/tools/perf/util/unwind-libunwind-local.c @@ -631,9 +631,8 @@ static unw_accessors_t accessors = { static int _unwind__prepare_access(struct thread *thread) { - if (callchain_param.record_mode != CALLCHAIN_DWARF) + if (!dwarf_callchain_users) return 0; - thread->addr_space = unw_create_addr_space(&accessors, 0); if (!thread->addr_space) { pr_err("unwind: Can't create unwind address space.\n"); @@ -646,17 +645,15 @@ static int _unwind__prepare_access(struct thread *thread) static void _unwind__flush_access(struct thread *thread) { - if (callchain_param.record_mode != CALLCHAIN_DWARF) + if (!dwarf_callchain_users) return; - unw_flush_cache(thread->addr_space, 0, 0); } static void _unwind__finish_access(struct thread *thread) { - if (callchain_param.record_mode != CALLCHAIN_DWARF) + if (!dwarf_callchain_users) return; - unw_destroy_addr_space(thread->addr_space); } -- cgit v1.2.3-70-g09d2 From 75d501171462d8624fd14a2baa474476d6745e95 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 15 Jan 2018 10:39:55 -0300 Subject: perf trace: Setup DWARF callchains for non-syscall events when --max-stack is used If we use: perf trace --max-stack=4 then the syscall events will use DWARF callchains, when available (libunwind enabled in the build) and the printing will stop at 4 levels. When we introduced support for tracepoint events this ended up not applying for them, fix it. Before: # perf trace --call-graph=dwarf --no-syscalls -e probe_libc:inet_pton ping -6 -c 1 ::1 PING ::1(::1) 56 data bytes 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.058 ms --- ::1 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.058/0.058/0.058/0.000 ms 0.000 probe_libc:inet_pton:(7fc6c2a16350)) # After: # perf trace --call-graph=dwarf --no-syscalls -e probe_libc:inet_pton ping -6 -c 1 ::1 PING ::1(::1) 56 data bytes 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.087 ms --- ::1 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.087/0.087/0.087/0.000 ms 0.000 probe_libc:inet_pton:(7fbf9a041350)) __inet_pton (inlined) gaih_inet.constprop.7 (/usr/lib64/libc-2.26.so) __GI_getaddrinfo (inlined) [0xffffaa947cb67f3f] (/usr/bin/ping) __libc_start_main (/usr/lib64/libc-2.26.so) [0xffffaa947cb68379] (/usr/bin/ping) # Reported-by: Thomas Richter Cc: Adrian Hunter Cc: David Ahern Cc: Hendrick Brueckner Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-afsu9eegd43ppihiuafhh9qv@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 0362974854e9..ee85c29dbf70 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -2350,7 +2350,7 @@ static int trace__run(struct trace *trace, int argc, const char **argv) goto out_delete_evlist; } - perf_evlist__config(evlist, &trace->opts, NULL); + perf_evlist__config(evlist, &trace->opts, &callchain_param); signal(SIGCHLD, sig_handler); signal(SIGINT, sig_handler); @@ -3065,8 +3065,9 @@ int cmd_trace(int argc, const char **argv) } #ifdef HAVE_DWARF_UNWIND_SUPPORT - if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled && trace.trace_syscalls) + if ((trace.min_stack || max_stack_user_set) && !callchain_param.enabled) { record_opts__parse_callchain(&trace.opts, &callchain_param, "dwarf", false); + } #endif if (callchain_param.enabled) { -- cgit v1.2.3-70-g09d2 From bd3dda9ab0fbdb8a91a2e869d93a0c9692b8444f Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 15 Jan 2018 11:33:53 -0300 Subject: perf trace: Allow overriding global --max-stack per event The per-event max-stack setting wasn't overriding the global --max-stack setting: # perf trace --no-syscalls --max-stack 4 -e probe_libc:inet_pton/call-graph=dwarf,max-stack=2/ ping -6 -c 1 ::1 PING ::1(::1) 56 data bytes 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.072 ms --- ::1 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.072/0.072/0.072/0.000 ms 0.000 probe_libc:inet_pton:(7feb7a998350)) __inet_pton (inlined) gaih_inet.constprop.7 (/usr/lib64/libc-2.26.so) __GI_getaddrinfo (inlined) [0xffffaa39b6108f3f] (/usr/bin/ping) # Fix it: # perf trace --no-syscalls --max-stack 4 -e probe_libc:inet_pton/call-graph=dwarf,max-stack=2/ ping -6 -c 1 ::1 PING ::1(::1) 56 data bytes 64 bytes from ::1: icmp_seq=1 ttl=64 time=0.073 ms --- ::1 ping statistics --- 1 packets transmitted, 1 received, 0% packet loss, time 0ms rtt min/avg/max/mdev = 0.073/0.073/0.073/0.000 ms 0.000 probe_libc:inet_pton:(7f1083221350)) __inet_pton (inlined) gaih_inet.constprop.7 (/usr/lib64/libc-2.26.so) # Cc: Adrian Hunter Cc: David Ahern Cc: Hendrick Brueckner Cc: Jiri Olsa Cc: Namhyung Kim Cc: Thomas Richter Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-ic3g837xg8ob3kcpkspxwz0g@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index ee85c29dbf70..531d43bf57e1 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -1644,7 +1644,7 @@ static int trace__resolve_callchain(struct trace *trace, struct perf_evsel *evse struct addr_location al; if (machine__resolve(trace->host, &al, sample) < 0 || - thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, trace->max_stack)) + thread__resolve_callchain(al.thread, cursor, evsel, sample, NULL, NULL, evsel->attr.sample_max_stack)) return -1; return 0; @@ -2423,6 +2423,18 @@ static int trace__run(struct trace *trace, int argc, const char **argv) trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 || evlist->threads->nr > 1 || perf_evlist__first(evlist)->attr.inherit; + + /* + * Now that we already used evsel->attr to ask the kernel to setup the + * events, lets reuse evsel->attr.sample_max_stack as the limit in + * trace__resolve_callchain(), allowing per-event max-stack settings + * to override an explicitely set --max-stack global setting. + */ + evlist__for_each_entry(evlist, evsel) { + if ((evsel->attr.sample_type & PERF_SAMPLE_CALLCHAIN) && + evsel->attr.sample_max_stack == 0) + evsel->attr.sample_max_stack = trace->max_stack; + } again: before = trace->nr_events; -- cgit v1.2.3-70-g09d2 From 0d3dcc0ef13d9d78132d7d6ae068c17ed858e65e Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Tue, 16 Jan 2018 11:16:25 -0300 Subject: perf callchains: Ask for PERF_RECORD_MMAP for data mmaps for DWARF unwinding When we use a global DWARF setting as in: perf record --call-graph dwarf According to 5c0cf22477ea ("perf record: Store data mmaps for dwarf unwind") we need to set up some extra perf_event_attr bits. But when we instead do a per event dwarf setting: perf record -e cycles/call-graph=dwarf/ This was not being done, make them equivalent. This didn't produce any output changes in my tests while fixing up loose ends in the per-event settings, I found it just by comparing the perf_event_attr fields trying to find an explanation for those problems. Cc: Adrian Hunter Cc: David Ahern Cc: Hendrick Brueckner Cc: Jiri Olsa Cc: Namhyung Kim Cc: Noel Grandin Cc: Thomas Richter Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-6476r53h2o38skbs9qa4ust4@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 8f971a2301d1..85eb84dfdf91 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -726,7 +726,7 @@ perf_evsel__reset_callgraph(struct perf_evsel *evsel, } static void apply_config_terms(struct perf_evsel *evsel, - struct record_opts *opts) + struct record_opts *opts, bool track) { struct perf_evsel_config_term *term; struct list_head *config_terms = &evsel->config_terms; @@ -797,6 +797,8 @@ static void apply_config_terms(struct perf_evsel *evsel, /* User explicitly set per-event callgraph, clear the old setting and reset. */ if ((callgraph_buf != NULL) || (dump_size > 0) || max_stack) { + bool sample_address = false; + if (max_stack) { param.max_stack = max_stack; if (callgraph_buf == NULL) @@ -816,6 +818,8 @@ static void apply_config_terms(struct perf_evsel *evsel, evsel->name); return; } + if (param.record_mode == CALLCHAIN_DWARF) + sample_address = true; } } if (dump_size > 0) { @@ -828,8 +832,14 @@ static void apply_config_terms(struct perf_evsel *evsel, perf_evsel__reset_callgraph(evsel, &callchain_param); /* set perf-event callgraph */ - if (param.enabled) + if (param.enabled) { + if (sample_address) { + perf_evsel__set_sample_bit(evsel, ADDR); + perf_evsel__set_sample_bit(evsel, DATA_SRC); + evsel->attr.mmap_data = track; + } perf_evsel__config_callchain(evsel, opts, ¶m); + } } } @@ -1060,7 +1070,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts, * Apply event specific term settings, * it overloads any global configuration. */ - apply_config_terms(evsel, opts); + apply_config_terms(evsel, opts, track); evsel->ignore_missing_thread = opts->ignore_missing_thread; } -- cgit v1.2.3-70-g09d2 From eb0b419eff8cf51af8e16cc8c5d2a92d19824266 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Wed, 10 Jan 2018 23:00:26 +0800 Subject: perf report: Improve error msg when no first/last sample time found The following message will be returned to user when executing 'perf report --time' if perf data file doesn't contain the first/last sample time. "HINT: no first/last sample time found in perf data. Please use latest perf binary to execute 'perf record' (if '--buildid-all' is enabled, needs to set '--timestamp-boundary')." Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1515596433-24653-2-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 6593779224d5..7d4f0a5de326 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1300,7 +1300,9 @@ repeat: if (perf_time__parse_str(report.ptime_range, report.time_str) != 0) { if (session->evlist->first_sample_time == 0 && session->evlist->last_sample_time == 0) { - pr_err("No first/last sample time in perf data\n"); + pr_err("HINT: no first/last sample time found in perf data.\n" + "Please use latest perf binary to execute 'perf record'\n" + "(if '--buildid-all' is enabled, please set '--timestamp-boundary').\n"); return -EINVAL; } -- cgit v1.2.3-70-g09d2 From 1e2778e91616086177a255f3fc8c72ecaa564ae6 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Wed, 10 Jan 2018 23:00:27 +0800 Subject: perf script: Improve error msg when no first/last sample time found The following message will be returned to user when executing 'perf script --time' if perf data file doesn't contain the first/last sample time. "HINT: no first/last sample time found in perf data. Please use latest perf binary to execute 'perf record' (if '--buildid-all' is enabled, needs to set '--timestamp-boundary')." Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1515596433-24653-3-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-script.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 08bc818f371b..ac781916e51e 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -3449,7 +3449,9 @@ int cmd_script(int argc, const char **argv) if (perf_time__parse_str(script.ptime_range, script.time_str) != 0) { if (session->evlist->first_sample_time == 0 && session->evlist->last_sample_time == 0) { - pr_err("No first/last sample time in perf data\n"); + pr_err("HINT: no first/last sample time found in perf data.\n" + "Please use latest perf binary to execute 'perf record'\n" + "(if '--buildid-all' is enabled, please set '--timestamp-boundary').\n"); err = -EINVAL; goto out_delete; } -- cgit v1.2.3-70-g09d2 From 6e761cbc9127fb8fc609aea2265ee8279b8d6c55 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Wed, 10 Jan 2018 23:00:28 +0800 Subject: perf util: Improve error checking for time percent input The command line like 'perf report --stdio --time 1abc%/1' could be accepted by perf. It looks not very good. This patch uses strtod() to replace original atof() and check the entire string. Now for the same command line, it would return error message "Invalid time string". root@skl:/tmp# perf report --stdio --time 1abc%/1 Invalid time string Signed-off-by: Jin Yao Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1515596433-24653-4-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/time-utils.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c index 3f7f18f06982..88510ab6450e 100644 --- a/tools/perf/util/time-utils.c +++ b/tools/perf/util/time-utils.c @@ -116,7 +116,8 @@ int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr) static int parse_percent(double *pcnt, char *str) { - char *c; + char *c, *endptr; + double d; c = strchr(str, '%'); if (c) @@ -124,8 +125,11 @@ static int parse_percent(double *pcnt, char *str) else return -1; - *pcnt = atof(str) / 100.0; + d = strtod(str, &endptr); + if (endptr != str + strlen(str)) + return -1; + *pcnt = d / 100.0; return 0; } -- cgit v1.2.3-70-g09d2 From 3002812e602d3f991a5b8cdc0499e63e13ff65c4 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Wed, 10 Jan 2018 23:00:29 +0800 Subject: perf util: Support no index time percent slice Previously, the time percent slice needs an index to specify which one the user wants. It may be easier to use if the index can be omitted. So with this patch, for example, perf report --stdio --time 10%/1 should be equivalent to perf report --stdio --time 10% Signed-off-by: Jin Yao Suggested-by: Arnaldo Carvalho de Melo Tested-by: Arnaldo Carvalho de Melo Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1515596433-24653-5-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/time-utils.c | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c index 88510ab6450e..5769f972c23e 100644 --- a/tools/perf/util/time-utils.c +++ b/tools/perf/util/time-utils.c @@ -261,6 +261,37 @@ static int percent_comma_split(struct perf_time_interval *ptime_buf, int num, return i; } +static int one_percent_convert(struct perf_time_interval *ptime_buf, + const char *ostr, u64 start, u64 end, char *c) +{ + char *str; + int len = strlen(ostr), ret; + + /* + * c points to '%'. + * '%' should be the last character + */ + if (ostr + len - 1 != c) + return -1; + + /* + * Construct a string like "xx%/1" + */ + str = malloc(len + 3); + if (str == NULL) + return -ENOMEM; + + memcpy(str, ostr, len); + strcpy(str + len, "/1"); + + ret = percent_slash_split(str, ptime_buf, start, end); + if (ret == 0) + ret = 1; + + free(str); + return ret; +} + int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num, const char *ostr, u64 start, u64 end) { @@ -270,6 +301,7 @@ int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num, * ostr example: * 10%/2,10%/3: select the second 10% slice and the third 10% slice * 0%-10%,30%-40%: multiple time range + * 50%: just one percent */ memset(ptime_buf, 0, sizeof(*ptime_buf) * num); @@ -286,6 +318,10 @@ int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num, end, percent_dash_split); } + c = strchr(ostr, '%'); + if (c) + return one_percent_convert(ptime_buf, ostr, start, end, c); + return -1; } -- cgit v1.2.3-70-g09d2 From 7425664bbd3174814500c7ab8740cbb9bb25396c Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Wed, 10 Jan 2018 23:00:30 +0800 Subject: perf report: Add an indication of what time slices are used Add a time slices indication to the perf report header. For example, # perf report --stdio --time 10% # Total Lost Samples: 0 # # Samples: 9K of event 'cycles:ppp' (time slices: 10%) # Event count (approx.): 8951288803 Signed-off-by: Jin Yao Suggested--by: Arnaldo Carvalho de Melo Tested-by: Arnaldo Carvalho de Melo Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1515596433-24653-6-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-report.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'tools') diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 7d4f0a5de326..4aaaa37262a8 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -404,6 +404,9 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report if (evname != NULL) ret += fprintf(fp, " of event '%s'", evname); + if (rep->time_str) + ret += fprintf(fp, " (time slices: %s)", rep->time_str); + if (symbol_conf.show_ref_callgraph && strstr(evname, "call-graph=no")) { ret += fprintf(fp, ", show reference callgraph"); -- cgit v1.2.3-70-g09d2 From 5a031f887cb8d60fe87d21159c3cf82c38f55679 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Wed, 10 Jan 2018 23:00:31 +0800 Subject: perf util: Allocate time slices buffer according to number of comma Previously we use a magic number 10 to limit the number of time slices. It's not very good. This patch creates a new function perf_time__range_alloc() to allocate time slices buffer. The number of buffer entries is determined by the number of comma in string but at least it will allocate one entry even if no comma is found. Signed-off-by: Jin Yao Suggested-by: Arnaldo Carvalho de Melo Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1515596433-24653-7-git-send-email-yao.jin@linux.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/time-utils.c | 28 ++++++++++++++++++++++++++++ tools/perf/util/time-utils.h | 2 ++ 2 files changed, 30 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/time-utils.c b/tools/perf/util/time-utils.c index 5769f972c23e..6193b46050a5 100644 --- a/tools/perf/util/time-utils.c +++ b/tools/perf/util/time-utils.c @@ -325,6 +325,34 @@ int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num, return -1; } +struct perf_time_interval *perf_time__range_alloc(const char *ostr, int *size) +{ + const char *p1, *p2; + int i = 1; + struct perf_time_interval *ptime; + + /* + * At least allocate one time range. + */ + if (!ostr) + goto alloc; + + p1 = ostr; + while (p1 < ostr + strlen(ostr)) { + p2 = strchr(p1, ','); + if (!p2) + break; + + p1 = p2 + 1; + i++; + } + +alloc: + *size = i; + ptime = calloc(i, sizeof(*ptime)); + return ptime; +} + bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp) { /* if time is not set don't drop sample */ diff --git a/tools/perf/util/time-utils.h b/tools/perf/util/time-utils.h index 34d5eba26bf5..70b177d2b98c 100644 --- a/tools/perf/util/time-utils.h +++ b/tools/perf/util/time-utils.h @@ -16,6 +16,8 @@ int perf_time__parse_str(struct perf_time_interval *ptime, const char *ostr); int perf_time__percent_parse_str(struct perf_time_interval *ptime_buf, int num, const char *ostr, u64 start, u64 end); +struct perf_time_interval *perf_time__range_alloc(const char *ostr, int *size); + bool perf_time__skip_sample(struct perf_time_interval *ptime, u64 timestamp); bool perf_time__ranges_skip_sample(struct perf_time_interval *ptime_buf, -- cgit v1.2.3-70-g09d2 From 0a3cc3ae05c363dabd891ed5f918c62197de8c7f Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Wed, 10 Jan 2018 23:00:32 +0800 Subject: perf report: Remove the time slices number limitation Previously it was only allowed to use at most 10 time slices in 'perf report --time'. This patch removes this limitation. For example, following command line is OK (12 time slices) perf report --stdio --time 1%/1,1%/2,1%/3,1%/4,1%/5,1%/6,1%/7,1%/8,1%/9,1%/10,1%/11,1%/12 Signed-off-by: Jin Yao Suggested-by: Arnaldo Carvalho de Melo Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Jiri Olsa Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1515596433-24653-8-git-send-email-yao.jin@linux.intel.com [ No need to check for NULL to call free, use zfree ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-report.txt | 2 +- tools/perf/builtin-report.c | 22 ++++++++++++++++------ 2 files changed, 17 insertions(+), 7 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 63d0db3184c9..907e505b6309 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -403,7 +403,7 @@ OPTIONS to end of file. Also support time percent with multiple time range. Time string is - 'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'. The maximum number of slices is 10. + 'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'. For example: Select the second 10% time slice: diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 4aaaa37262a8..42a52dcc41cd 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -54,8 +54,6 @@ #include #include -#define PTIME_RANGE_MAX 10 - struct report { struct perf_tool tool; struct perf_session *session; @@ -76,7 +74,8 @@ struct report { const char *cpu_list; const char *symbol_filter_str; const char *time_str; - struct perf_time_interval ptime_range[PTIME_RANGE_MAX]; + struct perf_time_interval *ptime_range; + int range_size; int range_num; float min_percent; u64 nr_entries; @@ -1300,24 +1299,33 @@ repeat: if (symbol__init(&session->header.env) < 0) goto error; + report.ptime_range = perf_time__range_alloc(report.time_str, + &report.range_size); + if (!report.ptime_range) { + ret = -ENOMEM; + goto error; + } + if (perf_time__parse_str(report.ptime_range, report.time_str) != 0) { if (session->evlist->first_sample_time == 0 && session->evlist->last_sample_time == 0) { pr_err("HINT: no first/last sample time found in perf data.\n" "Please use latest perf binary to execute 'perf record'\n" "(if '--buildid-all' is enabled, please set '--timestamp-boundary').\n"); - return -EINVAL; + ret = -EINVAL; + goto error; } report.range_num = perf_time__percent_parse_str( - report.ptime_range, PTIME_RANGE_MAX, + report.ptime_range, report.range_size, report.time_str, session->evlist->first_sample_time, session->evlist->last_sample_time); if (report.range_num < 0) { pr_err("Invalid time string\n"); - return -EINVAL; + ret = -EINVAL; + goto error; } } else { report.range_num = 1; @@ -1333,6 +1341,8 @@ repeat: ret = 0; error: + zfree(&report.ptime_range); + perf_session__delete(session); return ret; } -- cgit v1.2.3-70-g09d2 From cc2ef584a863b7c8033b78723cd253ca47e9a589 Mon Sep 17 00:00:00 2001 From: Jin Yao Date: Wed, 10 Jan 2018 23:00:33 +0800 Subject: perf script: Remove the time slices number limitation Previously it was only allowed to use at most 10 time slices in 'perf script --time'. This patch removes this limitation. For example, following command line is OK (12 time slices) perf script --time 1%/1,1%/2,1%/3,1%/4,1%/5,1%/6,1%/7,1%/8,1%/9,1%/10,1%/11,1%/12 Signed-off-by: Jin Yao Suggested-by: Arnaldo Carvalho de Melo Reviewed-by: Jiri Olsa Cc: Alexander Shishkin Cc: Andi Kleen Cc: Kan Liang Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1515596433-24653-9-git-send-email-yao.jin@linux.intel.com [ No need to check for NULL to call free, use zfree ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-script.txt | 10 +++++----- tools/perf/builtin-script.c | 16 ++++++++++++---- 2 files changed, 17 insertions(+), 9 deletions(-) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index 806ec6391fd6..7730c1d2b5d3 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -351,19 +351,19 @@ include::itrace.txt[] to end of file. Also support time percent with multipe time range. Time string is - 'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'. The maximum number of slices is 10. + 'a%/n,b%/m,...' or 'a%-b%,c%-%d,...'. For example: - Select the second 10% time slice + Select the second 10% time slice: perf script --time 10%/2 - Select from 0% to 10% time slice + Select from 0% to 10% time slice: perf script --time 0%-10% - Select the first and second 10% time slices + Select the first and second 10% time slices: perf script --time 10%/1,10%/2 - Select from 0% to 10% and 30% to 40% slices + Select from 0% to 10% and 30% to 40% slices: perf script --time 0%-10%,30%-40% --max-blocks:: diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index ac781916e51e..3499d68e1d70 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -1480,8 +1480,6 @@ static int perf_sample__fprintf_synth(struct perf_sample *sample, return 0; } -#define PTIME_RANGE_MAX 10 - struct perf_script { struct perf_tool tool; struct perf_session *session; @@ -1496,7 +1494,8 @@ struct perf_script { struct thread_map *threads; int name_width; const char *time_str; - struct perf_time_interval ptime_range[PTIME_RANGE_MAX]; + struct perf_time_interval *ptime_range; + int range_size; int range_num; }; @@ -3445,6 +3444,13 @@ int cmd_script(int argc, const char **argv) if (err < 0) goto out_delete; + script.ptime_range = perf_time__range_alloc(script.time_str, + &script.range_size); + if (!script.ptime_range) { + err = -ENOMEM; + goto out_delete; + } + /* needs to be parsed after looking up reference time */ if (perf_time__parse_str(script.ptime_range, script.time_str) != 0) { if (session->evlist->first_sample_time == 0 && @@ -3457,7 +3463,7 @@ int cmd_script(int argc, const char **argv) } script.range_num = perf_time__percent_parse_str( - script.ptime_range, PTIME_RANGE_MAX, + script.ptime_range, script.range_size, script.time_str, session->evlist->first_sample_time, session->evlist->last_sample_time); @@ -3476,6 +3482,8 @@ int cmd_script(int argc, const char **argv) flush_scripting(); out_delete: + zfree(&script.ptime_range); + perf_evlist__free_stats(session->evlist); perf_session__delete(session); -- cgit v1.2.3-70-g09d2 From 81fccd6ca507d3b2012eaf1edeb9b1dbf4bd22db Mon Sep 17 00:00:00 2001 From: Thomas Richter Date: Wed, 17 Jan 2018 14:16:11 +0100 Subject: perf record: Fix failed memory allocation for get_cpuid_str In x86 architecture dependend part function get_cpuid_str() mallocs a 128 byte buffer, but does not check if the memory allocation succeeded or not. When the memory allocation fails, function __get_cpuid() is called with first parameter being a NULL pointer. However this function references its first parameter and operates on a NULL pointer which might cause core dumps. Signed-off-by: Thomas Richter Cc: Heiko Carstens Cc: Hendrik Brueckner Cc: Martin Schwidefsky Link: http://lkml.kernel.org/r/20180117131611.34319-1-tmricht@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/arch/x86/util/header.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/arch/x86/util/header.c b/tools/perf/arch/x86/util/header.c index b626d2bad9f1..fb0d71afee8b 100644 --- a/tools/perf/arch/x86/util/header.c +++ b/tools/perf/arch/x86/util/header.c @@ -70,7 +70,7 @@ get_cpuid_str(struct perf_pmu *pmu __maybe_unused) { char *buf = malloc(128); - if (__get_cpuid(buf, 128, "%s-%u-%X$") < 0) { + if (buf && __get_cpuid(buf, 128, "%s-%u-%X$") < 0) { free(buf); return NULL; } -- cgit v1.2.3-70-g09d2 From a10eb530ae497e2411525fc1f5ec73f39eb11c11 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 16 Jan 2018 15:14:50 +0200 Subject: perf intel-pt/bts: Do not swap when synthesizing samples Both 'perf inject' and internal tools consume cpu endian samples, so there is never a need to do any swapping when synthesizing samples. Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/1516108492-21401-2-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/intel-bts.c | 6 +----- tools/perf/util/intel-pt.c | 11 +++-------- 2 files changed, 4 insertions(+), 13 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index 5325e65f9711..7077bebc2fb0 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -67,7 +67,6 @@ struct intel_bts { u64 branches_sample_type; u64 branches_id; size_t branches_event_size; - bool synth_needs_swap; unsigned long num_events; }; @@ -303,8 +302,7 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq, event.sample.header.size = bts->branches_event_size; ret = perf_event__synthesize_sample(&event, bts->branches_sample_type, - 0, &sample, - bts->synth_needs_swap); + 0, &sample, false); if (ret) return ret; } @@ -841,8 +839,6 @@ static int intel_bts_synth_events(struct intel_bts *bts, __perf_evsel__sample_size(attr.sample_type); } - bts->synth_needs_swap = evsel->needs_swap; - return 0; } diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 23f9ba676df0..2daf641beb85 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -104,8 +104,6 @@ struct intel_pt { u64 pwrx_id; u64 cbr_id; - bool synth_needs_swap; - u64 tsc_bit; u64 mtc_bit; u64 mtc_freq_bits; @@ -1101,11 +1099,10 @@ static void intel_pt_prep_b_sample(struct intel_pt *pt, } static int intel_pt_inject_event(union perf_event *event, - struct perf_sample *sample, u64 type, - bool swapped) + struct perf_sample *sample, u64 type) { event->header.size = perf_event__sample_event_size(sample, type, 0); - return perf_event__synthesize_sample(event, type, 0, sample, swapped); + return perf_event__synthesize_sample(event, type, 0, sample, false); } static inline int intel_pt_opt_inject(struct intel_pt *pt, @@ -1115,7 +1112,7 @@ static inline int intel_pt_opt_inject(struct intel_pt *pt, if (!pt->synth_opts.inject) return 0; - return intel_pt_inject_event(event, sample, type, pt->synth_needs_swap); + return intel_pt_inject_event(event, sample, type); } static int intel_pt_deliver_synth_b_event(struct intel_pt *pt, @@ -2329,8 +2326,6 @@ static int intel_pt_synth_events(struct intel_pt *pt, id += 1; } - pt->synth_needs_swap = evsel->needs_swap; - return 0; } -- cgit v1.2.3-70-g09d2 From 59a87fdad1467d228acc5cb1303b0b568a9e86a8 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 16 Jan 2018 15:14:51 +0200 Subject: perf evsel: Ensure reserved member of PERF_SAMPLE_CPU is zero in perf_event__synthesize_sample() PERF_SAMPLE_CPU contains the cpu number in the first 4 bytes and the second 4 bytes are reserved. Ensure the reserved bytes are zero in perf_event__synthesize_sample(). Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/1516108492-21401-3-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/evsel.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools') diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 85eb84dfdf91..44032679180f 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2533,6 +2533,7 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, if (type & PERF_SAMPLE_CPU) { u.val32[0] = sample->cpu; + u.val32[1] = 0; if (swapped) { /* * Inverse of what is done in perf_evsel__parse_sample -- cgit v1.2.3-70-g09d2 From 936f1f30bb7892f010670f1edebc419d47b139b1 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Tue, 16 Jan 2018 15:14:52 +0200 Subject: perf tools: Get rid of unused 'swapped' parameter from perf_event__synthesize_sample() There is never a need to synthesize a 'swapped' sample, so all callers to perf_event__synthesize_sample() pass 'false' as the value to 'swapped'. So get rid of the unused 'swapped' parameter. Signed-off-by: Adrian Hunter Acked-by: Jiri Olsa Link: http://lkml.kernel.org/r/1516108492-21401-4-git-send-email-adrian.hunter@intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-inject.c | 3 +-- tools/perf/tests/sample-parsing.c | 2 +- tools/perf/util/event.h | 3 +-- tools/perf/util/evsel.c | 28 +--------------------------- tools/perf/util/intel-bts.c | 2 +- tools/perf/util/intel-pt.c | 2 +- 6 files changed, 6 insertions(+), 34 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index 16a28547ca86..40fe919bbcf3 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -536,8 +536,7 @@ found: sample_sw.period = sample->period; sample_sw.time = sample->time; perf_event__synthesize_sample(event_sw, evsel->attr.sample_type, - evsel->attr.read_format, &sample_sw, - false); + evsel->attr.read_format, &sample_sw); build_id__mark_dso_hit(tool, event_sw, &sample_sw, evsel, machine); return perf_event__repipe(tool, event_sw, &sample_sw, machine); } diff --git a/tools/perf/tests/sample-parsing.c b/tools/perf/tests/sample-parsing.c index 3ec6302b6498..0e2d00d69e6e 100644 --- a/tools/perf/tests/sample-parsing.c +++ b/tools/perf/tests/sample-parsing.c @@ -248,7 +248,7 @@ static int do_test(u64 sample_type, u64 sample_regs, u64 read_format) event->header.size = sz; err = perf_event__synthesize_sample(event, sample_type, read_format, - &sample, false); + &sample); if (err) { pr_debug("%s failed for sample_type %#"PRIx64", error %d\n", "perf_event__synthesize_sample", sample_type, err); diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index e5fbd6dd1b01..0f794744919c 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -775,8 +775,7 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, u64 read_format); int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_format, - const struct perf_sample *sample, - bool swapped); + const struct perf_sample *sample); pid_t perf_event__synthesize_comm(struct perf_tool *tool, union perf_event *event, pid_t pid, diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 44032679180f..66fa45198a11 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -2472,8 +2472,7 @@ size_t perf_event__sample_event_size(const struct perf_sample *sample, u64 type, int perf_event__synthesize_sample(union perf_event *event, u64 type, u64 read_format, - const struct perf_sample *sample, - bool swapped) + const struct perf_sample *sample) { u64 *array; size_t sz; @@ -2498,15 +2497,6 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, if (type & PERF_SAMPLE_TID) { u.val32[0] = sample->pid; u.val32[1] = sample->tid; - if (swapped) { - /* - * Inverse of what is done in perf_evsel__parse_sample - */ - u.val32[0] = bswap_32(u.val32[0]); - u.val32[1] = bswap_32(u.val32[1]); - u.val64 = bswap_64(u.val64); - } - *array = u.val64; array++; } @@ -2534,13 +2524,6 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, if (type & PERF_SAMPLE_CPU) { u.val32[0] = sample->cpu; u.val32[1] = 0; - if (swapped) { - /* - * Inverse of what is done in perf_evsel__parse_sample - */ - u.val32[0] = bswap_32(u.val32[0]); - u.val64 = bswap_64(u.val64); - } *array = u.val64; array++; } @@ -2587,15 +2570,6 @@ int perf_event__synthesize_sample(union perf_event *event, u64 type, if (type & PERF_SAMPLE_RAW) { u.val32[0] = sample->raw_size; - if (WARN_ONCE(swapped, - "Endianness of raw data not corrected!\n")) { - /* - * Inverse of what is done in perf_evsel__parse_sample - */ - u.val32[0] = bswap_32(u.val32[0]); - u.val32[1] = bswap_32(u.val32[1]); - u.val64 = bswap_64(u.val64); - } *array = u.val64; array = (void *)array + sizeof(u32); diff --git a/tools/perf/util/intel-bts.c b/tools/perf/util/intel-bts.c index 7077bebc2fb0..72db2744876d 100644 --- a/tools/perf/util/intel-bts.c +++ b/tools/perf/util/intel-bts.c @@ -302,7 +302,7 @@ static int intel_bts_synth_branch_sample(struct intel_bts_queue *btsq, event.sample.header.size = bts->branches_event_size; ret = perf_event__synthesize_sample(&event, bts->branches_sample_type, - 0, &sample, false); + 0, &sample); if (ret) return ret; } diff --git a/tools/perf/util/intel-pt.c b/tools/perf/util/intel-pt.c index 2daf641beb85..3773d9c54f45 100644 --- a/tools/perf/util/intel-pt.c +++ b/tools/perf/util/intel-pt.c @@ -1102,7 +1102,7 @@ static int intel_pt_inject_event(union perf_event *event, struct perf_sample *sample, u64 type) { event->header.size = perf_event__sample_event_size(sample, type, 0); - return perf_event__synthesize_sample(event, type, 0, sample, false); + return perf_event__synthesize_sample(event, type, 0, sample); } static inline int intel_pt_opt_inject(struct intel_pt *pt, -- cgit v1.2.3-70-g09d2 From 56271170438df39c1b9a39c7aaf69010e6a4b59a Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 18 Jan 2018 10:28:14 -0300 Subject: perf tools: Use ui__error() for reporting --fields errors So that we can get it working for TUI, where using just pr_err() would end up making the message emitted to stderr to be erased by the TUI exit routine restoring the terminal to its previous state. Now we can see that trying to use a tracepoint field as one of the --field entries isn't working: # perf top --stdio --no-children -e syscalls:sys_enter_write --fields pid,sym,count Error: Unknown --fields key: `count' Usage: perf top [] --fields output field(s): overhead, period, sample plus all of sort keys # Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-usy9hhy7umdd4bbblkn63t8w@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/sort.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 211e7f326b9f..2da4d0456a03 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -2887,10 +2887,10 @@ static int setup_output_list(struct perf_hpp_list *list, char *str) tok; tok = strtok_r(NULL, ", ", &tmp)) { ret = output_field_add(list, tok); if (ret == -EINVAL) { - pr_err("Invalid --fields key: `%s'", tok); + ui__error("Invalid --fields key: `%s'", tok); break; } else if (ret == -ESRCH) { - pr_err("Unknown --fields key: `%s'", tok); + ui__error("Unknown --fields key: `%s'", tok); break; } } -- cgit v1.2.3-70-g09d2 From 872523233d640c21ce13ea51269c5c031ebb2f78 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Thu, 18 Jan 2018 13:07:00 -0300 Subject: perf bpf: Don't warn about unavailability of builtin clang, just fallback When clang is not linked with 'perf' we should just add a debug message about that before doing the fallback to calling the external compiler. I.e. just the "-95" warning below gets turned into a debug message: # cat sys_enter_open.c #include "bpf.h" SEC("syscalls:sys_enter_open") int func(void *ctx) { struct { char *ptr; char path[256]; } filename = { .ptr = *((char **)(ctx + 16)), }; int len = bpf_probe_read_str(filename.path, sizeof(filename.path), filename.ptr); if (len > 0) { if (len == 1) perf_event_output(ctx, &__bpf_stdout__, BPF_F_CURRENT_CPU, &filename, len + sizeof(filename.ptr)); else if (len < 256) perf_event_output(ctx, &__bpf_stdout__, BPF_F_CURRENT_CPU, &filename, len + sizeof(filename.ptr)); } return 0; } # trace -e open,sys_enter_open.c bpf: builtin compilation failed: -95, try external compiler 0.000 ( ): __bpf_stdout__:@......./proc/self/task/11160/comm..) 0.014 ( 0.116 ms): qemu-system-x8/6721 open(filename: /proc/self/task/11160/comm, flags: RDWR) = 91 2335.411 ( ): __bpf_stdout__:FB..~.../etc/resolv.conf....) 2335.421 ( 0.030 ms): chronyd/883 open(filename: /etc/resolv.conf, flags: CLOEXEC) = 5 ^C# Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-z5aak9oay448ffj37giz94yr@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf-loader.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index 72c107fcbc5a..ab2598af91eb 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -94,7 +94,7 @@ struct bpf_object *bpf__prepare_load(const char *filename, bool source) err = perf_clang__compile_bpf(filename, &obj_buf, &obj_buf_sz); perf_clang__cleanup(); if (err) { - pr_warning("bpf: builtin compilation failed: %d, try external compiler\n", err); + pr_debug("bpf: builtin compilation failed: %d, try external compiler\n", err); err = llvm__compile_bpf(filename, &obj_buf, &obj_buf_sz); if (err) return ERR_PTR(-BPF_LOADER_ERRNO__COMPILE); -- cgit v1.2.3-70-g09d2 From 5c61d70e55ab1cb917752f4a0d5f4683b4869e32 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 22 Jan 2018 17:11:30 -0300 Subject: perf tools: Move conditional O_CLOEXEC to util.h To be more generally available and get the build on centos:5 to work after we use O_CLOEXEC in the next patch, in the util/dso.c file. Cc: Adrian Hunter Cc: Alexander Shishkin Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Cc: Wang YanQing Link: https://lkml.kernel.org/n/tip-vsjbiydh15pfqomxw1kx64ex@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/data.c | 10 ---------- tools/perf/util/util.h | 10 ++++++++++ 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/data.c b/tools/perf/util/data.c index 48094fde0a68..d8cfc19ddb10 100644 --- a/tools/perf/util/data.c +++ b/tools/perf/util/data.c @@ -12,16 +12,6 @@ #include "util.h" #include "debug.h" -#ifndef O_CLOEXEC -#ifdef __sparc__ -#define O_CLOEXEC 0x400000 -#elif defined(__alpha__) || defined(__hppa__) -#define O_CLOEXEC 010000000 -#else -#define O_CLOEXEC 02000000 -#endif -#endif - static bool check_pipe(struct perf_data *data) { struct stat st; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 01434509c2e9..9496365da3d7 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -68,4 +68,14 @@ extern bool perf_singlethreaded; void perf_set_singlethreaded(void); void perf_set_multithreaded(void); +#ifndef O_CLOEXEC +#ifdef __sparc__ +#define O_CLOEXEC 0x400000 +#elif defined(__alpha__) || defined(__hppa__) +#define O_CLOEXEC 010000000 +#else +#define O_CLOEXEC 02000000 +#endif +#endif + #endif /* GIT_COMPAT_UTIL_H */ -- cgit v1.2.3-70-g09d2 From 4c0d8d27954d9efb2a02ec9fc16f39b02f248bb7 Mon Sep 17 00:00:00 2001 From: Wang YanQing Date: Mon, 15 Jan 2018 13:04:48 +0800 Subject: perf symbols: Using O_CLOEXEC in do_open I've meet a strange behavior with these commands on my gentoo box: 1: perf kmem record 2: CTRL-C to stop 1 3: perf report 4: "Enter", "Enter", "Run scripts for all samples", "event_analyzing_sample". Then 'perf report' says: " No kallsyms or vmlinux with build-id xxxx was found /lib/modules/4.10.0+/build/vmlinux with build id xxxx not found, continuing without symbols ". It is strange because I am sure /lib/modules/4.10.0+/build/vmlinux is right for perf.data. After digging, I found out the reason is that "perf report" generates many open fds, then "script_browse" uses popen to run "perf script" which run out of open files. The gentoo box has a small default value for "max open files", 1024. Yes, "ulimit -n " with a bigger number could fix it, but I think that using O_CLOEXEC in do_open is a better way. Signed-off-by: Wang YanQing Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180115050448.GA20759@udknight [ Make sure O_CLOEXEC is available in old systems by adding a patch just before this one, to keep this bisectable in such systems ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/dso.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index d5b6f7f5baff..36ef45b2e89d 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -446,7 +446,7 @@ static int do_open(char *name) char sbuf[STRERR_BUFSIZE]; do { - fd = open(name, O_RDONLY); + fd = open(name, O_RDONLY|O_CLOEXEC); if (fd >= 0) return fd; -- cgit v1.2.3-70-g09d2 From 631e8f0a9748d7ef1eb6a84d0d5b9e81a79433ef Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Wed, 10 Jan 2018 13:31:52 -0700 Subject: perf report: Fix regression when decoding intel_pt traces Commit (93d10af26bb7 perf tools: Optimize sample parsing for ordered events) breaks intelPT trace decoding by invariably returning an error if the event type isn't a PERF_SAMPLE_TIME. With this patch the timestamp is initialised and processing is allowed to continue if the error returned by function perf_evlist__parse_sample_timestamp() is not a fault. Signed-off-by: Mathieu Poirier Acked-by: Adrian Hunter Acked-by: Jiri Olsa Cc: Alexander Shishkin Cc: Peter Zijlstra Fixes: 93d10af26bb7 ("perf tools: Optimize sample parsing for ordered events") Link: http://lkml.kernel.org/r/1515616312-27645-1-git-send-email-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/session.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 8d0fa2f8da16..c71ced7db152 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1508,10 +1508,10 @@ static s64 perf_session__process_event(struct perf_session *session, return perf_session__process_user_event(session, event, file_offset); if (tool->ordered_events) { - u64 timestamp; + u64 timestamp = -1ULL; ret = perf_evlist__parse_sample_timestamp(evlist, event, ×tamp); - if (ret) + if (ret && ret != -1) return ret; ret = perf_session__queue_event(session, event, timestamp, file_offset); -- cgit v1.2.3-70-g09d2 From 99402e0683ecea11db44fd1c59a65b4eb3bd2672 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 9 Jan 2018 10:26:46 +0100 Subject: perf build: Display EXTRA features for VF=1 build Display the state of the rest of the features (FEATURE_TESTS_EXTRA) on a 'make VF=1' build. These features are detected manually by perf's Makefile.config so they can't be displayed with the main list, but only after we're done in Makefile.config. $ make VF=1 BUILD: Doing 'make -j4' parallel build Auto-detecting system features: ... dwarf: [ on ] ... dwarf_getlocations: [ on ] ... glibc: [ on ] ... gtk2: [ on ] SNIP ... timerfd: [ on ] ... sched_getcpu: [ on ] ... sdt: [ on ] ... setns: [ on ] extra features: ... bionic: [ OFF ] ... compile-32: [ on ] ... compile-x32: [ OFF ] ... cplus-demangle: [ on ] ... hello: [ OFF ] ... libbabeltrace: [ on ] ... liberty: [ on ] ... liberty-z: [ on ] ... libunwind-debug-frame: [ OFF ] ... libunwind-debug-frame-arm: [ OFF ] ... libunwind-debug-frame-aarch64: [ OFF ] SNIP Signed-off-by: Jiri Olsa Tested-by: Arnaldo Carvalho de Melo Cc: Alexander Shishkin Cc: Andi Kleen Cc: David Ahern Cc: Mathieu Poirier Cc: Namhyung Kim Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20180109092646.GB11520@krava Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.config | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools') diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 12dec6ea5ed2..92265b32dddd 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -947,6 +947,10 @@ define print_var_code endef ifeq ($(VF),1) + # Display EXTRA features which are detected manualy + # from here with feature_check call and thus cannot + # be partof global state output. + $(foreach feat,$(FEATURE_TESTS_EXTRA),$(call feature_print_status,$(feat),)) $(call print_var,prefix) $(call print_var,bindir) $(call print_var,libdir) -- cgit v1.2.3-70-g09d2 From 95f28190aa012b18eab14799b905b6db3cf31529 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Fri, 19 Jan 2018 09:56:13 +0100 Subject: tools include arch: Grab a copy of errno.h for arch's supported by perf For each arch in tools/perf/arch, grab a copy of errno.h. This is a pre-req to generate an architecture specific mapping of errno numbers to their names. This errno mapping can be used by perf trace to support cross-architecture trace reports and to get rid of the audit-libs dependency. Signed-off-by: Hendrik Brueckner Reviewed-by: Thomas Richter Cc: Jiri Olsa Cc: Michael Petlan Cc: linux-s390@vger.kernel.org LPU-Reference: 1516352177-11106-2-git-send-email-brueckner@linux.vnet.ibm.com Link: https://lkml.kernel.org/n/tip-73azjhrzpjsskwi129020i2u@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/arch/alpha/include/uapi/asm/errno.h | 128 +++++++++++++++++++++++++++ tools/arch/mips/include/asm/errno.h | 17 ++++ tools/arch/mips/include/uapi/asm/errno.h | 130 ++++++++++++++++++++++++++++ tools/arch/parisc/include/uapi/asm/errno.h | 128 +++++++++++++++++++++++++++ tools/arch/powerpc/include/uapi/asm/errno.h | 10 +++ tools/arch/sparc/include/uapi/asm/errno.h | 118 +++++++++++++++++++++++++ tools/arch/x86/include/uapi/asm/errno.h | 1 + tools/perf/check-headers.sh | 7 ++ 8 files changed, 539 insertions(+) create mode 100644 tools/arch/alpha/include/uapi/asm/errno.h create mode 100644 tools/arch/mips/include/asm/errno.h create mode 100644 tools/arch/mips/include/uapi/asm/errno.h create mode 100644 tools/arch/parisc/include/uapi/asm/errno.h create mode 100644 tools/arch/powerpc/include/uapi/asm/errno.h create mode 100644 tools/arch/sparc/include/uapi/asm/errno.h create mode 100644 tools/arch/x86/include/uapi/asm/errno.h (limited to 'tools') diff --git a/tools/arch/alpha/include/uapi/asm/errno.h b/tools/arch/alpha/include/uapi/asm/errno.h new file mode 100644 index 000000000000..3d265f6babaf --- /dev/null +++ b/tools/arch/alpha/include/uapi/asm/errno.h @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ALPHA_ERRNO_H +#define _ALPHA_ERRNO_H + +#include + +#undef EAGAIN /* 11 in errno-base.h */ + +#define EDEADLK 11 /* Resource deadlock would occur */ + +#define EAGAIN 35 /* Try again */ +#define EWOULDBLOCK EAGAIN /* Operation would block */ +#define EINPROGRESS 36 /* Operation now in progress */ +#define EALREADY 37 /* Operation already in progress */ +#define ENOTSOCK 38 /* Socket operation on non-socket */ +#define EDESTADDRREQ 39 /* Destination address required */ +#define EMSGSIZE 40 /* Message too long */ +#define EPROTOTYPE 41 /* Protocol wrong type for socket */ +#define ENOPROTOOPT 42 /* Protocol not available */ +#define EPROTONOSUPPORT 43 /* Protocol not supported */ +#define ESOCKTNOSUPPORT 44 /* Socket type not supported */ +#define EOPNOTSUPP 45 /* Operation not supported on transport endpoint */ +#define EPFNOSUPPORT 46 /* Protocol family not supported */ +#define EAFNOSUPPORT 47 /* Address family not supported by protocol */ +#define EADDRINUSE 48 /* Address already in use */ +#define EADDRNOTAVAIL 49 /* Cannot assign requested address */ +#define ENETDOWN 50 /* Network is down */ +#define ENETUNREACH 51 /* Network is unreachable */ +#define ENETRESET 52 /* Network dropped connection because of reset */ +#define ECONNABORTED 53 /* Software caused connection abort */ +#define ECONNRESET 54 /* Connection reset by peer */ +#define ENOBUFS 55 /* No buffer space available */ +#define EISCONN 56 /* Transport endpoint is already connected */ +#define ENOTCONN 57 /* Transport endpoint is not connected */ +#define ESHUTDOWN 58 /* Cannot send after transport endpoint shutdown */ +#define ETOOMANYREFS 59 /* Too many references: cannot splice */ +#define ETIMEDOUT 60 /* Connection timed out */ +#define ECONNREFUSED 61 /* Connection refused */ +#define ELOOP 62 /* Too many symbolic links encountered */ +#define ENAMETOOLONG 63 /* File name too long */ +#define EHOSTDOWN 64 /* Host is down */ +#define EHOSTUNREACH 65 /* No route to host */ +#define ENOTEMPTY 66 /* Directory not empty */ + +#define EUSERS 68 /* Too many users */ +#define EDQUOT 69 /* Quota exceeded */ +#define ESTALE 70 /* Stale file handle */ +#define EREMOTE 71 /* Object is remote */ + +#define ENOLCK 77 /* No record locks available */ +#define ENOSYS 78 /* Function not implemented */ + +#define ENOMSG 80 /* No message of desired type */ +#define EIDRM 81 /* Identifier removed */ +#define ENOSR 82 /* Out of streams resources */ +#define ETIME 83 /* Timer expired */ +#define EBADMSG 84 /* Not a data message */ +#define EPROTO 85 /* Protocol error */ +#define ENODATA 86 /* No data available */ +#define ENOSTR 87 /* Device not a stream */ + +#define ENOPKG 92 /* Package not installed */ + +#define EILSEQ 116 /* Illegal byte sequence */ + +/* The following are just random noise.. */ +#define ECHRNG 88 /* Channel number out of range */ +#define EL2NSYNC 89 /* Level 2 not synchronized */ +#define EL3HLT 90 /* Level 3 halted */ +#define EL3RST 91 /* Level 3 reset */ + +#define ELNRNG 93 /* Link number out of range */ +#define EUNATCH 94 /* Protocol driver not attached */ +#define ENOCSI 95 /* No CSI structure available */ +#define EL2HLT 96 /* Level 2 halted */ +#define EBADE 97 /* Invalid exchange */ +#define EBADR 98 /* Invalid request descriptor */ +#define EXFULL 99 /* Exchange full */ +#define ENOANO 100 /* No anode */ +#define EBADRQC 101 /* Invalid request code */ +#define EBADSLT 102 /* Invalid slot */ + +#define EDEADLOCK EDEADLK + +#define EBFONT 104 /* Bad font file format */ +#define ENONET 105 /* Machine is not on the network */ +#define ENOLINK 106 /* Link has been severed */ +#define EADV 107 /* Advertise error */ +#define ESRMNT 108 /* Srmount error */ +#define ECOMM 109 /* Communication error on send */ +#define EMULTIHOP 110 /* Multihop attempted */ +#define EDOTDOT 111 /* RFS specific error */ +#define EOVERFLOW 112 /* Value too large for defined data type */ +#define ENOTUNIQ 113 /* Name not unique on network */ +#define EBADFD 114 /* File descriptor in bad state */ +#define EREMCHG 115 /* Remote address changed */ + +#define EUCLEAN 117 /* Structure needs cleaning */ +#define ENOTNAM 118 /* Not a XENIX named type file */ +#define ENAVAIL 119 /* No XENIX semaphores available */ +#define EISNAM 120 /* Is a named type file */ +#define EREMOTEIO 121 /* Remote I/O error */ + +#define ELIBACC 122 /* Can not access a needed shared library */ +#define ELIBBAD 123 /* Accessing a corrupted shared library */ +#define ELIBSCN 124 /* .lib section in a.out corrupted */ +#define ELIBMAX 125 /* Attempting to link in too many shared libraries */ +#define ELIBEXEC 126 /* Cannot exec a shared library directly */ +#define ERESTART 127 /* Interrupted system call should be restarted */ +#define ESTRPIPE 128 /* Streams pipe error */ + +#define ENOMEDIUM 129 /* No medium found */ +#define EMEDIUMTYPE 130 /* Wrong medium type */ +#define ECANCELED 131 /* Operation Cancelled */ +#define ENOKEY 132 /* Required key not available */ +#define EKEYEXPIRED 133 /* Key has expired */ +#define EKEYREVOKED 134 /* Key has been revoked */ +#define EKEYREJECTED 135 /* Key was rejected by service */ + +/* for robust mutexes */ +#define EOWNERDEAD 136 /* Owner died */ +#define ENOTRECOVERABLE 137 /* State not recoverable */ + +#define ERFKILL 138 /* Operation not possible due to RF-kill */ + +#define EHWPOISON 139 /* Memory page has hardware error */ + +#endif diff --git a/tools/arch/mips/include/asm/errno.h b/tools/arch/mips/include/asm/errno.h new file mode 100644 index 000000000000..21d91cdfe3c9 --- /dev/null +++ b/tools/arch/mips/include/asm/errno.h @@ -0,0 +1,17 @@ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1995, 1999, 2001, 2002 by Ralf Baechle + */ +#ifndef _ASM_ERRNO_H +#define _ASM_ERRNO_H + +#include + + +/* The biggest error number defined here or in . */ +#define EMAXERRNO 1133 + +#endif /* _ASM_ERRNO_H */ diff --git a/tools/arch/mips/include/uapi/asm/errno.h b/tools/arch/mips/include/uapi/asm/errno.h new file mode 100644 index 000000000000..2fb714e2d6d8 --- /dev/null +++ b/tools/arch/mips/include/uapi/asm/errno.h @@ -0,0 +1,130 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +/* + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 1995, 1999, 2001, 2002 by Ralf Baechle + */ +#ifndef _UAPI_ASM_ERRNO_H +#define _UAPI_ASM_ERRNO_H + +/* + * These error numbers are intended to be MIPS ABI compatible + */ + +#include + +#define ENOMSG 35 /* No message of desired type */ +#define EIDRM 36 /* Identifier removed */ +#define ECHRNG 37 /* Channel number out of range */ +#define EL2NSYNC 38 /* Level 2 not synchronized */ +#define EL3HLT 39 /* Level 3 halted */ +#define EL3RST 40 /* Level 3 reset */ +#define ELNRNG 41 /* Link number out of range */ +#define EUNATCH 42 /* Protocol driver not attached */ +#define ENOCSI 43 /* No CSI structure available */ +#define EL2HLT 44 /* Level 2 halted */ +#define EDEADLK 45 /* Resource deadlock would occur */ +#define ENOLCK 46 /* No record locks available */ +#define EBADE 50 /* Invalid exchange */ +#define EBADR 51 /* Invalid request descriptor */ +#define EXFULL 52 /* Exchange full */ +#define ENOANO 53 /* No anode */ +#define EBADRQC 54 /* Invalid request code */ +#define EBADSLT 55 /* Invalid slot */ +#define EDEADLOCK 56 /* File locking deadlock error */ +#define EBFONT 59 /* Bad font file format */ +#define ENOSTR 60 /* Device not a stream */ +#define ENODATA 61 /* No data available */ +#define ETIME 62 /* Timer expired */ +#define ENOSR 63 /* Out of streams resources */ +#define ENONET 64 /* Machine is not on the network */ +#define ENOPKG 65 /* Package not installed */ +#define EREMOTE 66 /* Object is remote */ +#define ENOLINK 67 /* Link has been severed */ +#define EADV 68 /* Advertise error */ +#define ESRMNT 69 /* Srmount error */ +#define ECOMM 70 /* Communication error on send */ +#define EPROTO 71 /* Protocol error */ +#define EDOTDOT 73 /* RFS specific error */ +#define EMULTIHOP 74 /* Multihop attempted */ +#define EBADMSG 77 /* Not a data message */ +#define ENAMETOOLONG 78 /* File name too long */ +#define EOVERFLOW 79 /* Value too large for defined data type */ +#define ENOTUNIQ 80 /* Name not unique on network */ +#define EBADFD 81 /* File descriptor in bad state */ +#define EREMCHG 82 /* Remote address changed */ +#define ELIBACC 83 /* Can not access a needed shared library */ +#define ELIBBAD 84 /* Accessing a corrupted shared library */ +#define ELIBSCN 85 /* .lib section in a.out corrupted */ +#define ELIBMAX 86 /* Attempting to link in too many shared libraries */ +#define ELIBEXEC 87 /* Cannot exec a shared library directly */ +#define EILSEQ 88 /* Illegal byte sequence */ +#define ENOSYS 89 /* Function not implemented */ +#define ELOOP 90 /* Too many symbolic links encountered */ +#define ERESTART 91 /* Interrupted system call should be restarted */ +#define ESTRPIPE 92 /* Streams pipe error */ +#define ENOTEMPTY 93 /* Directory not empty */ +#define EUSERS 94 /* Too many users */ +#define ENOTSOCK 95 /* Socket operation on non-socket */ +#define EDESTADDRREQ 96 /* Destination address required */ +#define EMSGSIZE 97 /* Message too long */ +#define EPROTOTYPE 98 /* Protocol wrong type for socket */ +#define ENOPROTOOPT 99 /* Protocol not available */ +#define EPROTONOSUPPORT 120 /* Protocol not supported */ +#define ESOCKTNOSUPPORT 121 /* Socket type not supported */ +#define EOPNOTSUPP 122 /* Operation not supported on transport endpoint */ +#define EPFNOSUPPORT 123 /* Protocol family not supported */ +#define EAFNOSUPPORT 124 /* Address family not supported by protocol */ +#define EADDRINUSE 125 /* Address already in use */ +#define EADDRNOTAVAIL 126 /* Cannot assign requested address */ +#define ENETDOWN 127 /* Network is down */ +#define ENETUNREACH 128 /* Network is unreachable */ +#define ENETRESET 129 /* Network dropped connection because of reset */ +#define ECONNABORTED 130 /* Software caused connection abort */ +#define ECONNRESET 131 /* Connection reset by peer */ +#define ENOBUFS 132 /* No buffer space available */ +#define EISCONN 133 /* Transport endpoint is already connected */ +#define ENOTCONN 134 /* Transport endpoint is not connected */ +#define EUCLEAN 135 /* Structure needs cleaning */ +#define ENOTNAM 137 /* Not a XENIX named type file */ +#define ENAVAIL 138 /* No XENIX semaphores available */ +#define EISNAM 139 /* Is a named type file */ +#define EREMOTEIO 140 /* Remote I/O error */ +#define EINIT 141 /* Reserved */ +#define EREMDEV 142 /* Error 142 */ +#define ESHUTDOWN 143 /* Cannot send after transport endpoint shutdown */ +#define ETOOMANYREFS 144 /* Too many references: cannot splice */ +#define ETIMEDOUT 145 /* Connection timed out */ +#define ECONNREFUSED 146 /* Connection refused */ +#define EHOSTDOWN 147 /* Host is down */ +#define EHOSTUNREACH 148 /* No route to host */ +#define EWOULDBLOCK EAGAIN /* Operation would block */ +#define EALREADY 149 /* Operation already in progress */ +#define EINPROGRESS 150 /* Operation now in progress */ +#define ESTALE 151 /* Stale file handle */ +#define ECANCELED 158 /* AIO operation canceled */ + +/* + * These error are Linux extensions. + */ +#define ENOMEDIUM 159 /* No medium found */ +#define EMEDIUMTYPE 160 /* Wrong medium type */ +#define ENOKEY 161 /* Required key not available */ +#define EKEYEXPIRED 162 /* Key has expired */ +#define EKEYREVOKED 163 /* Key has been revoked */ +#define EKEYREJECTED 164 /* Key was rejected by service */ + +/* for robust mutexes */ +#define EOWNERDEAD 165 /* Owner died */ +#define ENOTRECOVERABLE 166 /* State not recoverable */ + +#define ERFKILL 167 /* Operation not possible due to RF-kill */ + +#define EHWPOISON 168 /* Memory page has hardware error */ + +#define EDQUOT 1133 /* Quota exceeded */ + + +#endif /* _UAPI_ASM_ERRNO_H */ diff --git a/tools/arch/parisc/include/uapi/asm/errno.h b/tools/arch/parisc/include/uapi/asm/errno.h new file mode 100644 index 000000000000..fc0df353ff0d --- /dev/null +++ b/tools/arch/parisc/include/uapi/asm/errno.h @@ -0,0 +1,128 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _PARISC_ERRNO_H +#define _PARISC_ERRNO_H + +#include + +#define ENOMSG 35 /* No message of desired type */ +#define EIDRM 36 /* Identifier removed */ +#define ECHRNG 37 /* Channel number out of range */ +#define EL2NSYNC 38 /* Level 2 not synchronized */ +#define EL3HLT 39 /* Level 3 halted */ +#define EL3RST 40 /* Level 3 reset */ +#define ELNRNG 41 /* Link number out of range */ +#define EUNATCH 42 /* Protocol driver not attached */ +#define ENOCSI 43 /* No CSI structure available */ +#define EL2HLT 44 /* Level 2 halted */ +#define EDEADLK 45 /* Resource deadlock would occur */ +#define EDEADLOCK EDEADLK +#define ENOLCK 46 /* No record locks available */ +#define EILSEQ 47 /* Illegal byte sequence */ + +#define ENONET 50 /* Machine is not on the network */ +#define ENODATA 51 /* No data available */ +#define ETIME 52 /* Timer expired */ +#define ENOSR 53 /* Out of streams resources */ +#define ENOSTR 54 /* Device not a stream */ +#define ENOPKG 55 /* Package not installed */ + +#define ENOLINK 57 /* Link has been severed */ +#define EADV 58 /* Advertise error */ +#define ESRMNT 59 /* Srmount error */ +#define ECOMM 60 /* Communication error on send */ +#define EPROTO 61 /* Protocol error */ + +#define EMULTIHOP 64 /* Multihop attempted */ + +#define EDOTDOT 66 /* RFS specific error */ +#define EBADMSG 67 /* Not a data message */ +#define EUSERS 68 /* Too many users */ +#define EDQUOT 69 /* Quota exceeded */ +#define ESTALE 70 /* Stale file handle */ +#define EREMOTE 71 /* Object is remote */ +#define EOVERFLOW 72 /* Value too large for defined data type */ + +/* these errnos are defined by Linux but not HPUX. */ + +#define EBADE 160 /* Invalid exchange */ +#define EBADR 161 /* Invalid request descriptor */ +#define EXFULL 162 /* Exchange full */ +#define ENOANO 163 /* No anode */ +#define EBADRQC 164 /* Invalid request code */ +#define EBADSLT 165 /* Invalid slot */ +#define EBFONT 166 /* Bad font file format */ +#define ENOTUNIQ 167 /* Name not unique on network */ +#define EBADFD 168 /* File descriptor in bad state */ +#define EREMCHG 169 /* Remote address changed */ +#define ELIBACC 170 /* Can not access a needed shared library */ +#define ELIBBAD 171 /* Accessing a corrupted shared library */ +#define ELIBSCN 172 /* .lib section in a.out corrupted */ +#define ELIBMAX 173 /* Attempting to link in too many shared libraries */ +#define ELIBEXEC 174 /* Cannot exec a shared library directly */ +#define ERESTART 175 /* Interrupted system call should be restarted */ +#define ESTRPIPE 176 /* Streams pipe error */ +#define EUCLEAN 177 /* Structure needs cleaning */ +#define ENOTNAM 178 /* Not a XENIX named type file */ +#define ENAVAIL 179 /* No XENIX semaphores available */ +#define EISNAM 180 /* Is a named type file */ +#define EREMOTEIO 181 /* Remote I/O error */ +#define ENOMEDIUM 182 /* No medium found */ +#define EMEDIUMTYPE 183 /* Wrong medium type */ +#define ENOKEY 184 /* Required key not available */ +#define EKEYEXPIRED 185 /* Key has expired */ +#define EKEYREVOKED 186 /* Key has been revoked */ +#define EKEYREJECTED 187 /* Key was rejected by service */ + +/* We now return you to your regularly scheduled HPUX. */ + +#define ENOSYM 215 /* symbol does not exist in executable */ +#define ENOTSOCK 216 /* Socket operation on non-socket */ +#define EDESTADDRREQ 217 /* Destination address required */ +#define EMSGSIZE 218 /* Message too long */ +#define EPROTOTYPE 219 /* Protocol wrong type for socket */ +#define ENOPROTOOPT 220 /* Protocol not available */ +#define EPROTONOSUPPORT 221 /* Protocol not supported */ +#define ESOCKTNOSUPPORT 222 /* Socket type not supported */ +#define EOPNOTSUPP 223 /* Operation not supported on transport endpoint */ +#define EPFNOSUPPORT 224 /* Protocol family not supported */ +#define EAFNOSUPPORT 225 /* Address family not supported by protocol */ +#define EADDRINUSE 226 /* Address already in use */ +#define EADDRNOTAVAIL 227 /* Cannot assign requested address */ +#define ENETDOWN 228 /* Network is down */ +#define ENETUNREACH 229 /* Network is unreachable */ +#define ENETRESET 230 /* Network dropped connection because of reset */ +#define ECONNABORTED 231 /* Software caused connection abort */ +#define ECONNRESET 232 /* Connection reset by peer */ +#define ENOBUFS 233 /* No buffer space available */ +#define EISCONN 234 /* Transport endpoint is already connected */ +#define ENOTCONN 235 /* Transport endpoint is not connected */ +#define ESHUTDOWN 236 /* Cannot send after transport endpoint shutdown */ +#define ETOOMANYREFS 237 /* Too many references: cannot splice */ +#define ETIMEDOUT 238 /* Connection timed out */ +#define ECONNREFUSED 239 /* Connection refused */ +#define EREFUSED ECONNREFUSED /* for HP's NFS apparently */ +#define EREMOTERELEASE 240 /* Remote peer released connection */ +#define EHOSTDOWN 241 /* Host is down */ +#define EHOSTUNREACH 242 /* No route to host */ + +#define EALREADY 244 /* Operation already in progress */ +#define EINPROGRESS 245 /* Operation now in progress */ +#define EWOULDBLOCK EAGAIN /* Operation would block (Not HPUX compliant) */ +#define ENOTEMPTY 247 /* Directory not empty */ +#define ENAMETOOLONG 248 /* File name too long */ +#define ELOOP 249 /* Too many symbolic links encountered */ +#define ENOSYS 251 /* Function not implemented */ + +#define ENOTSUP 252 /* Function not implemented (POSIX.4 / HPUX) */ +#define ECANCELLED 253 /* aio request was canceled before complete (POSIX.4 / HPUX) */ +#define ECANCELED ECANCELLED /* SuSv3 and Solaris wants one 'L' */ + +/* for robust mutexes */ +#define EOWNERDEAD 254 /* Owner died */ +#define ENOTRECOVERABLE 255 /* State not recoverable */ + +#define ERFKILL 256 /* Operation not possible due to RF-kill */ + +#define EHWPOISON 257 /* Memory page has hardware error */ + +#endif diff --git a/tools/arch/powerpc/include/uapi/asm/errno.h b/tools/arch/powerpc/include/uapi/asm/errno.h new file mode 100644 index 000000000000..cc79856896a1 --- /dev/null +++ b/tools/arch/powerpc/include/uapi/asm/errno.h @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ASM_POWERPC_ERRNO_H +#define _ASM_POWERPC_ERRNO_H + +#include + +#undef EDEADLOCK +#define EDEADLOCK 58 /* File locking deadlock error */ + +#endif /* _ASM_POWERPC_ERRNO_H */ diff --git a/tools/arch/sparc/include/uapi/asm/errno.h b/tools/arch/sparc/include/uapi/asm/errno.h new file mode 100644 index 000000000000..81a732b902ee --- /dev/null +++ b/tools/arch/sparc/include/uapi/asm/errno.h @@ -0,0 +1,118 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _SPARC_ERRNO_H +#define _SPARC_ERRNO_H + +/* These match the SunOS error numbering scheme. */ + +#include + +#define EWOULDBLOCK EAGAIN /* Operation would block */ +#define EINPROGRESS 36 /* Operation now in progress */ +#define EALREADY 37 /* Operation already in progress */ +#define ENOTSOCK 38 /* Socket operation on non-socket */ +#define EDESTADDRREQ 39 /* Destination address required */ +#define EMSGSIZE 40 /* Message too long */ +#define EPROTOTYPE 41 /* Protocol wrong type for socket */ +#define ENOPROTOOPT 42 /* Protocol not available */ +#define EPROTONOSUPPORT 43 /* Protocol not supported */ +#define ESOCKTNOSUPPORT 44 /* Socket type not supported */ +#define EOPNOTSUPP 45 /* Op not supported on transport endpoint */ +#define EPFNOSUPPORT 46 /* Protocol family not supported */ +#define EAFNOSUPPORT 47 /* Address family not supported by protocol */ +#define EADDRINUSE 48 /* Address already in use */ +#define EADDRNOTAVAIL 49 /* Cannot assign requested address */ +#define ENETDOWN 50 /* Network is down */ +#define ENETUNREACH 51 /* Network is unreachable */ +#define ENETRESET 52 /* Net dropped connection because of reset */ +#define ECONNABORTED 53 /* Software caused connection abort */ +#define ECONNRESET 54 /* Connection reset by peer */ +#define ENOBUFS 55 /* No buffer space available */ +#define EISCONN 56 /* Transport endpoint is already connected */ +#define ENOTCONN 57 /* Transport endpoint is not connected */ +#define ESHUTDOWN 58 /* No send after transport endpoint shutdown */ +#define ETOOMANYREFS 59 /* Too many references: cannot splice */ +#define ETIMEDOUT 60 /* Connection timed out */ +#define ECONNREFUSED 61 /* Connection refused */ +#define ELOOP 62 /* Too many symbolic links encountered */ +#define ENAMETOOLONG 63 /* File name too long */ +#define EHOSTDOWN 64 /* Host is down */ +#define EHOSTUNREACH 65 /* No route to host */ +#define ENOTEMPTY 66 /* Directory not empty */ +#define EPROCLIM 67 /* SUNOS: Too many processes */ +#define EUSERS 68 /* Too many users */ +#define EDQUOT 69 /* Quota exceeded */ +#define ESTALE 70 /* Stale file handle */ +#define EREMOTE 71 /* Object is remote */ +#define ENOSTR 72 /* Device not a stream */ +#define ETIME 73 /* Timer expired */ +#define ENOSR 74 /* Out of streams resources */ +#define ENOMSG 75 /* No message of desired type */ +#define EBADMSG 76 /* Not a data message */ +#define EIDRM 77 /* Identifier removed */ +#define EDEADLK 78 /* Resource deadlock would occur */ +#define ENOLCK 79 /* No record locks available */ +#define ENONET 80 /* Machine is not on the network */ +#define ERREMOTE 81 /* SunOS: Too many lvls of remote in path */ +#define ENOLINK 82 /* Link has been severed */ +#define EADV 83 /* Advertise error */ +#define ESRMNT 84 /* Srmount error */ +#define ECOMM 85 /* Communication error on send */ +#define EPROTO 86 /* Protocol error */ +#define EMULTIHOP 87 /* Multihop attempted */ +#define EDOTDOT 88 /* RFS specific error */ +#define EREMCHG 89 /* Remote address changed */ +#define ENOSYS 90 /* Function not implemented */ + +/* The rest have no SunOS equivalent. */ +#define ESTRPIPE 91 /* Streams pipe error */ +#define EOVERFLOW 92 /* Value too large for defined data type */ +#define EBADFD 93 /* File descriptor in bad state */ +#define ECHRNG 94 /* Channel number out of range */ +#define EL2NSYNC 95 /* Level 2 not synchronized */ +#define EL3HLT 96 /* Level 3 halted */ +#define EL3RST 97 /* Level 3 reset */ +#define ELNRNG 98 /* Link number out of range */ +#define EUNATCH 99 /* Protocol driver not attached */ +#define ENOCSI 100 /* No CSI structure available */ +#define EL2HLT 101 /* Level 2 halted */ +#define EBADE 102 /* Invalid exchange */ +#define EBADR 103 /* Invalid request descriptor */ +#define EXFULL 104 /* Exchange full */ +#define ENOANO 105 /* No anode */ +#define EBADRQC 106 /* Invalid request code */ +#define EBADSLT 107 /* Invalid slot */ +#define EDEADLOCK 108 /* File locking deadlock error */ +#define EBFONT 109 /* Bad font file format */ +#define ELIBEXEC 110 /* Cannot exec a shared library directly */ +#define ENODATA 111 /* No data available */ +#define ELIBBAD 112 /* Accessing a corrupted shared library */ +#define ENOPKG 113 /* Package not installed */ +#define ELIBACC 114 /* Can not access a needed shared library */ +#define ENOTUNIQ 115 /* Name not unique on network */ +#define ERESTART 116 /* Interrupted syscall should be restarted */ +#define EUCLEAN 117 /* Structure needs cleaning */ +#define ENOTNAM 118 /* Not a XENIX named type file */ +#define ENAVAIL 119 /* No XENIX semaphores available */ +#define EISNAM 120 /* Is a named type file */ +#define EREMOTEIO 121 /* Remote I/O error */ +#define EILSEQ 122 /* Illegal byte sequence */ +#define ELIBMAX 123 /* Atmpt to link in too many shared libs */ +#define ELIBSCN 124 /* .lib section in a.out corrupted */ + +#define ENOMEDIUM 125 /* No medium found */ +#define EMEDIUMTYPE 126 /* Wrong medium type */ +#define ECANCELED 127 /* Operation Cancelled */ +#define ENOKEY 128 /* Required key not available */ +#define EKEYEXPIRED 129 /* Key has expired */ +#define EKEYREVOKED 130 /* Key has been revoked */ +#define EKEYREJECTED 131 /* Key was rejected by service */ + +/* for robust mutexes */ +#define EOWNERDEAD 132 /* Owner died */ +#define ENOTRECOVERABLE 133 /* State not recoverable */ + +#define ERFKILL 134 /* Operation not possible due to RF-kill */ + +#define EHWPOISON 135 /* Memory page has hardware error */ + +#endif diff --git a/tools/arch/x86/include/uapi/asm/errno.h b/tools/arch/x86/include/uapi/asm/errno.h new file mode 100644 index 000000000000..4c82b503d92f --- /dev/null +++ b/tools/arch/x86/include/uapi/asm/errno.h @@ -0,0 +1 @@ +#include diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index f81ca508700c..83a65d305298 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -36,6 +36,13 @@ arch/s390/include/uapi/asm/sie.h arch/s390/include/uapi/asm/unistd.h arch/arm/include/uapi/asm/kvm.h arch/arm64/include/uapi/asm/kvm.h +arch/alpha/include/uapi/asm/errno.h +arch/mips/include/asm/errno.h +arch/mips/include/uapi/asm/errno.h +arch/parisc/include/uapi/asm/errno.h +arch/powerpc/include/uapi/asm/errno.h +arch/sparc/include/uapi/asm/errno.h +arch/x86/include/uapi/asm/errno.h include/asm-generic/bitops/arch_hweight.h include/asm-generic/bitops/const_hweight.h include/asm-generic/bitops/__fls.h -- cgit v1.2.3-70-g09d2 From 28b8f954003e59dbf0b56be6df0d81f83e64f36b Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Fri, 19 Jan 2018 09:56:14 +0100 Subject: tools include asm-generic: Grab errno.h and errno-base.h This is a pre-req to generate an architecture specific mapping of errno numbers to their names. This errno mapping can be used by perf trace to support cross-architecture trace reports and to get rid of the audit-libs dependency. Signed-off-by: Hendrik Brueckner Reviewed-by: Thomas Richter Cc: Jiri Olsa Cc: Michael Petlan Cc: linux-s390@vger.kernel.org LPU-Reference: 1516352177-11106-3-git-send-email-brueckner@linux.vnet.ibm.com Link: https://lkml.kernel.org/n/tip-q13ystrw4sjz4wyvd3654cnm@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/uapi/asm-generic/errno-base.h | 40 +++++++++ tools/include/uapi/asm-generic/errno.h | 123 ++++++++++++++++++++++++++++ tools/perf/check-headers.sh | 2 + 3 files changed, 165 insertions(+) create mode 100644 tools/include/uapi/asm-generic/errno-base.h create mode 100644 tools/include/uapi/asm-generic/errno.h (limited to 'tools') diff --git a/tools/include/uapi/asm-generic/errno-base.h b/tools/include/uapi/asm-generic/errno-base.h new file mode 100644 index 000000000000..9653140bff92 --- /dev/null +++ b/tools/include/uapi/asm-generic/errno-base.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ASM_GENERIC_ERRNO_BASE_H +#define _ASM_GENERIC_ERRNO_BASE_H + +#define EPERM 1 /* Operation not permitted */ +#define ENOENT 2 /* No such file or directory */ +#define ESRCH 3 /* No such process */ +#define EINTR 4 /* Interrupted system call */ +#define EIO 5 /* I/O error */ +#define ENXIO 6 /* No such device or address */ +#define E2BIG 7 /* Argument list too long */ +#define ENOEXEC 8 /* Exec format error */ +#define EBADF 9 /* Bad file number */ +#define ECHILD 10 /* No child processes */ +#define EAGAIN 11 /* Try again */ +#define ENOMEM 12 /* Out of memory */ +#define EACCES 13 /* Permission denied */ +#define EFAULT 14 /* Bad address */ +#define ENOTBLK 15 /* Block device required */ +#define EBUSY 16 /* Device or resource busy */ +#define EEXIST 17 /* File exists */ +#define EXDEV 18 /* Cross-device link */ +#define ENODEV 19 /* No such device */ +#define ENOTDIR 20 /* Not a directory */ +#define EISDIR 21 /* Is a directory */ +#define EINVAL 22 /* Invalid argument */ +#define ENFILE 23 /* File table overflow */ +#define EMFILE 24 /* Too many open files */ +#define ENOTTY 25 /* Not a typewriter */ +#define ETXTBSY 26 /* Text file busy */ +#define EFBIG 27 /* File too large */ +#define ENOSPC 28 /* No space left on device */ +#define ESPIPE 29 /* Illegal seek */ +#define EROFS 30 /* Read-only file system */ +#define EMLINK 31 /* Too many links */ +#define EPIPE 32 /* Broken pipe */ +#define EDOM 33 /* Math argument out of domain of func */ +#define ERANGE 34 /* Math result not representable */ + +#endif diff --git a/tools/include/uapi/asm-generic/errno.h b/tools/include/uapi/asm-generic/errno.h new file mode 100644 index 000000000000..cf9c51ac49f9 --- /dev/null +++ b/tools/include/uapi/asm-generic/errno.h @@ -0,0 +1,123 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _ASM_GENERIC_ERRNO_H +#define _ASM_GENERIC_ERRNO_H + +#include + +#define EDEADLK 35 /* Resource deadlock would occur */ +#define ENAMETOOLONG 36 /* File name too long */ +#define ENOLCK 37 /* No record locks available */ + +/* + * This error code is special: arch syscall entry code will return + * -ENOSYS if users try to call a syscall that doesn't exist. To keep + * failures of syscalls that really do exist distinguishable from + * failures due to attempts to use a nonexistent syscall, syscall + * implementations should refrain from returning -ENOSYS. + */ +#define ENOSYS 38 /* Invalid system call number */ + +#define ENOTEMPTY 39 /* Directory not empty */ +#define ELOOP 40 /* Too many symbolic links encountered */ +#define EWOULDBLOCK EAGAIN /* Operation would block */ +#define ENOMSG 42 /* No message of desired type */ +#define EIDRM 43 /* Identifier removed */ +#define ECHRNG 44 /* Channel number out of range */ +#define EL2NSYNC 45 /* Level 2 not synchronized */ +#define EL3HLT 46 /* Level 3 halted */ +#define EL3RST 47 /* Level 3 reset */ +#define ELNRNG 48 /* Link number out of range */ +#define EUNATCH 49 /* Protocol driver not attached */ +#define ENOCSI 50 /* No CSI structure available */ +#define EL2HLT 51 /* Level 2 halted */ +#define EBADE 52 /* Invalid exchange */ +#define EBADR 53 /* Invalid request descriptor */ +#define EXFULL 54 /* Exchange full */ +#define ENOANO 55 /* No anode */ +#define EBADRQC 56 /* Invalid request code */ +#define EBADSLT 57 /* Invalid slot */ + +#define EDEADLOCK EDEADLK + +#define EBFONT 59 /* Bad font file format */ +#define ENOSTR 60 /* Device not a stream */ +#define ENODATA 61 /* No data available */ +#define ETIME 62 /* Timer expired */ +#define ENOSR 63 /* Out of streams resources */ +#define ENONET 64 /* Machine is not on the network */ +#define ENOPKG 65 /* Package not installed */ +#define EREMOTE 66 /* Object is remote */ +#define ENOLINK 67 /* Link has been severed */ +#define EADV 68 /* Advertise error */ +#define ESRMNT 69 /* Srmount error */ +#define ECOMM 70 /* Communication error on send */ +#define EPROTO 71 /* Protocol error */ +#define EMULTIHOP 72 /* Multihop attempted */ +#define EDOTDOT 73 /* RFS specific error */ +#define EBADMSG 74 /* Not a data message */ +#define EOVERFLOW 75 /* Value too large for defined data type */ +#define ENOTUNIQ 76 /* Name not unique on network */ +#define EBADFD 77 /* File descriptor in bad state */ +#define EREMCHG 78 /* Remote address changed */ +#define ELIBACC 79 /* Can not access a needed shared library */ +#define ELIBBAD 80 /* Accessing a corrupted shared library */ +#define ELIBSCN 81 /* .lib section in a.out corrupted */ +#define ELIBMAX 82 /* Attempting to link in too many shared libraries */ +#define ELIBEXEC 83 /* Cannot exec a shared library directly */ +#define EILSEQ 84 /* Illegal byte sequence */ +#define ERESTART 85 /* Interrupted system call should be restarted */ +#define ESTRPIPE 86 /* Streams pipe error */ +#define EUSERS 87 /* Too many users */ +#define ENOTSOCK 88 /* Socket operation on non-socket */ +#define EDESTADDRREQ 89 /* Destination address required */ +#define EMSGSIZE 90 /* Message too long */ +#define EPROTOTYPE 91 /* Protocol wrong type for socket */ +#define ENOPROTOOPT 92 /* Protocol not available */ +#define EPROTONOSUPPORT 93 /* Protocol not supported */ +#define ESOCKTNOSUPPORT 94 /* Socket type not supported */ +#define EOPNOTSUPP 95 /* Operation not supported on transport endpoint */ +#define EPFNOSUPPORT 96 /* Protocol family not supported */ +#define EAFNOSUPPORT 97 /* Address family not supported by protocol */ +#define EADDRINUSE 98 /* Address already in use */ +#define EADDRNOTAVAIL 99 /* Cannot assign requested address */ +#define ENETDOWN 100 /* Network is down */ +#define ENETUNREACH 101 /* Network is unreachable */ +#define ENETRESET 102 /* Network dropped connection because of reset */ +#define ECONNABORTED 103 /* Software caused connection abort */ +#define ECONNRESET 104 /* Connection reset by peer */ +#define ENOBUFS 105 /* No buffer space available */ +#define EISCONN 106 /* Transport endpoint is already connected */ +#define ENOTCONN 107 /* Transport endpoint is not connected */ +#define ESHUTDOWN 108 /* Cannot send after transport endpoint shutdown */ +#define ETOOMANYREFS 109 /* Too many references: cannot splice */ +#define ETIMEDOUT 110 /* Connection timed out */ +#define ECONNREFUSED 111 /* Connection refused */ +#define EHOSTDOWN 112 /* Host is down */ +#define EHOSTUNREACH 113 /* No route to host */ +#define EALREADY 114 /* Operation already in progress */ +#define EINPROGRESS 115 /* Operation now in progress */ +#define ESTALE 116 /* Stale file handle */ +#define EUCLEAN 117 /* Structure needs cleaning */ +#define ENOTNAM 118 /* Not a XENIX named type file */ +#define ENAVAIL 119 /* No XENIX semaphores available */ +#define EISNAM 120 /* Is a named type file */ +#define EREMOTEIO 121 /* Remote I/O error */ +#define EDQUOT 122 /* Quota exceeded */ + +#define ENOMEDIUM 123 /* No medium found */ +#define EMEDIUMTYPE 124 /* Wrong medium type */ +#define ECANCELED 125 /* Operation Canceled */ +#define ENOKEY 126 /* Required key not available */ +#define EKEYEXPIRED 127 /* Key has expired */ +#define EKEYREVOKED 128 /* Key has been revoked */ +#define EKEYREJECTED 129 /* Key was rejected by service */ + +/* for robust mutexes */ +#define EOWNERDEAD 130 /* Owner died */ +#define ENOTRECOVERABLE 131 /* State not recoverable */ + +#define ERFKILL 132 /* Operation not possible due to RF-kill */ + +#define EHWPOISON 133 /* Memory page has hardware error */ + +#endif diff --git a/tools/perf/check-headers.sh b/tools/perf/check-headers.sh index 83a65d305298..51abdb0a4047 100755 --- a/tools/perf/check-headers.sh +++ b/tools/perf/check-headers.sh @@ -49,6 +49,8 @@ include/asm-generic/bitops/__fls.h include/asm-generic/bitops/fls.h include/asm-generic/bitops/fls64.h include/linux/coresight-pmu.h +include/uapi/asm-generic/errno.h +include/uapi/asm-generic/errno-base.h include/uapi/asm-generic/ioctls.h include/uapi/asm-generic/mman-common.h ' -- cgit v1.2.3-70-g09d2 From 0337cf74ccf2a43437bff2e23b278e4f2dc4c6e2 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Fri, 19 Jan 2018 09:56:15 +0100 Subject: perf util: Introduce architecture specific errno/name mapping Introduce a script that generates a mapping of errno numbers to their names for each architecture that is supported by perf (i.e. has a subdirectory in tools/perf/arch/). The errno mapping is generated as part of the trace beautifiers and can be used by including the trace/beauty/arch_errno_names.c file. Then, use arch_syscalls__strerrno() to look up an errno value to obtain the errno name (e.g. ENOENT) for a particular architecture. Signed-off-by: Hendrik Brueckner Reviewed-by: Thomas Richter Suggested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Michael Petlan Cc: linux-s390@vger.kernel.org LPU-Reference: 1516352177-11106-4-git-send-email-brueckner@linux.vnet.ibm.com Link: https://lkml.kernel.org/n/tip-8zlsjnuoep2ww39aq5z41fno@git.kernel.org [ Make x86 be the first arch, most common, add newline to last line, fixing build on centos:5 ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Makefile.perf | 13 +++- tools/perf/trace/beauty/arch_errno_names.c | 1 + tools/perf/trace/beauty/arch_errno_names.sh | 100 ++++++++++++++++++++++++++++ tools/perf/trace/beauty/beauty.h | 2 + 4 files changed, 114 insertions(+), 2 deletions(-) create mode 100644 tools/perf/trace/beauty/arch_errno_names.c create mode 100755 tools/perf/trace/beauty/arch_errno_names.sh (limited to 'tools') diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 9fdefd748e2e..9a9b528a88bb 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -462,6 +462,13 @@ prctl_option_tbl := $(srctree)/tools/perf/trace/beauty/prctl_option.sh $(prctl_option_array): $(prctl_hdr_dir)/prctl.h $(prctl_option_tbl) $(Q)$(SHELL) '$(prctl_option_tbl)' $(prctl_hdr_dir) > $@ +arch_errno_name_array := $(beauty_outdir)/arch_errno_name_array.c +arch_errno_hdr_dir := $(srctree)/tools +arch_errno_tbl := $(srctree)/tools/perf/trace/beauty/arch_errno_names.sh + +$(arch_errno_name_array): $(arch_errno_tbl) + $(Q)$(SHELL) '$(arch_errno_tbl)' $(CC) $(arch_errno_hdr_dir) > $@ + all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS) $(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(LIBTRACEEVENT_DYNAMIC_LIST) @@ -565,7 +572,8 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc $(vhost_virtio_ioctl_array) \ $(madvise_behavior_array) \ $(perf_ioctl_array) \ - $(prctl_option_array) + $(prctl_option_array) \ + $(arch_errno_name_array) $(OUTPUT)%.o: %.c prepare FORCE $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ @@ -847,7 +855,8 @@ clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clea $(OUTPUT)$(kcmp_type_array) \ $(OUTPUT)$(vhost_virtio_ioctl_array) \ $(OUTPUT)$(perf_ioctl_array) \ - $(OUTPUT)$(prctl_option_array) + $(OUTPUT)$(prctl_option_array) \ + $(OUTPUT)$(arch_errno_name_array) $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean # diff --git a/tools/perf/trace/beauty/arch_errno_names.c b/tools/perf/trace/beauty/arch_errno_names.c new file mode 100644 index 000000000000..ede031c3a9e0 --- /dev/null +++ b/tools/perf/trace/beauty/arch_errno_names.c @@ -0,0 +1 @@ +#include "trace/beauty/generated/arch_errno_name_array.c" diff --git a/tools/perf/trace/beauty/arch_errno_names.sh b/tools/perf/trace/beauty/arch_errno_names.sh new file mode 100755 index 000000000000..22c9fc900c84 --- /dev/null +++ b/tools/perf/trace/beauty/arch_errno_names.sh @@ -0,0 +1,100 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 +# +# Generate C file mapping errno codes to errno names. +# +# Copyright IBM Corp. 2018 +# Author(s): Hendrik Brueckner + +gcc="$1" +toolsdir="$2" +include_path="-I$toolsdir/include/uapi" + +arch_string() +{ + echo "$1" |sed -e 'y/- /__/' |tr '[[:upper:]]' '[[:lower:]]' +} + +asm_errno_file() +{ + local arch="$1" + local header + + header="$toolsdir/arch/$arch/include/uapi/asm/errno.h" + if test -r "$header"; then + echo "$header" + else + echo "$toolsdir/include/uapi/asm-generic/errno.h" + fi +} + +create_errno_lookup_func() +{ + local arch=$(arch_string "$1") + local nr name + + cat < + +EoHEADER + +# Create list of architectures and ignore those that do not appear +# in tools/perf/arch +archlist="" +for arch in $(find $toolsdir/arch -maxdepth 1 -mindepth 1 -type d -printf "%f\n" | grep -v x86 | sort); do + test -d arch/$arch && archlist="$archlist $arch" +done + +for arch in x86 $archlist generic; do + process_arch "$arch" +done +create_arch_errno_table_func "x86 $archlist" "generic" diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h index a6dfd04beaee..d8f6b2ec7fc5 100644 --- a/tools/perf/trace/beauty/beauty.h +++ b/tools/perf/trace/beauty/beauty.h @@ -114,4 +114,6 @@ size_t open__scnprintf_flags(unsigned long flags, char *bf, size_t size); void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg, size_t (*ret_scnprintf)(char *bf, size_t size, struct syscall_arg *arg)); +const char *arch_syscalls__strerrno(const char *arch, int err); + #endif /* _PERF_TRACE_BEAUTY_H */ -- cgit v1.2.3-70-g09d2 From 092bd3cd7169085b09e4a5307de95e461d0581d7 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Fri, 19 Jan 2018 09:56:16 +0100 Subject: perf trace: Obtain errno strings by using arch_syscalls__strerrno() Replace the errno_to_name() from the audit-libs with the newly introduced arch_syscalls__strerrno() function. With this change: 1. With replacing errno_to_name() from audit-libs, perf trace does no longer require audit-lib interfaces. 2. In addition to 1, the audit-libs dependency can be removed for architectures that support syscall tables in perf. This is achieved in a follow-up commit. 3. With the architecture specific errno number/name mapping, perf trace reports can work across architectures. Signed-off-by: Hendrik Brueckner Reviewed-by: Thomas Richter Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Michael Petlan Cc: linux-s390@vger.kernel.org LPU-Reference: 1516352177-11106-5-git-send-email-brueckner@linux.vnet.ibm.com Link: https://lkml.kernel.org/n/tip-xjvoqzhwmu4wn4kl9ng11rvs@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 531d43bf57e1..7dece5e0cdbb 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -21,6 +21,7 @@ #include "builtin.h" #include "util/color.h" #include "util/debug.h" +#include "util/env.h" #include "util/event.h" #include "util/evlist.h" #include @@ -45,14 +46,12 @@ #include #include -#include /* FIXME: Still needed for audit_errno_to_name */ #include #include #include #include #include #include -#include #include #include #include @@ -545,6 +544,7 @@ static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size, { .scnprintf = SCA_STRARRAY, \ .parm = &strarray__##array, } +#include "trace/beauty/arch_errno_names.c" #include "trace/beauty/eventfd.c" #include "trace/beauty/flock.c" #include "trace/beauty/futex_op.c" @@ -1660,6 +1660,14 @@ static int trace__fprintf_callchain(struct trace *trace, struct perf_sample *sam return sample__fprintf_callchain(sample, 38, print_opts, &callchain_cursor, trace->output); } +static const char *errno_to_name(struct perf_evsel *evsel, int err) +{ + struct perf_env *env = perf_evsel__env(evsel); + const char *arch_name = perf_env__arch(env); + + return arch_syscalls__strerrno(arch_name, err); +} + static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, union perf_event *event __maybe_unused, struct perf_sample *sample) @@ -1730,7 +1738,7 @@ signed_print: errno_print: { char bf[STRERR_BUFSIZE]; const char *emsg = str_error_r(-ret, bf, sizeof(bf)), - *e = audit_errno_to_name(-ret); + *e = errno_to_name(evsel, -ret); fprintf(trace->output, ") = -1 %s %s", e, emsg); } -- cgit v1.2.3-70-g09d2 From b3fa38963a6a95bef888350ff3125182462c523c Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Fri, 19 Jan 2018 09:56:17 +0100 Subject: perf trace: Remove audit-libs dependency if syscall tables are present Change the Makefile and build process to no longer require audit-libs interfaces when the architecture provides system call tables. Committer notes: Its not enough to hook into the NO_LIBAUDIT makefile block, we need to define a CONFIG_TRACE that gets selected by both architectures generating the syscall tables from the kernel headers and from detecting the availability of libaudit. With that in place we will not link against libaudit even if the necessary files are available for that, in fact we will not even try to detect its availability, speeding up a bit the feature detection phase. Signed-off-by: Hendrik Brueckner Reviewed-by: Thomas Richter Tested-by: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Michael Petlan Cc: linux-s390@vger.kernel.org LPU-Reference: 1516352177-11106-6-git-send-email-brueckner@linux.vnet.ibm.com Link: https://lkml.kernel.org/n/tip-j68lub6ipm8apvy52vd3l4cm@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Build | 4 ++-- tools/perf/Makefile.config | 20 ++++++++++++-------- tools/perf/builtin-help.c | 2 +- tools/perf/perf.c | 4 ++-- tools/perf/util/Build | 2 +- tools/perf/util/generate-cmdlist.sh | 2 +- 6 files changed, 19 insertions(+), 15 deletions(-) (limited to 'tools') diff --git a/tools/perf/Build b/tools/perf/Build index b48ca40fccf9..e5232d567611 100644 --- a/tools/perf/Build +++ b/tools/perf/Build @@ -25,7 +25,7 @@ perf-y += builtin-data.o perf-y += builtin-version.o perf-y += builtin-c2c.o -perf-$(CONFIG_AUDIT) += builtin-trace.o +perf-$(CONFIG_TRACE) += builtin-trace.o perf-$(CONFIG_LIBELF) += builtin-probe.o perf-y += bench/ @@ -50,6 +50,6 @@ libperf-y += util/ libperf-y += arch/ libperf-y += ui/ libperf-y += scripts/ -libperf-$(CONFIG_AUDIT) += trace/beauty/ +libperf-$(CONFIG_TRACE) += trace/beauty/ gtk-y += ui/gtk/ diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 92265b32dddd..a042ccca4e93 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -531,14 +531,18 @@ ifndef NO_LIBUNWIND EXTLIBS += $(EXTLIBS_LIBUNWIND) endif -ifndef NO_LIBAUDIT - ifneq ($(feature-libaudit), 1) - msg := $(warning No libaudit.h found, disables 'trace' tool, please install audit-libs-devel or libaudit-dev); - NO_LIBAUDIT := 1 - else - CFLAGS += -DHAVE_LIBAUDIT_SUPPORT - EXTLIBS += -laudit - $(call detected,CONFIG_AUDIT) +ifeq ($(NO_SYSCALL_TABLE),0) + $(call detected,CONFIG_TRACE) +else + ifndef NO_LIBAUDIT + ifneq ($(feature-libaudit), 1) + msg := $(warning No libaudit.h found, disables 'trace' tool, please install audit-libs-devel or libaudit-dev); + NO_LIBAUDIT := 1 + else + CFLAGS += -DHAVE_LIBAUDIT_SUPPORT + EXTLIBS += -laudit + $(call detected,CONFIG_TRACE) + endif endif endif diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index a0f7ed2b869b..4aca13f23b9d 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -439,7 +439,7 @@ int cmd_help(int argc, const char **argv) #ifdef HAVE_LIBELF_SUPPORT "probe", #endif -#ifdef HAVE_LIBAUDIT_SUPPORT +#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE) "trace", #endif NULL }; diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 62b13518bc6e..1b3fc8ec0fa2 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -73,7 +73,7 @@ static struct cmd_struct commands[] = { { "lock", cmd_lock, 0 }, { "kvm", cmd_kvm, 0 }, { "test", cmd_test, 0 }, -#ifdef HAVE_LIBAUDIT_SUPPORT +#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE) { "trace", cmd_trace, 0 }, #endif { "inject", cmd_inject, 0 }, @@ -485,7 +485,7 @@ int main(int argc, const char **argv) argv[0] = cmd; } if (strstarts(cmd, "trace")) { -#ifdef HAVE_LIBAUDIT_SUPPORT +#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE) setup_path(); argv[0] = "trace"; return cmd_trace(argc, argv); diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 7c6a8b461e24..4eef0c243306 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -44,7 +44,7 @@ libperf-y += machine.o libperf-y += map.o libperf-y += pstack.o libperf-y += session.o -libperf-$(CONFIG_AUDIT) += syscalltbl.o +libperf-$(CONFIG_TRACE) += syscalltbl.o libperf-y += ordered-events.o libperf-y += namespaces.o libperf-y += comm.o diff --git a/tools/perf/util/generate-cmdlist.sh b/tools/perf/util/generate-cmdlist.sh index 9bbcec4e3365..ff17920a5ebc 100755 --- a/tools/perf/util/generate-cmdlist.sh +++ b/tools/perf/util/generate-cmdlist.sh @@ -38,7 +38,7 @@ do done echo "#endif /* HAVE_LIBELF_SUPPORT */" -echo "#ifdef HAVE_LIBAUDIT_SUPPORT" +echo "#if defined(HAVE_LIBAUDIT_SUPPORT) || defined(HAVE_SYSCALL_TABLE)" sed -n -e 's/^perf-\([^ ]*\)[ ].* audit*/\1/p' command-list.txt | sort | while read cmd -- cgit v1.2.3-70-g09d2 From b3ab8adc8b5a72c2cc60ea936a870143a9b8c100 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 18 Jan 2018 04:43:03 -0800 Subject: perf vendor events intel: Update Broadwell events to V22 Signed-off-by: Andi Kleen Link: https://lkml.kernel.org/r/20180118234518.GA27753@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/pmu-events/arch/x86/broadwell/cache.json | 555 ++++++--- .../arch/x86/broadwell/floating-point.json | 108 +- .../pmu-events/arch/x86/broadwell/frontend.json | 138 +-- .../perf/pmu-events/arch/x86/broadwell/memory.json | 210 +++- .../perf/pmu-events/arch/x86/broadwell/other.json | 20 +- .../pmu-events/arch/x86/broadwell/pipeline.json | 1216 ++++++++++---------- .../arch/x86/broadwell/virtual-memory.json | 150 +-- 7 files changed, 1401 insertions(+), 996 deletions(-) (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/x86/broadwell/cache.json b/tools/perf/pmu-events/arch/x86/broadwell/cache.json index 73688a9dab2a..bba3152ec54a 100644 --- a/tools/perf/pmu-events/arch/x86/broadwell/cache.json +++ b/tools/perf/pmu-events/arch/x86/broadwell/cache.json @@ -10,13 +10,30 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of demand Data Read requests that hit L2 cache. Only not rejected loads are counted.", "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x41", - "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT", + "UMask": "0x22", + "EventName": "L2_RQSTS.RFO_MISS", "SampleAfterValue": "200003", - "BriefDescription": "Demand Data Read requests that hit L2 cache", + "BriefDescription": "RFO requests that miss L2 cache.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "Counter": "0,1,2,3", + "UMask": "0x24", + "EventName": "L2_RQSTS.CODE_RD_MISS", + "SampleAfterValue": "200003", + "BriefDescription": "L2 cache misses when fetching instructions.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "Counter": "0,1,2,3", + "UMask": "0x27", + "EventName": "L2_RQSTS.ALL_DEMAND_MISS", + "SampleAfterValue": "200003", + "BriefDescription": "Demand requests that miss L2 cache.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -29,6 +46,43 @@ "BriefDescription": "L2 prefetch requests that miss L2 cache", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x24", + "Counter": "0,1,2,3", + "UMask": "0x3f", + "EventName": "L2_RQSTS.MISS", + "SampleAfterValue": "200003", + "BriefDescription": "All requests that miss L2 cache.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "This event counts the number of demand Data Read requests that hit L2 cache. Only not rejected loads are counted.", + "EventCode": "0x24", + "Counter": "0,1,2,3", + "UMask": "0x41", + "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT", + "SampleAfterValue": "200003", + "BriefDescription": "Demand Data Read requests that hit L2 cache", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "Counter": "0,1,2,3", + "UMask": "0x42", + "EventName": "L2_RQSTS.RFO_HIT", + "SampleAfterValue": "200003", + "BriefDescription": "RFO requests that hit L2 cache.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "Counter": "0,1,2,3", + "UMask": "0x44", + "EventName": "L2_RQSTS.CODE_RD_HIT", + "SampleAfterValue": "200003", + "BriefDescription": "L2 cache hits when fetching instructions, code reads.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "This event counts the number of requests from the L2 hardware prefetchers that hit L2 cache. L3 prefetch new types.", "EventCode": "0x24", @@ -69,6 +123,15 @@ "BriefDescription": "L2 code requests", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x24", + "Counter": "0,1,2,3", + "UMask": "0xe7", + "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES", + "SampleAfterValue": "200003", + "BriefDescription": "Demand requests to L2 cache.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "This event counts the total number of requests from the L2 hardware prefetchers.", "EventCode": "0x24", @@ -79,6 +142,15 @@ "BriefDescription": "Requests from L2 hardware prefetchers", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x24", + "Counter": "0,1,2,3", + "UMask": "0xff", + "EventName": "L2_RQSTS.REFERENCES", + "SampleAfterValue": "200003", + "BriefDescription": "All L2 requests.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "This event counts the number of WB requests that hit L2 cache.", "EventCode": "0x27", @@ -130,6 +202,27 @@ "CounterMask": "1", "CounterHTOff": "2" }, + { + "EventCode": "0x48", + "Counter": "2", + "UMask": "0x1", + "AnyThread": "1", + "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.", + "CounterMask": "1", + "CounterHTOff": "2" + }, + { + "EventCode": "0x48", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "L1D_PEND_MISS.FB_FULL", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "This event counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.", "EventCode": "0x51", @@ -152,7 +245,30 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The \"Offcore outstanding\" state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", + "PublicDescription": "This event counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).", + "EventCode": "0x60", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Errata": "BDM76", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x60", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Errata": "BDM76", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.", + "CounterMask": "6", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "This event counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The Offcore outstanding state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", "EventCode": "0x60", "Counter": "0,1,2,3", "UMask": "0x2", @@ -174,26 +290,26 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", + "PublicDescription": "This event counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The Offcore outstanding state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x8", + "UMask": "0x4", "Errata": "BDM76", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", "SampleAfterValue": "2000003", - "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore", + "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).", + "PublicDescription": "This event counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x1", + "UMask": "0x8", "Errata": "BDM76", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore", - "CounterMask": "1", + "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -208,18 +324,6 @@ "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, - { - "PublicDescription": "This event counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The \"Offcore outstanding\" state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", - "EventCode": "0x60", - "Counter": "0,1,2,3", - "UMask": "0x4", - "Errata": "BDM76", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", - "SampleAfterValue": "2000003", - "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, { "PublicDescription": "This event counts the number of cycles when the L1D is locked. It is a superset of the 0x1 mask (BUS_LOCK_CLOCKS.BUS_LOCK_DURATION).", "EventCode": "0x63", @@ -261,7 +365,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the demand and prefetch data reads. All Core Data Reads include cacheable \"Demands\" and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.", + "PublicDescription": "This event counts the demand and prefetch data reads. All Core Data Reads include cacheable Demands and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.", "EventCode": "0xB0", "Counter": "0,1,2,3", "UMask": "0x8", @@ -280,153 +384,162 @@ "BriefDescription": "Offcore requests buffer cannot take more entries for this thread core.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0xB7, 0xBB", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE", + "SampleAfterValue": "100003", + "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "CounterHTOff": "0,1,2,3" + }, { "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts load uops with true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts load uops with true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.", "EventCode": "0xD0", "Counter": "0,1,2,3", "UMask": "0x11", "EventName": "MEM_UOPS_RETIRED.STLB_MISS_LOADS", "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops that miss the STLB.", + "BriefDescription": "Retired load uops that miss the STLB. (Precise Event - PEBS)", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, { "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts store uops with true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts store uops true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.", "EventCode": "0xD0", "Counter": "0,1,2,3", "UMask": "0x12", "EventName": "MEM_UOPS_RETIRED.STLB_MISS_STORES", "SampleAfterValue": "100003", - "BriefDescription": "Retired store uops that miss the STLB.", + "BriefDescription": "Retired store uops that miss the STLB. (Precise Event - PEBS)", "CounterHTOff": "0,1,2,3", "Data_LA": "1", "L1_Hit_Indication": "1" }, { "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts load uops with locked access retired to the architected path.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts load uops with locked access retired to the architected path.", "EventCode": "0xD0", "Counter": "0,1,2,3", "UMask": "0x21", "Errata": "BDM35", "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS", "SampleAfterValue": "100007", - "BriefDescription": "Retired load uops with locked access.", + "BriefDescription": "Retired load uops with locked access. (Precise Event - PEBS)", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, { "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts line-splitted load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-splitted load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).", "EventCode": "0xD0", "Counter": "0,1,2,3", "UMask": "0x41", "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS", "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops that split across a cacheline boundary.", + "BriefDescription": "Retired load uops that split across a cacheline boundary.(Precise Event - PEBS)", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, { "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts line-splitted store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-splitted store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).", "EventCode": "0xD0", "Counter": "0,1,2,3", "UMask": "0x42", "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES", "SampleAfterValue": "100003", - "BriefDescription": "Retired store uops that split across a cacheline boundary.", + "BriefDescription": "Retired store uops that split across a cacheline boundary. (Precise Event - PEBS)", "CounterHTOff": "0,1,2,3", "Data_LA": "1", "L1_Hit_Indication": "1" }, { "PEBS": "1", - "PublicDescription": "This event counts load uops retired to the architected path with a filter on bits 0 and 1 applied.\nNote: This event counts AVX-256bit load/store double-pump memory uops as a single uop at retirement. This event also counts SW prefetches.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts load uops retired to the architected path with a filter on bits 0 and 1 applied.\nNote: This event ?ounts AVX-256bit load/store double-pump memory uops as a single uop at retirement. This event also counts SW prefetches.", "EventCode": "0xD0", "Counter": "0,1,2,3", "UMask": "0x81", "EventName": "MEM_UOPS_RETIRED.ALL_LOADS", "SampleAfterValue": "2000003", - "BriefDescription": "All retired load uops.", + "BriefDescription": "All retired load uops. (Precise Event - PEBS)", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, { "PEBS": "1", - "PublicDescription": "This event counts store uops retired to the architected path with a filter on bits 0 and 1 applied.\nNote: This event counts AVX-256bit load/store double-pump memory uops as a single uop at retirement.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts store uops retired to the architected path with a filter on bits 0 and 1 applied.\nNote: This event ?ounts AVX-256bit load/store double-pump memory uops as a single uop at retirement.", "EventCode": "0xD0", "Counter": "0,1,2,3", "UMask": "0x82", "EventName": "MEM_UOPS_RETIRED.ALL_STORES", "SampleAfterValue": "2000003", - "BriefDescription": "All retired store uops.", + "BriefDescription": "Retired store uops that split across a cacheline boundary. (Precise Event - PEBS)", "CounterHTOff": "0,1,2,3", "Data_LA": "1", "L1_Hit_Indication": "1" }, { "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were hits in the nearest-level (L1) cache.\nNote: Only two data-sources of L1/FB are applicable for AVX-256bit even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load. This event also counts SW prefetches independent of the actual data source.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data source were hits in the nearest-level (L1) cache.\nNote: Only two data-sources of L1/FB are applicable for AVX-256bit even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load. This event also counts SW prefetches independent of the actual data source.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT", "SampleAfterValue": "2000003", - "BriefDescription": "Retired load uops with L1 cache hits as data sources.", + "BriefDescription": "Retired load uops with L1 cache hits as data sources. (Precise Event - PEBS)", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, { "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were hits in the mid-level (L2) cache.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were hits in the mid-level (L2) cache.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x2", "Errata": "BDM35", "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT", "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops with L2 cache hits as data sources.", + "BriefDescription": "Retired load uops with L2 cache hits as data sources. (Precise Event - PEBS)", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, { "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were data hits in the last-level (L3) cache without snoops required.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were data hits in the last-level (L3) cache without snoops required.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x4", "Errata": "BDM100", "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT", "SampleAfterValue": "50021", - "BriefDescription": "Retired load uops which data sources were data hits in L3 without snoops required.", + "BriefDescription": "Hit in last-level (L3) cache. Excludes Unknown data-source. (Precise Event - PEBS)", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, { "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were misses in the nearest-level (L1) cache. Counting excludes unknown and UC data source.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were misses in the nearest-level (L1) cache. Counting excludes unknown and UC data source.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x8", "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS", "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops misses in L1 cache as data sources.", + "BriefDescription": "Retired load uops misses in L1 cache as data sources. Uses PEBS.", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, { "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were misses in the mid-level (L2) cache. Counting excludes unknown and UC data source.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were misses in the mid-level (L2) cache. Counting excludes unknown and UC data source.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x10", "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS", "SampleAfterValue": "50021", - "BriefDescription": "Miss in mid-level (L2) cache. Excludes Unknown data-source.", + "BriefDescription": "Retired load uops with L2 cache misses as data sources. Uses PEBS.", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, @@ -438,84 +551,83 @@ "Errata": "BDM100, BDE70", "EventName": "MEM_LOAD_UOPS_RETIRED.L3_MISS", "SampleAfterValue": "100007", - "BriefDescription": "Miss in last-level (L3) cache. Excludes Unknown data-source.", + "BriefDescription": "Miss in last-level (L3) cache. Excludes Unknown data-source. (Precise Event - PEBS).", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, { "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were load uops missed L1 but hit a fill buffer due to a preceding miss to the same cache line with the data not ready.\nNote: Only two data-sources of L1/FB are applicable for AVX-256bit even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were load uops missed L1 but hit a fill buffer due to a preceding miss to the same cache line with the data not ready.\nNote: Only two data-sources of L1/FB are applicable for AVX-256bit even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x40", "EventName": "MEM_LOAD_UOPS_RETIRED.HIT_LFB", "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready.", + "BriefDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready. (Precise Event - PEBS)", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, { "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were L3 Hit and a cross-core snoop missed in the on-pkg core cache.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were L3 Hit and a cross-core snoop missed in the on-pkg core cache.", "EventCode": "0xD2", "Counter": "0,1,2,3", "UMask": "0x1", "Errata": "BDM100", "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS", "SampleAfterValue": "20011", - "BriefDescription": "Retired load uops which data sources were L3 hit and cross-core snoop missed in on-pkg core cache.", + "BriefDescription": "Retired load uops which data sources were L3 hit and cross-core snoop missed in on-pkg core cache. (Precise Event - PEBS)", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, { "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were L3 hit and a cross-core snoop hit in the on-pkg core cache.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were L3 hit and a cross-core snoop hit in the on-pkg core cache.", "EventCode": "0xD2", "Counter": "0,1,2,3", "UMask": "0x2", "Errata": "BDM100", "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT", "SampleAfterValue": "20011", - "BriefDescription": "Retired load uops which data sources were L3 and cross-core snoop hits in on-pkg core cache.", + "BriefDescription": "Retired load uops which data sources were L3 and cross-core snoop hits in on-pkg core cache. (Precise Event - PEBS)", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, { "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were HitM responses from a core on same socket (shared L3).", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were HitM responses from a core on same socket (shared L3).", "EventCode": "0xD2", "Counter": "0,1,2,3", "UMask": "0x4", "Errata": "BDM100", "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM", "SampleAfterValue": "20011", - "BriefDescription": "Retired load uops which data sources were HitM responses from shared L3.", + "BriefDescription": "Retired load uops which data sources were HitM responses from shared L3. (Precise Event - PEBS)", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, { "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were hits in the last-level (L3) cache without snoops required.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were hits in the last-level (L3) cache without snoops required.", "EventCode": "0xD2", "Counter": "0,1,2,3", "UMask": "0x8", "Errata": "BDM100", "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_NONE", "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops which data sources were hits in L3 without snoops required.", + "BriefDescription": "Retired load uops which data sources were hits in L3 without snoops required. (Precise Event - PEBS)", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, { "PEBS": "1", - "PublicDescription": "Retired load uop whose Data Source was: local DRAM either Snoop not needed or Snoop Miss (RspI).", + "PublicDescription": "This event counts retired load uops where the data came from local DRAM. This does not include hardware prefetches. This is a precise event.", "EventCode": "0xD3", "Counter": "0,1,2,3", "UMask": "0x1", "Errata": "BDE70, BDM100", "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM", "SampleAfterValue": "100007", - "BriefDescription": "Data from local DRAM either Snoop not needed or Snoop Miss (RspI)", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, @@ -659,119 +771,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x42", - "EventName": "L2_RQSTS.RFO_HIT", - "SampleAfterValue": "200003", - "BriefDescription": "RFO requests that hit L2 cache.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x22", - "EventName": "L2_RQSTS.RFO_MISS", - "SampleAfterValue": "200003", - "BriefDescription": "RFO requests that miss L2 cache.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x44", - "EventName": "L2_RQSTS.CODE_RD_HIT", - "SampleAfterValue": "200003", - "BriefDescription": "L2 cache hits when fetching instructions, code reads.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x24", - "EventName": "L2_RQSTS.CODE_RD_MISS", - "SampleAfterValue": "200003", - "BriefDescription": "L2 cache misses when fetching instructions.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x27", - "EventName": "L2_RQSTS.ALL_DEMAND_MISS", - "SampleAfterValue": "200003", - "BriefDescription": "Demand requests that miss L2 cache.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0xe7", - "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES", - "SampleAfterValue": "200003", - "BriefDescription": "Demand requests to L2 cache.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x3f", - "EventName": "L2_RQSTS.MISS", - "SampleAfterValue": "200003", - "BriefDescription": "All requests that miss L2 cache.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0xff", - "EventName": "L2_RQSTS.REFERENCES", - "SampleAfterValue": "200003", - "BriefDescription": "All L2 requests.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xB7, 0xBB", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE", - "SampleAfterValue": "100003", - "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0x60", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Errata": "BDM76", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.", - "CounterMask": "6", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x48", - "Counter": "2", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.", - "CounterMask": "1", - "CounterHTOff": "2" - }, - { - "EventCode": "0x48", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "L1D_PEND_MISS.FB_FULL", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { + "PublicDescription": "Counts demand data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0000010001 ", "Counter": "0,1,2,3", @@ -784,6 +784,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0080020001 ", "Counter": "0,1,2,3", @@ -796,6 +797,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100020001 ", "Counter": "0,1,2,3", @@ -808,6 +810,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0200020001 ", "Counter": "0,1,2,3", @@ -820,6 +823,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0400020001 ", "Counter": "0,1,2,3", @@ -832,6 +836,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1000020001 ", "Counter": "0,1,2,3", @@ -844,6 +849,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f80020001 ", "Counter": "0,1,2,3", @@ -856,6 +862,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts demand data reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00803c0001 ", "Counter": "0,1,2,3", @@ -868,6 +875,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts demand data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x01003c0001 ", "Counter": "0,1,2,3", @@ -880,6 +888,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts demand data reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x02003c0001 ", "Counter": "0,1,2,3", @@ -892,6 +901,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts demand data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x04003c0001 ", "Counter": "0,1,2,3", @@ -904,6 +914,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x10003c0001 ", "Counter": "0,1,2,3", @@ -916,6 +927,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts demand data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f803c0001 ", "Counter": "0,1,2,3", @@ -928,6 +940,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand data writes (RFOs) that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0000010002 ", "Counter": "0,1,2,3", @@ -940,6 +953,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00803c0002 ", "Counter": "0,1,2,3", @@ -952,6 +966,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x01003c0002 ", "Counter": "0,1,2,3", @@ -964,6 +979,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x02003c0002 ", "Counter": "0,1,2,3", @@ -976,6 +992,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x04003c0002 ", "Counter": "0,1,2,3", @@ -988,6 +1005,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x10003c0002 ", "Counter": "0,1,2,3", @@ -1000,6 +1018,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f803c0002 ", "Counter": "0,1,2,3", @@ -1012,6 +1031,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand code reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0000010004 ", "Counter": "0,1,2,3", @@ -1024,6 +1044,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0080020004 ", "Counter": "0,1,2,3", @@ -1036,6 +1057,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100020004 ", "Counter": "0,1,2,3", @@ -1048,6 +1070,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0200020004 ", "Counter": "0,1,2,3", @@ -1060,6 +1083,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0400020004 ", "Counter": "0,1,2,3", @@ -1072,6 +1096,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1000020004 ", "Counter": "0,1,2,3", @@ -1084,6 +1109,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f80020004 ", "Counter": "0,1,2,3", @@ -1096,6 +1122,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand code reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00803c0004 ", "Counter": "0,1,2,3", @@ -1108,6 +1135,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x01003c0004 ", "Counter": "0,1,2,3", @@ -1120,6 +1148,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand code reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x02003c0004 ", "Counter": "0,1,2,3", @@ -1132,6 +1161,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x04003c0004 ", "Counter": "0,1,2,3", @@ -1144,6 +1174,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x10003c0004 ", "Counter": "0,1,2,3", @@ -1156,6 +1187,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand code reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f803c0004 ", "Counter": "0,1,2,3", @@ -1168,6 +1200,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts writebacks (modified to exclusive) that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0000010008 ", "Counter": "0,1,2,3", @@ -1180,6 +1213,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0080020008 ", "Counter": "0,1,2,3", @@ -1192,6 +1226,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100020008 ", "Counter": "0,1,2,3", @@ -1204,6 +1239,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0200020008 ", "Counter": "0,1,2,3", @@ -1216,6 +1252,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0400020008 ", "Counter": "0,1,2,3", @@ -1228,6 +1265,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1000020008 ", "Counter": "0,1,2,3", @@ -1240,6 +1278,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f80020008 ", "Counter": "0,1,2,3", @@ -1252,6 +1291,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts writebacks (modified to exclusive) that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00803c0008 ", "Counter": "0,1,2,3", @@ -1264,6 +1304,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts writebacks (modified to exclusive) that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x01003c0008 ", "Counter": "0,1,2,3", @@ -1276,6 +1317,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts writebacks (modified to exclusive) that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x02003c0008 ", "Counter": "0,1,2,3", @@ -1288,6 +1330,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts writebacks (modified to exclusive) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x04003c0008 ", "Counter": "0,1,2,3", @@ -1300,6 +1343,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x10003c0008 ", "Counter": "0,1,2,3", @@ -1312,6 +1356,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts writebacks (modified to exclusive) that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f803c0008 ", "Counter": "0,1,2,3", @@ -1324,6 +1369,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0000010010 ", "Counter": "0,1,2,3", @@ -1336,6 +1382,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0080020010 ", "Counter": "0,1,2,3", @@ -1348,6 +1395,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100020010 ", "Counter": "0,1,2,3", @@ -1360,6 +1408,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0200020010 ", "Counter": "0,1,2,3", @@ -1372,6 +1421,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0400020010 ", "Counter": "0,1,2,3", @@ -1384,6 +1434,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1000020010 ", "Counter": "0,1,2,3", @@ -1396,6 +1447,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f80020010 ", "Counter": "0,1,2,3", @@ -1408,6 +1460,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00803c0010 ", "Counter": "0,1,2,3", @@ -1420,6 +1473,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x01003c0010 ", "Counter": "0,1,2,3", @@ -1432,6 +1486,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x02003c0010 ", "Counter": "0,1,2,3", @@ -1444,6 +1499,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x04003c0010 ", "Counter": "0,1,2,3", @@ -1456,6 +1512,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x10003c0010 ", "Counter": "0,1,2,3", @@ -1468,6 +1525,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f803c0010 ", "Counter": "0,1,2,3", @@ -1480,6 +1538,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0000010020 ", "Counter": "0,1,2,3", @@ -1492,6 +1551,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0080020020 ", "Counter": "0,1,2,3", @@ -1504,6 +1564,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100020020 ", "Counter": "0,1,2,3", @@ -1516,6 +1577,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0200020020 ", "Counter": "0,1,2,3", @@ -1528,6 +1590,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0400020020 ", "Counter": "0,1,2,3", @@ -1540,6 +1603,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1000020020 ", "Counter": "0,1,2,3", @@ -1552,6 +1616,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f80020020 ", "Counter": "0,1,2,3", @@ -1564,6 +1629,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00803c0020 ", "Counter": "0,1,2,3", @@ -1576,6 +1642,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x01003c0020 ", "Counter": "0,1,2,3", @@ -1588,6 +1655,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x02003c0020 ", "Counter": "0,1,2,3", @@ -1600,6 +1668,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x04003c0020 ", "Counter": "0,1,2,3", @@ -1612,6 +1681,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x10003c0020 ", "Counter": "0,1,2,3", @@ -1624,6 +1694,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f803c0020 ", "Counter": "0,1,2,3", @@ -1636,6 +1707,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0000010040 ", "Counter": "0,1,2,3", @@ -1648,6 +1720,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0080020040 ", "Counter": "0,1,2,3", @@ -1660,6 +1733,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100020040 ", "Counter": "0,1,2,3", @@ -1672,6 +1746,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0200020040 ", "Counter": "0,1,2,3", @@ -1684,6 +1759,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0400020040 ", "Counter": "0,1,2,3", @@ -1696,6 +1772,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1000020040 ", "Counter": "0,1,2,3", @@ -1708,6 +1785,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f80020040 ", "Counter": "0,1,2,3", @@ -1720,6 +1798,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00803c0040 ", "Counter": "0,1,2,3", @@ -1732,6 +1811,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x01003c0040 ", "Counter": "0,1,2,3", @@ -1744,6 +1824,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x02003c0040 ", "Counter": "0,1,2,3", @@ -1756,6 +1837,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x04003c0040 ", "Counter": "0,1,2,3", @@ -1768,6 +1850,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x10003c0040 ", "Counter": "0,1,2,3", @@ -1780,6 +1863,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f803c0040 ", "Counter": "0,1,2,3", @@ -1792,6 +1876,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0000010080 ", "Counter": "0,1,2,3", @@ -1804,6 +1889,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0080020080 ", "Counter": "0,1,2,3", @@ -1816,6 +1902,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100020080 ", "Counter": "0,1,2,3", @@ -1828,6 +1915,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0200020080 ", "Counter": "0,1,2,3", @@ -1840,6 +1928,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0400020080 ", "Counter": "0,1,2,3", @@ -1852,6 +1941,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1000020080 ", "Counter": "0,1,2,3", @@ -1864,6 +1954,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f80020080 ", "Counter": "0,1,2,3", @@ -1876,6 +1967,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00803c0080 ", "Counter": "0,1,2,3", @@ -1888,6 +1980,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x01003c0080 ", "Counter": "0,1,2,3", @@ -1900,6 +1993,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x02003c0080 ", "Counter": "0,1,2,3", @@ -1912,6 +2006,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x04003c0080 ", "Counter": "0,1,2,3", @@ -1924,6 +2019,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x10003c0080 ", "Counter": "0,1,2,3", @@ -1936,6 +2032,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f803c0080 ", "Counter": "0,1,2,3", @@ -1948,6 +2045,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0000010100 ", "Counter": "0,1,2,3", @@ -1960,6 +2058,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0080020100 ", "Counter": "0,1,2,3", @@ -1972,6 +2071,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100020100 ", "Counter": "0,1,2,3", @@ -1984,6 +2084,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0200020100 ", "Counter": "0,1,2,3", @@ -1996,6 +2097,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0400020100 ", "Counter": "0,1,2,3", @@ -2008,6 +2110,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1000020100 ", "Counter": "0,1,2,3", @@ -2020,6 +2123,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f80020100 ", "Counter": "0,1,2,3", @@ -2032,6 +2136,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00803c0100 ", "Counter": "0,1,2,3", @@ -2044,6 +2149,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x01003c0100 ", "Counter": "0,1,2,3", @@ -2056,6 +2162,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x02003c0100 ", "Counter": "0,1,2,3", @@ -2068,6 +2175,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x04003c0100 ", "Counter": "0,1,2,3", @@ -2080,6 +2188,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x10003c0100 ", "Counter": "0,1,2,3", @@ -2092,6 +2201,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f803c0100 ", "Counter": "0,1,2,3", @@ -2104,6 +2214,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0000010200 ", "Counter": "0,1,2,3", @@ -2116,6 +2227,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0080020200 ", "Counter": "0,1,2,3", @@ -2128,6 +2240,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100020200 ", "Counter": "0,1,2,3", @@ -2140,6 +2253,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0200020200 ", "Counter": "0,1,2,3", @@ -2152,6 +2266,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0400020200 ", "Counter": "0,1,2,3", @@ -2164,6 +2279,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1000020200 ", "Counter": "0,1,2,3", @@ -2176,6 +2292,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f80020200 ", "Counter": "0,1,2,3", @@ -2188,6 +2305,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00803c0200 ", "Counter": "0,1,2,3", @@ -2200,6 +2318,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x01003c0200 ", "Counter": "0,1,2,3", @@ -2212,6 +2331,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x02003c0200 ", "Counter": "0,1,2,3", @@ -2224,6 +2344,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x04003c0200 ", "Counter": "0,1,2,3", @@ -2236,6 +2357,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x10003c0200 ", "Counter": "0,1,2,3", @@ -2248,6 +2370,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f803c0200 ", "Counter": "0,1,2,3", @@ -2260,6 +2383,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts any other requests that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0000018000 ", "Counter": "0,1,2,3", @@ -2272,6 +2396,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0080028000 ", "Counter": "0,1,2,3", @@ -2284,6 +2409,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100028000 ", "Counter": "0,1,2,3", @@ -2296,6 +2422,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0200028000 ", "Counter": "0,1,2,3", @@ -2308,6 +2435,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0400028000 ", "Counter": "0,1,2,3", @@ -2320,6 +2448,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1000028000 ", "Counter": "0,1,2,3", @@ -2332,6 +2461,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f80028000 ", "Counter": "0,1,2,3", @@ -2344,6 +2474,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts any other requests that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00803c8000 ", "Counter": "0,1,2,3", @@ -2356,6 +2487,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts any other requests that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x01003c8000 ", "Counter": "0,1,2,3", @@ -2368,6 +2500,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts any other requests that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x02003c8000 ", "Counter": "0,1,2,3", @@ -2380,6 +2513,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts any other requests that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x04003c8000 ", "Counter": "0,1,2,3", @@ -2392,6 +2526,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x10003c8000 ", "Counter": "0,1,2,3", @@ -2404,6 +2539,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts any other requests that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f803c8000 ", "Counter": "0,1,2,3", @@ -2416,6 +2552,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0000010090 ", "Counter": "0,1,2,3", @@ -2428,6 +2565,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0080020090 ", "Counter": "0,1,2,3", @@ -2440,6 +2578,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100020090 ", "Counter": "0,1,2,3", @@ -2452,6 +2591,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0200020090 ", "Counter": "0,1,2,3", @@ -2464,6 +2604,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0400020090 ", "Counter": "0,1,2,3", @@ -2476,6 +2617,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1000020090 ", "Counter": "0,1,2,3", @@ -2488,6 +2630,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f80020090 ", "Counter": "0,1,2,3", @@ -2500,6 +2643,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch data reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00803c0090 ", "Counter": "0,1,2,3", @@ -2512,6 +2656,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x01003c0090 ", "Counter": "0,1,2,3", @@ -2524,6 +2669,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch data reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x02003c0090 ", "Counter": "0,1,2,3", @@ -2536,6 +2682,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x04003c0090 ", "Counter": "0,1,2,3", @@ -2548,6 +2695,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x10003c0090 ", "Counter": "0,1,2,3", @@ -2560,6 +2708,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f803c0090 ", "Counter": "0,1,2,3", @@ -2572,6 +2721,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0000010120 ", "Counter": "0,1,2,3", @@ -2584,6 +2734,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0080020120 ", "Counter": "0,1,2,3", @@ -2596,6 +2747,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100020120 ", "Counter": "0,1,2,3", @@ -2608,6 +2760,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0200020120 ", "Counter": "0,1,2,3", @@ -2620,6 +2773,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0400020120 ", "Counter": "0,1,2,3", @@ -2632,6 +2786,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1000020120 ", "Counter": "0,1,2,3", @@ -2644,6 +2799,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f80020120 ", "Counter": "0,1,2,3", @@ -2656,6 +2812,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch RFOs that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00803c0120 ", "Counter": "0,1,2,3", @@ -2668,6 +2825,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x01003c0120 ", "Counter": "0,1,2,3", @@ -2680,6 +2838,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch RFOs that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x02003c0120 ", "Counter": "0,1,2,3", @@ -2692,6 +2851,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x04003c0120 ", "Counter": "0,1,2,3", @@ -2704,6 +2864,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x10003c0120 ", "Counter": "0,1,2,3", @@ -2716,6 +2877,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f803c0120 ", "Counter": "0,1,2,3", @@ -2728,6 +2890,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch code reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0000010240 ", "Counter": "0,1,2,3", @@ -2740,6 +2903,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0080020240 ", "Counter": "0,1,2,3", @@ -2752,6 +2916,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100020240 ", "Counter": "0,1,2,3", @@ -2764,6 +2929,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0200020240 ", "Counter": "0,1,2,3", @@ -2776,6 +2942,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0400020240 ", "Counter": "0,1,2,3", @@ -2788,6 +2955,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1000020240 ", "Counter": "0,1,2,3", @@ -2800,6 +2968,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f80020240 ", "Counter": "0,1,2,3", @@ -2812,6 +2981,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch code reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00803c0240 ", "Counter": "0,1,2,3", @@ -2824,6 +2994,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x01003c0240 ", "Counter": "0,1,2,3", @@ -2836,6 +3007,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch code reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x02003c0240 ", "Counter": "0,1,2,3", @@ -2848,6 +3020,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x04003c0240 ", "Counter": "0,1,2,3", @@ -2860,6 +3033,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x10003c0240 ", "Counter": "0,1,2,3", @@ -2872,6 +3046,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch code reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f803c0240 ", "Counter": "0,1,2,3", @@ -2884,6 +3059,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0000010091 ", "Counter": "0,1,2,3", @@ -2896,6 +3072,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0080020091 ", "Counter": "0,1,2,3", @@ -2908,6 +3085,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100020091 ", "Counter": "0,1,2,3", @@ -2920,6 +3098,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0200020091 ", "Counter": "0,1,2,3", @@ -2932,6 +3111,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0400020091 ", "Counter": "0,1,2,3", @@ -2944,6 +3124,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1000020091 ", "Counter": "0,1,2,3", @@ -2956,6 +3137,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f80020091 ", "Counter": "0,1,2,3", @@ -2968,6 +3150,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00803c0091 ", "Counter": "0,1,2,3", @@ -2980,6 +3163,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x01003c0091 ", "Counter": "0,1,2,3", @@ -2992,6 +3176,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x02003c0091 ", "Counter": "0,1,2,3", @@ -3004,6 +3189,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x04003c0091 ", "Counter": "0,1,2,3", @@ -3016,6 +3202,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x10003c0091 ", "Counter": "0,1,2,3", @@ -3028,6 +3215,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f803c0091 ", "Counter": "0,1,2,3", @@ -3040,6 +3228,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0000010122 ", "Counter": "0,1,2,3", @@ -3052,6 +3241,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0080020122 ", "Counter": "0,1,2,3", @@ -3064,6 +3254,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100020122 ", "Counter": "0,1,2,3", @@ -3076,6 +3267,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0200020122 ", "Counter": "0,1,2,3", @@ -3088,6 +3280,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0400020122 ", "Counter": "0,1,2,3", @@ -3100,6 +3293,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1000020122 ", "Counter": "0,1,2,3", @@ -3112,6 +3306,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f80020122 ", "Counter": "0,1,2,3", @@ -3124,6 +3319,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00803c0122 ", "Counter": "0,1,2,3", @@ -3136,6 +3332,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x01003c0122 ", "Counter": "0,1,2,3", @@ -3148,6 +3345,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x02003c0122 ", "Counter": "0,1,2,3", @@ -3160,6 +3358,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x04003c0122 ", "Counter": "0,1,2,3", @@ -3172,6 +3371,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x10003c0122 ", "Counter": "0,1,2,3", @@ -3184,6 +3384,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f803c0122 ", "Counter": "0,1,2,3", diff --git a/tools/perf/pmu-events/arch/x86/broadwell/floating-point.json b/tools/perf/pmu-events/arch/x86/broadwell/floating-point.json index 102bfb808199..689d478dae93 100644 --- a/tools/perf/pmu-events/arch/x86/broadwell/floating-point.json +++ b/tools/perf/pmu-events/arch/x86/broadwell/floating-point.json @@ -1,6 +1,6 @@ [ { - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts the number of transitions from AVX-256 to legacy SSE when penalty is applicable.", + "PublicDescription": "This event counts the number of transitions from AVX-256 to legacy SSE when penalty is applicable.", "EventCode": "0xC1", "Counter": "0,1,2,3", "UMask": "0x8", @@ -11,7 +11,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts the number of transitions from legacy SSE to AVX-256 when penalty is applicable.", + "PublicDescription": "This event counts the number of transitions from legacy SSE to AVX-256 when penalty is applicable.", "EventCode": "0xC1", "Counter": "0,1,2,3", "UMask": "0x10", @@ -22,7 +22,6 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", "EventCode": "0xC7", "Counter": "0,1,2,3", "UMask": "0x1", @@ -32,7 +31,6 @@ "CounterHTOff": "0,1,2,3" }, { - "PEBS": "1", "EventCode": "0xC7", "Counter": "0,1,2,3", "UMask": "0x2", @@ -42,7 +40,15 @@ "CounterHTOff": "0,1,2,3" }, { - "PEBS": "1", + "EventCode": "0xC7", + "Counter": "0,1,2,3", + "UMask": "0x3", + "EventName": "FP_ARITH_INST_RETIRED.SCALAR", + "SampleAfterValue": "2000003", + "BriefDescription": "Number of SSE/AVX computational scalar floating-point instructions retired. Applies to SSE* and AVX* scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", + "CounterHTOff": "0,1,2,3" + }, + { "EventCode": "0xC7", "Counter": "0,1,2,3", "UMask": "0x4", @@ -52,7 +58,6 @@ "CounterHTOff": "0,1,2,3" }, { - "PEBS": "1", "EventCode": "0xC7", "Counter": "0,1,2,3", "UMask": "0x8", @@ -62,7 +67,6 @@ "CounterHTOff": "0,1,2,3" }, { - "PEBS": "1", "EventCode": "0xC7", "Counter": "0,1,2,3", "UMask": "0x10", @@ -72,7 +76,43 @@ "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts the number of x87 floating point (FP) micro-code assist (numeric overflow/underflow, inexact result) when the output value (destination register) is invalid.", + "EventCode": "0xC7", + "Counter": "0,1,2,3", + "UMask": "0x15", + "EventName": "FP_ARITH_INST_RETIRED.DOUBLE", + "SampleAfterValue": "2000006", + "BriefDescription": "Number of SSE/AVX computational double precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. ?.", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xc7", + "Counter": "0,1,2,3", + "UMask": "0x20", + "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE", + "SampleAfterValue": "2000003", + "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired. Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xC7", + "Counter": "0,1,2,3", + "UMask": "0x2a", + "EventName": "FP_ARITH_INST_RETIRED.SINGLE", + "SampleAfterValue": "2000005", + "BriefDescription": "Number of SSE/AVX computational single precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. ?.", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xC7", + "Counter": "0,1,2,3", + "UMask": "0x3c", + "EventName": "FP_ARITH_INST_RETIRED.PACKED", + "SampleAfterValue": "2000004", + "BriefDescription": "Number of SSE/AVX computational packed floating-point instructions retired. Applies to SSE* and AVX*, packed, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "This event counts the number of x87 floating point (FP) micro-code assist (numeric overflow/underflow, inexact result) when the output value (destination register) is invalid.", "EventCode": "0xCA", "Counter": "0,1,2,3", "UMask": "0x2", @@ -82,7 +122,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts x87 floating point (FP) micro-code assist (invalid operation, denormal operand, SNaN operand) when the input value (one of the source operands to an FP instruction) is invalid.", + "PublicDescription": "This event counts x87 floating point (FP) micro-code assist (invalid operation, denormal operand, SNaN operand) when the input value (one of the source operands to an FP instruction) is invalid.", "EventCode": "0xCA", "Counter": "0,1,2,3", "UMask": "0x4", @@ -92,7 +132,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts the number of SSE* floating point (FP) micro-code assist (numeric overflow/underflow) when the output value (destination register) is invalid. Counting covers only cases involving penalties that require micro-code assist intervention.", + "PublicDescription": "This event counts the number of SSE* floating point (FP) micro-code assist (numeric overflow/underflow) when the output value (destination register) is invalid. Counting covers only cases involving penalties that require micro-code assist intervention.", "EventCode": "0xCA", "Counter": "0,1,2,3", "UMask": "0x8", @@ -102,7 +142,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts any input SSE* FP assist - invalid operation, denormal operand, dividing by zero, SNaN operand. Counting includes only cases involving penalties that required micro-code assist intervention.", + "PublicDescription": "This event counts any input SSE* FP assist - invalid operation, denormal operand, dividing by zero, SNaN operand. Counting includes only cases involving penalties that required micro-code assist intervention.", "EventCode": "0xCA", "Counter": "0,1,2,3", "UMask": "0x10", @@ -121,51 +161,5 @@ "BriefDescription": "Cycles with any input/output SSE or FP assist", "CounterMask": "1", "CounterHTOff": "0,1,2,3" - }, - { - "PEBS": "1", - "EventCode": "0xc7", - "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired. Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xC7", - "Counter": "0,1,2,3", - "UMask": "0x3", - "EventName": "FP_ARITH_INST_RETIRED.SCALAR", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of SSE/AVX computational scalar floating-point instructions retired. Applies to SSE* and AVX* scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xC7", - "Counter": "0,1,2,3", - "UMask": "0x3c", - "EventName": "FP_ARITH_INST_RETIRED.PACKED", - "SampleAfterValue": "2000004", - "BriefDescription": "Number of SSE/AVX computational packed floating-point instructions retired. Applies to SSE* and AVX*, packed, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xC7", - "Counter": "0,1,2,3", - "UMask": "0x2a", - "EventName": "FP_ARITH_INST_RETIRED.SINGLE", - "SampleAfterValue": "2000005", - "BriefDescription": "Number of SSE/AVX computational single precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. ?.", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xC7", - "Counter": "0,1,2,3", - "UMask": "0x15", - "EventName": "FP_ARITH_INST_RETIRED.DOUBLE", - "SampleAfterValue": "2000006", - "BriefDescription": "Number of SSE/AVX computational double precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. ?.", - "CounterHTOff": "0,1,2,3" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/broadwell/frontend.json b/tools/perf/pmu-events/arch/x86/broadwell/frontend.json index b0cdf1f097a0..7142c76d7f11 100644 --- a/tools/perf/pmu-events/arch/x86/broadwell/frontend.json +++ b/tools/perf/pmu-events/arch/x86/broadwell/frontend.json @@ -10,7 +10,7 @@ "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may \"bypass\" the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", + "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", "EventCode": "0x79", "Counter": "0,1,2,3", "UMask": "0x4", @@ -20,80 +20,49 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may \"bypass\" the IDQ.", + "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ.", "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "IDQ.DSB_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts the number of uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may \"bypass\" the IDQ.", - "EventCode": "0x79", - "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "IDQ.MS_DSB_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may \"bypass\" the IDQ.", - "EventCode": "0x79", - "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "IDQ.MS_MITE_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts the total number of uops delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may \"bypass\" the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.", - "EventCode": "0x79", - "Counter": "0,1,2,3", - "UMask": "0x30", - "EventName": "IDQ.MS_UOPS", + "UMask": "0x4", + "EventName": "IDQ.MITE_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may \"bypass\" the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.", + "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.", "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x30", - "EventName": "IDQ.MS_CYCLES", + "UMask": "0x8", + "EventName": "IDQ.DSB_UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "CounterMask": "1", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may \"bypass\" the IDQ.", + "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.", "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "IDQ.MITE_CYCLES", + "UMask": "0x8", + "EventName": "IDQ.DSB_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path", "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may \"bypass\" the IDQ.", + "PublicDescription": "This event counts the number of uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may bypass the IDQ.", "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "IDQ.DSB_CYCLES", + "UMask": "0x10", + "EventName": "IDQ.MS_DSB_UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path", - "CounterMask": "1", + "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may \"bypass\" the IDQ.", + "PublicDescription": "This event counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may bypass the IDQ.", "EventCode": "0x79", "Counter": "0,1,2,3", "UMask": "0x10", @@ -104,7 +73,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while the Microcode Sequencer (MS) is busy. Counting includes uops that may \"bypass\" the IDQ.", + "PublicDescription": "This event counts the number of deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while the Microcode Sequencer (MS) is busy. Counting includes uops that may bypass the IDQ.", "EventCode": "0x79", "Counter": "0,1,2,3", "UMask": "0x10", @@ -116,7 +85,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may \"bypass\" the IDQ.", + "PublicDescription": "This event counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.", "EventCode": "0x79", "Counter": "0,1,2,3", "UMask": "0x18", @@ -127,7 +96,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may \"bypass\" the IDQ.", + "PublicDescription": "This event counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.", "EventCode": "0x79", "Counter": "0,1,2,3", "UMask": "0x18", @@ -138,7 +107,17 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may \"bypass\" the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", + "PublicDescription": "This event counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may bypass the IDQ.", + "EventCode": "0x79", + "Counter": "0,1,2,3", + "UMask": "0x20", + "EventName": "IDQ.MS_MITE_UOPS", + "SampleAfterValue": "2000003", + "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "This event counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", "EventCode": "0x79", "Counter": "0,1,2,3", "UMask": "0x24", @@ -149,7 +128,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may \"bypass\" the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", + "PublicDescription": "This event counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", "EventCode": "0x79", "Counter": "0,1,2,3", "UMask": "0x24", @@ -160,7 +139,39 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may \"bypass\" the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", + "PublicDescription": "This event counts the total number of uops delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may bypass the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.", + "EventCode": "0x79", + "Counter": "0,1,2,3", + "UMask": "0x30", + "EventName": "IDQ.MS_UOPS", + "SampleAfterValue": "2000003", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may bypass the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.", + "EventCode": "0x79", + "Counter": "0,1,2,3", + "UMask": "0x30", + "EventName": "IDQ.MS_CYCLES", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x79", + "Counter": "0,1,2,3", + "UMask": "0x30", + "EdgeDetect": "1", + "EventName": "IDQ.MS_SWITCHES", + "SampleAfterValue": "2000003", + "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", "EventCode": "0x79", "Counter": "0,1,2,3", "UMask": "0x3c", @@ -200,7 +211,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding ?4 ? x? when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when:\n a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread;\n b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions); \n c. Instruction Decode Queue (IDQ) delivers four uops.", + "PublicDescription": "This event counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding 4 x when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when:\n a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread;\n b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions); \n c. Instruction Decode Queue (IDQ) delivers four uops.", "EventCode": "0x9C", "Counter": "0,1,2,3", "UMask": "0x1", @@ -263,7 +274,7 @@ "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "This event counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. \nMM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.\nPenalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 0?2 cycles.", + "PublicDescription": "This event counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. \nMM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.\nPenalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 02 cycles.", "EventCode": "0xAB", "Counter": "0,1,2,3", "UMask": "0x2", @@ -271,16 +282,5 @@ "SampleAfterValue": "2000003", "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.", "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x79", - "Counter": "0,1,2,3", - "UMask": "0x30", - "EdgeDetect": "1", - "EventName": "IDQ.MS_SWITCHES", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/broadwell/memory.json b/tools/perf/pmu-events/arch/x86/broadwell/memory.json index ff5416d29d0d..c9154cebbdf0 100644 --- a/tools/perf/pmu-events/arch/x86/broadwell/memory.json +++ b/tools/perf/pmu-events/arch/x86/broadwell/memory.json @@ -90,7 +90,6 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Unfriendly TSX abort triggered by a flowmarker.", "EventCode": "0x5d", "Counter": "0,1,2,3", "UMask": "0x1", @@ -170,13 +169,13 @@ }, { "PEBS": "1", - "PublicDescription": "Number of times HLE abort was triggered.", + "PublicDescription": "Number of times HLE abort was triggered (PEBS).", "EventCode": "0xc8", "Counter": "0,1,2,3", "UMask": "0x4", "EventName": "HLE_RETIRED.ABORTED", "SampleAfterValue": "2000003", - "BriefDescription": "Number of times HLE abort was triggered", + "BriefDescription": "Number of times HLE abort was triggered (PEBS)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -251,13 +250,13 @@ }, { "PEBS": "1", - "PublicDescription": "Number of times RTM abort was triggered .", + "PublicDescription": "Number of times RTM abort was triggered (PEBS).", "EventCode": "0xc9", "Counter": "0,1,2,3", "UMask": "0x4", "EventName": "RTM_RETIRED.ABORTED", "SampleAfterValue": "2000003", - "BriefDescription": "Number of times RTM abort was triggered", + "BriefDescription": "Number of times RTM abort was triggered (PEBS)", "CounterHTOff": "0,1,2,3" }, { @@ -431,6 +430,7 @@ "CounterHTOff": "3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2000020001 ", "Counter": "0,1,2,3", @@ -443,6 +443,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts demand data reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x20003c0001 ", "Counter": "0,1,2,3", @@ -455,6 +456,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0084000001 ", "Counter": "0,1,2,3", @@ -467,6 +469,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0104000001 ", "Counter": "0,1,2,3", @@ -479,6 +482,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0204000001 ", "Counter": "0,1,2,3", @@ -491,6 +495,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0404000001 ", "Counter": "0,1,2,3", @@ -503,6 +508,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1004000001 ", "Counter": "0,1,2,3", @@ -515,6 +521,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2004000001 ", "Counter": "0,1,2,3", @@ -527,6 +534,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f84000001 ", "Counter": "0,1,2,3", @@ -539,6 +547,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts demand data reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00bc000001 ", "Counter": "0,1,2,3", @@ -551,6 +560,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x013c000001 ", "Counter": "0,1,2,3", @@ -563,6 +573,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts demand data reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x023c000001 ", "Counter": "0,1,2,3", @@ -575,6 +586,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x043c000001 ", "Counter": "0,1,2,3", @@ -587,6 +599,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x20003c0002 ", "Counter": "0,1,2,3", @@ -599,6 +612,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f84000002 ", "Counter": "0,1,2,3", @@ -611,6 +625,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00bc000002 ", "Counter": "0,1,2,3", @@ -623,6 +638,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x013c000002 ", "Counter": "0,1,2,3", @@ -635,6 +651,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x023c000002 ", "Counter": "0,1,2,3", @@ -647,6 +664,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x043c000002 ", "Counter": "0,1,2,3", @@ -659,6 +677,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2000020004 ", "Counter": "0,1,2,3", @@ -671,6 +690,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand code reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x20003c0004 ", "Counter": "0,1,2,3", @@ -683,6 +703,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0084000004 ", "Counter": "0,1,2,3", @@ -695,6 +716,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0104000004 ", "Counter": "0,1,2,3", @@ -707,6 +729,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0204000004 ", "Counter": "0,1,2,3", @@ -719,6 +742,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0404000004 ", "Counter": "0,1,2,3", @@ -731,6 +755,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1004000004 ", "Counter": "0,1,2,3", @@ -743,6 +768,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2004000004 ", "Counter": "0,1,2,3", @@ -755,6 +781,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f84000004 ", "Counter": "0,1,2,3", @@ -767,6 +794,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand code reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00bc000004 ", "Counter": "0,1,2,3", @@ -779,6 +807,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x013c000004 ", "Counter": "0,1,2,3", @@ -791,6 +820,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand code reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x023c000004 ", "Counter": "0,1,2,3", @@ -803,6 +833,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x043c000004 ", "Counter": "0,1,2,3", @@ -815,6 +846,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2000020008 ", "Counter": "0,1,2,3", @@ -827,6 +859,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts writebacks (modified to exclusive) that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x20003c0008 ", "Counter": "0,1,2,3", @@ -839,6 +872,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0084000008 ", "Counter": "0,1,2,3", @@ -851,6 +885,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0104000008 ", "Counter": "0,1,2,3", @@ -863,6 +898,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0204000008 ", "Counter": "0,1,2,3", @@ -875,6 +911,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0404000008 ", "Counter": "0,1,2,3", @@ -887,6 +924,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1004000008 ", "Counter": "0,1,2,3", @@ -899,6 +937,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2004000008 ", "Counter": "0,1,2,3", @@ -911,6 +950,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f84000008 ", "Counter": "0,1,2,3", @@ -923,6 +963,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts writebacks (modified to exclusive) that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00bc000008 ", "Counter": "0,1,2,3", @@ -935,6 +976,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x013c000008 ", "Counter": "0,1,2,3", @@ -947,6 +989,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts writebacks (modified to exclusive) that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x023c000008 ", "Counter": "0,1,2,3", @@ -959,6 +1002,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x043c000008 ", "Counter": "0,1,2,3", @@ -971,6 +1015,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2000020010 ", "Counter": "0,1,2,3", @@ -983,6 +1028,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x20003c0010 ", "Counter": "0,1,2,3", @@ -995,6 +1041,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0084000010 ", "Counter": "0,1,2,3", @@ -1007,6 +1054,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0104000010 ", "Counter": "0,1,2,3", @@ -1019,6 +1067,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0204000010 ", "Counter": "0,1,2,3", @@ -1031,6 +1080,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0404000010 ", "Counter": "0,1,2,3", @@ -1043,6 +1093,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1004000010 ", "Counter": "0,1,2,3", @@ -1055,6 +1106,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2004000010 ", "Counter": "0,1,2,3", @@ -1067,6 +1119,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f84000010 ", "Counter": "0,1,2,3", @@ -1079,6 +1132,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00bc000010 ", "Counter": "0,1,2,3", @@ -1091,6 +1145,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x013c000010 ", "Counter": "0,1,2,3", @@ -1103,6 +1158,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x023c000010 ", "Counter": "0,1,2,3", @@ -1115,6 +1171,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x043c000010 ", "Counter": "0,1,2,3", @@ -1127,6 +1184,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2000020020 ", "Counter": "0,1,2,3", @@ -1139,6 +1197,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x20003c0020 ", "Counter": "0,1,2,3", @@ -1151,6 +1210,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0084000020 ", "Counter": "0,1,2,3", @@ -1163,6 +1223,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0104000020 ", "Counter": "0,1,2,3", @@ -1175,6 +1236,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0204000020 ", "Counter": "0,1,2,3", @@ -1187,6 +1249,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0404000020 ", "Counter": "0,1,2,3", @@ -1199,6 +1262,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1004000020 ", "Counter": "0,1,2,3", @@ -1211,6 +1275,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2004000020 ", "Counter": "0,1,2,3", @@ -1223,6 +1288,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f84000020 ", "Counter": "0,1,2,3", @@ -1235,6 +1301,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00bc000020 ", "Counter": "0,1,2,3", @@ -1247,6 +1314,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x013c000020 ", "Counter": "0,1,2,3", @@ -1259,6 +1327,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x023c000020 ", "Counter": "0,1,2,3", @@ -1271,6 +1340,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x043c000020 ", "Counter": "0,1,2,3", @@ -1283,6 +1353,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2000020040 ", "Counter": "0,1,2,3", @@ -1295,6 +1366,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x20003c0040 ", "Counter": "0,1,2,3", @@ -1307,6 +1379,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0084000040 ", "Counter": "0,1,2,3", @@ -1319,6 +1392,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0104000040 ", "Counter": "0,1,2,3", @@ -1331,6 +1405,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0204000040 ", "Counter": "0,1,2,3", @@ -1343,6 +1418,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0404000040 ", "Counter": "0,1,2,3", @@ -1355,6 +1431,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1004000040 ", "Counter": "0,1,2,3", @@ -1367,6 +1444,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2004000040 ", "Counter": "0,1,2,3", @@ -1379,6 +1457,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f84000040 ", "Counter": "0,1,2,3", @@ -1391,6 +1470,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00bc000040 ", "Counter": "0,1,2,3", @@ -1403,6 +1483,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x013c000040 ", "Counter": "0,1,2,3", @@ -1415,6 +1496,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x023c000040 ", "Counter": "0,1,2,3", @@ -1427,6 +1509,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x043c000040 ", "Counter": "0,1,2,3", @@ -1439,6 +1522,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2000020080 ", "Counter": "0,1,2,3", @@ -1451,6 +1535,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x20003c0080 ", "Counter": "0,1,2,3", @@ -1463,6 +1548,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0084000080 ", "Counter": "0,1,2,3", @@ -1475,6 +1561,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0104000080 ", "Counter": "0,1,2,3", @@ -1487,6 +1574,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0204000080 ", "Counter": "0,1,2,3", @@ -1499,6 +1587,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0404000080 ", "Counter": "0,1,2,3", @@ -1511,6 +1600,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1004000080 ", "Counter": "0,1,2,3", @@ -1523,6 +1613,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2004000080 ", "Counter": "0,1,2,3", @@ -1535,6 +1626,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f84000080 ", "Counter": "0,1,2,3", @@ -1547,6 +1639,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00bc000080 ", "Counter": "0,1,2,3", @@ -1559,6 +1652,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x013c000080 ", "Counter": "0,1,2,3", @@ -1571,6 +1665,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x023c000080 ", "Counter": "0,1,2,3", @@ -1583,6 +1678,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x043c000080 ", "Counter": "0,1,2,3", @@ -1595,6 +1691,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2000020100 ", "Counter": "0,1,2,3", @@ -1607,6 +1704,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x20003c0100 ", "Counter": "0,1,2,3", @@ -1619,6 +1717,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0084000100 ", "Counter": "0,1,2,3", @@ -1631,6 +1730,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0104000100 ", "Counter": "0,1,2,3", @@ -1643,6 +1743,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0204000100 ", "Counter": "0,1,2,3", @@ -1655,6 +1756,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0404000100 ", "Counter": "0,1,2,3", @@ -1667,6 +1769,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1004000100 ", "Counter": "0,1,2,3", @@ -1679,6 +1782,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2004000100 ", "Counter": "0,1,2,3", @@ -1691,6 +1795,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f84000100 ", "Counter": "0,1,2,3", @@ -1703,6 +1808,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00bc000100 ", "Counter": "0,1,2,3", @@ -1715,6 +1821,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x013c000100 ", "Counter": "0,1,2,3", @@ -1727,6 +1834,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x023c000100 ", "Counter": "0,1,2,3", @@ -1739,6 +1847,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x043c000100 ", "Counter": "0,1,2,3", @@ -1751,6 +1860,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2000020200 ", "Counter": "0,1,2,3", @@ -1763,6 +1873,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x20003c0200 ", "Counter": "0,1,2,3", @@ -1775,6 +1886,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0084000200 ", "Counter": "0,1,2,3", @@ -1787,6 +1899,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0104000200 ", "Counter": "0,1,2,3", @@ -1799,6 +1912,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0204000200 ", "Counter": "0,1,2,3", @@ -1811,6 +1925,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0404000200 ", "Counter": "0,1,2,3", @@ -1823,6 +1938,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1004000200 ", "Counter": "0,1,2,3", @@ -1835,6 +1951,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2004000200 ", "Counter": "0,1,2,3", @@ -1847,6 +1964,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f84000200 ", "Counter": "0,1,2,3", @@ -1859,6 +1977,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00bc000200 ", "Counter": "0,1,2,3", @@ -1871,6 +1990,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x013c000200 ", "Counter": "0,1,2,3", @@ -1883,6 +2003,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x023c000200 ", "Counter": "0,1,2,3", @@ -1895,6 +2016,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x043c000200 ", "Counter": "0,1,2,3", @@ -1907,6 +2029,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2000028000 ", "Counter": "0,1,2,3", @@ -1919,6 +2042,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts any other requests that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x20003c8000 ", "Counter": "0,1,2,3", @@ -1931,6 +2055,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0084008000 ", "Counter": "0,1,2,3", @@ -1943,6 +2068,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0104008000 ", "Counter": "0,1,2,3", @@ -1955,6 +2081,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0204008000 ", "Counter": "0,1,2,3", @@ -1967,6 +2094,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0404008000 ", "Counter": "0,1,2,3", @@ -1979,6 +2107,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1004008000 ", "Counter": "0,1,2,3", @@ -1991,6 +2120,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2004008000 ", "Counter": "0,1,2,3", @@ -2003,6 +2133,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f84008000 ", "Counter": "0,1,2,3", @@ -2015,6 +2146,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts any other requests that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00bc008000 ", "Counter": "0,1,2,3", @@ -2027,6 +2159,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x013c008000 ", "Counter": "0,1,2,3", @@ -2039,6 +2172,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts any other requests that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x023c008000 ", "Counter": "0,1,2,3", @@ -2051,6 +2185,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x043c008000 ", "Counter": "0,1,2,3", @@ -2063,6 +2198,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2000020090 ", "Counter": "0,1,2,3", @@ -2075,6 +2211,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch data reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x20003c0090 ", "Counter": "0,1,2,3", @@ -2087,6 +2224,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0084000090 ", "Counter": "0,1,2,3", @@ -2099,6 +2237,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0104000090 ", "Counter": "0,1,2,3", @@ -2111,6 +2250,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0204000090 ", "Counter": "0,1,2,3", @@ -2123,6 +2263,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0404000090 ", "Counter": "0,1,2,3", @@ -2135,6 +2276,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1004000090 ", "Counter": "0,1,2,3", @@ -2147,6 +2289,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2004000090 ", "Counter": "0,1,2,3", @@ -2159,6 +2302,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f84000090 ", "Counter": "0,1,2,3", @@ -2171,6 +2315,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch data reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00bc000090 ", "Counter": "0,1,2,3", @@ -2183,6 +2328,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x013c000090 ", "Counter": "0,1,2,3", @@ -2195,6 +2341,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch data reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x023c000090 ", "Counter": "0,1,2,3", @@ -2207,6 +2354,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x043c000090 ", "Counter": "0,1,2,3", @@ -2219,6 +2367,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2000020120 ", "Counter": "0,1,2,3", @@ -2231,6 +2380,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch RFOs that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x20003c0120 ", "Counter": "0,1,2,3", @@ -2243,6 +2393,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0084000120 ", "Counter": "0,1,2,3", @@ -2255,6 +2406,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0104000120 ", "Counter": "0,1,2,3", @@ -2267,6 +2419,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0204000120 ", "Counter": "0,1,2,3", @@ -2279,6 +2432,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0404000120 ", "Counter": "0,1,2,3", @@ -2291,6 +2445,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1004000120 ", "Counter": "0,1,2,3", @@ -2303,6 +2458,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2004000120 ", "Counter": "0,1,2,3", @@ -2315,6 +2471,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f84000120 ", "Counter": "0,1,2,3", @@ -2327,6 +2484,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch RFOs that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00bc000120 ", "Counter": "0,1,2,3", @@ -2339,6 +2497,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x013c000120 ", "Counter": "0,1,2,3", @@ -2351,6 +2510,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch RFOs that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x023c000120 ", "Counter": "0,1,2,3", @@ -2363,6 +2523,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x043c000120 ", "Counter": "0,1,2,3", @@ -2375,6 +2536,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2000020240 ", "Counter": "0,1,2,3", @@ -2387,6 +2549,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch code reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x20003c0240 ", "Counter": "0,1,2,3", @@ -2399,6 +2562,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0084000240 ", "Counter": "0,1,2,3", @@ -2411,6 +2575,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0104000240 ", "Counter": "0,1,2,3", @@ -2423,6 +2588,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0204000240 ", "Counter": "0,1,2,3", @@ -2435,6 +2601,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0404000240 ", "Counter": "0,1,2,3", @@ -2447,6 +2614,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1004000240 ", "Counter": "0,1,2,3", @@ -2459,6 +2627,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2004000240 ", "Counter": "0,1,2,3", @@ -2471,6 +2640,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f84000240 ", "Counter": "0,1,2,3", @@ -2483,6 +2653,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch code reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00bc000240 ", "Counter": "0,1,2,3", @@ -2495,6 +2666,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x013c000240 ", "Counter": "0,1,2,3", @@ -2507,6 +2679,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch code reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x023c000240 ", "Counter": "0,1,2,3", @@ -2519,6 +2692,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x043c000240 ", "Counter": "0,1,2,3", @@ -2531,6 +2705,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2000020091 ", "Counter": "0,1,2,3", @@ -2543,6 +2718,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x20003c0091 ", "Counter": "0,1,2,3", @@ -2555,6 +2731,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0084000091 ", "Counter": "0,1,2,3", @@ -2567,6 +2744,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0104000091 ", "Counter": "0,1,2,3", @@ -2579,6 +2757,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0204000091 ", "Counter": "0,1,2,3", @@ -2591,6 +2770,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0404000091 ", "Counter": "0,1,2,3", @@ -2603,6 +2783,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1004000091 ", "Counter": "0,1,2,3", @@ -2615,6 +2796,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2004000091 ", "Counter": "0,1,2,3", @@ -2627,6 +2809,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f84000091 ", "Counter": "0,1,2,3", @@ -2639,6 +2822,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00bc000091 ", "Counter": "0,1,2,3", @@ -2651,6 +2835,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x013c000091 ", "Counter": "0,1,2,3", @@ -2663,6 +2848,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x023c000091 ", "Counter": "0,1,2,3", @@ -2675,6 +2861,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x043c000091 ", "Counter": "0,1,2,3", @@ -2687,6 +2874,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2000020122 ", "Counter": "0,1,2,3", @@ -2699,6 +2887,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the target was non-DRAM system address. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x20003c0122 ", "Counter": "0,1,2,3", @@ -2711,6 +2900,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0084000122 ", "Counter": "0,1,2,3", @@ -2723,6 +2913,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0104000122 ", "Counter": "0,1,2,3", @@ -2735,6 +2926,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0204000122 ", "Counter": "0,1,2,3", @@ -2747,6 +2939,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0404000122 ", "Counter": "0,1,2,3", @@ -2759,6 +2952,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1004000122 ", "Counter": "0,1,2,3", @@ -2771,6 +2965,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x2004000122 ", "Counter": "0,1,2,3", @@ -2783,6 +2978,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f84000122 ", "Counter": "0,1,2,3", @@ -2795,6 +2991,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 with no details on snoop-related information. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x00bc000122 ", "Counter": "0,1,2,3", @@ -2807,6 +3004,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x013c000122 ", "Counter": "0,1,2,3", @@ -2819,6 +3017,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 with a snoop miss response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x023c000122 ", "Counter": "0,1,2,3", @@ -2831,6 +3030,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x043c000122 ", "Counter": "0,1,2,3", diff --git a/tools/perf/pmu-events/arch/x86/broadwell/other.json b/tools/perf/pmu-events/arch/x86/broadwell/other.json index edf14f0d0eaf..4f829c5febbe 100644 --- a/tools/perf/pmu-events/arch/x86/broadwell/other.json +++ b/tools/perf/pmu-events/arch/x86/broadwell/other.json @@ -9,16 +9,6 @@ "BriefDescription": "Unhalted core cycles when the thread is in ring 0", "CounterHTOff": "0,1,2,3,4,5,6,7" }, - { - "PublicDescription": "This event counts unhalted core cycles during which the thread is in rings 1, 2, or 3.", - "EventCode": "0x5C", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "CPL_CYCLES.RING123", - "SampleAfterValue": "2000003", - "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, { "PublicDescription": "This event counts when there is a transition from ring 1,2 or 3 to ring0.", "EventCode": "0x5C", @@ -31,6 +21,16 @@ "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "This event counts unhalted core cycles during which the thread is in rings 1, 2, or 3.", + "EventCode": "0x5C", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "CPL_CYCLES.RING123", + "SampleAfterValue": "2000003", + "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "This event counts cycles in which the L1 and L2 are locked due to a UC lock or split lock. A lock is asserted in case of locked memory access, due to noncacheable memory, locked operation that spans two cache lines, or a page walk from the noncacheable page table. L1D and L2 locks have a very high performance penalty and it is highly recommended to avoid such access.", "EventCode": "0x63", diff --git a/tools/perf/pmu-events/arch/x86/broadwell/pipeline.json b/tools/perf/pmu-events/arch/x86/broadwell/pipeline.json index 78913ae87703..97c5d0784c6c 100644 --- a/tools/perf/pmu-events/arch/x86/broadwell/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/broadwell/pipeline.json @@ -2,32 +2,42 @@ { "PublicDescription": "This event counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, this event counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. \nNotes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. \nCounting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.", "EventCode": "0x00", - "Counter": "Fixed counter 1", + "Counter": "Fixed counter 0", "UMask": "0x1", "EventName": "INST_RETIRED.ANY", "SampleAfterValue": "2000003", "BriefDescription": "Instructions retired from execution.", - "CounterHTOff": "Fixed counter 1" + "CounterHTOff": "Fixed counter 0" }, { "PublicDescription": "This event counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.", "EventCode": "0x00", - "Counter": "Fixed counter 2", + "Counter": "Fixed counter 1", "UMask": "0x2", "EventName": "CPU_CLK_UNHALTED.THREAD", "SampleAfterValue": "2000003", "BriefDescription": "Core cycles when the thread is not in halt state", - "CounterHTOff": "Fixed counter 2" + "CounterHTOff": "Fixed counter 1" + }, + { + "EventCode": "0x00", + "Counter": "Fixed counter 1", + "UMask": "0x2", + "AnyThread": "1", + "EventName": "CPU_CLK_UNHALTED.THREAD_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", + "CounterHTOff": "Fixed counter 1" }, { "PublicDescription": "This event counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. \nNote: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. This event is clocked by base clock (100 Mhz) on Sandy Bridge. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.", "EventCode": "0x00", - "Counter": "Fixed counter 3", + "Counter": "Fixed counter 2", "UMask": "0x3", "EventName": "CPU_CLK_UNHALTED.REF_TSC", "SampleAfterValue": "2000003", "BriefDescription": "Reference cycles when the core is not in halt state.", - "CounterHTOff": "Fixed counter 3" + "CounterHTOff": "Fixed counter 2" }, { "PublicDescription": "This event counts how many times the load operation got the true Block-on-Store blocking code preventing store forwarding. This includes cases when:\n - preceding store conflicts with the load (incomplete overlap);\n - store forwarding is impossible due to u-arch limitations;\n - preceding lock RMW operations are not forwarded;\n - store has the no-forward bit set (uncacheable/page-split/masked stores);\n - all-blocking stores are used (mostly, fences and port I/O);\nand others.\nThe most common case is a load blocked due to its address range overlapping with a preceding smaller uncompleted store. Note: This event does not take into account cases of out-of-SW-control (for example, SbTailHit), unknown physical STA, and cases of blocking loads on store due to being non-WB memory type or a lock. These cases are covered by other events.\nSee the table of not supported store forwards in the Optimization Guide.", @@ -59,26 +69,37 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of cycles during which Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the current thread. This also includes the cycles during which the Allocator is serving another thread.", + "PublicDescription": "Cycles checkpoints in Resource Allocation Table (RAT) are recovering from JEClear or machine clear.", "EventCode": "0x0D", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "INT_MISC.RAT_STALL_CYCLES", + "UMask": "0x3", + "EventName": "INT_MISC.RECOVERY_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the thread", + "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles checkpoints in Resource Allocation Table (RAT) are recovering from JEClear or machine clear.", "EventCode": "0x0D", "Counter": "0,1,2,3", "UMask": "0x3", - "EventName": "INT_MISC.RECOVERY_CYCLES", + "AnyThread": "1", + "EventName": "INT_MISC.RECOVERY_CYCLES_ANY", "SampleAfterValue": "2000003", - "BriefDescription": "Number of cycles waiting for the checkpoints in Resource Allocation Table (RAT) to be recovered after Nuke due to all other cases except JEClear (e.g. whenever a ucode assist is needed like SSE exception, memory disambiguation, etc...)", + "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).", "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "This event counts the number of cycles during which Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the current thread. This also includes the cycles during which the Allocator is serving another thread.", + "EventCode": "0x0D", + "Counter": "0,1,2,3", + "UMask": "0x8", + "EventName": "INT_MISC.RAT_STALL_CYCLES", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles when Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the thread", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "This event counts the number of Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS).", "EventCode": "0x0E", @@ -89,6 +110,18 @@ "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "This event counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.", + "EventCode": "0x0E", + "Invert": "1", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "UOPS_ISSUED.STALL_CYCLES", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3" + }, { "PublicDescription": "Number of flags-merge uops being allocated. Such uops considered perf sensitive\n added by GSR u-arch.", "EventCode": "0x0E", @@ -117,18 +150,6 @@ "BriefDescription": "Number of Multiply packed/scalar single precision uops allocated.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, - { - "PublicDescription": "This event counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.", - "EventCode": "0x0E", - "Invert": "1", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_ISSUED.STALL_CYCLES", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3" - }, { "PublicDescription": "This event counts the number of the divide operations executed. Uses edge-detect and a cmask value of 1 on ARITH.FPU_DIV_ACTIVE to get the number of the divide operations executed.", "EventCode": "0x14", @@ -139,6 +160,26 @@ "BriefDescription": "Cycles when divider is busy executing divide operations", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.", + "EventCode": "0x3C", + "Counter": "0,1,2,3", + "UMask": "0x0", + "EventName": "CPU_CLK_UNHALTED.THREAD_P", + "SampleAfterValue": "2000003", + "BriefDescription": "Thread cycles when thread is not in halt state", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x3C", + "Counter": "0,1,2,3", + "UMask": "0x0", + "AnyThread": "1", + "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "This is a fixed-frequency event programmed to general counters. It counts when the core is unhalted at 100 Mhz.", "EventCode": "0x3C", @@ -149,6 +190,36 @@ "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x3C", + "Counter": "0,1,2,3", + "UMask": "0x1", + "AnyThread": "1", + "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate).", + "EventCode": "0x3C", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK", + "SampleAfterValue": "2000003", + "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x3C", + "Counter": "0,1,2,3", + "UMask": "0x1", + "AnyThread": "1", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x3c", "Counter": "0,1,2,3", @@ -158,6 +229,15 @@ "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.", "CounterHTOff": "0,1,2,3" }, + { + "EventCode": "0x3C", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", + "SampleAfterValue": "2000003", + "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "This event counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by asm inspection of the nearby instructions.", "EventCode": "0x4c", @@ -224,6 +304,18 @@ "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x5E", + "Invert": "1", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EdgeDetect": "1", + "EventName": "RS_EVENTS.EMPTY_END", + "SampleAfterValue": "200003", + "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "This event counts stalls occured due to changing prefix length (66, 67 or REX.W when they change the length of the decoded instruction). Occurrences counting is proportional to the number of prefixes in a 16B-line. This may result in the following penalties: three-cycle penalty for each LCP in a 16-byte chunk.", "EventCode": "0x87", @@ -404,6 +496,15 @@ "BriefDescription": "Taken speculative and retired mispredicted indirect branches with return mnemonic", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x89", + "Counter": "0,1,2,3", + "UMask": "0xa0", + "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_NEAR_CALL", + "SampleAfterValue": "200003", + "BriefDescription": "Taken speculative and retired mispredicted indirect calls.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "This event counts both taken and not taken speculative and retired mispredicted macro conditional branch instructions.", "EventCode": "0x89", @@ -434,6 +535,16 @@ "BriefDescription": "Speculative and retired mispredicted macro conditional branches", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "This event counts the number of micro-operations cancelled after they were dispatched from the scheduler to the execution units when the total number of physical register read ports across all dispatch ports exceeds the read bandwidth of the physical register file. The SIMD_PRF subevent applies to the following instructions: VDPPS, DPPS, VPCMPESTRI, PCMPESTRI, VPCMPESTRM, PCMPESTRM, VFMADD*, VFMADDSUB*, VFMSUB*, VMSUBADD*, VFNMADD*, VFNMSUB*. See the Broadwell Optimization Guide for more information.", + "EventCode": "0xA0", + "Counter": "0,1,2,3", + "UMask": "0x3", + "EventName": "UOP_DISPATCHES_CANCELLED.SIMD_PRF", + "SampleAfterValue": "2000003", + "BriefDescription": "Micro-op dispatches cancelled due to insufficient SIMD physical register file read ports", + "CounterHTOff": "0,1,2,3" + }, { "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 0.", "EventCode": "0xA1", @@ -445,602 +556,472 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 1.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_DISPATCHED_PORT.PORT_1", + "UMask": "0x1", + "AnyThread": "1", + "EventName": "UOPS_EXECUTED_PORT.PORT_0_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 1", + "BriefDescription": "Cycles per core when uops are exectuted in port 0.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 2.", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 0.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "UOPS_DISPATCHED_PORT.PORT_2", + "UMask": "0x1", + "EventName": "UOPS_EXECUTED_PORT.PORT_0", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 2", + "BriefDescription": "Cycles per thread when uops are executed in port 0", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 3.", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 1.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "UOPS_DISPATCHED_PORT.PORT_3", + "UMask": "0x2", + "EventName": "UOPS_DISPATCHED_PORT.PORT_1", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 3", + "BriefDescription": "Cycles per thread when uops are executed in port 1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 4.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "UOPS_DISPATCHED_PORT.PORT_4", + "UMask": "0x2", + "AnyThread": "1", + "EventName": "UOPS_EXECUTED_PORT.PORT_1_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 4", + "BriefDescription": "Cycles per core when uops are exectuted in port 1.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 5.", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 1.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "UOPS_DISPATCHED_PORT.PORT_5", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED_PORT.PORT_1", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 5", + "BriefDescription": "Cycles per thread when uops are executed in port 1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 6.", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 2.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x40", - "EventName": "UOPS_DISPATCHED_PORT.PORT_6", + "UMask": "0x4", + "EventName": "UOPS_DISPATCHED_PORT.PORT_2", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 6", + "BriefDescription": "Cycles per thread when uops are executed in port 2", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 7.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x80", - "EventName": "UOPS_DISPATCHED_PORT.PORT_7", + "UMask": "0x4", + "AnyThread": "1", + "EventName": "UOPS_EXECUTED_PORT.PORT_2_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 7", + "BriefDescription": "Cycles per core when uops are dispatched to port 2.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts resource-related stall cycles. Reasons for stalls can be as follows:\n - *any* u-arch structure got full (LB, SB, RS, ROB, BOB, LM, Physical Register Reclaim Table (PRRT), or Physical History Table (PHT) slots)\n - *any* u-arch structure got empty (like INT/SIMD FreeLists)\n - FPU control word (FPCW), MXCSR\nand others. This counts cycles that the pipeline backend blocked uop delivery from the front end.", - "EventCode": "0xA2", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 2.", + "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "RESOURCE_STALLS.ANY", + "UMask": "0x4", + "EventName": "UOPS_EXECUTED_PORT.PORT_2", "SampleAfterValue": "2000003", - "BriefDescription": "Resource-related stall cycles", + "BriefDescription": "Cycles per thread when uops are executed in port 2", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts stall cycles caused by absence of eligible entries in the reservation station (RS). This may result from RS overflow, or from RS deallocation because of the RS array Write Port allocation scheme (each RS entry has two write ports instead of four. As a result, empty entries could not be used, although RS is not really full). This counts cycles that the pipeline backend blocked uop delivery from the front end.", - "EventCode": "0xA2", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 3.", + "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "RESOURCE_STALLS.RS", + "UMask": "0x8", + "EventName": "UOPS_DISPATCHED_PORT.PORT_3", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles stalled due to no eligible RS entry available.", + "BriefDescription": "Cycles per thread when uops are executed in port 3", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts stall cycles caused by the store buffer (SB) overflow (excluding draining from synch). This counts cycles that the pipeline backend blocked uop delivery from the front end.", - "EventCode": "0xA2", + "EventCode": "0xA1", "Counter": "0,1,2,3", "UMask": "0x8", - "EventName": "RESOURCE_STALLS.SB", + "AnyThread": "1", + "EventName": "UOPS_EXECUTED_PORT.PORT_3_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).", + "BriefDescription": "Cycles per core when uops are dispatched to port 3.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts ROB full stall cycles. This counts cycles that the pipeline backend blocked uop delivery from the front end.", - "EventCode": "0xA2", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 3.", + "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "RESOURCE_STALLS.ROB", + "UMask": "0x8", + "EventName": "UOPS_EXECUTED_PORT.PORT_3", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles stalled due to re-order buffer full.", + "BriefDescription": "Cycles per thread when uops are executed in port 3", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Counts number of cycles the CPU has at least one pending demand* load request missing the L2 cache.", - "EventCode": "0xA3", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 4.", + "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "CYCLE_ACTIVITY.CYCLES_L2_PENDING", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Counts number of cycles the CPU has at least one pending demand load request missing the L1 data cache.", - "EventCode": "0xA3", - "Counter": "2", - "UMask": "0x8", - "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING", + "UMask": "0x10", + "EventName": "UOPS_DISPATCHED_PORT.PORT_4", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", - "CounterMask": "8", - "CounterHTOff": "2" + "BriefDescription": "Cycles per thread when uops are executed in port 4", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Counts number of cycles the CPU has at least one pending demand load request (that is cycles with non-completed load waiting for its data from memory subsystem).", - "EventCode": "0xA3", + "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "CYCLE_ACTIVITY.CYCLES_LDM_PENDING", + "UMask": "0x10", + "AnyThread": "1", + "EventName": "UOPS_EXECUTED_PORT.PORT_4_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles while memory subsystem has an outstanding load.", - "CounterMask": "2", + "BriefDescription": "Cycles per core when uops are exectuted in port 4.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Counts number of cycles nothing is executed on any execution port.", - "EventCode": "0xA3", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 4.", + "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "CYCLE_ACTIVITY.CYCLES_NO_EXECUTE", + "UMask": "0x10", + "EventName": "UOPS_EXECUTED_PORT.PORT_4", "SampleAfterValue": "2000003", - "BriefDescription": "Total execution stalls", - "CounterMask": "4", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Cycles per thread when uops are executed in port 4", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand* load request missing the L2 cache.(as a footprint) * includes also L1 HW prefetch requests that may or may not be required by demands.", - "EventCode": "0xA3", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 5.", + "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x5", - "EventName": "CYCLE_ACTIVITY.STALLS_L2_PENDING", + "UMask": "0x20", + "EventName": "UOPS_DISPATCHED_PORT.PORT_5", "SampleAfterValue": "2000003", - "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.", - "CounterMask": "5", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Cycles per thread when uops are executed in port 5", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand load request.", - "EventCode": "0xA3", + "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x6", - "EventName": "CYCLE_ACTIVITY.STALLS_LDM_PENDING", + "UMask": "0x20", + "AnyThread": "1", + "EventName": "UOPS_EXECUTED_PORT.PORT_5_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", - "CounterMask": "6", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Cycles per core when uops are exectuted in port 5.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand load request missing the L1 data cache.", - "EventCode": "0xA3", - "Counter": "2", - "UMask": "0xc", - "EventName": "CYCLE_ACTIVITY.STALLS_L1D_PENDING", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 5.", + "EventCode": "0xA1", + "Counter": "0,1,2,3", + "UMask": "0x20", + "EventName": "UOPS_EXECUTED_PORT.PORT_5", "SampleAfterValue": "2000003", - "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", - "CounterMask": "12", - "CounterHTOff": "2" + "BriefDescription": "Cycles per thread when uops are executed in port 5", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Number of Uops delivered by the LSD. ", - "EventCode": "0xA8", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 6.", + "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "LSD.UOPS", + "UMask": "0x40", + "EventName": "UOPS_DISPATCHED_PORT.PORT_6", "SampleAfterValue": "2000003", - "BriefDescription": "Number of Uops delivered by the LSD.", + "BriefDescription": "Cycles per thread when uops are executed in port 6", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Number of uops to be executed per-thread each cycle.", - "EventCode": "0xB1", + "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.THREAD", + "UMask": "0x40", + "AnyThread": "1", + "EventName": "UOPS_EXECUTED_PORT.PORT_6_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.", + "BriefDescription": "Cycles per core when uops are exectuted in port 6.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Number of uops executed from any thread.", - "EventCode": "0xB1", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 6.", + "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE", + "UMask": "0x40", + "EventName": "UOPS_EXECUTED_PORT.PORT_6", "SampleAfterValue": "2000003", - "BriefDescription": "Number of uops executed on the core.", + "BriefDescription": "Cycles per thread when uops are executed in port 6", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.", - "EventCode": "0xB1", - "Invert": "1", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 7.", + "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.STALL_CYCLES", + "UMask": "0x80", + "EventName": "UOPS_DISPATCHED_PORT.PORT_7", "SampleAfterValue": "2000003", - "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Cycles per thread when uops are executed in port 7", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).", - "EventCode": "0xC0", + "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x0", - "Errata": "BDM61", - "EventName": "INST_RETIRED.ANY_P", + "UMask": "0x80", + "AnyThread": "1", + "EventName": "UOPS_EXECUTED_PORT.PORT_7_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Number of instructions retired. General Counter - architectural event", + "BriefDescription": "Cycles per core when uops are dispatched to port 7.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts FP operations retired. For X87 FP operations that have no exceptions counting also includes flows that have several X87, or flows that use X87 uops in the exception handling.", - "EventCode": "0xC0", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 7.", + "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "INST_RETIRED.X87", + "UMask": "0x80", + "EventName": "UOPS_EXECUTED_PORT.PORT_7", "SampleAfterValue": "2000003", - "BriefDescription": "FP operations retired. X87 FP operations that have no exceptions:", + "BriefDescription": "Cycles per thread when uops are executed in port 7", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "2", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts instructions retired.", - "EventCode": "0xC0", - "Counter": "1", + "PublicDescription": "This event counts resource-related stall cycles. Reasons for stalls can be as follows:\n - *any* u-arch structure got full (LB, SB, RS, ROB, BOB, LM, Physical Register Reclaim Table (PRRT), or Physical History Table (PHT) slots)\n - *any* u-arch structure got empty (like INT/SIMD FreeLists)\n - FPU control word (FPCW), MXCSR\nand others. This counts cycles that the pipeline backend blocked uop delivery from the front end.", + "EventCode": "0xA2", + "Counter": "0,1,2,3", "UMask": "0x1", - "Errata": "BDM11, BDM55", - "EventName": "INST_RETIRED.PREC_DIST", + "EventName": "RESOURCE_STALLS.ANY", "SampleAfterValue": "2000003", - "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution", - "CounterHTOff": "1" + "BriefDescription": "Resource-related stall cycles", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC1", + "PublicDescription": "This event counts stall cycles caused by absence of eligible entries in the reservation station (RS). This may result from RS overflow, or from RS deallocation because of the RS array Write Port allocation scheme (each RS entry has two write ports instead of four. As a result, empty entries could not be used, although RS is not really full). This counts cycles that the pipeline backend blocked uop delivery from the front end.", + "EventCode": "0xA2", "Counter": "0,1,2,3", - "UMask": "0x40", - "EventName": "OTHER_ASSISTS.ANY_WB_ASSIST", - "SampleAfterValue": "100003", - "BriefDescription": "Number of times any microcode assist is invoked by HW upon uop writeback.", + "UMask": "0x4", + "EventName": "RESOURCE_STALLS.RS", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles stalled due to no eligible RS entry available.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "PublicDescription": "This event counts all actually retired uops. Counting increments by two for micro-fused uops, and by one for macro-fused and other uops. Maximal increment value for one cycle is eight.", - "EventCode": "0xC2", + "PublicDescription": "This event counts stall cycles caused by the store buffer (SB) overflow (excluding draining from synch). This counts cycles that the pipeline backend blocked uop delivery from the front end.", + "EventCode": "0xA2", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_RETIRED.ALL", + "UMask": "0x8", + "EventName": "RESOURCE_STALLS.SB", "SampleAfterValue": "2000003", - "BriefDescription": "Actually retired uops.", - "CounterHTOff": "0,1,2,3,4,5,6,7", - "Data_LA": "1" + "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts the number of retirement slots used.", - "EventCode": "0xC2", + "PublicDescription": "This event counts ROB full stall cycles. This counts cycles that the pipeline backend blocked uop delivery from the front end.", + "EventCode": "0xA2", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_RETIRED.RETIRE_SLOTS", + "UMask": "0x10", + "EventName": "RESOURCE_STALLS.ROB", "SampleAfterValue": "2000003", - "BriefDescription": "Retirement slots used.", + "BriefDescription": "Cycles stalled due to re-order buffer full.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts cycles without actually retired uops.", - "EventCode": "0xC2", - "Invert": "1", + "PublicDescription": "Counts number of cycles the CPU has at least one pending demand* load request missing the L2 cache.", + "EventCode": "0xA3", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "UOPS_RETIRED.STALL_CYCLES", + "EventName": "CYCLE_ACTIVITY.CYCLES_L2_PENDING", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles without actually retired uops.", + "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.", "CounterMask": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.", - "EventCode": "0xC2", - "Invert": "1", + "EventCode": "0xA3", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "UOPS_RETIRED.TOTAL_CYCLES", + "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with less than 10 actually retired uops.", - "CounterMask": "10", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts both thread-specific (TS) and all-thread (AT) nukes.", - "EventCode": "0xC3", + "PublicDescription": "Counts number of cycles the CPU has at least one pending demand load request (that is cycles with non-completed load waiting for its data from memory subsystem).", + "EventCode": "0xA3", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "MACHINE_CLEARS.CYCLES", + "UMask": "0x2", + "EventName": "CYCLE_ACTIVITY.CYCLES_LDM_PENDING", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles there was a Nuke. Account for both thread-specific and All Thread Nukes.", + "BriefDescription": "Cycles while memory subsystem has an outstanding load.", + "CounterMask": "2", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts self-modifying code (SMC) detected, which causes a machine clear.", - "EventCode": "0xC3", + "EventCode": "0xA3", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "MACHINE_CLEARS.SMC", - "SampleAfterValue": "100003", - "BriefDescription": "Self-modifying code (SMC) detected.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x2", + "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles while memory subsystem has an outstanding load.", + "CounterMask": "2", + "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Maskmov false fault - counts number of time ucode passes through Maskmov flow due to instruction's mask being 0 while the flow was completed without raising a fault.", - "EventCode": "0xC3", + "PublicDescription": "Counts number of cycles nothing is executed on any execution port.", + "EventCode": "0xA3", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "MACHINE_CLEARS.MASKMOV", - "SampleAfterValue": "100003", - "BriefDescription": "This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x4", + "EventName": "CYCLE_ACTIVITY.CYCLES_NO_EXECUTE", + "SampleAfterValue": "2000003", + "BriefDescription": "This event increments by 1 for every cycle where there was no execute for this thread.", + "CounterMask": "4", + "CounterHTOff": "0,1,2,3" }, { - "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts conditional branch instructions retired.", - "EventCode": "0xC4", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "BR_INST_RETIRED.CONDITIONAL", - "SampleAfterValue": "400009", - "BriefDescription": "Conditional branch instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts both direct and indirect near call instructions retired.", - "EventCode": "0xC4", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "BR_INST_RETIRED.NEAR_CALL", - "SampleAfterValue": "100007", - "BriefDescription": "Direct and indirect near call instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts all (macro) branch instructions retired.", - "EventCode": "0xC4", - "Counter": "0,1,2,3", - "UMask": "0x0", - "EventName": "BR_INST_RETIRED.ALL_BRANCHES", - "SampleAfterValue": "400009", - "BriefDescription": "All (macro) branch instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts return instructions retired.", - "EventCode": "0xC4", - "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "BR_INST_RETIRED.NEAR_RETURN", - "SampleAfterValue": "100007", - "BriefDescription": "Return instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts not taken branch instructions retired.", - "EventCode": "0xC4", - "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "BR_INST_RETIRED.NOT_TAKEN", - "SampleAfterValue": "400009", - "BriefDescription": "Not taken branch instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts taken branch instructions retired.", - "EventCode": "0xC4", - "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "BR_INST_RETIRED.NEAR_TAKEN", - "SampleAfterValue": "400009", - "BriefDescription": "Taken branch instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts far branch instructions retired.", - "EventCode": "0xC4", - "Counter": "0,1,2,3", - "UMask": "0x40", - "Errata": "BDW98", - "EventName": "BR_INST_RETIRED.FAR_BRANCH", - "SampleAfterValue": "100007", - "BriefDescription": "Far branch instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PEBS": "2", - "PublicDescription": "This is a precise version of BR_INST_RETIRED.ALL_BRANCHES that counts all (macro) branch instructions retired.", - "EventCode": "0xC4", + "EventCode": "0xA3", "Counter": "0,1,2,3", "UMask": "0x4", - "Errata": "BDW98", - "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS", - "SampleAfterValue": "400009", - "BriefDescription": "All (macro) branch instructions retired. (Precise Event - PEBS)", - "CounterHTOff": "0,1,2,3" - }, - { - "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts mispredicted conditional branch instructions retired.", - "EventCode": "0xC5", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "BR_MISP_RETIRED.CONDITIONAL", - "SampleAfterValue": "400009", - "BriefDescription": "Mispredicted conditional branch instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts all mispredicted macro branch instructions retired.", - "EventCode": "0xC5", - "Counter": "0,1,2,3", - "UMask": "0x0", - "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", - "SampleAfterValue": "400009", - "BriefDescription": "All mispredicted macro branch instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts mispredicted return instructions retired.", - "EventCode": "0xC5", - "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "BR_MISP_RETIRED.RET", - "SampleAfterValue": "100007", - "BriefDescription": "This event counts the number of mispredicted ret instructions retired. Non PEBS", + "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL", + "SampleAfterValue": "2000003", + "BriefDescription": "Total execution stalls.", + "CounterMask": "4", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "2", - "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.", - "EventCode": "0xC5", + "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand* load request missing the L2 cache.(as a footprint) * includes also L1 HW prefetch requests that may or may not be required by demands.", + "EventCode": "0xA3", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS", - "SampleAfterValue": "400009", - "BriefDescription": "Mispredicted macro branch instructions retired. (Precise Event - PEBS)", + "UMask": "0x5", + "EventName": "CYCLE_ACTIVITY.STALLS_L2_PENDING", + "SampleAfterValue": "2000003", + "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.", + "CounterMask": "5", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "This event counts cases of saving new LBR records by hardware. This assumes proper enabling of LBRs and takes into account LBR filtering done by the LBR_SELECT register.", - "EventCode": "0xCC", + "EventCode": "0xA3", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "ROB_MISC_EVENTS.LBR_INSERTS", + "UMask": "0x5", + "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS", "SampleAfterValue": "2000003", - "BriefDescription": "Count cases of saving new LBR", + "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.", + "CounterMask": "5", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.", - "EventCode": "0x3C", + "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand load request.", + "EventCode": "0xA3", "Counter": "0,1,2,3", - "UMask": "0x0", - "EventName": "CPU_CLK_UNHALTED.THREAD_P", + "UMask": "0x6", + "EventName": "CYCLE_ACTIVITY.STALLS_LDM_PENDING", "SampleAfterValue": "2000003", - "BriefDescription": "Thread cycles when thread is not in halt state", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", + "CounterMask": "6", + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0x89", + "EventCode": "0xA3", "Counter": "0,1,2,3", - "UMask": "0xa0", - "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_NEAR_CALL", - "SampleAfterValue": "200003", - "BriefDescription": "Taken speculative and retired mispredicted indirect calls.", + "UMask": "0x6", + "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", + "CounterMask": "6", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", - "Counter": "0,1,2,3", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "UOPS_EXECUTED_PORT.PORT_0_CORE", + "PublicDescription": "Counts number of cycles the CPU has at least one pending demand load request missing the L1 data cache.", + "EventCode": "0xA3", + "Counter": "2", + "UMask": "0x8", + "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are exectuted in port 0.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", + "CounterMask": "8", + "CounterHTOff": "2" }, { - "EventCode": "0xA1", - "Counter": "0,1,2,3", - "UMask": "0x2", - "AnyThread": "1", - "EventName": "UOPS_EXECUTED_PORT.PORT_1_CORE", + "EventCode": "0xA3", + "Counter": "2", + "UMask": "0x8", + "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are exectuted in port 1.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", + "CounterMask": "8", + "CounterHTOff": "2" }, { - "EventCode": "0xA1", - "Counter": "0,1,2,3", - "UMask": "0x4", - "AnyThread": "1", - "EventName": "UOPS_EXECUTED_PORT.PORT_2_CORE", + "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand load request missing the L1 data cache.", + "EventCode": "0xA3", + "Counter": "2", + "UMask": "0xc", + "EventName": "CYCLE_ACTIVITY.STALLS_L1D_PENDING", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are dispatched to port 2.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", + "CounterMask": "12", + "CounterHTOff": "2" }, { - "EventCode": "0xA1", - "Counter": "0,1,2,3", - "UMask": "0x8", - "AnyThread": "1", - "EventName": "UOPS_EXECUTED_PORT.PORT_3_CORE", + "EventCode": "0xA3", + "Counter": "2", + "UMask": "0xc", + "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are dispatched to port 3.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", + "CounterMask": "12", + "CounterHTOff": "2" }, { - "EventCode": "0xA1", + "EventCode": "0xA8", "Counter": "0,1,2,3", - "UMask": "0x10", - "AnyThread": "1", - "EventName": "UOPS_EXECUTED_PORT.PORT_4_CORE", + "UMask": "0x1", + "EventName": "LSD.UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are exectuted in port 4.", + "BriefDescription": "Number of Uops delivered by the LSD.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", + "EventCode": "0xA8", "Counter": "0,1,2,3", - "UMask": "0x20", - "AnyThread": "1", - "EventName": "UOPS_EXECUTED_PORT.PORT_5_CORE", + "UMask": "0x1", + "EventName": "LSD.CYCLES_4_UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are exectuted in port 5.", + "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", + "CounterMask": "4", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", + "EventCode": "0xA8", "Counter": "0,1,2,3", - "UMask": "0x40", - "AnyThread": "1", - "EventName": "UOPS_EXECUTED_PORT.PORT_6_CORE", + "UMask": "0x1", + "EventName": "LSD.CYCLES_ACTIVE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are exectuted in port 6.", + "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", + "PublicDescription": "Number of uops to be executed per-thread each cycle.", + "EventCode": "0xB1", "Counter": "0,1,2,3", - "UMask": "0x80", - "AnyThread": "1", - "EventName": "UOPS_EXECUTED_PORT.PORT_7_CORE", + "UMask": "0x1", + "EventName": "UOPS_EXECUTED.THREAD", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are dispatched to port 7.", + "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "PublicDescription": "Number of near branch instructions retired that were mispredicted and taken.", - "EventCode": "0xC5", + "PublicDescription": "This event counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.", + "EventCode": "0xB1", + "Invert": "1", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", - "SampleAfterValue": "400009", - "BriefDescription": "number of near branch instructions retired that were mispredicted and taken.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1", + "EventName": "UOPS_EXECUTED.STALL_CYCLES", + "SampleAfterValue": "2000003", + "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xB1", @@ -1083,335 +1064,364 @@ "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xe6", + "PublicDescription": "Number of uops executed from any thread.", + "EventCode": "0xB1", "Counter": "0,1,2,3", - "UMask": "0x1f", - "EventName": "BACLEARS.ANY", - "SampleAfterValue": "100003", - "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED.CORE", + "SampleAfterValue": "2000003", + "BriefDescription": "Number of uops executed on the core.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "Counter": "2", - "UMask": "0x8", - "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS", + "EventCode": "0xb1", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", - "CounterMask": "8", - "CounterHTOff": "2" + "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", + "EventCode": "0xb1", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.", - "CounterMask": "1", + "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", + "CounterMask": "2", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", + "EventCode": "0xb1", "Counter": "0,1,2,3", "UMask": "0x2", - "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles while memory subsystem has an outstanding load.", - "CounterMask": "2", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", + "CounterMask": "3", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", + "EventCode": "0xb1", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", "SampleAfterValue": "2000003", - "BriefDescription": "Total execution stalls.", + "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", "CounterMask": "4", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "Counter": "2", - "UMask": "0xc", - "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS", + "EventCode": "0xb1", + "Invert": "1", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE", "SampleAfterValue": "2000003", - "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", - "CounterMask": "12", - "CounterHTOff": "2" + "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", + "PublicDescription": "This event counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).", + "EventCode": "0xC0", "Counter": "0,1,2,3", - "UMask": "0x5", - "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS", + "UMask": "0x0", + "Errata": "BDM61", + "EventName": "INST_RETIRED.ANY_P", "SampleAfterValue": "2000003", - "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.", - "CounterMask": "5", + "BriefDescription": "Number of instructions retired. General Counter - architectural event", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", + "PEBS": "2", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts instructions retired.", + "EventCode": "0xC0", + "Counter": "1", + "UMask": "0x1", + "Errata": "BDM11, BDM55", + "EventName": "INST_RETIRED.PREC_DIST", + "SampleAfterValue": "2000003", + "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution", + "CounterHTOff": "1" + }, + { + "PublicDescription": "This event counts FP operations retired. For X87 FP operations that have no exceptions counting also includes flows that have several X87, or flows that use X87 uops in the exception handling.", + "EventCode": "0xC0", "Counter": "0,1,2,3", - "UMask": "0x6", - "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY", + "UMask": "0x2", + "EventName": "INST_RETIRED.X87", "SampleAfterValue": "2000003", - "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", - "CounterMask": "6", + "BriefDescription": "FP operations retired. X87 FP operations that have no exceptions:", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC3", + "EventCode": "0xC1", "Counter": "0,1,2,3", - "UMask": "0x1", - "EdgeDetect": "1", - "EventName": "MACHINE_CLEARS.COUNT", + "UMask": "0x40", + "EventName": "OTHER_ASSISTS.ANY_WB_ASSIST", "SampleAfterValue": "100003", - "BriefDescription": "Number of machine clears (nukes) of any type.", - "CounterMask": "1", + "BriefDescription": "Number of times any microcode assist is invoked by HW upon uop writeback.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA8", + "PEBS": "1", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts all actually retired uops. Counting increments by two for micro-fused uops, and by one for macro-fused and other uops. Maximal increment value for one cycle is eight.", + "EventCode": "0xC2", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "LSD.CYCLES_4_UOPS", + "EventName": "UOPS_RETIRED.ALL", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", - "CounterMask": "4", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "BriefDescription": "Actually retired uops. (Precise Event - PEBS)", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Data_LA": "1" }, { - "EventCode": "0x5E", + "PublicDescription": "This event counts cycles without actually retired uops.", + "EventCode": "0xC2", "Invert": "1", "Counter": "0,1,2,3", "UMask": "0x1", - "EdgeDetect": "1", - "EventName": "RS_EVENTS.EMPTY_END", - "SampleAfterValue": "200003", - "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA8", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "LSD.CYCLES_ACTIVE", + "EventName": "UOPS_RETIRED.STALL_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.", + "BriefDescription": "Cycles without actually retired uops.", "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 0.", - "EventCode": "0xA1", + "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.", + "EventCode": "0xC2", + "Invert": "1", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "UOPS_EXECUTED_PORT.PORT_0", + "EventName": "UOPS_RETIRED.TOTAL_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 0", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "BriefDescription": "Cycles with less than 10 actually retired uops.", + "CounterMask": "10", + "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 1.", - "EventCode": "0xA1", + "PEBS": "1", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts the number of retirement slots used.", + "EventCode": "0xC2", "Counter": "0,1,2,3", "UMask": "0x2", - "EventName": "UOPS_EXECUTED_PORT.PORT_1", + "EventName": "UOPS_RETIRED.RETIRE_SLOTS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 1", + "BriefDescription": "Retirement slots used. (Precise Event - PEBS)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 2.", - "EventCode": "0xA1", + "PublicDescription": "This event counts both thread-specific (TS) and all-thread (AT) nukes.", + "EventCode": "0xC3", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "UOPS_EXECUTED_PORT.PORT_2", + "UMask": "0x1", + "EventName": "MACHINE_CLEARS.CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 2", + "BriefDescription": "Cycles there was a Nuke. Account for both thread-specific and All Thread Nukes.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 3.", - "EventCode": "0xA1", + "EventCode": "0xC3", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "UOPS_EXECUTED_PORT.PORT_3", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 3", + "UMask": "0x1", + "EdgeDetect": "1", + "EventName": "MACHINE_CLEARS.COUNT", + "SampleAfterValue": "100003", + "BriefDescription": "Number of machine clears (nukes) of any type.", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 4.", - "EventCode": "0xA1", + "PublicDescription": "This event counts self-modifying code (SMC) detected, which causes a machine clear.", + "EventCode": "0xC3", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "UOPS_EXECUTED_PORT.PORT_4", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 4", + "UMask": "0x4", + "EventName": "MACHINE_CLEARS.SMC", + "SampleAfterValue": "100003", + "BriefDescription": "Self-modifying code (SMC) detected.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 5.", - "EventCode": "0xA1", + "PublicDescription": "Maskmov false fault - counts number of time ucode passes through Maskmov flow due to instruction's mask being 0 while the flow was completed without raising a fault.", + "EventCode": "0xC3", "Counter": "0,1,2,3", "UMask": "0x20", - "EventName": "UOPS_EXECUTED_PORT.PORT_5", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 5", + "EventName": "MACHINE_CLEARS.MASKMOV", + "SampleAfterValue": "100003", + "BriefDescription": "This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 6.", - "EventCode": "0xA1", + "PublicDescription": "This event counts all (macro) branch instructions retired.", + "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x40", - "EventName": "UOPS_EXECUTED_PORT.PORT_6", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 6", + "UMask": "0x0", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES", + "SampleAfterValue": "400009", + "BriefDescription": "All (macro) branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 7.", - "EventCode": "0xA1", + "PEBS": "1", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts conditional branch instructions retired.", + "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x80", - "EventName": "UOPS_EXECUTED_PORT.PORT_7", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 7", + "UMask": "0x1", + "EventName": "BR_INST_RETIRED.CONDITIONAL", + "SampleAfterValue": "400009", + "BriefDescription": "Conditional branch instructions retired. (Precise Event - PEBS)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of micro-operations cancelled after they were dispatched from the scheduler to the execution units when the total number of physical register read ports across all dispatch ports exceeds the read bandwidth of the physical register file. The SIMD_PRF subevent applies to the following instructions: VDPPS, DPPS, VPCMPESTRI, PCMPESTRI, VPCMPESTRM, PCMPESTRM, VFMADD*, VFMADDSUB*, VFMSUB*, VMSUBADD*, VFNMADD*, VFNMSUB*. See the Broadwell Optimization Guide for more information.", - "EventCode": "0xA0", + "PEBS": "1", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts both direct and indirect near call instructions retired.", + "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x3", - "EventName": "UOP_DISPATCHES_CANCELLED.SIMD_PRF", - "SampleAfterValue": "2000003", - "BriefDescription": "Micro-op dispatches cancelled due to insufficient SIMD physical register file read ports", - "CounterHTOff": "0,1,2,3" + "UMask": "0x2", + "EventName": "BR_INST_RETIRED.NEAR_CALL", + "SampleAfterValue": "100007", + "BriefDescription": "Direct and indirect near call instructions retired. (Precise Event - PEBS)", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x00", - "Counter": "Fixed counter 2", + "PEBS": "1", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts both direct and indirect macro near call instructions retired (captured in ring 3).", + "EventCode": "0xC4", + "Counter": "0,1,2,3", "UMask": "0x2", - "AnyThread": "1", - "EventName": "CPU_CLK_UNHALTED.THREAD_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", - "CounterHTOff": "Fixed counter 2" + "EventName": "BR_INST_RETIRED.NEAR_CALL_R3", + "SampleAfterValue": "100007", + "BriefDescription": "Direct and indirect macro near call instructions retired (captured in ring 3). (Precise Event - PEBS)", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x3C", + "PEBS": "2", + "PublicDescription": "This is a precise version of BR_INST_RETIRED.ALL_BRANCHES that counts all (macro) branch instructions retired.", + "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x0", - "AnyThread": "1", - "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x4", + "Errata": "BDW98", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS", + "SampleAfterValue": "400009", + "BriefDescription": "All (macro) branch instructions retired. (Precise Event - PEBS)", + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0x3C", + "PEBS": "1", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts return instructions retired.", + "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).", + "UMask": "0x8", + "EventName": "BR_INST_RETIRED.NEAR_RETURN", + "SampleAfterValue": "100007", + "BriefDescription": "Return instructions retired. (Precise Event - PEBS)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x0D", + "PublicDescription": "This event counts not taken branch instructions retired.", + "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x3", - "AnyThread": "1", - "EventName": "INT_MISC.RECOVERY_CYCLES_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).", - "CounterMask": "1", + "UMask": "0x10", + "EventName": "BR_INST_RETIRED.NOT_TAKEN", + "SampleAfterValue": "400009", + "BriefDescription": "Not taken branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xb1", + "PEBS": "1", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts taken branch instructions retired.", + "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", - "CounterMask": "1", + "UMask": "0x20", + "EventName": "BR_INST_RETIRED.NEAR_TAKEN", + "SampleAfterValue": "400009", + "BriefDescription": "Taken branch instructions retired. (Precise Event - PEBS)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xb1", + "PublicDescription": "This event counts far branch instructions retired.", + "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", - "CounterMask": "2", + "UMask": "0x40", + "Errata": "BDW98", + "EventName": "BR_INST_RETIRED.FAR_BRANCH", + "SampleAfterValue": "100007", + "BriefDescription": "Far branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xb1", + "PublicDescription": "This event counts all mispredicted macro branch instructions retired.", + "EventCode": "0xC5", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", - "CounterMask": "3", + "UMask": "0x0", + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", + "SampleAfterValue": "400009", + "BriefDescription": "All mispredicted macro branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xb1", + "PEBS": "1", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts mispredicted conditional branch instructions retired.", + "EventCode": "0xC5", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", - "CounterMask": "4", + "UMask": "0x1", + "EventName": "BR_MISP_RETIRED.CONDITIONAL", + "SampleAfterValue": "400009", + "BriefDescription": "Mispredicted conditional branch instructions retired. (Precise Event - PEBS)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xb1", - "Invert": "1", + "PEBS": "2", + "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.", + "EventCode": "0xC5", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.", + "UMask": "0x4", + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS", + "SampleAfterValue": "400009", + "BriefDescription": "Mispredicted macro branch instructions retired. (Precise Event - PEBS)", + "CounterHTOff": "0,1,2,3" + }, + { + "PEBS": "1", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts mispredicted return instructions retired.", + "EventCode": "0xC5", + "Counter": "0,1,2,3", + "UMask": "0x8", + "EventName": "BR_MISP_RETIRED.RET", + "SampleAfterValue": "100007", + "BriefDescription": "This event counts the number of mispredicted ret instructions retired.(Precise Event)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate).", - "EventCode": "0x3C", + "PEBS": "1", + "PublicDescription": "Number of near branch instructions retired that were mispredicted and taken. (Precise Event - PEBS).", + "EventCode": "0xC5", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "CPU_CLK_UNHALTED.REF_XCLK", - "SampleAfterValue": "2000003", - "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)", + "UMask": "0x20", + "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", + "SampleAfterValue": "400009", + "BriefDescription": "number of near branch instructions retired that were mispredicted and taken. (Precise Event - PEBS).", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x3C", + "PublicDescription": "This event counts cases of saving new LBR records by hardware. This assumes proper enabling of LBRs and takes into account LBR filtering done by the LBR_SELECT register.", + "EventCode": "0xCC", "Counter": "0,1,2,3", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY", + "UMask": "0x20", + "EventName": "ROB_MISC_EVENTS.LBR_INSERTS", "SampleAfterValue": "2000003", - "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).", + "BriefDescription": "Count cases of saving new LBR", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x3C", + "EventCode": "0xe6", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", - "SampleAfterValue": "2000003", - "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.", + "UMask": "0x1f", + "EventName": "BACLEARS.ANY", + "SampleAfterValue": "100003", + "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.", "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/broadwell/virtual-memory.json b/tools/perf/pmu-events/arch/x86/broadwell/virtual-memory.json index 4301e6fbc5eb..2a015e4c7e21 100644 --- a/tools/perf/pmu-events/arch/x86/broadwell/virtual-memory.json +++ b/tools/perf/pmu-events/arch/x86/broadwell/virtual-memory.json @@ -43,6 +43,16 @@ "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (1G)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x08", + "Counter": "0,1,2,3", + "UMask": "0xe", + "Errata": "BDM69", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", + "SampleAfterValue": "100003", + "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "This event counts the number of cycles while PMH is busy with the page walk.", "EventCode": "0x08", @@ -72,6 +82,15 @@ "BriefDescription": "Load misses that miss the DTLB and hit the STLB (2M).", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x08", + "Counter": "0,1,2,3", + "UMask": "0x60", + "EventName": "DTLB_LOAD_MISSES.STLB_HIT", + "SampleAfterValue": "2000003", + "BriefDescription": "Load operations that miss the first DTLB level but hit the second and do not cause page walks.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "This event counts store misses in all DTLB levels that cause page walks of any page size (4K/2M/4M/1G).", "EventCode": "0x49", @@ -116,6 +135,16 @@ "BriefDescription": "Store misses in all DTLB levels that cause completed page walks (1G)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x49", + "Counter": "0,1,2,3", + "UMask": "0xe", + "Errata": "BDM69", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", + "SampleAfterValue": "100003", + "BriefDescription": "Store misses in all DTLB levels that cause completed page walks.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "This event counts the number of cycles while PMH is busy with the page walk.", "EventCode": "0x49", @@ -145,6 +174,15 @@ "BriefDescription": "Store misses that miss the DTLB and hit the STLB (2M).", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x49", + "Counter": "0,1,2,3", + "UMask": "0x60", + "EventName": "DTLB_STORE_MISSES.STLB_HIT", + "SampleAfterValue": "100003", + "BriefDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "This event counts cycles for an extended page table walk. The Extended Page directory cache differs from standard TLB caches by the operating system that use it. Virtual machine operating systems use the extended page directory cache, while guest operating systems use the standard TLB caches.", "EventCode": "0x4F", @@ -199,6 +237,16 @@ "BriefDescription": "Store miss in all TLB levels causes a page walk that completes. (1G)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x85", + "Counter": "0,1,2,3", + "UMask": "0xe", + "Errata": "BDM69", + "EventName": "ITLB_MISSES.WALK_COMPLETED", + "SampleAfterValue": "100003", + "BriefDescription": "Misses in all ITLB levels that cause completed page walks.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "This event counts the number of cycles while PMH is busy with the page walk.", "EventCode": "0x85", @@ -228,6 +276,15 @@ "BriefDescription": "Code misses that miss the DTLB and hit the STLB (2M).", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x85", + "Counter": "0,1,2,3", + "UMask": "0x60", + "EventName": "ITLB_MISSES.STLB_HIT", + "SampleAfterValue": "100003", + "BriefDescription": "Operations that miss the first ITLB level but hit the second and do not cause any page walks.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "This event counts the number of flushes of the big or small ITLB pages. Counting include both TLB Flush (covering all sets) and TLB Set Clear (set-specific).", "EventCode": "0xAE", @@ -251,61 +308,61 @@ { "EventCode": "0xBC", "Counter": "0,1,2,3", - "UMask": "0x21", + "UMask": "0x12", "Errata": "BDM69, BDM98", - "EventName": "PAGE_WALKER_LOADS.ITLB_L1", + "EventName": "PAGE_WALKER_LOADS.DTLB_L2", "SampleAfterValue": "2000003", - "BriefDescription": "Number of ITLB page walker hits in the L1+FB.", + "BriefDescription": "Number of DTLB page walker hits in the L2.", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", "Counter": "0,1,2,3", - "UMask": "0x12", + "UMask": "0x14", "Errata": "BDM69, BDM98", - "EventName": "PAGE_WALKER_LOADS.DTLB_L2", + "EventName": "PAGE_WALKER_LOADS.DTLB_L3", "SampleAfterValue": "2000003", - "BriefDescription": "Number of DTLB page walker hits in the L2.", + "BriefDescription": "Number of DTLB page walker hits in the L3 + XSNP.", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", "Counter": "0,1,2,3", - "UMask": "0x22", + "UMask": "0x18", "Errata": "BDM69, BDM98", - "EventName": "PAGE_WALKER_LOADS.ITLB_L2", + "EventName": "PAGE_WALKER_LOADS.DTLB_MEMORY", "SampleAfterValue": "2000003", - "BriefDescription": "Number of ITLB page walker hits in the L2.", + "BriefDescription": "Number of DTLB page walker hits in Memory.", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", "Counter": "0,1,2,3", - "UMask": "0x14", + "UMask": "0x21", "Errata": "BDM69, BDM98", - "EventName": "PAGE_WALKER_LOADS.DTLB_L3", + "EventName": "PAGE_WALKER_LOADS.ITLB_L1", "SampleAfterValue": "2000003", - "BriefDescription": "Number of DTLB page walker hits in the L3 + XSNP.", + "BriefDescription": "Number of ITLB page walker hits in the L1+FB.", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", "Counter": "0,1,2,3", - "UMask": "0x24", + "UMask": "0x22", "Errata": "BDM69, BDM98", - "EventName": "PAGE_WALKER_LOADS.ITLB_L3", + "EventName": "PAGE_WALKER_LOADS.ITLB_L2", "SampleAfterValue": "2000003", - "BriefDescription": "Number of ITLB page walker hits in the L3 + XSNP.", + "BriefDescription": "Number of ITLB page walker hits in the L2.", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", "Counter": "0,1,2,3", - "UMask": "0x18", + "UMask": "0x24", "Errata": "BDM69, BDM98", - "EventName": "PAGE_WALKER_LOADS.DTLB_MEMORY", + "EventName": "PAGE_WALKER_LOADS.ITLB_L3", "SampleAfterValue": "2000003", - "BriefDescription": "Number of DTLB page walker hits in Memory.", + "BriefDescription": "Number of ITLB page walker hits in the L3 + XSNP.", "CounterHTOff": "0,1,2,3" }, { @@ -327,62 +384,5 @@ "SampleAfterValue": "100007", "BriefDescription": "STLB flush attempts", "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x08", - "Counter": "0,1,2,3", - "UMask": "0xe", - "Errata": "BDM69", - "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", - "SampleAfterValue": "100003", - "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x08", - "Counter": "0,1,2,3", - "UMask": "0x60", - "EventName": "DTLB_LOAD_MISSES.STLB_HIT", - "SampleAfterValue": "2000003", - "BriefDescription": "Load operations that miss the first DTLB level but hit the second and do not cause page walks.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x49", - "Counter": "0,1,2,3", - "UMask": "0xe", - "Errata": "BDM69", - "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", - "SampleAfterValue": "100003", - "BriefDescription": "Store misses in all DTLB levels that cause completed page walks.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x49", - "Counter": "0,1,2,3", - "UMask": "0x60", - "EventName": "DTLB_STORE_MISSES.STLB_HIT", - "SampleAfterValue": "100003", - "BriefDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x85", - "Counter": "0,1,2,3", - "UMask": "0xe", - "Errata": "BDM69", - "EventName": "ITLB_MISSES.WALK_COMPLETED", - "SampleAfterValue": "100003", - "BriefDescription": "Misses in all ITLB levels that cause completed page walks.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x85", - "Counter": "0,1,2,3", - "UMask": "0x60", - "EventName": "ITLB_MISSES.STLB_HIT", - "SampleAfterValue": "100003", - "BriefDescription": "Operations that miss the first ITLB level but hit the second and do not cause any page walks.", - "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file -- cgit v1.2.3-70-g09d2 From 97d00f2d10ddd6b3ca89641d4a2fe6922a535c3f Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 18 Jan 2018 04:44:03 -0800 Subject: perf vendor events intel: Update BroadwellX events to V13 Signed-off-by: Andi Kleen Link: https://lkml.kernel.org/r/20180118234518.GA27753@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/pmu-events/arch/x86/broadwellx/cache.json | 383 +++--- .../arch/x86/broadwellx/floating-point.json | 108 +- .../pmu-events/arch/x86/broadwellx/frontend.json | 138 +-- .../pmu-events/arch/x86/broadwellx/memory.json | 40 +- .../perf/pmu-events/arch/x86/broadwellx/other.json | 20 +- .../pmu-events/arch/x86/broadwellx/pipeline.json | 1214 ++++++++++---------- .../arch/x86/broadwellx/virtual-memory.json | 150 +-- 7 files changed, 1055 insertions(+), 998 deletions(-) (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/cache.json b/tools/perf/pmu-events/arch/x86/broadwellx/cache.json index d1d043829b95..bf0c51272068 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellx/cache.json +++ b/tools/perf/pmu-events/arch/x86/broadwellx/cache.json @@ -11,11 +11,28 @@ }, { "EventCode": "0x24", - "UMask": "0x41", - "BriefDescription": "Demand Data Read requests that hit L2 cache", + "UMask": "0x22", + "BriefDescription": "RFO requests that miss L2 cache.", "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT", - "PublicDescription": "This event counts the number of demand Data Read requests that hit L2 cache. Only not rejected loads are counted.", + "EventName": "L2_RQSTS.RFO_MISS", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "UMask": "0x24", + "BriefDescription": "L2 cache misses when fetching instructions.", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.CODE_RD_MISS", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "UMask": "0x27", + "BriefDescription": "Demand requests that miss L2 cache.", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.ALL_DEMAND_MISS", "SampleAfterValue": "200003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -29,6 +46,43 @@ "SampleAfterValue": "200003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x24", + "UMask": "0x3f", + "BriefDescription": "All requests that miss L2 cache.", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.MISS", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "UMask": "0x41", + "BriefDescription": "Demand Data Read requests that hit L2 cache", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT", + "PublicDescription": "This event counts the number of demand Data Read requests that hit L2 cache. Only not rejected loads are counted.", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "UMask": "0x42", + "BriefDescription": "RFO requests that hit L2 cache.", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.RFO_HIT", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "UMask": "0x44", + "BriefDescription": "L2 cache hits when fetching instructions, code reads.", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.CODE_RD_HIT", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x24", "UMask": "0x50", @@ -69,6 +123,15 @@ "SampleAfterValue": "200003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x24", + "UMask": "0xe7", + "BriefDescription": "Demand requests to L2 cache.", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x24", "UMask": "0xf8", @@ -79,6 +142,15 @@ "SampleAfterValue": "200003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x24", + "UMask": "0xff", + "BriefDescription": "All L2 requests.", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.REFERENCES", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x27", "UMask": "0x50", @@ -130,6 +202,27 @@ "SampleAfterValue": "2000003", "CounterHTOff": "2" }, + { + "EventCode": "0x48", + "UMask": "0x1", + "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.", + "Counter": "2", + "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY", + "AnyThread": "1", + "CounterMask": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "2" + }, + { + "EventCode": "0x48", + "UMask": "0x2", + "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.", + "Counter": "0,1,2,3", + "EventName": "L1D_PEND_MISS.FB_FULL", + "CounterMask": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x51", "UMask": "0x1", @@ -151,6 +244,29 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x60", + "UMask": "0x1", + "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD", + "CounterMask": "1", + "Errata": "BDM76", + "PublicDescription": "This event counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x60", + "UMask": "0x1", + "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6", + "CounterMask": "6", + "Errata": "BDM76", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x60", "UMask": "0x2", @@ -158,7 +274,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD", "Errata": "BDM76", - "PublicDescription": "This event counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The \"Offcore outstanding\" state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", + "PublicDescription": "This event counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The Offcore outstanding state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -175,24 +291,24 @@ }, { "EventCode": "0x60", - "UMask": "0x8", - "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore", + "UMask": "0x4", + "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle", "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", + "CounterMask": "1", "Errata": "BDM76", - "PublicDescription": "This event counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", + "PublicDescription": "This event counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The Offcore outstanding state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x60", - "UMask": "0x1", - "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore", + "UMask": "0x8", + "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore", "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD", - "CounterMask": "1", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", "Errata": "BDM76", - "PublicDescription": "This event counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).", + "PublicDescription": "This event counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -208,18 +324,6 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, - { - "EventCode": "0x60", - "UMask": "0x4", - "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", - "CounterMask": "1", - "Errata": "BDM76", - "PublicDescription": "This event counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The \"Offcore outstanding\" state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, { "EventCode": "0x63", "UMask": "0x2", @@ -266,7 +370,7 @@ "BriefDescription": "Demand and prefetch data reads", "Counter": "0,1,2,3", "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD", - "PublicDescription": "This event counts the demand and prefetch data reads. All Core Data Reads include cacheable \"Demands\" and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.", + "PublicDescription": "This event counts the demand and prefetch data reads. All Core Data Reads include cacheable Demands and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -280,27 +384,36 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, { "EventCode": "0xD0", "UMask": "0x11", - "BriefDescription": "Retired load uops that miss the STLB.", + "BriefDescription": "Retired load uops that miss the STLB. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.STLB_MISS_LOADS", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts load uops with true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts load uops with true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD0", "UMask": "0x12", - "BriefDescription": "Retired store uops that miss the STLB.", + "BriefDescription": "Retired store uops that miss the STLB. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.STLB_MISS_STORES", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts store uops with true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts store uops true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.", "SampleAfterValue": "100003", "L1_Hit_Indication": "1", "CounterHTOff": "0,1,2,3" @@ -308,37 +421,37 @@ { "EventCode": "0xD0", "UMask": "0x21", - "BriefDescription": "Retired load uops with locked access.", + "BriefDescription": "Retired load uops with locked access. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS", "Errata": "BDM35", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts load uops with locked access retired to the architected path.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts load uops with locked access retired to the architected path.", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD0", "UMask": "0x41", - "BriefDescription": "Retired load uops that split across a cacheline boundary.", + "BriefDescription": "Retired load uops that split across a cacheline boundary.(Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts line-splitted load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-splitted load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD0", "UMask": "0x42", - "BriefDescription": "Retired store uops that split across a cacheline boundary.", + "BriefDescription": "Retired store uops that split across a cacheline boundary. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts line-splitted store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-splitted store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).", "SampleAfterValue": "100003", "L1_Hit_Indication": "1", "CounterHTOff": "0,1,2,3" @@ -346,24 +459,24 @@ { "EventCode": "0xD0", "UMask": "0x81", - "BriefDescription": "All retired load uops.", + "BriefDescription": "All retired load uops. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.ALL_LOADS", - "PublicDescription": "This event counts load uops retired to the architected path with a filter on bits 0 and 1 applied.\nNote: This event counts AVX-256bit load/store double-pump memory uops as a single uop at retirement. This event also counts SW prefetches.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts load uops retired to the architected path with a filter on bits 0 and 1 applied.\nNote: This event ?ounts AVX-256bit load/store double-pump memory uops as a single uop at retirement. This event also counts SW prefetches.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD0", "UMask": "0x82", - "BriefDescription": "All retired store uops.", + "BriefDescription": "Retired store uops that split across a cacheline boundary. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.ALL_STORES", - "PublicDescription": "This event counts store uops retired to the architected path with a filter on bits 0 and 1 applied.\nNote: This event counts AVX-256bit load/store double-pump memory uops as a single uop at retirement.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts store uops retired to the architected path with a filter on bits 0 and 1 applied.\nNote: This event ?ounts AVX-256bit load/store double-pump memory uops as a single uop at retirement.", "SampleAfterValue": "2000003", "L1_Hit_Indication": "1", "CounterHTOff": "0,1,2,3" @@ -371,69 +484,69 @@ { "EventCode": "0xD1", "UMask": "0x1", - "BriefDescription": "Retired load uops with L1 cache hits as data sources.", + "BriefDescription": "Retired load uops with L1 cache hits as data sources. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were hits in the nearest-level (L1) cache.\nNote: Only two data-sources of L1/FB are applicable for AVX-256bit even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load. This event also counts SW prefetches independent of the actual data source.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data source were hits in the nearest-level (L1) cache.\nNote: Only two data-sources of L1/FB are applicable for AVX-256bit even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load. This event also counts SW prefetches independent of the actual data source.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD1", "UMask": "0x2", - "BriefDescription": "Retired load uops with L2 cache hits as data sources.", + "BriefDescription": "Retired load uops with L2 cache hits as data sources. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT", "Errata": "BDM35", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were hits in the mid-level (L2) cache.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were hits in the mid-level (L2) cache.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD1", "UMask": "0x4", - "BriefDescription": "Retired load uops which data sources were data hits in L3 without snoops required.", + "BriefDescription": "Hit in last-level (L3) cache. Excludes Unknown data-source. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT", "Errata": "BDM100", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were data hits in the last-level (L3) cache without snoops required.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were data hits in the last-level (L3) cache without snoops required.", "SampleAfterValue": "50021", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD1", "UMask": "0x8", - "BriefDescription": "Retired load uops misses in L1 cache as data sources.", + "BriefDescription": "Retired load uops misses in L1 cache as data sources. Uses PEBS.", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were misses in the nearest-level (L1) cache. Counting excludes unknown and UC data source.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were misses in the nearest-level (L1) cache. Counting excludes unknown and UC data source.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD1", "UMask": "0x10", - "BriefDescription": "Miss in mid-level (L2) cache. Excludes Unknown data-source.", + "BriefDescription": "Retired load uops with L2 cache misses as data sources. Uses PEBS.", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were misses in the mid-level (L2) cache. Counting excludes unknown and UC data source.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were misses in the mid-level (L2) cache. Counting excludes unknown and UC data source.", "SampleAfterValue": "50021", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD1", "UMask": "0x20", - "BriefDescription": "Miss in last-level (L3) cache. Excludes Unknown data-source.", + "BriefDescription": "Miss in last-level (L3) cache. Excludes Unknown data-source. (Precise Event - PEBS).", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", @@ -445,84 +558,83 @@ { "EventCode": "0xD1", "UMask": "0x40", - "BriefDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready.", + "BriefDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.HIT_LFB", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were load uops missed L1 but hit a fill buffer due to a preceding miss to the same cache line with the data not ready.\nNote: Only two data-sources of L1/FB are applicable for AVX-256bit even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were load uops missed L1 but hit a fill buffer due to a preceding miss to the same cache line with the data not ready.\nNote: Only two data-sources of L1/FB are applicable for AVX-256bit even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD2", "UMask": "0x1", - "BriefDescription": "Retired load uops which data sources were L3 hit and cross-core snoop missed in on-pkg core cache.", + "BriefDescription": "Retired load uops which data sources were L3 hit and cross-core snoop missed in on-pkg core cache. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS", "Errata": "BDM100", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were L3 Hit and a cross-core snoop missed in the on-pkg core cache.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were L3 Hit and a cross-core snoop missed in the on-pkg core cache.", "SampleAfterValue": "20011", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD2", "UMask": "0x2", - "BriefDescription": "Retired load uops which data sources were L3 and cross-core snoop hits in on-pkg core cache.", + "BriefDescription": "Retired load uops which data sources were L3 and cross-core snoop hits in on-pkg core cache. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT", "Errata": "BDM100", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were L3 hit and a cross-core snoop hit in the on-pkg core cache.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were L3 hit and a cross-core snoop hit in the on-pkg core cache.", "SampleAfterValue": "20011", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD2", "UMask": "0x4", - "BriefDescription": "Retired load uops which data sources were HitM responses from shared L3.", + "BriefDescription": "Retired load uops which data sources were HitM responses from shared L3. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM", "Errata": "BDM100", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were HitM responses from a core on same socket (shared L3).", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were HitM responses from a core on same socket (shared L3).", "SampleAfterValue": "20011", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD2", "UMask": "0x8", - "BriefDescription": "Retired load uops which data sources were hits in L3 without snoops required.", + "BriefDescription": "Retired load uops which data sources were hits in L3 without snoops required. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_NONE", "Errata": "BDM100", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were hits in the last-level (L3) cache without snoops required.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were hits in the last-level (L3) cache without snoops required.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD3", "UMask": "0x1", - "BriefDescription": "Data from local DRAM either Snoop not needed or Snoop Miss (RspI)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM", "Errata": "BDE70, BDM100", - "PublicDescription": "Retired load uop whose Data Source was: local DRAM either Snoop not needed or Snoop Miss (RspI).", + "PublicDescription": "This event counts retired load uops where the data came from local DRAM. This does not include hardware prefetches. This is a precise event.", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD3", "UMask": "0x4", - "BriefDescription": "Retired load uop whose Data Source was: remote DRAM either Snoop not needed or Snoop Miss (RspI)", + "BriefDescription": "Retired load uop whose Data Source was: remote DRAM either Snoop not needed or Snoop Miss (RspI) (Precise Event)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", @@ -534,7 +646,7 @@ { "EventCode": "0xD3", "UMask": "0x10", - "BriefDescription": "Retired load uop whose Data Source was: Remote cache HITM", + "BriefDescription": "Retired load uop whose Data Source was: Remote cache HITM (Precise Event)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", @@ -546,7 +658,7 @@ { "EventCode": "0xD3", "UMask": "0x20", - "BriefDescription": "Retired load uop whose Data Source was: forwarded from remote cache", + "BriefDescription": "Retired load uop whose Data Source was: forwarded from remote cache (Precise Event)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", @@ -694,119 +806,6 @@ "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, - { - "EventCode": "0x24", - "UMask": "0x42", - "BriefDescription": "RFO requests that hit L2 cache.", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.RFO_HIT", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0x22", - "BriefDescription": "RFO requests that miss L2 cache.", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.RFO_MISS", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0x44", - "BriefDescription": "L2 cache hits when fetching instructions, code reads.", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.CODE_RD_HIT", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0x24", - "BriefDescription": "L2 cache misses when fetching instructions.", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.CODE_RD_MISS", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0x27", - "BriefDescription": "Demand requests that miss L2 cache.", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.ALL_DEMAND_MISS", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0xe7", - "BriefDescription": "Demand requests to L2 cache.", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0x3f", - "BriefDescription": "All requests that miss L2 cache.", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.MISS", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0xff", - "BriefDescription": "All L2 requests.", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.REFERENCES", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0x60", - "UMask": "0x1", - "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6", - "CounterMask": "6", - "Errata": "BDM76", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x48", - "UMask": "0x1", - "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.", - "Counter": "2", - "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY", - "AnyThread": "1", - "CounterMask": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "2" - }, - { - "EventCode": "0x48", - "UMask": "0x2", - "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.", - "Counter": "0,1,2,3", - "EventName": "L1D_PEND_MISS.FB_FULL", - "CounterMask": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, { "Offcore": "1", "EventCode": "0xB7, 0xBB", @@ -816,6 +815,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_REQUESTS.LLC_HIT.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all requests that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -828,6 +828,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -840,6 +841,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -852,6 +854,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -864,6 +867,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -876,6 +880,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -888,6 +893,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -900,6 +906,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -912,6 +919,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_HIT.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -924,6 +932,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_LLC_RFO.LLC_HIT.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -936,6 +945,20 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3", + "MSRValue": "0x3f803c0002", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" } diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/floating-point.json b/tools/perf/pmu-events/arch/x86/broadwellx/floating-point.json index 4ae1ea24f22f..d7b9d9c9c518 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellx/floating-point.json +++ b/tools/perf/pmu-events/arch/x86/broadwellx/floating-point.json @@ -6,7 +6,7 @@ "Counter": "0,1,2,3", "EventName": "OTHER_ASSISTS.AVX_TO_SSE", "Errata": "BDM30", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts the number of transitions from AVX-256 to legacy SSE when penalty is applicable.", + "PublicDescription": "This event counts the number of transitions from AVX-256 to legacy SSE when penalty is applicable.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -17,7 +17,7 @@ "Counter": "0,1,2,3", "EventName": "OTHER_ASSISTS.SSE_TO_AVX", "Errata": "BDM30", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts the number of transitions from legacy SSE to AVX-256 when penalty is applicable.", + "PublicDescription": "This event counts the number of transitions from legacy SSE to AVX-256 when penalty is applicable.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -25,7 +25,6 @@ "EventCode": "0xC7", "UMask": "0x1", "BriefDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "PEBS": "1", "Counter": "0,1,2,3", "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE", "SampleAfterValue": "2000003", @@ -35,17 +34,24 @@ "EventCode": "0xC7", "UMask": "0x2", "BriefDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "PEBS": "1", "Counter": "0,1,2,3", "EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, + { + "EventCode": "0xC7", + "UMask": "0x3", + "BriefDescription": "Number of SSE/AVX computational scalar floating-point instructions retired. Applies to SSE* and AVX* scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", + "Counter": "0,1,2,3", + "EventName": "FP_ARITH_INST_RETIRED.SCALAR", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3" + }, { "EventCode": "0xC7", "UMask": "0x4", "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired. Each count represents 2 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "PEBS": "1", "Counter": "0,1,2,3", "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE", "SampleAfterValue": "2000003", @@ -55,7 +61,6 @@ "EventCode": "0xC7", "UMask": "0x8", "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "PEBS": "1", "Counter": "0,1,2,3", "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE", "SampleAfterValue": "2000003", @@ -65,19 +70,54 @@ "EventCode": "0xC7", "UMask": "0x10", "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "PEBS": "1", "Counter": "0,1,2,3", "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, + { + "EventCode": "0xC7", + "UMask": "0x15", + "BriefDescription": "Number of SSE/AVX computational double precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. ?.", + "Counter": "0,1,2,3", + "EventName": "FP_ARITH_INST_RETIRED.DOUBLE", + "SampleAfterValue": "2000006", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xc7", + "UMask": "0x20", + "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired. Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", + "Counter": "0,1,2,3", + "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xC7", + "UMask": "0x2a", + "BriefDescription": "Number of SSE/AVX computational single precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. ?.", + "Counter": "0,1,2,3", + "EventName": "FP_ARITH_INST_RETIRED.SINGLE", + "SampleAfterValue": "2000005", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xC7", + "UMask": "0x3c", + "BriefDescription": "Number of SSE/AVX computational packed floating-point instructions retired. Applies to SSE* and AVX*, packed, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", + "Counter": "0,1,2,3", + "EventName": "FP_ARITH_INST_RETIRED.PACKED", + "SampleAfterValue": "2000004", + "CounterHTOff": "0,1,2,3" + }, { "EventCode": "0xCA", "UMask": "0x2", "BriefDescription": "Number of X87 assists due to output value.", "Counter": "0,1,2,3", "EventName": "FP_ASSIST.X87_OUTPUT", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts the number of x87 floating point (FP) micro-code assist (numeric overflow/underflow, inexact result) when the output value (destination register) is invalid.", + "PublicDescription": "This event counts the number of x87 floating point (FP) micro-code assist (numeric overflow/underflow, inexact result) when the output value (destination register) is invalid.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -87,7 +127,7 @@ "BriefDescription": "Number of X87 assists due to input value.", "Counter": "0,1,2,3", "EventName": "FP_ASSIST.X87_INPUT", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts x87 floating point (FP) micro-code assist (invalid operation, denormal operand, SNaN operand) when the input value (one of the source operands to an FP instruction) is invalid.", + "PublicDescription": "This event counts x87 floating point (FP) micro-code assist (invalid operation, denormal operand, SNaN operand) when the input value (one of the source operands to an FP instruction) is invalid.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -97,7 +137,7 @@ "BriefDescription": "Number of SIMD FP assists due to Output values", "Counter": "0,1,2,3", "EventName": "FP_ASSIST.SIMD_OUTPUT", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts the number of SSE* floating point (FP) micro-code assist (numeric overflow/underflow) when the output value (destination register) is invalid. Counting covers only cases involving penalties that require micro-code assist intervention.", + "PublicDescription": "This event counts the number of SSE* floating point (FP) micro-code assist (numeric overflow/underflow) when the output value (destination register) is invalid. Counting covers only cases involving penalties that require micro-code assist intervention.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -107,7 +147,7 @@ "BriefDescription": "Number of SIMD FP assists due to input values", "Counter": "0,1,2,3", "EventName": "FP_ASSIST.SIMD_INPUT", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts any input SSE* FP assist - invalid operation, denormal operand, dividing by zero, SNaN operand. Counting includes only cases involving penalties that required micro-code assist intervention.", + "PublicDescription": "This event counts any input SSE* FP assist - invalid operation, denormal operand, dividing by zero, SNaN operand. Counting includes only cases involving penalties that required micro-code assist intervention.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -121,51 +161,5 @@ "PublicDescription": "This event counts cycles with any input and output SSE or x87 FP assist. If an input and output assist are detected on the same cycle the event increments by 1.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xc7", - "UMask": "0x20", - "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired. Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xC7", - "UMask": "0x3", - "BriefDescription": "Number of SSE/AVX computational scalar floating-point instructions retired. Applies to SSE* and AVX* scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "Counter": "0,1,2,3", - "EventName": "FP_ARITH_INST_RETIRED.SCALAR", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xC7", - "UMask": "0x3c", - "BriefDescription": "Number of SSE/AVX computational packed floating-point instructions retired. Applies to SSE* and AVX*, packed, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "Counter": "0,1,2,3", - "EventName": "FP_ARITH_INST_RETIRED.PACKED", - "SampleAfterValue": "2000004", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xC7", - "UMask": "0x2a", - "BriefDescription": "Number of SSE/AVX computational single precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. ?.", - "Counter": "0,1,2,3", - "EventName": "FP_ARITH_INST_RETIRED.SINGLE", - "SampleAfterValue": "2000005", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xC7", - "UMask": "0x15", - "BriefDescription": "Number of SSE/AVX computational double precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. ?.", - "Counter": "0,1,2,3", - "EventName": "FP_ARITH_INST_RETIRED.DOUBLE", - "SampleAfterValue": "2000006", - "CounterHTOff": "0,1,2,3" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/frontend.json b/tools/perf/pmu-events/arch/x86/broadwellx/frontend.json index 06bf0a40e568..72781e1e3362 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellx/frontend.json +++ b/tools/perf/pmu-events/arch/x86/broadwellx/frontend.json @@ -15,80 +15,49 @@ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path", "Counter": "0,1,2,3", "EventName": "IDQ.MITE_UOPS", - "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may \"bypass\" the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", + "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x79", - "UMask": "0x8", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", - "Counter": "0,1,2,3", - "EventName": "IDQ.DSB_UOPS", - "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may \"bypass\" the IDQ.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x79", - "UMask": "0x10", - "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "Counter": "0,1,2,3", - "EventName": "IDQ.MS_DSB_UOPS", - "PublicDescription": "This event counts the number of uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may \"bypass\" the IDQ.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x79", - "UMask": "0x20", - "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "Counter": "0,1,2,3", - "EventName": "IDQ.MS_MITE_UOPS", - "PublicDescription": "This event counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may \"bypass\" the IDQ.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x79", - "UMask": "0x30", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "UMask": "0x4", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path", "Counter": "0,1,2,3", - "EventName": "IDQ.MS_UOPS", - "PublicDescription": "This event counts the total number of uops delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may \"bypass\" the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.", + "EventName": "IDQ.MITE_CYCLES", + "CounterMask": "1", + "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x79", - "UMask": "0x30", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "UMask": "0x8", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", "Counter": "0,1,2,3", - "EventName": "IDQ.MS_CYCLES", - "CounterMask": "1", - "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may \"bypass\" the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.", + "EventName": "IDQ.DSB_UOPS", + "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x79", - "UMask": "0x4", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path", + "UMask": "0x8", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path", "Counter": "0,1,2,3", - "EventName": "IDQ.MITE_CYCLES", + "EventName": "IDQ.DSB_CYCLES", "CounterMask": "1", - "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may \"bypass\" the IDQ.", + "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x79", - "UMask": "0x8", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path", + "UMask": "0x10", + "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", "Counter": "0,1,2,3", - "EventName": "IDQ.DSB_CYCLES", - "CounterMask": "1", - "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may \"bypass\" the IDQ.", + "EventName": "IDQ.MS_DSB_UOPS", + "PublicDescription": "This event counts the number of uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may bypass the IDQ.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -99,7 +68,7 @@ "Counter": "0,1,2,3", "EventName": "IDQ.MS_DSB_CYCLES", "CounterMask": "1", - "PublicDescription": "This event counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may \"bypass\" the IDQ.", + "PublicDescription": "This event counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may bypass the IDQ.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -111,7 +80,7 @@ "Counter": "0,1,2,3", "EventName": "IDQ.MS_DSB_OCCUR", "CounterMask": "1", - "PublicDescription": "This event counts the number of deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while the Microcode Sequencer (MS) is busy. Counting includes uops that may \"bypass\" the IDQ.", + "PublicDescription": "This event counts the number of deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while the Microcode Sequencer (MS) is busy. Counting includes uops that may bypass the IDQ.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -122,7 +91,7 @@ "Counter": "0,1,2,3", "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS", "CounterMask": "4", - "PublicDescription": "This event counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may \"bypass\" the IDQ.", + "PublicDescription": "This event counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -133,7 +102,17 @@ "Counter": "0,1,2,3", "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS", "CounterMask": "1", - "PublicDescription": "This event counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may \"bypass\" the IDQ.", + "PublicDescription": "This event counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x79", + "UMask": "0x20", + "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "Counter": "0,1,2,3", + "EventName": "IDQ.MS_MITE_UOPS", + "PublicDescription": "This event counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may bypass the IDQ.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -144,7 +123,7 @@ "Counter": "0,1,2,3", "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS", "CounterMask": "4", - "PublicDescription": "This event counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may \"bypass\" the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", + "PublicDescription": "This event counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -155,7 +134,39 @@ "Counter": "0,1,2,3", "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS", "CounterMask": "1", - "PublicDescription": "This event counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may \"bypass\" the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", + "PublicDescription": "This event counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x79", + "UMask": "0x30", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "Counter": "0,1,2,3", + "EventName": "IDQ.MS_UOPS", + "PublicDescription": "This event counts the total number of uops delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may bypass the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x79", + "UMask": "0x30", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "Counter": "0,1,2,3", + "EventName": "IDQ.MS_CYCLES", + "CounterMask": "1", + "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may bypass the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EdgeDetect": "1", + "EventCode": "0x79", + "UMask": "0x30", + "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", + "Counter": "0,1,2,3", + "EventName": "IDQ.MS_SWITCHES", + "CounterMask": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -165,7 +176,7 @@ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path", "Counter": "0,1,2,3", "EventName": "IDQ.MITE_ALL_UOPS", - "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may \"bypass\" the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", + "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -205,7 +216,7 @@ "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled", "Counter": "0,1,2,3", "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE", - "PublicDescription": "This event counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding ?4 ? x? when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when:\n a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread;\n b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions); \n c. Instruction Decode Queue (IDQ) delivers four uops.", + "PublicDescription": "This event counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding 4 x when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when:\n a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread;\n b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions); \n c. Instruction Decode Queue (IDQ) delivers four uops.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, @@ -268,18 +279,7 @@ "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.", "Counter": "0,1,2,3", "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES", - "PublicDescription": "This event counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. \nMM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.\nPenalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 0?2 cycles.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EdgeDetect": "1", - "EventCode": "0x79", - "UMask": "0x30", - "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", - "Counter": "0,1,2,3", - "EventName": "IDQ.MS_SWITCHES", - "CounterMask": "1", + "PublicDescription": "This event counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. \nMM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.\nPenalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 02 cycles.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" } diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/memory.json b/tools/perf/pmu-events/arch/x86/broadwellx/memory.json index 1204ea8ff30d..d79a5cfea44b 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellx/memory.json +++ b/tools/perf/pmu-events/arch/x86/broadwellx/memory.json @@ -95,7 +95,6 @@ "BriefDescription": "Counts the number of times a class of instructions that may cause a transactional abort was executed. Since this is the count of execution, it may not always cause a transactional abort.", "Counter": "0,1,2,3", "EventName": "TX_EXEC.MISC1", - "PublicDescription": "Unfriendly TSX abort triggered by a flowmarker.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -171,11 +170,11 @@ { "EventCode": "0xc8", "UMask": "0x4", - "BriefDescription": "Number of times HLE abort was triggered", + "BriefDescription": "Number of times HLE abort was triggered (PEBS)", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "HLE_RETIRED.ABORTED", - "PublicDescription": "Number of times HLE abort was triggered.", + "PublicDescription": "Number of times HLE abort was triggered (PEBS).", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -252,11 +251,11 @@ { "EventCode": "0xc9", "UMask": "0x4", - "BriefDescription": "Number of times RTM abort was triggered", + "BriefDescription": "Number of times RTM abort was triggered (PEBS)", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "RTM_RETIRED.ABORTED", - "PublicDescription": "Number of times RTM abort was triggered .", + "PublicDescription": "Number of times RTM abort was triggered (PEBS).", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, @@ -439,6 +438,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_REQUESTS.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all requests that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -451,6 +451,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and clean or shared data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -463,6 +464,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_HITM", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the modified data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -475,6 +477,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_DRAM", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from remote dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -487,6 +490,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.LOCAL_DRAM", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -499,6 +503,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -511,6 +516,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.LOCAL_DRAM", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch code reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -523,6 +529,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -535,6 +542,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_MISS.LOCAL_DRAM", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -547,6 +555,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch RFOs that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -559,6 +568,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -571,6 +581,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.REMOTE_HITM", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the modified data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -583,6 +594,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.REMOTE_DRAM", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from remote dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -595,6 +607,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.LOCAL_DRAM", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -607,6 +620,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -619,6 +633,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -631,6 +646,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_LLC_RFO.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -643,6 +659,20 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_MISS.REMOTE_HITM", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the modified data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts all demand data writes (RFOs) that miss in the L3", + "MSRValue": "0x3fbfc00002", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_MISS.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand data writes (RFOs) that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" } diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/other.json b/tools/perf/pmu-events/arch/x86/broadwellx/other.json index 718fcb1db2ee..4475249ea9da 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellx/other.json +++ b/tools/perf/pmu-events/arch/x86/broadwellx/other.json @@ -9,16 +9,6 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, - { - "EventCode": "0x5C", - "UMask": "0x2", - "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3", - "Counter": "0,1,2,3", - "EventName": "CPL_CYCLES.RING123", - "PublicDescription": "This event counts unhalted core cycles during which the thread is in rings 1, 2, or 3.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, { "EdgeDetect": "1", "EventCode": "0x5C", @@ -31,6 +21,16 @@ "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x5C", + "UMask": "0x2", + "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3", + "Counter": "0,1,2,3", + "EventName": "CPL_CYCLES.RING123", + "PublicDescription": "This event counts unhalted core cycles during which the thread is in rings 1, 2, or 3.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x63", "UMask": "0x1", diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/pipeline.json b/tools/perf/pmu-events/arch/x86/broadwellx/pipeline.json index 02b4e1035f2d..920c89da9111 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellx/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/broadwellx/pipeline.json @@ -3,31 +3,41 @@ "EventCode": "0x00", "UMask": "0x1", "BriefDescription": "Instructions retired from execution.", - "Counter": "Fixed counter 1", + "Counter": "Fixed counter 0", "EventName": "INST_RETIRED.ANY", "PublicDescription": "This event counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, this event counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. \nNotes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. \nCounting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.", "SampleAfterValue": "2000003", - "CounterHTOff": "Fixed counter 1" + "CounterHTOff": "Fixed counter 0" }, { "EventCode": "0x00", "UMask": "0x2", "BriefDescription": "Core cycles when the thread is not in halt state", - "Counter": "Fixed counter 2", + "Counter": "Fixed counter 1", "EventName": "CPU_CLK_UNHALTED.THREAD", "PublicDescription": "This event counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.", "SampleAfterValue": "2000003", - "CounterHTOff": "Fixed counter 2" + "CounterHTOff": "Fixed counter 1" + }, + { + "EventCode": "0x00", + "UMask": "0x2", + "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", + "Counter": "Fixed counter 1", + "EventName": "CPU_CLK_UNHALTED.THREAD_ANY", + "AnyThread": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "Fixed counter 1" }, { "EventCode": "0x00", "UMask": "0x3", "BriefDescription": "Reference cycles when the core is not in halt state.", - "Counter": "Fixed counter 3", + "Counter": "Fixed counter 2", "EventName": "CPU_CLK_UNHALTED.REF_TSC", "PublicDescription": "This event counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. \nNote: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. This event is clocked by base clock (100 Mhz) on Sandy Bridge. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.", "SampleAfterValue": "2000003", - "CounterHTOff": "Fixed counter 3" + "CounterHTOff": "Fixed counter 2" }, { "EventCode": "0x03", @@ -60,22 +70,33 @@ }, { "EventCode": "0x0D", - "UMask": "0x8", - "BriefDescription": "Cycles when Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the thread", + "UMask": "0x3", + "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)", "Counter": "0,1,2,3", - "EventName": "INT_MISC.RAT_STALL_CYCLES", - "PublicDescription": "This event counts the number of cycles during which Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the current thread. This also includes the cycles during which the Allocator is serving another thread.", + "EventName": "INT_MISC.RECOVERY_CYCLES", + "CounterMask": "1", + "PublicDescription": "Cycles checkpoints in Resource Allocation Table (RAT) are recovering from JEClear or machine clear.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x0D", "UMask": "0x3", - "BriefDescription": "Number of cycles waiting for the checkpoints in Resource Allocation Table (RAT) to be recovered after Nuke due to all other cases except JEClear (e.g. whenever a ucode assist is needed like SSE exception, memory disambiguation, etc...)", + "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).", "Counter": "0,1,2,3", - "EventName": "INT_MISC.RECOVERY_CYCLES", + "EventName": "INT_MISC.RECOVERY_CYCLES_ANY", + "AnyThread": "1", "CounterMask": "1", - "PublicDescription": "Cycles checkpoints in Resource Allocation Table (RAT) are recovering from JEClear or machine clear.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x0D", + "UMask": "0x8", + "BriefDescription": "Cycles when Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the thread", + "Counter": "0,1,2,3", + "EventName": "INT_MISC.RAT_STALL_CYCLES", + "PublicDescription": "This event counts the number of cycles during which Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the current thread. This also includes the cycles during which the Allocator is serving another thread.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -89,6 +110,18 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "Invert": "1", + "EventCode": "0x0E", + "UMask": "0x1", + "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread", + "Counter": "0,1,2,3", + "EventName": "UOPS_ISSUED.STALL_CYCLES", + "CounterMask": "1", + "PublicDescription": "This event counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3" + }, { "EventCode": "0x0E", "UMask": "0x10", @@ -117,18 +150,6 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, - { - "Invert": "1", - "EventCode": "0x0E", - "UMask": "0x1", - "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread", - "Counter": "0,1,2,3", - "EventName": "UOPS_ISSUED.STALL_CYCLES", - "CounterMask": "1", - "PublicDescription": "This event counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" - }, { "EventCode": "0x14", "UMask": "0x1", @@ -139,6 +160,26 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x3C", + "UMask": "0x0", + "BriefDescription": "Thread cycles when thread is not in halt state", + "Counter": "0,1,2,3", + "EventName": "CPU_CLK_UNHALTED.THREAD_P", + "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x3C", + "UMask": "0x0", + "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", + "Counter": "0,1,2,3", + "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY", + "AnyThread": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x3C", "UMask": "0x1", @@ -149,6 +190,36 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x3C", + "UMask": "0x1", + "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).", + "Counter": "0,1,2,3", + "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY", + "AnyThread": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x3C", + "UMask": "0x1", + "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)", + "Counter": "0,1,2,3", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK", + "PublicDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate).", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x3C", + "UMask": "0x1", + "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).", + "Counter": "0,1,2,3", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY", + "AnyThread": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x3c", "UMask": "0x2", @@ -158,6 +229,15 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, + { + "EventCode": "0x3C", + "UMask": "0x2", + "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.", + "Counter": "0,1,2,3", + "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x4c", "UMask": "0x1", @@ -224,6 +304,18 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EdgeDetect": "1", + "Invert": "1", + "EventCode": "0x5E", + "UMask": "0x1", + "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.", + "Counter": "0,1,2,3", + "EventName": "RS_EVENTS.EMPTY_END", + "CounterMask": "1", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x87", "UMask": "0x1", @@ -404,6 +496,15 @@ "SampleAfterValue": "200003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x89", + "UMask": "0xa0", + "BriefDescription": "Taken speculative and retired mispredicted indirect calls.", + "Counter": "0,1,2,3", + "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_NEAR_CALL", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x89", "UMask": "0xc1", @@ -434,6 +535,16 @@ "SampleAfterValue": "200003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0xA0", + "UMask": "0x3", + "BriefDescription": "Micro-op dispatches cancelled due to insufficient SIMD physical register file read ports", + "Counter": "0,1,2,3", + "EventName": "UOP_DISPATCHES_CANCELLED.SIMD_PRF", + "PublicDescription": "This event counts the number of micro-operations cancelled after they were dispatched from the scheduler to the execution units when the total number of physical register read ports across all dispatch ports exceeds the read bandwidth of the physical register file. The SIMD_PRF subevent applies to the following instructions: VDPPS, DPPS, VPCMPESTRI, PCMPESTRI, VPCMPESTRM, PCMPESTRM, VFMADD*, VFMADDSUB*, VFMSUB*, VMSUBADD*, VFNMADD*, VFNMSUB*. See the Broadwell Optimization Guide for more information.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3" + }, { "EventCode": "0xA1", "UMask": "0x1", @@ -446,601 +557,471 @@ }, { "EventCode": "0xA1", - "UMask": "0x2", - "BriefDescription": "Cycles per thread when uops are executed in port 1", + "UMask": "0x1", + "BriefDescription": "Cycles per core when uops are exectuted in port 0.", "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_1", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 1.", + "EventName": "UOPS_EXECUTED_PORT.PORT_0_CORE", + "AnyThread": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xA1", - "UMask": "0x4", - "BriefDescription": "Cycles per thread when uops are executed in port 2", + "UMask": "0x1", + "BriefDescription": "Cycles per thread when uops are executed in port 0", "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_2", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 2.", + "EventName": "UOPS_EXECUTED_PORT.PORT_0", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 0.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xA1", - "UMask": "0x8", - "BriefDescription": "Cycles per thread when uops are executed in port 3", + "UMask": "0x2", + "BriefDescription": "Cycles per thread when uops are executed in port 1", "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_3", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 3.", + "EventName": "UOPS_DISPATCHED_PORT.PORT_1", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 1.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xA1", - "UMask": "0x10", - "BriefDescription": "Cycles per thread when uops are executed in port 4", + "UMask": "0x2", + "BriefDescription": "Cycles per core when uops are exectuted in port 1.", "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_4", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 4.", + "EventName": "UOPS_EXECUTED_PORT.PORT_1_CORE", + "AnyThread": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xA1", - "UMask": "0x20", - "BriefDescription": "Cycles per thread when uops are executed in port 5", + "UMask": "0x2", + "BriefDescription": "Cycles per thread when uops are executed in port 1", "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_5", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 5.", + "EventName": "UOPS_EXECUTED_PORT.PORT_1", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 1.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xA1", - "UMask": "0x40", - "BriefDescription": "Cycles per thread when uops are executed in port 6", + "UMask": "0x4", + "BriefDescription": "Cycles per thread when uops are executed in port 2", "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_6", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 6.", + "EventName": "UOPS_DISPATCHED_PORT.PORT_2", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 2.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xA1", - "UMask": "0x80", - "BriefDescription": "Cycles per thread when uops are executed in port 7", + "UMask": "0x4", + "BriefDescription": "Cycles per core when uops are dispatched to port 2.", "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_7", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 7.", + "EventName": "UOPS_EXECUTED_PORT.PORT_2_CORE", + "AnyThread": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA2", - "UMask": "0x1", - "BriefDescription": "Resource-related stall cycles", + "EventCode": "0xA1", + "UMask": "0x4", + "BriefDescription": "Cycles per thread when uops are executed in port 2", "Counter": "0,1,2,3", - "EventName": "RESOURCE_STALLS.ANY", - "PublicDescription": "This event counts resource-related stall cycles. Reasons for stalls can be as follows:\n - *any* u-arch structure got full (LB, SB, RS, ROB, BOB, LM, Physical Register Reclaim Table (PRRT), or Physical History Table (PHT) slots)\n - *any* u-arch structure got empty (like INT/SIMD FreeLists)\n - FPU control word (FPCW), MXCSR\nand others. This counts cycles that the pipeline backend blocked uop delivery from the front end.", + "EventName": "UOPS_EXECUTED_PORT.PORT_2", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 2.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA2", - "UMask": "0x4", - "BriefDescription": "Cycles stalled due to no eligible RS entry available.", + "EventCode": "0xA1", + "UMask": "0x8", + "BriefDescription": "Cycles per thread when uops are executed in port 3", "Counter": "0,1,2,3", - "EventName": "RESOURCE_STALLS.RS", - "PublicDescription": "This event counts stall cycles caused by absence of eligible entries in the reservation station (RS). This may result from RS overflow, or from RS deallocation because of the RS array Write Port allocation scheme (each RS entry has two write ports instead of four. As a result, empty entries could not be used, although RS is not really full). This counts cycles that the pipeline backend blocked uop delivery from the front end.", + "EventName": "UOPS_DISPATCHED_PORT.PORT_3", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 3.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA2", + "EventCode": "0xA1", "UMask": "0x8", - "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).", + "BriefDescription": "Cycles per core when uops are dispatched to port 3.", "Counter": "0,1,2,3", - "EventName": "RESOURCE_STALLS.SB", - "PublicDescription": "This event counts stall cycles caused by the store buffer (SB) overflow (excluding draining from synch). This counts cycles that the pipeline backend blocked uop delivery from the front end.", + "EventName": "UOPS_EXECUTED_PORT.PORT_3_CORE", + "AnyThread": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA2", - "UMask": "0x10", - "BriefDescription": "Cycles stalled due to re-order buffer full.", + "EventCode": "0xA1", + "UMask": "0x8", + "BriefDescription": "Cycles per thread when uops are executed in port 3", "Counter": "0,1,2,3", - "EventName": "RESOURCE_STALLS.ROB", - "PublicDescription": "This event counts ROB full stall cycles. This counts cycles that the pipeline backend blocked uop delivery from the front end.", + "EventName": "UOPS_EXECUTED_PORT.PORT_3", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 3.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "UMask": "0x1", - "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.", + "EventCode": "0xA1", + "UMask": "0x10", + "BriefDescription": "Cycles per thread when uops are executed in port 4", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.CYCLES_L2_PENDING", - "CounterMask": "1", - "PublicDescription": "Counts number of cycles the CPU has at least one pending demand* load request missing the L2 cache.", + "EventName": "UOPS_DISPATCHED_PORT.PORT_4", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 4.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "UMask": "0x8", - "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", - "Counter": "2", - "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING", - "CounterMask": "8", - "PublicDescription": "Counts number of cycles the CPU has at least one pending demand load request missing the L1 data cache.", - "SampleAfterValue": "2000003", - "CounterHTOff": "2" - }, - { - "EventCode": "0xA3", - "UMask": "0x2", - "BriefDescription": "Cycles while memory subsystem has an outstanding load.", + "EventCode": "0xA1", + "UMask": "0x10", + "BriefDescription": "Cycles per core when uops are exectuted in port 4.", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.CYCLES_LDM_PENDING", - "CounterMask": "2", - "PublicDescription": "Counts number of cycles the CPU has at least one pending demand load request (that is cycles with non-completed load waiting for its data from memory subsystem).", + "EventName": "UOPS_EXECUTED_PORT.PORT_4_CORE", + "AnyThread": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "UMask": "0x4", - "BriefDescription": "Total execution stalls", + "EventCode": "0xA1", + "UMask": "0x10", + "BriefDescription": "Cycles per thread when uops are executed in port 4", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.CYCLES_NO_EXECUTE", - "CounterMask": "4", - "PublicDescription": "Counts number of cycles nothing is executed on any execution port.", + "EventName": "UOPS_EXECUTED_PORT.PORT_4", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 4.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "UMask": "0x5", - "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.", + "EventCode": "0xA1", + "UMask": "0x20", + "BriefDescription": "Cycles per thread when uops are executed in port 5", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.STALLS_L2_PENDING", - "CounterMask": "5", - "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand* load request missing the L2 cache.(as a footprint) * includes also L1 HW prefetch requests that may or may not be required by demands.", + "EventName": "UOPS_DISPATCHED_PORT.PORT_5", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 5.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "UMask": "0x6", - "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", + "EventCode": "0xA1", + "UMask": "0x20", + "BriefDescription": "Cycles per core when uops are exectuted in port 5.", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.STALLS_LDM_PENDING", - "CounterMask": "6", - "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand load request.", + "EventName": "UOPS_EXECUTED_PORT.PORT_5_CORE", + "AnyThread": "1", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "UMask": "0xc", - "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", - "Counter": "2", - "EventName": "CYCLE_ACTIVITY.STALLS_L1D_PENDING", - "CounterMask": "12", - "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand load request missing the L1 data cache.", + "EventCode": "0xA1", + "UMask": "0x20", + "BriefDescription": "Cycles per thread when uops are executed in port 5", + "Counter": "0,1,2,3", + "EventName": "UOPS_EXECUTED_PORT.PORT_5", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 5.", "SampleAfterValue": "2000003", - "CounterHTOff": "2" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA8", - "UMask": "0x1", - "BriefDescription": "Number of Uops delivered by the LSD.", + "EventCode": "0xA1", + "UMask": "0x40", + "BriefDescription": "Cycles per thread when uops are executed in port 6", "Counter": "0,1,2,3", - "EventName": "LSD.UOPS", - "PublicDescription": "Number of Uops delivered by the LSD. ", + "EventName": "UOPS_DISPATCHED_PORT.PORT_6", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 6.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB1", - "UMask": "0x1", - "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.", + "EventCode": "0xA1", + "UMask": "0x40", + "BriefDescription": "Cycles per core when uops are exectuted in port 6.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.THREAD", - "PublicDescription": "Number of uops to be executed per-thread each cycle.", + "EventName": "UOPS_EXECUTED_PORT.PORT_6_CORE", + "AnyThread": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB1", - "UMask": "0x2", - "BriefDescription": "Number of uops executed on the core.", + "EventCode": "0xA1", + "UMask": "0x40", + "BriefDescription": "Cycles per thread when uops are executed in port 6", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE", - "PublicDescription": "Number of uops executed from any thread.", + "EventName": "UOPS_EXECUTED_PORT.PORT_6", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 6.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "Invert": "1", - "EventCode": "0xB1", - "UMask": "0x1", - "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.", + "EventCode": "0xA1", + "UMask": "0x80", + "BriefDescription": "Cycles per thread when uops are executed in port 7", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.STALL_CYCLES", - "CounterMask": "1", - "PublicDescription": "This event counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.", + "EventName": "UOPS_DISPATCHED_PORT.PORT_7", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 7.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC0", - "UMask": "0x0", - "BriefDescription": "Number of instructions retired. General Counter - architectural event", + "EventCode": "0xA1", + "UMask": "0x80", + "BriefDescription": "Cycles per core when uops are dispatched to port 7.", "Counter": "0,1,2,3", - "EventName": "INST_RETIRED.ANY_P", - "Errata": "BDM61", - "PublicDescription": "This event counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).", + "EventName": "UOPS_EXECUTED_PORT.PORT_7_CORE", + "AnyThread": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC0", - "UMask": "0x2", - "BriefDescription": "FP operations retired. X87 FP operations that have no exceptions:", + "EventCode": "0xA1", + "UMask": "0x80", + "BriefDescription": "Cycles per thread when uops are executed in port 7", "Counter": "0,1,2,3", - "EventName": "INST_RETIRED.X87", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts FP operations retired. For X87 FP operations that have no exceptions counting also includes flows that have several X87, or flows that use X87 uops in the exception handling.", + "EventName": "UOPS_EXECUTED_PORT.PORT_7", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 7.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC0", + "EventCode": "0xA2", "UMask": "0x1", - "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution", - "PEBS": "2", - "Counter": "1", - "EventName": "INST_RETIRED.PREC_DIST", - "Errata": "BDM11, BDM55", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts instructions retired.", + "BriefDescription": "Resource-related stall cycles", + "Counter": "0,1,2,3", + "EventName": "RESOURCE_STALLS.ANY", + "PublicDescription": "This event counts resource-related stall cycles. Reasons for stalls can be as follows:\n - *any* u-arch structure got full (LB, SB, RS, ROB, BOB, LM, Physical Register Reclaim Table (PRRT), or Physical History Table (PHT) slots)\n - *any* u-arch structure got empty (like INT/SIMD FreeLists)\n - FPU control word (FPCW), MXCSR\nand others. This counts cycles that the pipeline backend blocked uop delivery from the front end.", "SampleAfterValue": "2000003", - "CounterHTOff": "1" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC1", - "UMask": "0x40", - "BriefDescription": "Number of times any microcode assist is invoked by HW upon uop writeback.", + "EventCode": "0xA2", + "UMask": "0x4", + "BriefDescription": "Cycles stalled due to no eligible RS entry available.", "Counter": "0,1,2,3", - "EventName": "OTHER_ASSISTS.ANY_WB_ASSIST", - "SampleAfterValue": "100003", + "EventName": "RESOURCE_STALLS.RS", + "PublicDescription": "This event counts stall cycles caused by absence of eligible entries in the reservation station (RS). This may result from RS overflow, or from RS deallocation because of the RS array Write Port allocation scheme (each RS entry has two write ports instead of four. As a result, empty entries could not be used, although RS is not really full). This counts cycles that the pipeline backend blocked uop delivery from the front end.", + "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC2", - "UMask": "0x1", - "BriefDescription": "Actually retired uops.", - "Data_LA": "1", - "PEBS": "1", + "EventCode": "0xA2", + "UMask": "0x8", + "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).", "Counter": "0,1,2,3", - "EventName": "UOPS_RETIRED.ALL", - "PublicDescription": "This event counts all actually retired uops. Counting increments by two for micro-fused uops, and by one for macro-fused and other uops. Maximal increment value for one cycle is eight.", + "EventName": "RESOURCE_STALLS.SB", + "PublicDescription": "This event counts stall cycles caused by the store buffer (SB) overflow (excluding draining from synch). This counts cycles that the pipeline backend blocked uop delivery from the front end.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC2", - "UMask": "0x2", - "BriefDescription": "Retirement slots used.", - "PEBS": "1", + "EventCode": "0xA2", + "UMask": "0x10", + "BriefDescription": "Cycles stalled due to re-order buffer full.", "Counter": "0,1,2,3", - "EventName": "UOPS_RETIRED.RETIRE_SLOTS", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts the number of retirement slots used.", + "EventName": "RESOURCE_STALLS.ROB", + "PublicDescription": "This event counts ROB full stall cycles. This counts cycles that the pipeline backend blocked uop delivery from the front end.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "Invert": "1", - "EventCode": "0xC2", + "EventCode": "0xA3", "UMask": "0x1", - "BriefDescription": "Cycles without actually retired uops.", + "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.", "Counter": "0,1,2,3", - "EventName": "UOPS_RETIRED.STALL_CYCLES", + "EventName": "CYCLE_ACTIVITY.CYCLES_L2_PENDING", "CounterMask": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts cycles without actually retired uops.", + "PublicDescription": "Counts number of cycles the CPU has at least one pending demand* load request missing the L2 cache.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "Invert": "1", - "EventCode": "0xC2", + "EventCode": "0xA3", "UMask": "0x1", - "BriefDescription": "Cycles with less than 10 actually retired uops.", + "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.", "Counter": "0,1,2,3", - "EventName": "UOPS_RETIRED.TOTAL_CYCLES", - "CounterMask": "10", - "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.", + "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS", + "CounterMask": "1", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC3", - "UMask": "0x1", - "BriefDescription": "Cycles there was a Nuke. Account for both thread-specific and All Thread Nukes.", + "EventCode": "0xA3", + "UMask": "0x2", + "BriefDescription": "Cycles while memory subsystem has an outstanding load.", "Counter": "0,1,2,3", - "EventName": "MACHINE_CLEARS.CYCLES", - "PublicDescription": "This event counts both thread-specific (TS) and all-thread (AT) nukes.", + "EventName": "CYCLE_ACTIVITY.CYCLES_LDM_PENDING", + "CounterMask": "2", + "PublicDescription": "Counts number of cycles the CPU has at least one pending demand load request (that is cycles with non-completed load waiting for its data from memory subsystem).", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC3", + "EventCode": "0xA3", + "UMask": "0x2", + "BriefDescription": "Cycles while memory subsystem has an outstanding load.", + "Counter": "0,1,2,3", + "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY", + "CounterMask": "2", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xA3", "UMask": "0x4", - "BriefDescription": "Self-modifying code (SMC) detected.", + "BriefDescription": "This event increments by 1 for every cycle where there was no execute for this thread.", "Counter": "0,1,2,3", - "EventName": "MACHINE_CLEARS.SMC", - "PublicDescription": "This event counts self-modifying code (SMC) detected, which causes a machine clear.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "EventName": "CYCLE_ACTIVITY.CYCLES_NO_EXECUTE", + "CounterMask": "4", + "PublicDescription": "Counts number of cycles nothing is executed on any execution port.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xC3", - "UMask": "0x20", - "BriefDescription": "This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0.", + "EventCode": "0xA3", + "UMask": "0x4", + "BriefDescription": "Total execution stalls.", "Counter": "0,1,2,3", - "EventName": "MACHINE_CLEARS.MASKMOV", - "PublicDescription": "Maskmov false fault - counts number of time ucode passes through Maskmov flow due to instruction's mask being 0 while the flow was completed without raising a fault.", - "SampleAfterValue": "100003", + "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL", + "CounterMask": "4", + "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC4", - "UMask": "0x1", - "BriefDescription": "Conditional branch instructions retired.", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.CONDITIONAL", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts conditional branch instructions retired.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC4", - "UMask": "0x2", - "BriefDescription": "Direct and indirect near call instructions retired.", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.NEAR_CALL", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts both direct and indirect near call instructions retired.", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC4", - "UMask": "0x0", - "BriefDescription": "All (macro) branch instructions retired.", - "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.ALL_BRANCHES", - "PublicDescription": "This event counts all (macro) branch instructions retired.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC4", - "UMask": "0x8", - "BriefDescription": "Return instructions retired.", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.NEAR_RETURN", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts return instructions retired.", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC4", - "UMask": "0x10", - "BriefDescription": "Not taken branch instructions retired.", - "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.NOT_TAKEN", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts not taken branch instructions retired.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC4", - "UMask": "0x20", - "BriefDescription": "Taken branch instructions retired.", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.NEAR_TAKEN", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts taken branch instructions retired.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC4", - "UMask": "0x40", - "BriefDescription": "Far branch instructions retired.", - "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.FAR_BRANCH", - "Errata": "BDW98", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts far branch instructions retired.", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC4", - "UMask": "0x4", - "BriefDescription": "All (macro) branch instructions retired. (Precise Event - PEBS)", - "PEBS": "2", + "EventCode": "0xA3", + "UMask": "0x5", + "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.", "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS", - "Errata": "BDW98", - "PublicDescription": "This is a precise version of BR_INST_RETIRED.ALL_BRANCHES that counts all (macro) branch instructions retired.", - "SampleAfterValue": "400009", + "EventName": "CYCLE_ACTIVITY.STALLS_L2_PENDING", + "CounterMask": "5", + "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand* load request missing the L2 cache.(as a footprint) * includes also L1 HW prefetch requests that may or may not be required by demands.", + "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xC5", - "UMask": "0x1", - "BriefDescription": "Mispredicted conditional branch instructions retired.", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "BR_MISP_RETIRED.CONDITIONAL", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts mispredicted conditional branch instructions retired.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC5", - "UMask": "0x0", - "BriefDescription": "All mispredicted macro branch instructions retired.", - "Counter": "0,1,2,3", - "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", - "PublicDescription": "This event counts all mispredicted macro branch instructions retired.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC5", - "UMask": "0x8", - "BriefDescription": "This event counts the number of mispredicted ret instructions retired. Non PEBS", - "PEBS": "1", + "EventCode": "0xA3", + "UMask": "0x5", + "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.", "Counter": "0,1,2,3", - "EventName": "BR_MISP_RETIRED.RET", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts mispredicted return instructions retired.", - "SampleAfterValue": "100007", + "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS", + "CounterMask": "5", + "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC5", - "UMask": "0x4", - "BriefDescription": "Mispredicted macro branch instructions retired. (Precise Event - PEBS)", - "PEBS": "2", + "EventCode": "0xA3", + "UMask": "0x6", + "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", "Counter": "0,1,2,3", - "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS", - "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.", - "SampleAfterValue": "400009", + "EventName": "CYCLE_ACTIVITY.STALLS_LDM_PENDING", + "CounterMask": "6", + "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand load request.", + "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xCC", - "UMask": "0x20", - "BriefDescription": "Count cases of saving new LBR", + "EventCode": "0xA3", + "UMask": "0x6", + "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", "Counter": "0,1,2,3", - "EventName": "ROB_MISC_EVENTS.LBR_INSERTS", - "PublicDescription": "This event counts cases of saving new LBR records by hardware. This assumes proper enabling of LBRs and takes into account LBR filtering done by the LBR_SELECT register.", + "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY", + "CounterMask": "6", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x3C", - "UMask": "0x0", - "BriefDescription": "Thread cycles when thread is not in halt state", - "Counter": "0,1,2,3", - "EventName": "CPU_CLK_UNHALTED.THREAD_P", - "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.", + "EventCode": "0xA3", + "UMask": "0x8", + "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", + "Counter": "2", + "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING", + "CounterMask": "8", + "PublicDescription": "Counts number of cycles the CPU has at least one pending demand load request missing the L1 data cache.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x89", - "UMask": "0xa0", - "BriefDescription": "Taken speculative and retired mispredicted indirect calls.", - "Counter": "0,1,2,3", - "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_NEAR_CALL", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "2" }, { - "EventCode": "0xA1", - "UMask": "0x1", - "BriefDescription": "Cycles per core when uops are exectuted in port 0.", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_0_CORE", - "AnyThread": "1", + "EventCode": "0xA3", + "UMask": "0x8", + "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", + "Counter": "2", + "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS", + "CounterMask": "8", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "2" }, { - "EventCode": "0xA1", - "UMask": "0x2", - "BriefDescription": "Cycles per core when uops are exectuted in port 1.", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_1_CORE", - "AnyThread": "1", + "EventCode": "0xA3", + "UMask": "0xc", + "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", + "Counter": "2", + "EventName": "CYCLE_ACTIVITY.STALLS_L1D_PENDING", + "CounterMask": "12", + "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand load request missing the L1 data cache.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "2" }, { - "EventCode": "0xA1", - "UMask": "0x4", - "BriefDescription": "Cycles per core when uops are dispatched to port 2.", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_2_CORE", - "AnyThread": "1", + "EventCode": "0xA3", + "UMask": "0xc", + "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", + "Counter": "2", + "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS", + "CounterMask": "12", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "2" }, { - "EventCode": "0xA1", - "UMask": "0x8", - "BriefDescription": "Cycles per core when uops are dispatched to port 3.", + "EventCode": "0xA8", + "UMask": "0x1", + "BriefDescription": "Number of Uops delivered by the LSD.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_3_CORE", - "AnyThread": "1", + "EventName": "LSD.UOPS", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", - "UMask": "0x10", - "BriefDescription": "Cycles per core when uops are exectuted in port 4.", + "EventCode": "0xA8", + "UMask": "0x1", + "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_4_CORE", - "AnyThread": "1", + "EventName": "LSD.CYCLES_4_UOPS", + "CounterMask": "4", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", - "UMask": "0x20", - "BriefDescription": "Cycles per core when uops are exectuted in port 5.", + "EventCode": "0xA8", + "UMask": "0x1", + "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_5_CORE", - "AnyThread": "1", + "EventName": "LSD.CYCLES_ACTIVE", + "CounterMask": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", - "UMask": "0x40", - "BriefDescription": "Cycles per core when uops are exectuted in port 6.", + "EventCode": "0xB1", + "UMask": "0x1", + "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_6_CORE", - "AnyThread": "1", + "EventName": "UOPS_EXECUTED.THREAD", + "PublicDescription": "Number of uops to be executed per-thread each cycle.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", - "UMask": "0x80", - "BriefDescription": "Cycles per core when uops are dispatched to port 7.", + "Invert": "1", + "EventCode": "0xB1", + "UMask": "0x1", + "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_7_CORE", - "AnyThread": "1", + "EventName": "UOPS_EXECUTED.STALL_CYCLES", + "CounterMask": "1", + "PublicDescription": "This event counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC5", - "UMask": "0x20", - "BriefDescription": "number of near branch instructions retired that were mispredicted and taken.", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", - "PublicDescription": "Number of near branch instructions retired that were mispredicted and taken.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xB1", @@ -1083,335 +1064,364 @@ "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xe6", - "UMask": "0x1f", - "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.", + "EventCode": "0xB1", + "UMask": "0x2", + "BriefDescription": "Number of uops executed on the core.", "Counter": "0,1,2,3", - "EventName": "BACLEARS.ANY", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA3", - "UMask": "0x8", - "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", - "Counter": "2", - "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS", - "CounterMask": "8", + "EventName": "UOPS_EXECUTED.CORE", + "PublicDescription": "Number of uops executed from any thread.", "SampleAfterValue": "2000003", - "CounterHTOff": "2" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "UMask": "0x1", - "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.", + "EventCode": "0xb1", + "UMask": "0x2", + "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", "CounterMask": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", + "EventCode": "0xb1", "UMask": "0x2", - "BriefDescription": "Cycles while memory subsystem has an outstanding load.", + "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", "CounterMask": "2", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "UMask": "0x4", - "BriefDescription": "Total execution stalls.", + "EventCode": "0xb1", + "UMask": "0x2", + "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL", - "CounterMask": "4", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", + "CounterMask": "3", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "UMask": "0xc", - "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", - "Counter": "2", - "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS", - "CounterMask": "12", + "EventCode": "0xb1", + "UMask": "0x2", + "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", + "Counter": "0,1,2,3", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", + "CounterMask": "4", "SampleAfterValue": "2000003", - "CounterHTOff": "2" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "UMask": "0x5", - "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.", + "Invert": "1", + "EventCode": "0xb1", + "UMask": "0x2", + "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS", - "CounterMask": "5", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "UMask": "0x6", - "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", + "EventCode": "0xC0", + "UMask": "0x0", + "BriefDescription": "Number of instructions retired. General Counter - architectural event", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY", - "CounterMask": "6", + "EventName": "INST_RETIRED.ANY_P", + "Errata": "BDM61", + "PublicDescription": "This event counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EdgeDetect": "1", - "EventCode": "0xC3", + "EventCode": "0xC0", "UMask": "0x1", - "BriefDescription": "Number of machine clears (nukes) of any type.", + "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution", + "PEBS": "2", + "Counter": "1", + "EventName": "INST_RETIRED.PREC_DIST", + "Errata": "BDM11, BDM55", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts instructions retired.", + "SampleAfterValue": "2000003", + "CounterHTOff": "1" + }, + { + "EventCode": "0xC0", + "UMask": "0x2", + "BriefDescription": "FP operations retired. X87 FP operations that have no exceptions:", "Counter": "0,1,2,3", - "EventName": "MACHINE_CLEARS.COUNT", - "CounterMask": "1", - "SampleAfterValue": "100003", + "EventName": "INST_RETIRED.X87", + "PublicDescription": "This event counts FP operations retired. For X87 FP operations that have no exceptions counting also includes flows that have several X87, or flows that use X87 uops in the exception handling.", + "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA8", - "UMask": "0x1", - "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", + "EventCode": "0xC1", + "UMask": "0x40", + "BriefDescription": "Number of times any microcode assist is invoked by HW upon uop writeback.", "Counter": "0,1,2,3", - "EventName": "LSD.CYCLES_4_UOPS", - "CounterMask": "4", - "SampleAfterValue": "2000003", + "EventName": "OTHER_ASSISTS.ANY_WB_ASSIST", + "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EdgeDetect": "1", - "Invert": "1", - "EventCode": "0x5E", + "EventCode": "0xC2", "UMask": "0x1", - "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.", + "BriefDescription": "Actually retired uops. (Precise Event - PEBS)", + "Data_LA": "1", + "PEBS": "1", "Counter": "0,1,2,3", - "EventName": "RS_EVENTS.EMPTY_END", - "CounterMask": "1", - "SampleAfterValue": "200003", + "EventName": "UOPS_RETIRED.ALL", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts all actually retired uops. Counting increments by two for micro-fused uops, and by one for macro-fused and other uops. Maximal increment value for one cycle is eight.", + "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA8", + "Invert": "1", + "EventCode": "0xC2", "UMask": "0x1", - "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.", + "BriefDescription": "Cycles without actually retired uops.", "Counter": "0,1,2,3", - "EventName": "LSD.CYCLES_ACTIVE", + "EventName": "UOPS_RETIRED.STALL_CYCLES", "CounterMask": "1", + "PublicDescription": "This event counts cycles without actually retired uops.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xA1", + "Invert": "1", + "EventCode": "0xC2", "UMask": "0x1", - "BriefDescription": "Cycles per thread when uops are executed in port 0", + "BriefDescription": "Cycles with less than 10 actually retired uops.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_0", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 0.", + "EventName": "UOPS_RETIRED.TOTAL_CYCLES", + "CounterMask": "10", + "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xA1", + "EventCode": "0xC2", "UMask": "0x2", - "BriefDescription": "Cycles per thread when uops are executed in port 1", + "BriefDescription": "Retirement slots used. (Precise Event - PEBS)", + "PEBS": "1", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_1", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 1.", + "EventName": "UOPS_RETIRED.RETIRE_SLOTS", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts the number of retirement slots used.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", - "UMask": "0x4", - "BriefDescription": "Cycles per thread when uops are executed in port 2", + "EventCode": "0xC3", + "UMask": "0x1", + "BriefDescription": "Cycles there was a Nuke. Account for both thread-specific and All Thread Nukes.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_2", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 2.", + "EventName": "MACHINE_CLEARS.CYCLES", + "PublicDescription": "This event counts both thread-specific (TS) and all-thread (AT) nukes.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", - "UMask": "0x8", - "BriefDescription": "Cycles per thread when uops are executed in port 3", + "EdgeDetect": "1", + "EventCode": "0xC3", + "UMask": "0x1", + "BriefDescription": "Number of machine clears (nukes) of any type.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_3", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 3.", - "SampleAfterValue": "2000003", + "EventName": "MACHINE_CLEARS.COUNT", + "CounterMask": "1", + "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", - "UMask": "0x10", - "BriefDescription": "Cycles per thread when uops are executed in port 4", + "EventCode": "0xC3", + "UMask": "0x4", + "BriefDescription": "Self-modifying code (SMC) detected.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_4", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 4.", - "SampleAfterValue": "2000003", + "EventName": "MACHINE_CLEARS.SMC", + "PublicDescription": "This event counts self-modifying code (SMC) detected, which causes a machine clear.", + "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", + "EventCode": "0xC3", "UMask": "0x20", - "BriefDescription": "Cycles per thread when uops are executed in port 5", + "BriefDescription": "This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_5", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 5.", - "SampleAfterValue": "2000003", + "EventName": "MACHINE_CLEARS.MASKMOV", + "PublicDescription": "Maskmov false fault - counts number of time ucode passes through Maskmov flow due to instruction's mask being 0 while the flow was completed without raising a fault.", + "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", - "UMask": "0x40", - "BriefDescription": "Cycles per thread when uops are executed in port 6", + "EventCode": "0xC4", + "UMask": "0x0", + "BriefDescription": "All (macro) branch instructions retired.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_6", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 6.", - "SampleAfterValue": "2000003", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES", + "PublicDescription": "This event counts all (macro) branch instructions retired.", + "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", - "UMask": "0x80", - "BriefDescription": "Cycles per thread when uops are executed in port 7", + "EventCode": "0xC4", + "UMask": "0x1", + "BriefDescription": "Conditional branch instructions retired. (Precise Event - PEBS)", + "PEBS": "1", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_7", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 7.", - "SampleAfterValue": "2000003", + "EventName": "BR_INST_RETIRED.CONDITIONAL", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts conditional branch instructions retired.", + "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA0", - "UMask": "0x3", - "BriefDescription": "Micro-op dispatches cancelled due to insufficient SIMD physical register file read ports", + "EventCode": "0xC4", + "UMask": "0x2", + "BriefDescription": "Direct and indirect near call instructions retired. (Precise Event - PEBS)", + "PEBS": "1", "Counter": "0,1,2,3", - "EventName": "UOP_DISPATCHES_CANCELLED.SIMD_PRF", - "PublicDescription": "This event counts the number of micro-operations cancelled after they were dispatched from the scheduler to the execution units when the total number of physical register read ports across all dispatch ports exceeds the read bandwidth of the physical register file. The SIMD_PRF subevent applies to the following instructions: VDPPS, DPPS, VPCMPESTRI, PCMPESTRI, VPCMPESTRM, PCMPESTRM, VFMADD*, VFMADDSUB*, VFMSUB*, VMSUBADD*, VFNMADD*, VFNMSUB*. See the Broadwell Optimization Guide for more information.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" + "EventName": "BR_INST_RETIRED.NEAR_CALL", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts both direct and indirect near call instructions retired.", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x00", + "EventCode": "0xC4", "UMask": "0x2", - "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", - "Counter": "Fixed counter 2", - "EventName": "CPU_CLK_UNHALTED.THREAD_ANY", - "AnyThread": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "Fixed counter 2" + "BriefDescription": "Direct and indirect macro near call instructions retired (captured in ring 3). (Precise Event - PEBS)", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "BR_INST_RETIRED.NEAR_CALL_R3", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts both direct and indirect macro near call instructions retired (captured in ring 3).", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x3C", - "UMask": "0x0", - "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", + "EventCode": "0xC4", + "UMask": "0x4", + "BriefDescription": "All (macro) branch instructions retired. (Precise Event - PEBS)", + "PEBS": "2", "Counter": "0,1,2,3", - "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY", - "AnyThread": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS", + "Errata": "BDW98", + "PublicDescription": "This is a precise version of BR_INST_RETIRED.ALL_BRANCHES that counts all (macro) branch instructions retired.", + "SampleAfterValue": "400009", + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0x3C", - "UMask": "0x1", - "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).", + "EventCode": "0xC4", + "UMask": "0x8", + "BriefDescription": "Return instructions retired. (Precise Event - PEBS)", + "PEBS": "1", "Counter": "0,1,2,3", - "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY", - "AnyThread": "1", - "SampleAfterValue": "2000003", + "EventName": "BR_INST_RETIRED.NEAR_RETURN", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts return instructions retired.", + "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x0D", - "UMask": "0x3", - "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).", + "EventCode": "0xC4", + "UMask": "0x10", + "BriefDescription": "Not taken branch instructions retired.", "Counter": "0,1,2,3", - "EventName": "INT_MISC.RECOVERY_CYCLES_ANY", - "AnyThread": "1", - "CounterMask": "1", - "SampleAfterValue": "2000003", + "EventName": "BR_INST_RETIRED.NOT_TAKEN", + "PublicDescription": "This event counts not taken branch instructions retired.", + "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xb1", - "UMask": "0x2", - "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", + "EventCode": "0xC4", + "UMask": "0x20", + "BriefDescription": "Taken branch instructions retired. (Precise Event - PEBS)", + "PEBS": "1", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", - "CounterMask": "1", - "SampleAfterValue": "2000003", + "EventName": "BR_INST_RETIRED.NEAR_TAKEN", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts taken branch instructions retired.", + "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xb1", - "UMask": "0x2", - "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", + "EventCode": "0xC4", + "UMask": "0x40", + "BriefDescription": "Far branch instructions retired.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", - "CounterMask": "2", - "SampleAfterValue": "2000003", + "EventName": "BR_INST_RETIRED.FAR_BRANCH", + "Errata": "BDW98", + "PublicDescription": "This event counts far branch instructions retired.", + "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xb1", - "UMask": "0x2", - "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", + "EventCode": "0xC5", + "UMask": "0x0", + "BriefDescription": "All mispredicted macro branch instructions retired.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", - "CounterMask": "3", - "SampleAfterValue": "2000003", + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", + "PublicDescription": "This event counts all mispredicted macro branch instructions retired.", + "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xb1", - "UMask": "0x2", - "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", + "EventCode": "0xC5", + "UMask": "0x1", + "BriefDescription": "Mispredicted conditional branch instructions retired. (Precise Event - PEBS)", + "PEBS": "1", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", - "CounterMask": "4", - "SampleAfterValue": "2000003", + "EventName": "BR_MISP_RETIRED.CONDITIONAL", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts mispredicted conditional branch instructions retired.", + "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "Invert": "1", - "EventCode": "0xb1", - "UMask": "0x2", - "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.", + "EventCode": "0xC5", + "UMask": "0x4", + "BriefDescription": "Mispredicted macro branch instructions retired. (Precise Event - PEBS)", + "PEBS": "2", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE", - "SampleAfterValue": "2000003", + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS", + "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.", + "SampleAfterValue": "400009", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xC5", + "UMask": "0x8", + "BriefDescription": "This event counts the number of mispredicted ret instructions retired.(Precise Event)", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "BR_MISP_RETIRED.RET", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts mispredicted return instructions retired.", + "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x3C", - "UMask": "0x1", - "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)", + "EventCode": "0xC5", + "UMask": "0x20", + "BriefDescription": "number of near branch instructions retired that were mispredicted and taken. (Precise Event - PEBS).", + "PEBS": "1", "Counter": "0,1,2,3", - "EventName": "CPU_CLK_UNHALTED.REF_XCLK", - "PublicDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate).", - "SampleAfterValue": "2000003", + "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", + "PublicDescription": "Number of near branch instructions retired that were mispredicted and taken. (Precise Event - PEBS).", + "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x3C", - "UMask": "0x1", - "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).", + "EventCode": "0xCC", + "UMask": "0x20", + "BriefDescription": "Count cases of saving new LBR", "Counter": "0,1,2,3", - "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY", - "AnyThread": "1", + "EventName": "ROB_MISC_EVENTS.LBR_INSERTS", + "PublicDescription": "This event counts cases of saving new LBR records by hardware. This assumes proper enabling of LBRs and takes into account LBR filtering done by the LBR_SELECT register.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x3C", - "UMask": "0x2", - "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.", + "EventCode": "0xe6", + "UMask": "0x1f", + "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.", "Counter": "0,1,2,3", - "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", - "SampleAfterValue": "2000003", + "EventName": "BACLEARS.ANY", + "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/broadwellx/virtual-memory.json b/tools/perf/pmu-events/arch/x86/broadwellx/virtual-memory.json index 5ce8b67ba076..7d79c707c6d1 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellx/virtual-memory.json +++ b/tools/perf/pmu-events/arch/x86/broadwellx/virtual-memory.json @@ -43,6 +43,16 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x08", + "UMask": "0xe", + "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.", + "Counter": "0,1,2,3", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", + "Errata": "BDM69", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x08", "UMask": "0x10", @@ -72,6 +82,15 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x08", + "UMask": "0x60", + "BriefDescription": "Load operations that miss the first DTLB level but hit the second and do not cause page walks.", + "Counter": "0,1,2,3", + "EventName": "DTLB_LOAD_MISSES.STLB_HIT", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x49", "UMask": "0x1", @@ -116,6 +135,16 @@ "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x49", + "UMask": "0xe", + "BriefDescription": "Store misses in all DTLB levels that cause completed page walks.", + "Counter": "0,1,2,3", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", + "Errata": "BDM69", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x49", "UMask": "0x10", @@ -145,6 +174,15 @@ "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x49", + "UMask": "0x60", + "BriefDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks.", + "Counter": "0,1,2,3", + "EventName": "DTLB_STORE_MISSES.STLB_HIT", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x4F", "UMask": "0x10", @@ -199,6 +237,16 @@ "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x85", + "UMask": "0xe", + "BriefDescription": "Misses in all ITLB levels that cause completed page walks.", + "Counter": "0,1,2,3", + "EventName": "ITLB_MISSES.WALK_COMPLETED", + "Errata": "BDM69", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x85", "UMask": "0x10", @@ -228,6 +276,15 @@ "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x85", + "UMask": "0x60", + "BriefDescription": "Operations that miss the first ITLB level but hit the second and do not cause any page walks.", + "Counter": "0,1,2,3", + "EventName": "ITLB_MISSES.STLB_HIT", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0xAE", "UMask": "0x1", @@ -250,60 +307,60 @@ }, { "EventCode": "0xBC", - "UMask": "0x21", - "BriefDescription": "Number of ITLB page walker hits in the L1+FB.", + "UMask": "0x12", + "BriefDescription": "Number of DTLB page walker hits in the L2.", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.ITLB_L1", + "EventName": "PAGE_WALKER_LOADS.DTLB_L2", "Errata": "BDM69, BDM98", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", - "UMask": "0x12", - "BriefDescription": "Number of DTLB page walker hits in the L2.", + "UMask": "0x14", + "BriefDescription": "Number of DTLB page walker hits in the L3 + XSNP.", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.DTLB_L2", + "EventName": "PAGE_WALKER_LOADS.DTLB_L3", "Errata": "BDM69, BDM98", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", - "UMask": "0x22", - "BriefDescription": "Number of ITLB page walker hits in the L2.", + "UMask": "0x18", + "BriefDescription": "Number of DTLB page walker hits in Memory.", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.ITLB_L2", + "EventName": "PAGE_WALKER_LOADS.DTLB_MEMORY", "Errata": "BDM69, BDM98", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", - "UMask": "0x14", - "BriefDescription": "Number of DTLB page walker hits in the L3 + XSNP.", + "UMask": "0x21", + "BriefDescription": "Number of ITLB page walker hits in the L1+FB.", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.DTLB_L3", + "EventName": "PAGE_WALKER_LOADS.ITLB_L1", "Errata": "BDM69, BDM98", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", - "UMask": "0x24", - "BriefDescription": "Number of ITLB page walker hits in the L3 + XSNP.", + "UMask": "0x22", + "BriefDescription": "Number of ITLB page walker hits in the L2.", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.ITLB_L3", + "EventName": "PAGE_WALKER_LOADS.ITLB_L2", "Errata": "BDM69, BDM98", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", - "UMask": "0x18", - "BriefDescription": "Number of DTLB page walker hits in Memory.", + "UMask": "0x24", + "BriefDescription": "Number of ITLB page walker hits in the L3 + XSNP.", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.DTLB_MEMORY", + "EventName": "PAGE_WALKER_LOADS.ITLB_L3", "Errata": "BDM69, BDM98", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" @@ -327,62 +384,5 @@ "PublicDescription": "This event counts the number of any STLB flush attempts (such as entire, VPID, PCID, InvPage, CR3 write, and so on).", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x08", - "UMask": "0xe", - "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.", - "Counter": "0,1,2,3", - "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", - "Errata": "BDM69", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x08", - "UMask": "0x60", - "BriefDescription": "Load operations that miss the first DTLB level but hit the second and do not cause page walks.", - "Counter": "0,1,2,3", - "EventName": "DTLB_LOAD_MISSES.STLB_HIT", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x49", - "UMask": "0xe", - "BriefDescription": "Store misses in all DTLB levels that cause completed page walks.", - "Counter": "0,1,2,3", - "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", - "Errata": "BDM69", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x49", - "UMask": "0x60", - "BriefDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks.", - "Counter": "0,1,2,3", - "EventName": "DTLB_STORE_MISSES.STLB_HIT", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x85", - "UMask": "0xe", - "BriefDescription": "Misses in all ITLB levels that cause completed page walks.", - "Counter": "0,1,2,3", - "EventName": "ITLB_MISSES.WALK_COMPLETED", - "Errata": "BDM69", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x85", - "UMask": "0x60", - "BriefDescription": "Operations that miss the first ITLB level but hit the second and do not cause any page walks.", - "Counter": "0,1,2,3", - "EventName": "ITLB_MISSES.STLB_HIT", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file -- cgit v1.2.3-70-g09d2 From 03da89c5516ea7be3afce2bd86b0a886877db835 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 18 Jan 2018 04:46:30 -0800 Subject: perf vendor events intel: Update Goldmont events to V12 Signed-off-by: Andi Kleen Link: https://lkml.kernel.org/r/20180118234518.GA27753@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/goldmont/cache.json | 1244 +++++++++++++++++--- .../perf/pmu-events/arch/x86/goldmont/memory.json | 280 ++++- tools/perf/pmu-events/arch/x86/goldmont/other.json | 54 +- .../pmu-events/arch/x86/goldmont/pipeline.json | 506 ++++---- .../arch/x86/goldmont/virtual-memory.json | 60 +- 5 files changed, 1687 insertions(+), 457 deletions(-) (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/x86/goldmont/cache.json b/tools/perf/pmu-events/arch/x86/goldmont/cache.json index 4e02e1e5e70d..f8bbe087b0f8 100644 --- a/tools/perf/pmu-events/arch/x86/goldmont/cache.json +++ b/tools/perf/pmu-events/arch/x86/goldmont/cache.json @@ -1,4 +1,24 @@ [ + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts memory requests originating from the core that miss in the L2 cache.", + "EventCode": "0x2E", + "Counter": "0,1,2,3", + "UMask": "0x41", + "EventName": "LONGEST_LAT_CACHE.MISS", + "SampleAfterValue": "200003", + "BriefDescription": "L2 cache request misses" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts memory requests originating from the core that reference a cache line in the L2 cache.", + "EventCode": "0x2E", + "Counter": "0,1,2,3", + "UMask": "0x4f", + "EventName": "LONGEST_LAT_CACHE.REFERENCE", + "SampleAfterValue": "200003", + "BriefDescription": "L2 cache requests" + }, { "CollectPEBSRecord": "1", "PublicDescription": "Counts the number of demand and prefetch transactions that the L2 XQ rejects due to a full or near full condition which likely indicates back pressure from the intra-die interconnect (IDI) fabric. The XQ may reject transactions from the L2Q (non-cacheable requests), L2 misses and L2 write-back victims.", @@ -11,120 +31,119 @@ }, { "CollectPEBSRecord": "1", - "PublicDescription": "Counts the number of demand and L1 prefetcher requests rejected by the L2Q due to a full or nearly full condition which likely indicates back pressure from L2Q. It also counts requests that would have gone directly to the XQ, but are rejected due to a full or nearly full condition, indicating back pressure from the IDI link. The L2Q may also reject transactions from a core to insure fairness between cores, or to delay a core's dirty eviction when the address conflicts with incoming external snoops.", + "PublicDescription": "Counts the number of demand and L1 prefetcher requests rejected by the L2Q due to a full or nearly full condition which likely indicates back pressure from L2Q. It also counts requests that would have gone directly to the XQ, but are rejected due to a full or nearly full condition, indicating back pressure from the IDI link. The L2Q may also reject transactions from a core to ensure fairness between cores, or to delay a core's dirty eviction when the address conflicts with incoming external snoops.", "EventCode": "0x31", "Counter": "0,1,2,3", "UMask": "0x0", "EventName": "CORE_REJECT_L2Q.ALL", "SampleAfterValue": "200003", - "BriefDescription": "Requests rejected by the L2Q " + "BriefDescription": "Requests rejected by the L2Q" }, { "CollectPEBSRecord": "1", - "PublicDescription": "Counts memory requests originating from the core that reference a cache line in the L2 cache.", - "EventCode": "0x2E", + "PublicDescription": "Counts when a modified (dirty) cache line is evicted from the data L1 cache and needs to be written back to memory. No count will occur if the evicted line is clean, and hence does not require a writeback.", + "EventCode": "0x51", "Counter": "0,1,2,3", - "UMask": "0x4f", - "EventName": "LONGEST_LAT_CACHE.REFERENCE", + "UMask": "0x1", + "EventName": "DL1.DIRTY_EVICTION", "SampleAfterValue": "200003", - "BriefDescription": "L2 cache requests" + "BriefDescription": "L1 Cache evictions for dirty data" }, { "CollectPEBSRecord": "1", - "PublicDescription": "Counts memory requests originating from the core that miss in the L2 cache.", - "EventCode": "0x2E", + "PublicDescription": "Counts cycles that fetch is stalled due to an outstanding ICache miss. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes due to an ICache miss. Note: this event is not the same as the total number of cycles spent retrieving instruction cache lines from the memory hierarchy.", + "EventCode": "0x86", "Counter": "0,1,2,3", - "UMask": "0x41", - "EventName": "LONGEST_LAT_CACHE.MISS", + "UMask": "0x2", + "EventName": "FETCH_STALL.ICACHE_FILL_PENDING_CYCLES", "SampleAfterValue": "200003", - "BriefDescription": "L2 cache request misses" + "BriefDescription": "Cycles code-fetch stalled due to an outstanding ICache miss." }, { "CollectPEBSRecord": "1", - "PublicDescription": "Counts cycles that an ICache miss is outstanding, and instruction fetch is stalled. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes, while an Icache miss outstanding. Note this event is not the same as cycles to retrieve an instruction due to an Icache miss. Rather, it is the part of the Instruction Cache (ICache) miss time where no bytes are available for the decoder.", - "EventCode": "0x86", + "EventCode": "0xB7", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "FETCH_STALL.ICACHE_FILL_PENDING_CYCLES", - "SampleAfterValue": "200003", - "BriefDescription": "Cycles where code-fetch is stalled and an ICache miss is outstanding. This is not the same as an ICache Miss." + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE", + "SampleAfterValue": "100007", + "BriefDescription": "Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)" }, { "PEBS": "2", "CollectPEBSRecord": "2", - "PublicDescription": "Counts the number of load uops retired.", + "PublicDescription": "Counts locked memory uops retired. This includes regular locks and bus locks. (To specifically count bus locks only, see the Offcore response event.) A locked access is one with a lock prefix, or an exchange to memory. See the SDM for a complete description of which memory load accesses are locks.", "EventCode": "0xD0", "Counter": "0,1,2,3", - "UMask": "0x81", - "EventName": "MEM_UOPS_RETIRED.ALL_LOADS", + "UMask": "0x21", + "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS", "SampleAfterValue": "200003", - "BriefDescription": "Load uops retired (Precise event capable)" + "BriefDescription": "Locked load uops retired (Precise event capable)" }, { "PEBS": "2", "CollectPEBSRecord": "2", - "PublicDescription": "Counts the number of store uops retired.", + "PublicDescription": "Counts load uops retired where the data requested spans a 64 byte cache line boundary.", "EventCode": "0xD0", "Counter": "0,1,2,3", - "UMask": "0x82", - "EventName": "MEM_UOPS_RETIRED.ALL_STORES", + "UMask": "0x41", + "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS", "SampleAfterValue": "200003", - "BriefDescription": "Store uops retired (Precise event capable)" + "BriefDescription": "Load uops retired that split a cache-line (Precise event capable)" }, { "PEBS": "2", "CollectPEBSRecord": "2", - "PublicDescription": "Counts the number of memory uops retired that is either a loads or a store or both.", + "PublicDescription": "Counts store uops retired where the data requested spans a 64 byte cache line boundary.", "EventCode": "0xD0", "Counter": "0,1,2,3", - "UMask": "0x83", - "EventName": "MEM_UOPS_RETIRED.ALL", + "UMask": "0x42", + "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES", "SampleAfterValue": "200003", - "BriefDescription": "Memory uops retired (Precise event capable)" + "BriefDescription": "Stores uops retired that split a cache-line (Precise event capable)" }, { "PEBS": "2", "CollectPEBSRecord": "2", - "PublicDescription": "Counts locked memory uops retired. This includes \"regular\" locks and bus locks. (To specifically count bus locks only, see the Offcore response event.) A locked access is one with a lock prefix, or an exchange to memory. See the SDM for a complete description of which memory load accesses are locks.", + "PublicDescription": "Counts memory uops retired where the data requested spans a 64 byte cache line boundary.", "EventCode": "0xD0", "Counter": "0,1,2,3", - "UMask": "0x21", - "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS", + "UMask": "0x43", + "EventName": "MEM_UOPS_RETIRED.SPLIT", "SampleAfterValue": "200003", - "BriefDescription": "Locked load uops retired (Precise event capable)" + "BriefDescription": "Memory uops retired that split a cache-line (Precise event capable)" }, { "PEBS": "2", "CollectPEBSRecord": "2", - "PublicDescription": "Counts load uops retired where the data requested spans a 64 byte cache line boundary.", + "PublicDescription": "Counts the number of load uops retired.", "EventCode": "0xD0", "Counter": "0,1,2,3", - "UMask": "0x41", - "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS", + "UMask": "0x81", + "EventName": "MEM_UOPS_RETIRED.ALL_LOADS", "SampleAfterValue": "200003", - "BriefDescription": "Load uops retired that split a cache-line (Precise event capable)" + "BriefDescription": "Load uops retired (Precise event capable)" }, { "PEBS": "2", "CollectPEBSRecord": "2", - "PublicDescription": "Counts store uops retired where the data requested spans a 64 byte cache line boundary.", + "PublicDescription": "Counts the number of store uops retired.", "EventCode": "0xD0", "Counter": "0,1,2,3", - "UMask": "0x42", - "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES", + "UMask": "0x82", + "EventName": "MEM_UOPS_RETIRED.ALL_STORES", "SampleAfterValue": "200003", - "BriefDescription": "Stores uops retired that split a cache-line (Precise event capable)" + "BriefDescription": "Store uops retired (Precise event capable)" }, { "PEBS": "2", "CollectPEBSRecord": "2", - "PublicDescription": "Counts memory uops retired where the data requested spans a 64 byte cache line boundary.", + "PublicDescription": "Counts the number of memory uops retired that is either a loads or a store or both.", "EventCode": "0xD0", "Counter": "0,1,2,3", - "UMask": "0x43", - "EventName": "MEM_UOPS_RETIRED.SPLIT", + "UMask": "0x83", + "EventName": "MEM_UOPS_RETIRED.ALL", "SampleAfterValue": "200003", - "BriefDescription": "Memory uops retired that split a cache-line (Precise event capable)" + "BriefDescription": "Memory uops retired (Precise event capable)" }, { "PEBS": "2", @@ -140,24 +159,24 @@ { "PEBS": "2", "CollectPEBSRecord": "2", - "PublicDescription": "Counts load uops retired that miss the L1 data cache.", + "PublicDescription": "Counts load uops retired that hit in the L2 cache.", "EventCode": "0xD1", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS", + "UMask": "0x2", + "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT", "SampleAfterValue": "200003", - "BriefDescription": "Load uops retired that missed L1 data cache (Precise event capable)" + "BriefDescription": "Load uops retired that hit L2 (Precise event capable)" }, { "PEBS": "2", "CollectPEBSRecord": "2", - "PublicDescription": "Counts load uops retired that hit in the L2 cache.", + "PublicDescription": "Counts load uops retired that miss the L1 data cache.", "EventCode": "0xD1", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT", + "UMask": "0x8", + "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS", "SampleAfterValue": "200003", - "BriefDescription": "Load uops retired that hit L2 (Precise event capable)" + "BriefDescription": "Load uops retired that missed L1 data cache (Precise event capable)" }, { "PEBS": "2", @@ -205,24 +224,20 @@ }, { "CollectPEBSRecord": "1", - "PublicDescription": "Counts when a modified (dirty) cache line is evicted from the data L1 cache and needs to be written back to memory. No count will occur if the evicted line is clean, and hence does not require a writeback.", - "EventCode": "0x51", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "DL1.DIRTY_EVICTION", - "SampleAfterValue": "200003", - "BriefDescription": "L1 Cache evictions for dirty data" - }, - { - "CollectPEBSRecord": "1", + "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", + "MSRValue": "0x40000032b7 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE", + "EventName": "OFFCORE_RESPONSE.ANY_READ.OUTSTANDING", + "MSRIndex": "0x1a6", "SampleAfterValue": "100007", - "BriefDescription": "Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)" + "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", + "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x36000032b7 ", "Counter": "0,1,2,3", @@ -234,6 +249,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x10000032b7 ", "Counter": "0,1,2,3", @@ -245,6 +262,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x04000032b7 ", "Counter": "0,1,2,3", @@ -256,6 +275,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x02000032b7 ", "Counter": "0,1,2,3", @@ -267,6 +288,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x00000432b7 ", "Counter": "0,1,2,3", @@ -278,6 +301,34 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x00000132b7 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.ANY_READ.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that have any transaction responses from the uncore subsystem.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x4000000022 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.ANY_RFO.OUTSTANDING", + "MSRIndex": "0x1a6", + "SampleAfterValue": "100007", + "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x3600000022 ", "Counter": "0,1,2,3", @@ -289,6 +340,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x1000000022 ", "Counter": "0,1,2,3", @@ -300,6 +353,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0400000022 ", "Counter": "0,1,2,3", @@ -311,6 +366,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0200000022 ", "Counter": "0,1,2,3", @@ -322,6 +379,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0000040022 ", "Counter": "0,1,2,3", @@ -333,6 +392,34 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000010022 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.ANY_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that have any transaction responses from the uncore subsystem.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data reads (demand & prefetch) that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x4000003091", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.OUTSTANDING", + "MSRIndex": "0x1a6", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data reads (demand & prefetch) that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data reads (demand & prefetch) that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x3600003091", "Counter": "0,1,2,3", @@ -344,6 +431,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data reads (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x1000003091", "Counter": "0,1,2,3", @@ -355,6 +444,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data reads (demand & prefetch) that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0400003091", "Counter": "0,1,2,3", @@ -366,6 +457,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data reads (demand & prefetch) that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0200003091", "Counter": "0,1,2,3", @@ -377,6 +470,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data reads (demand & prefetch) that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0000043091", "Counter": "0,1,2,3", @@ -388,6 +483,34 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data reads (demand & prefetch) that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000013091", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data reads (demand & prefetch) that have any transaction responses from the uncore subsystem.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x4000003010 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.OUTSTANDING", + "MSRIndex": "0x1a6", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x3600003010 ", "Counter": "0,1,2,3", @@ -399,6 +522,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x1000003010 ", "Counter": "0,1,2,3", @@ -410,6 +535,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0400003010 ", "Counter": "0,1,2,3", @@ -421,6 +548,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0200003010 ", "Counter": "0,1,2,3", @@ -432,347 +561,957 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0000043010 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_HIT", + "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_HIT", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers that hit the L2 cache.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000013010 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers that have any transaction responses from the uncore subsystem.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts requests to the uncore subsystem that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x4000008000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.OUTSTANDING", + "MSRIndex": "0x1a6", + "SampleAfterValue": "100007", + "BriefDescription": "Counts requests to the uncore subsystem that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts requests to the uncore subsystem that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x3600008000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.ANY", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts requests to the uncore subsystem that miss the L2 cache.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts requests to the uncore subsystem that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x1000008000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts requests to the uncore subsystem that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts requests to the uncore subsystem that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0400008000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts requests to the uncore subsystem that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts requests to the uncore subsystem that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0200008000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts requests to the uncore subsystem that true miss for the L2 cache with a snoop miss in the other processor module. ", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts requests to the uncore subsystem that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000048000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts requests to the uncore subsystem that hit the L2 cache.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts requests to the uncore subsystem that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000018000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts requests to the uncore subsystem that have any transaction responses from the uncore subsystem.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x4000004800 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.OUTSTANDING", + "MSRIndex": "0x1a6", + "SampleAfterValue": "100007", + "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x3600004800 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_MISS.ANY", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that miss the L2 cache.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x1000004800 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_MISS.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0400004800 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_MISS.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0200004800 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that true miss for the L2 cache with a snoop miss in the other processor module. ", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000044800 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_HIT", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that hit the L2 cache.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000014800 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that have any transaction responses from the uncore subsystem.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x4000004000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.OUTSTANDING", + "MSRIndex": "0x1a6", + "SampleAfterValue": "100007", + "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x3600004000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_MISS.ANY", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that miss the L2 cache.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x1000004000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_MISS.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0400004000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_MISS.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0200004000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that true miss for the L2 cache with a snoop miss in the other processor module. ", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000044000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_HIT", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that hit the L2 cache.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000014000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that have any transaction responses from the uncore subsystem.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x4000002000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.OUTSTANDING", + "MSRIndex": "0x1a6", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x3600002000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.ANY", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x1000002000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0400002000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0200002000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that true miss for the L2 cache with a snoop miss in the other processor module. ", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000042000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that hit the L2 cache.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000012000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that have any transaction responses from the uncore subsystem.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cache lines requests by software prefetch instructions that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x4000001000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.OUTSTANDING", + "MSRIndex": "0x1a6", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cache lines requests by software prefetch instructions that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x3600001000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.ANY", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x1000001000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0400001000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cache lines requests by software prefetch instructions that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0200001000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cache lines requests by software prefetch instructions that true miss for the L2 cache with a snoop miss in the other processor module. ", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cache lines requests by software prefetch instructions that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000041000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_HIT", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cache lines requests by software prefetch instructions that hit the L2 cache.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cache lines requests by software prefetch instructions that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000011000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cache lines requests by software prefetch instructions that have any transaction responses from the uncore subsystem.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x4000000800 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.OUTSTANDING", + "MSRIndex": "0x1a6", + "SampleAfterValue": "100007", + "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x3600000800 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.ANY", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x1000000800 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0400000800 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0200000800 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that true miss for the L2 cache with a snoop miss in the other processor module. ", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000040800 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_HIT", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers that hit the L2 cache.", + "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that hit the L2 cache.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x1000008000 ", + "MSRValue": "0x0000010800 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.HITM_OTHER_CORE", + "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts requests to the uncore subsystem that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", + "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that have any transaction responses from the uncore subsystem.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts bus lock and split lock requests that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0400008000 ", + "MSRValue": "0x4000000400 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.OUTSTANDING", + "MSRIndex": "0x1a6", "SampleAfterValue": "100007", - "BriefDescription": "Counts requests to the uncore subsystem that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.", + "BriefDescription": "Counts bus lock and split lock requests that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts bus lock and split lock requests that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0200008000 ", + "MSRValue": "0x3600000400 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", + "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_MISS.ANY", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts requests to the uncore subsystem that true miss for the L2 cache with a snoop miss in the other processor module. ", + "BriefDescription": "Counts bus lock and split lock requests that miss the L2 cache.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts bus lock and split lock requests that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0000048000 ", + "MSRValue": "0x1000000400 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_HIT", + "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_MISS.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts requests to the uncore subsystem that hit the L2 cache.", + "BriefDescription": "Counts bus lock and split lock requests that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts bus lock and split lock requests that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0000018000 ", + "MSRValue": "0x0400000400 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.ANY_RESPONSE", + "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_MISS.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts requests to the uncore subsystem that have any transaction responses from the uncore subsystem.", + "BriefDescription": "Counts bus lock and split lock requests that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts bus lock and split lock requests that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x3600004800 ", + "MSRValue": "0x0200000400 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_MISS.ANY", + "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that miss the L2 cache.", + "BriefDescription": "Counts bus lock and split lock requests that true miss for the L2 cache with a snoop miss in the other processor module. ", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts bus lock and split lock requests that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0000044800 ", + "MSRValue": "0x0000040400 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_HIT", + "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_HIT", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that hit the L2 cache.", + "BriefDescription": "Counts bus lock and split lock requests that hit the L2 cache.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts bus lock and split lock requests that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x3600004000 ", + "MSRValue": "0x0000010400 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_MISS.ANY", + "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that miss the L2 cache.", + "BriefDescription": "Counts bus lock and split lock requests that have any transaction responses from the uncore subsystem.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts code reads in uncacheable (UC) memory region that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x1000004000 ", + "MSRValue": "0x4000000200 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_MISS.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", + "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.OUTSTANDING", + "MSRIndex": "0x1a6", "SampleAfterValue": "100007", - "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", + "BriefDescription": "Counts code reads in uncacheable (UC) memory region that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0400004000 ", + "MSRValue": "0x3600000200 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_MISS.HIT_OTHER_CORE_NO_FWD", + "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.L2_MISS.ANY", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.", + "BriefDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0200004000 ", + "MSRValue": "0x1000000200 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", + "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.L2_MISS.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that true miss for the L2 cache with a snoop miss in the other processor module. ", + "BriefDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0000044000 ", + "MSRValue": "0x0400000200 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_HIT", + "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that hit the L2 cache.", + "BriefDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts code reads in uncacheable (UC) memory region that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x3600002000 ", + "MSRValue": "0x0200000200 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.ANY", + "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache.", + "BriefDescription": "Counts code reads in uncacheable (UC) memory region that true miss for the L2 cache with a snoop miss in the other processor module. ", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts code reads in uncacheable (UC) memory region that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x1000002000 ", + "MSRValue": "0x0000040200 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.HITM_OTHER_CORE", + "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.L2_HIT", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", + "BriefDescription": "Counts code reads in uncacheable (UC) memory region that hit the L2 cache.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts code reads in uncacheable (UC) memory region that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0400002000 ", + "MSRValue": "0x0000010200 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.HIT_OTHER_CORE_NO_FWD", + "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.", + "BriefDescription": "Counts code reads in uncacheable (UC) memory region that have any transaction responses from the uncore subsystem.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0200002000 ", + "MSRValue": "0x4000000100 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.OUTSTANDING", + "MSRIndex": "0x1a6", "SampleAfterValue": "100007", - "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that true miss for the L2 cache with a snoop miss in the other processor module. ", + "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0000042000 ", + "MSRValue": "0x3600000100 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_HIT", + "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_MISS.ANY", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that hit the L2 cache.", + "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x3600001000 ", + "MSRValue": "0x1000000100 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.ANY", + "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_MISS.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache.", + "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x1000001000 ", + "MSRValue": "0x0400000100 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.HITM_OTHER_CORE", + "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_MISS.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", + "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0400001000 ", + "MSRValue": "0x0200000100 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.HIT_OTHER_CORE_NO_FWD", + "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.", + "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that true miss for the L2 cache with a snoop miss in the other processor module. ", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0200001000 ", + "MSRValue": "0x0000040100 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", + "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_HIT", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts data cache lines requests by software prefetch instructions that true miss for the L2 cache with a snoop miss in the other processor module. ", + "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that hit the L2 cache.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0000041000 ", + "MSRValue": "0x0000010100 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_HIT", + "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts data cache lines requests by software prefetch instructions that hit the L2 cache.", + "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that have any transaction responses from the uncore subsystem.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x3600000800 ", + "MSRValue": "0x4000000080 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.ANY", - "MSRIndex": "0x1a6,0x1a7", + "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.OUTSTANDING", + "MSRIndex": "0x1a6", "SampleAfterValue": "100007", - "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache.", + "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x1000000800 ", + "MSRValue": "0x3600000080 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.HITM_OTHER_CORE", + "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_MISS.ANY", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", + "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0400000800 ", + "MSRValue": "0x1000000080 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.HIT_OTHER_CORE_NO_FWD", + "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_MISS.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.", + "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0200000800 ", + "MSRValue": "0x0400000080 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", + "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_MISS.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that true miss for the L2 cache with a snoop miss in the other processor module. ", + "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0000040800 ", + "MSRValue": "0x0200000080 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_HIT", + "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_MISS.SNOOP_MISS_OR_NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that hit the L2 cache.", + "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that true miss for the L2 cache with a snoop miss in the other processor module. ", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x0000010400 ", + "MSRValue": "0x0000040080 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.ANY_RESPONSE", + "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_HIT", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts bus lock and split lock requests that have any transaction responses from the uncore subsystem.", + "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that hit the L2 cache.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x3600000100 ", + "MSRValue": "0x0000010080 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_MISS.ANY", + "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100007", - "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache.", + "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that have any transaction responses from the uncore subsystem.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", - "MSRValue": "0x3600000080 ", + "MSRValue": "0x4000000020 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_MISS.ANY", - "MSRIndex": "0x1a6,0x1a7", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.OUTSTANDING", + "MSRIndex": "0x1a6", "SampleAfterValue": "100007", - "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache.", + "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x3600000020 ", "Counter": "0,1,2,3", @@ -784,6 +1523,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x1000000020 ", "Counter": "0,1,2,3", @@ -795,6 +1536,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0400000020 ", "Counter": "0,1,2,3", @@ -806,6 +1549,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0200000020 ", "Counter": "0,1,2,3", @@ -817,6 +1562,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0000040020 ", "Counter": "0,1,2,3", @@ -828,6 +1575,34 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000010020 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that have any transaction responses from the uncore subsystem.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x4000000010 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.OUTSTANDING", + "MSRIndex": "0x1a6", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x3600000010 ", "Counter": "0,1,2,3", @@ -839,6 +1614,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x1000000010 ", "Counter": "0,1,2,3", @@ -850,6 +1627,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0400000010 ", "Counter": "0,1,2,3", @@ -861,6 +1640,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0200000010 ", "Counter": "0,1,2,3", @@ -872,6 +1653,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0000040010 ", "Counter": "0,1,2,3", @@ -883,6 +1666,34 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000010010 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that have any transaction responses from the uncore subsystem.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x4000000008 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.COREWB.OUTSTANDING", + "MSRIndex": "0x1a6", + "SampleAfterValue": "100007", + "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that are outstanding, per cycle, from the time of the L2 miss to when any response is received.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x3600000008 ", "Counter": "0,1,2,3", @@ -894,6 +1705,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x1000000008 ", "Counter": "0,1,2,3", @@ -905,6 +1718,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0400000008 ", "Counter": "0,1,2,3", @@ -916,6 +1731,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0200000008 ", "Counter": "0,1,2,3", @@ -927,6 +1744,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0000040008 ", "Counter": "0,1,2,3", @@ -938,6 +1757,21 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000010008 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.COREWB.ANY_RESPONSE", + "MSRIndex": "0x1a6", + "SampleAfterValue": "100007", + "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that have any transaction responses from the uncore subsystem.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x4000000004 ", "Counter": "0,1,2,3", @@ -949,6 +1783,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x3600000004 ", "Counter": "0,1,2,3", @@ -960,6 +1796,21 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x1000000004 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_MISS.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0400000004 ", "Counter": "0,1,2,3", @@ -971,6 +1822,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0200000004 ", "Counter": "0,1,2,3", @@ -982,6 +1835,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0000040004 ", "Counter": "0,1,2,3", @@ -993,6 +1848,21 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000010004 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that have any transaction responses from the uncore subsystem.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x4000000002 ", "Counter": "0,1,2,3", @@ -1004,6 +1874,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x3600000002 ", "Counter": "0,1,2,3", @@ -1015,6 +1887,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x1000000002 ", "Counter": "0,1,2,3", @@ -1026,6 +1900,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0400000002 ", "Counter": "0,1,2,3", @@ -1037,6 +1913,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0200000002 ", "Counter": "0,1,2,3", @@ -1048,6 +1926,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0000040002 ", "Counter": "0,1,2,3", @@ -1059,6 +1939,21 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000010002 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that have any transaction responses from the uncore subsystem.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand cacheable data reads of full cache lines that are outstanding, per cycle, from the time of the L2 miss to when any response is received. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x4000000001 ", "Counter": "0,1,2,3", @@ -1070,6 +1965,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand cacheable data reads of full cache lines that miss the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x3600000001 ", "Counter": "0,1,2,3", @@ -1081,6 +1978,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand cacheable data reads of full cache lines that miss the L2 cache with a snoop hit in the other processor module, data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x1000000001 ", "Counter": "0,1,2,3", @@ -1092,6 +1991,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand cacheable data reads of full cache lines that miss the L2 cache with a snoop hit in the other processor module, no data forwarding is required. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0400000001 ", "Counter": "0,1,2,3", @@ -1103,6 +2004,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand cacheable data reads of full cache lines that true miss for the L2 cache with a snoop miss in the other processor module. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0200000001 ", "Counter": "0,1,2,3", @@ -1114,6 +2017,8 @@ "Offcore": "1" }, { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand cacheable data reads of full cache lines that hit the L2 cache. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", "EventCode": "0xB7", "MSRValue": "0x0000040001 ", "Counter": "0,1,2,3", @@ -1123,5 +2028,18 @@ "SampleAfterValue": "100007", "BriefDescription": "Counts demand cacheable data reads of full cache lines that hit the L2 cache.", "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand cacheable data reads of full cache lines that have any transaction responses from the uncore subsystem. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x0000010001 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts demand cacheable data reads of full cache lines that have any transaction responses from the uncore subsystem.", + "Offcore": "1" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/goldmont/memory.json b/tools/perf/pmu-events/arch/x86/goldmont/memory.json index ac8b0d365a19..690cebd12a94 100644 --- a/tools/perf/pmu-events/arch/x86/goldmont/memory.json +++ b/tools/perf/pmu-events/arch/x86/goldmont/memory.json @@ -1,14 +1,4 @@ [ - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts machine clears due to memory ordering issues. This occurs when a snoop request happens and the machine is uncertain if memory ordering will be preserved - as another core is in the process of modifying the data.", - "EventCode": "0xC3", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "MACHINE_CLEARS.MEMORY_ORDERING", - "SampleAfterValue": "200003", - "BriefDescription": "Machine clears due to memory ordering issue" - }, { "PEBS": "2", "CollectPEBSRecord": "2", @@ -30,5 +20,275 @@ "EventName": "MISALIGN_MEM_REF.STORE_PAGE_SPLIT", "SampleAfterValue": "200003", "BriefDescription": "Store uops that split a page (Precise event capable)" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts machine clears due to memory ordering issues. This occurs when a snoop request happens and the machine is uncertain if memory ordering will be preserved as another core is in the process of modifying the data.", + "EventCode": "0xC3", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "MACHINE_CLEARS.MEMORY_ORDERING", + "SampleAfterValue": "200003", + "BriefDescription": "Machine clears due to memory ordering issue" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x20000032b7 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.ANY_READ.L2_MISS.NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data read, code read, and read for ownership (RFO) requests (demand & prefetch) that miss the L2 cache and targets non-DRAM system address.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x2000000022 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.ANY_RFO.L2_MISS.NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts reads for ownership (RFO) requests (demand & prefetch) that miss the L2 cache and targets non-DRAM system address.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data reads (demand & prefetch) that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x2000003091", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.ANY_DATA_RD.L2_MISS.NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data reads (demand & prefetch) that miss the L2 cache and targets non-DRAM system address.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x2000003010 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.ANY_PF_DATA_RD.L2_MISS.NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data reads generated by L1 or L2 prefetchers that miss the L2 cache and targets non-DRAM system address.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts requests to the uncore subsystem that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x2000008000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.ANY_REQUEST.L2_MISS.NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts requests to the uncore subsystem that miss the L2 cache and targets non-DRAM system address.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x2000004800 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L2_MISS.NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts any data writes to uncacheable write combining (USWC) memory region that miss the L2 cache and targets non-DRAM system address.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x2000004000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.PARTIAL_STREAMING_STORES.L2_MISS.NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts partial cache line data writes to uncacheable write combining (USWC) memory region that miss the L2 cache and targets non-DRAM system address.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x2000002000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.PF_L1_DATA_RD.L2_MISS.NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cache line reads generated by hardware L1 data cache prefetcher that miss the L2 cache and targets non-DRAM system address.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x2000001000 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.SW_PREFETCH.L2_MISS.NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cache lines requests by software prefetch instructions that miss the L2 cache and targets non-DRAM system address.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x2000000800 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.FULL_STREAMING_STORES.L2_MISS.NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts full cache line data writes to uncacheable write combining (USWC) memory region and full cache-line non-temporal writes that miss the L2 cache and targets non-DRAM system address.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts bus lock and split lock requests that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x2000000400 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.BUS_LOCKS.L2_MISS.NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts bus lock and split lock requests that miss the L2 cache and targets non-DRAM system address.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x2000000200 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.UC_CODE_RD.L2_MISS.NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts code reads in uncacheable (UC) memory region that miss the L2 cache and targets non-DRAM system address.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x2000000100 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.PARTIAL_WRITES.L2_MISS.NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts the number of demand write requests (RFO) generated by a write to partial data cache line, including the writes to uncacheable (UC) and write through (WT), and write protected (WP) types of memory that miss the L2 cache and targets non-DRAM system address.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x2000000080 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.PARTIAL_READS.L2_MISS.NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts demand data partial reads, including data in uncacheable (UC) or uncacheable write combining (USWC) memory types that miss the L2 cache and targets non-DRAM system address.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x2000000020 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L2_MISS.NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts reads for ownership (RFO) requests generated by L2 prefetcher that miss the L2 cache and targets non-DRAM system address.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x2000000010 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L2_MISS.NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts data cacheline reads generated by hardware L2 cache prefetcher that miss the L2 cache and targets non-DRAM system address.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x2000000008 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.COREWB.L2_MISS.NON_DRAM", + "MSRIndex": "0x1a6", + "SampleAfterValue": "100007", + "BriefDescription": "Counts the number of writeback transactions caused by L1 or L2 cache evictions that miss the L2 cache and targets non-DRAM system address.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x2000000004 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L2_MISS.NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts demand instruction cacheline and I-side prefetch requests that miss the instruction cache that miss the L2 cache and targets non-DRAM system address.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x2000000002 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L2_MISS.NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts demand reads for ownership (RFO) requests generated by a write to full data cache line that miss the L2 cache and targets non-DRAM system address.", + "Offcore": "1" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts demand cacheable data reads of full cache lines that miss the L2 cache and targets non-DRAM system address. Requires MSR_OFFCORE_RESP[0,1] to specify request type and response. (duplicated for both MSRs)", + "EventCode": "0xB7", + "MSRValue": "0x2000000001 ", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L2_MISS.NON_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100007", + "BriefDescription": "Counts demand cacheable data reads of full cache lines that miss the L2 cache and targets non-DRAM system address.", + "Offcore": "1" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/goldmont/other.json b/tools/perf/pmu-events/arch/x86/goldmont/other.json index df25ca9542f1..959cadd7cb0e 100644 --- a/tools/perf/pmu-events/arch/x86/goldmont/other.json +++ b/tools/perf/pmu-events/arch/x86/goldmont/other.json @@ -1,23 +1,23 @@ [ { "CollectPEBSRecord": "1", - "PublicDescription": "Counts the number of issue slots per core cycle that were not consumed because of a full resource in the backend. Including but not limited to resources such as the Re-order Buffer (ROB), reservation stations (RS), load/store buffers, physical registers, or any other needed machine resource that is currently unavailable. Note that uops must be available for consumption in order for this event to fire. If a uop is not available (Instruction Queue is empty), this event will not count.", - "EventCode": "0xCA", + "PublicDescription": "Counts cycles that fetch is stalled due to any reason. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes. This will include cycles due to an ITLB miss, ICache miss and other events.", + "EventCode": "0x86", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "ISSUE_SLOTS_NOT_CONSUMED.RESOURCE_FULL", + "UMask": "0x0", + "EventName": "FETCH_STALL.ALL", "SampleAfterValue": "200003", - "BriefDescription": "Unfilled issue slots per cycle because of a full resource in the backend" + "BriefDescription": "Cycles code-fetch stalled due to any reason." }, { "CollectPEBSRecord": "1", - "PublicDescription": "Counts the number of issue slots per core cycle that were not consumed by the backend because allocation is stalled waiting for a mispredicted jump to retire or other branch-like conditions (e.g. the event is relevant during certain microcode flows). Counts all issue slots blocked while within this window including slots where uops were not available in the Instruction Queue.", - "EventCode": "0xCA", + "PublicDescription": "Counts cycles that fetch is stalled due to an outstanding ITLB miss. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes due to an ITLB miss. Note: this event is not the same as page walk cycles to retrieve an instruction translation.", + "EventCode": "0x86", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "ISSUE_SLOTS_NOT_CONSUMED.RECOVERY", + "UMask": "0x1", + "EventName": "FETCH_STALL.ITLB_FILL_PENDING_CYCLES", "SampleAfterValue": "200003", - "BriefDescription": "Unfilled issue slots per cycle to recover" + "BriefDescription": "Cycles code-fetch stalled due to an outstanding ITLB miss." }, { "CollectPEBSRecord": "1", @@ -29,6 +29,26 @@ "SampleAfterValue": "200003", "BriefDescription": "Unfilled issue slots per cycle" }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of issue slots per core cycle that were not consumed because of a full resource in the backend. Including but not limited to resources such as the Re-order Buffer (ROB), reservation stations (RS), load/store buffers, physical registers, or any other needed machine resource that is currently unavailable. Note that uops must be available for consumption in order for this event to fire. If a uop is not available (Instruction Queue is empty), this event will not count.", + "EventCode": "0xCA", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "ISSUE_SLOTS_NOT_CONSUMED.RESOURCE_FULL", + "SampleAfterValue": "200003", + "BriefDescription": "Unfilled issue slots per cycle because of a full resource in the backend" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of issue slots per core cycle that were not consumed by the backend because allocation is stalled waiting for a mispredicted jump to retire or other branch-like conditions (e.g. the event is relevant during certain microcode flows). Counts all issue slots blocked while within this window including slots where uops were not available in the Instruction Queue.", + "EventCode": "0xCA", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "ISSUE_SLOTS_NOT_CONSUMED.RECOVERY", + "SampleAfterValue": "200003", + "BriefDescription": "Unfilled issue slots per cycle to recover" + }, { "CollectPEBSRecord": "2", "PublicDescription": "Counts hardware interrupts received by the processor.", @@ -36,8 +56,18 @@ "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "HW_INTERRUPTS.RECEIVED", + "SampleAfterValue": "203", + "BriefDescription": "Hardware interrupts received" + }, + { + "CollectPEBSRecord": "2", + "PublicDescription": "Counts the number of core cycles during which interrupts are masked (disabled). Increments by 1 each core cycle that EFLAGS.IF is 0, regardless of whether interrupts are pending or not.", + "EventCode": "0xCB", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "HW_INTERRUPTS.MASKED", "SampleAfterValue": "200003", - "BriefDescription": "Hardware interrupts received (Precise event capable)" + "BriefDescription": "Cycles hardware interrupts are masked" }, { "CollectPEBSRecord": "2", @@ -47,6 +77,6 @@ "UMask": "0x4", "EventName": "HW_INTERRUPTS.PENDING_AND_MASKED", "SampleAfterValue": "200003", - "BriefDescription": "Cycles pending interrupts are masked (Precise event capable)" + "BriefDescription": "Cycles pending interrupts are masked" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/goldmont/pipeline.json b/tools/perf/pmu-events/arch/x86/goldmont/pipeline.json index 07f00041f56f..254788af8ab6 100644 --- a/tools/perf/pmu-events/arch/x86/goldmont/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/goldmont/pipeline.json @@ -1,168 +1,136 @@ [ { - "PEBS": "2", - "CollectPEBSRecord": "2", - "PublicDescription": "Counts branch instructions retired for all branch types. This is an architectural performance event.", - "EventCode": "0xC4", - "Counter": "0,1,2,3", - "UMask": "0x0", - "EventName": "BR_INST_RETIRED.ALL_BRANCHES", - "SampleAfterValue": "200003", - "BriefDescription": "Retired branch instructions (Precise event capable)" - }, - { - "PEBS": "2", - "CollectPEBSRecord": "2", - "PublicDescription": "Counts retired Jcc (Jump on Conditional Code/Jump if Condition is Met) branch instructions retired, including both when the branch was taken and when it was not taken.", - "EventCode": "0xC4", - "Counter": "0,1,2,3", - "UMask": "0x7e", - "EventName": "BR_INST_RETIRED.JCC", - "SampleAfterValue": "200003", - "BriefDescription": "Retired conditional branch instructions (Precise event capable)" + "PublicDescription": "Counts the number of instructions that retire execution. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. The counter continues counting during hardware interrupts, traps, and inside interrupt handlers. This event uses fixed counter 0. You cannot collect a PEBs record for this event.", + "EventCode": "0x00", + "Counter": "Fixed counter 0", + "UMask": "0x1", + "EventName": "INST_RETIRED.ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Instructions retired (Fixed event)" }, { - "PEBS": "2", - "CollectPEBSRecord": "2", - "PublicDescription": "Counts Jcc (Jump on Conditional Code/Jump if Condition is Met) branch instructions retired that were taken and does not count when the Jcc branch instruction were not taken.", - "EventCode": "0xC4", - "Counter": "0,1,2,3", - "UMask": "0xfe", - "EventName": "BR_INST_RETIRED.TAKEN_JCC", - "SampleAfterValue": "200003", - "BriefDescription": "Retired conditional branch instructions that were taken (Precise event capable)" + "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. In mobile systems the core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. This event uses fixed counter 1. You cannot collect a PEBs record for this event.", + "EventCode": "0x00", + "Counter": "Fixed counter 1", + "UMask": "0x2", + "EventName": "CPU_CLK_UNHALTED.CORE", + "SampleAfterValue": "2000003", + "BriefDescription": "Core cycles when core is not halted (Fixed event)" }, { - "PEBS": "2", - "CollectPEBSRecord": "2", - "PublicDescription": "Counts near CALL branch instructions retired.", - "EventCode": "0xC4", - "Counter": "0,1,2,3", - "UMask": "0xf9", - "EventName": "BR_INST_RETIRED.CALL", - "SampleAfterValue": "200003", - "BriefDescription": "Retired near call instructions (Precise event capable)" + "PublicDescription": "Counts the number of reference cycles that the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. In mobile systems the core frequency may change from time. This event is not affected by core frequency changes but counts as if the core is running at the maximum frequency all the time. This event uses fixed counter 2. You cannot collect a PEBs record for this event.", + "EventCode": "0x00", + "Counter": "Fixed counter 2", + "UMask": "0x3", + "EventName": "CPU_CLK_UNHALTED.REF_TSC", + "SampleAfterValue": "2000003", + "BriefDescription": "Reference cycles when core is not halted (Fixed event)" }, { "PEBS": "2", "CollectPEBSRecord": "2", - "PublicDescription": "Counts near relative CALL branch instructions retired.", - "EventCode": "0xC4", + "PublicDescription": "Counts a load blocked from using a store forward, but did not occur because the store data was not available at the right time. The forward might occur subsequently when the data is available.", + "EventCode": "0x03", "Counter": "0,1,2,3", - "UMask": "0xfd", - "EventName": "BR_INST_RETIRED.REL_CALL", + "UMask": "0x1", + "EventName": "LD_BLOCKS.DATA_UNKNOWN", "SampleAfterValue": "200003", - "BriefDescription": "Retired near relative call instructions (Precise event capable)" + "BriefDescription": "Loads blocked due to store data not ready (Precise event capable)" }, { "PEBS": "2", "CollectPEBSRecord": "2", - "PublicDescription": "Counts near indirect CALL branch instructions retired.", - "EventCode": "0xC4", + "PublicDescription": "Counts a load blocked from using a store forward because of an address/size mismatch, only one of the loads blocked from each store will be counted.", + "EventCode": "0x03", "Counter": "0,1,2,3", - "UMask": "0xfb", - "EventName": "BR_INST_RETIRED.IND_CALL", + "UMask": "0x2", + "EventName": "LD_BLOCKS.STORE_FORWARD", "SampleAfterValue": "200003", - "BriefDescription": "Retired near indirect call instructions (Precise event capable)" + "BriefDescription": "Loads blocked due to store forward restriction (Precise event capable)" }, { "PEBS": "2", "CollectPEBSRecord": "2", - "PublicDescription": "Counts near return branch instructions retired.", - "EventCode": "0xC4", + "PublicDescription": "Counts loads that block because their address modulo 4K matches a pending store.", + "EventCode": "0x03", "Counter": "0,1,2,3", - "UMask": "0xf7", - "EventName": "BR_INST_RETIRED.RETURN", + "UMask": "0x4", + "EventName": "LD_BLOCKS.4K_ALIAS", "SampleAfterValue": "200003", - "BriefDescription": "Retired near return instructions (Precise event capable)" + "BriefDescription": "Loads blocked because address has 4k partial address false dependence (Precise event capable)" }, { "PEBS": "2", "CollectPEBSRecord": "2", - "PublicDescription": "Counts near indirect call or near indirect jmp branch instructions retired.", - "EventCode": "0xC4", + "PublicDescription": "Counts loads blocked because they are unable to find their physical address in the micro TLB (UTLB).", + "EventCode": "0x03", "Counter": "0,1,2,3", - "UMask": "0xeb", - "EventName": "BR_INST_RETIRED.NON_RETURN_IND", + "UMask": "0x8", + "EventName": "LD_BLOCKS.UTLB_MISS", "SampleAfterValue": "200003", - "BriefDescription": "Retired instructions of near indirect Jmp or call (Precise event capable)" + "BriefDescription": "Loads blocked because address in not in the UTLB (Precise event capable)" }, { "PEBS": "2", "CollectPEBSRecord": "2", - "PublicDescription": "Counts far branch instructions retired. This includes far jump, far call and return, and Interrupt call and return.", - "EventCode": "0xC4", + "PublicDescription": "Counts anytime a load that retires is blocked for any reason.", + "EventCode": "0x03", "Counter": "0,1,2,3", - "UMask": "0xbf", - "EventName": "BR_INST_RETIRED.FAR_BRANCH", + "UMask": "0x10", + "EventName": "LD_BLOCKS.ALL_BLOCK", "SampleAfterValue": "200003", - "BriefDescription": "Retired far branch instructions (Precise event capable)" + "BriefDescription": "Loads blocked (Precise event capable)" }, { - "PEBS": "2", - "CollectPEBSRecord": "2", - "PublicDescription": "Counts mispredicted branch instructions retired including all branch types.", - "EventCode": "0xC5", + "CollectPEBSRecord": "1", + "PublicDescription": "Counts uops issued by the front end and allocated into the back end of the machine. This event counts uops that retire as well as uops that were speculatively executed but didn't retire. The sort of speculative uops that might be counted includes, but is not limited to those uops issued in the shadow of a miss-predicted branch, those uops that are inserted during an assist (such as for a denormal floating point result), and (previously allocated) uops that might be canceled during a machine clear.", + "EventCode": "0x0E", "Counter": "0,1,2,3", "UMask": "0x0", - "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", - "SampleAfterValue": "200003", - "BriefDescription": "Retired mispredicted branch instructions (Precise event capable)" - }, - { - "PEBS": "2", - "CollectPEBSRecord": "2", - "PublicDescription": "Counts mispredicted retired Jcc (Jump on Conditional Code/Jump if Condition is Met) branch instructions retired, including both when the branch was supposed to be taken and when it was not supposed to be taken (but the processor predicted the opposite condition).", - "EventCode": "0xC5", - "Counter": "0,1,2,3", - "UMask": "0x7e", - "EventName": "BR_MISP_RETIRED.JCC", + "EventName": "UOPS_ISSUED.ANY", "SampleAfterValue": "200003", - "BriefDescription": "Retired mispredicted conditional branch instructions (Precise event capable)" + "BriefDescription": "Uops issued to the back end per cycle" }, { - "PEBS": "2", - "CollectPEBSRecord": "2", - "PublicDescription": "Counts mispredicted retired Jcc (Jump on Conditional Code/Jump if Condition is Met) branch instructions retired that were supposed to be taken but the processor predicted that it would not be taken.", - "EventCode": "0xC5", + "CollectPEBSRecord": "1", + "PublicDescription": "Core cycles when core is not halted. This event uses a (_P)rogrammable general purpose performance counter.", + "EventCode": "0x3C", "Counter": "0,1,2,3", - "UMask": "0xfe", - "EventName": "BR_MISP_RETIRED.TAKEN_JCC", - "SampleAfterValue": "200003", - "BriefDescription": "Retired mispredicted conditional branch instructions that were taken (Precise event capable)" + "UMask": "0x0", + "EventName": "CPU_CLK_UNHALTED.CORE_P", + "SampleAfterValue": "2000003", + "BriefDescription": "Core cycles when core is not halted" }, { - "PEBS": "2", - "CollectPEBSRecord": "2", - "PublicDescription": "Counts mispredicted near indirect CALL branch instructions retired, where the target address taken was not what the processor predicted.", - "EventCode": "0xC5", + "CollectPEBSRecord": "1", + "PublicDescription": "Reference cycles when core is not halted. This event uses a programmable general purpose performance counter.", + "EventCode": "0x3C", "Counter": "0,1,2,3", - "UMask": "0xfb", - "EventName": "BR_MISP_RETIRED.IND_CALL", - "SampleAfterValue": "200003", - "BriefDescription": "Retired mispredicted near indirect call instructions (Precise event capable)" + "UMask": "0x1", + "EventName": "CPU_CLK_UNHALTED.REF", + "SampleAfterValue": "2000003", + "BriefDescription": "Reference cycles when core is not halted" }, { - "PEBS": "2", - "CollectPEBSRecord": "2", - "PublicDescription": "Counts mispredicted near RET branch instructions retired, where the return address taken was not what the processor predicted.", - "EventCode": "0xC5", + "CollectPEBSRecord": "1", + "PublicDescription": "This event used to measure front-end inefficiencies. I.e. when front-end of the machine is not delivering uops to the back-end and the back-end has is not stalled. This event can be used to identify if the machine is truly front-end bound. When this event occurs, it is an indication that the front-end of the machine is operating at less than its theoretical peak performance. Background: We can think of the processor pipeline as being divided into 2 broader parts: Front-end and Back-end. Front-end is responsible for fetching the instruction, decoding into uops in machine understandable format and putting them into a uop queue to be consumed by back end. The back-end then takes these uops, allocates the required resources. When all resources are ready, uops are executed. If the back-end is not ready to accept uops from the front-end, then we do not want to count these as front-end bottlenecks. However, whenever we have bottlenecks in the back-end, we will have allocation unit stalls and eventually forcing the front-end to wait until the back-end is ready to receive more uops. This event counts only when back-end is requesting more uops and front-end is not able to provide them. When 3 uops are requested and no uops are delivered, the event counts 3. When 3 are requested, and only 1 is delivered, the event counts 2. When only 2 are delivered, the event counts 1. Alternatively stated, the event will not count if 3 uops are delivered, or if the back end is stalled and not requesting any uops at all. Counts indicate missed opportunities for the front-end to deliver a uop to the back end. Some examples of conditions that cause front-end efficiencies are: ICache misses, ITLB misses, and decoder restrictions that limit the front-end bandwidth. Known Issues: Some uops require multiple allocation slots. These uops will not be charged as a front end 'not delivered' opportunity, and will be regarded as a back end problem. For example, the INC instruction has one uop that requires 2 issue slots. A stream of INC instructions will not count as UOPS_NOT_DELIVERED, even though only one instruction can be issued per clock. The low uop issue rate for a stream of INC instructions is considered to be a back end issue.", + "EventCode": "0x9C", "Counter": "0,1,2,3", - "UMask": "0xf7", - "EventName": "BR_MISP_RETIRED.RETURN", + "UMask": "0x0", + "EventName": "UOPS_NOT_DELIVERED.ANY", "SampleAfterValue": "200003", - "BriefDescription": "Retired mispredicted near return instructions (Precise event capable)" + "BriefDescription": "Uops requested but not-delivered to the back-end per cycle" }, { "PEBS": "2", - "CollectPEBSRecord": "2", - "PublicDescription": "Counts mispredicted branch instructions retired that were near indirect call or near indirect jmp, where the target address taken was not what the processor predicted.", - "EventCode": "0xC5", + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of instructions that retire execution. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. The event continues counting during hardware interrupts, traps, and inside interrupt handlers. This is an architectural performance event. This event uses a (_P)rogrammable general purpose performance counter. *This event is Precise Event capable: The EventingRIP field in the PEBS record is precise to the address of the instruction which caused the event. Note: Because PEBS records can be collected only on IA32_PMC0, only one event can use the PEBS facility at a time.", + "EventCode": "0xC0", "Counter": "0,1,2,3", - "UMask": "0xeb", - "EventName": "BR_MISP_RETIRED.NON_RETURN_IND", - "SampleAfterValue": "200003", - "BriefDescription": "Retired mispredicted instructions of near indirect Jmp or near indirect call. (Precise event capable)" + "UMask": "0x0", + "EventName": "INST_RETIRED.ANY_P", + "SampleAfterValue": "2000003", + "BriefDescription": "Instructions retired (Precise event capable)" }, { "PEBS": "2", @@ -186,9 +154,41 @@ "SampleAfterValue": "2000003", "BriefDescription": "MS uops retired (Precise event capable)" }, + { + "PEBS": "2", + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of floating point divide uops retired.", + "EventCode": "0xC2", + "Counter": "0,1,2,3", + "UMask": "0x8", + "EventName": "UOPS_RETIRED.FPDIV", + "SampleAfterValue": "2000003", + "BriefDescription": "Floating point divide uops retired. (Precise Event Capable)" + }, + { + "PEBS": "2", + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of integer divide uops retired.", + "EventCode": "0xC2", + "Counter": "0,1,2,3", + "UMask": "0x10", + "EventName": "UOPS_RETIRED.IDIV", + "SampleAfterValue": "2000003", + "BriefDescription": "Integer divide uops retired. (Precise Event Capable)" + }, { "CollectPEBSRecord": "1", - "PublicDescription": "Counts the number of times that the processor detects that a program is writing to a code section and has to perform a machine clear because of that modification. Self-modifying code (SMC) causes a severe penalty in all Intel? architecture processors.", + "PublicDescription": "Counts machine clears for any reason.", + "EventCode": "0xC3", + "Counter": "0,1,2,3", + "UMask": "0x0", + "EventName": "MACHINE_CLEARS.ALL", + "SampleAfterValue": "200003", + "BriefDescription": "All machine clears" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of times that the processor detects that a program is writing to a code section and has to perform a machine clear because of that modification. Self-modifying code (SMC) causes a severe penalty in all Intel architecture processors.", "EventCode": "0xC3", "Counter": "0,1,2,3", "UMask": "0x1", @@ -217,217 +217,239 @@ "BriefDescription": "Machine clears due to memory disambiguation" }, { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts machine clears for any reason.", - "EventCode": "0xC3", + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts branch instructions retired for all branch types. This is an architectural performance event.", + "EventCode": "0xC4", "Counter": "0,1,2,3", "UMask": "0x0", - "EventName": "MACHINE_CLEARS.ALL", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES", "SampleAfterValue": "200003", - "BriefDescription": "All machine clears" + "BriefDescription": "Retired branch instructions (Precise event capable)" }, { "PEBS": "2", - "CollectPEBSRecord": "1", - "PublicDescription": "Counts the number of instructions that retire execution. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. The event continues counting during hardware interrupts, traps, and inside interrupt handlers. This is an architectural performance event. This event uses a (_P)rogrammable general purpose performance counter. *This event is Precise Event capable: The EventingRIP field in the PEBS record is precise to the address of the instruction which caused the event. Note: Because PEBS records can be collected only on IA32_PMC0, only one event can use the PEBS facility at a time.", - "EventCode": "0xC0", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts retired Jcc (Jump on Conditional Code/Jump if Condition is Met) branch instructions retired, including both when the branch was taken and when it was not taken.", + "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x0", - "EventName": "INST_RETIRED.ANY_P", - "SampleAfterValue": "2000003", - "BriefDescription": "Instructions retired (Precise event capable)" + "UMask": "0x7e", + "EventName": "BR_INST_RETIRED.JCC", + "SampleAfterValue": "200003", + "BriefDescription": "Retired conditional branch instructions (Precise event capable)" }, { + "PEBS": "2", "CollectPEBSRecord": "1", - "PublicDescription": "This event used to measure front-end inefficiencies. I.e. when front-end of the machine is not delivering uops to the back-end and the back-end has is not stalled. This event can be used to identify if the machine is truly front-end bound. When this event occurs, it is an indication that the front-end of the machine is operating at less than its theoretical peak performance. Background: We can think of the processor pipeline as being divided into 2 broader parts: Front-end and Back-end. Front-end is responsible for fetching the instruction, decoding into uops in machine understandable format and putting them into a uop queue to be consumed by back end. The back-end then takes these uops, allocates the required resources. When all resources are ready, uops are executed. If the back-end is not ready to accept uops from the front-end, then we do not want to count these as front-end bottlenecks. However, whenever we have bottlenecks in the back-end, we will have allocation unit stalls and eventually forcing the front-end to wait until the back-end is ready to receive more uops. This event counts only when back-end is requesting more uops and front-end is not able to provide them. When 3 uops are requested and no uops are delivered, the event counts 3. When 3 are requested, and only 1 is delivered, the event counts 2. When only 2 are delivered, the event counts 1. Alternatively stated, the event will not count if 3 uops are delivered, or if the back end is stalled and not requesting any uops at all. Counts indicate missed opportunities for the front-end to deliver a uop to the back end. Some examples of conditions that cause front-end efficiencies are: ICache misses, ITLB misses, and decoder restrictions that limit the front-end bandwidth. Known Issues: Some uops require multiple allocation slots. These uops will not be charged as a front end 'not delivered' opportunity, and will be regarded as a back end problem. For example, the INC instruction has one uop that requires 2 issue slots. A stream of INC instructions will not count as UOPS_NOT_DELIVERED, even though only one instruction can be issued per clock. The low uop issue rate for a stream of INC instructions is considered to be a back end issue.", - "EventCode": "0x9C", + "PublicDescription": "Counts the number of taken branch instructions retired.", + "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x0", - "EventName": "UOPS_NOT_DELIVERED.ANY", + "UMask": "0x80", + "EventName": "BR_INST_RETIRED.ALL_TAKEN_BRANCHES", "SampleAfterValue": "200003", - "BriefDescription": "Uops requested but not-delivered to the back-end per cycle" + "BriefDescription": "Retired taken branch instructions (Precise event capable)" }, { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts uops issued by the front end and allocated into the back end of the machine. This event counts uops that retire as well as uops that were speculatively executed but didn't retire. The sort of speculative uops that might be counted includes, but is not limited to those uops issued in the shadow of a miss-predicted branch, those uops that are inserted during an assist (such as for a denormal floating point result), and (previously allocated) uops that might be canceled during a machine clear.", - "EventCode": "0x0E", + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts far branch instructions retired. This includes far jump, far call and return, and Interrupt call and return.", + "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x0", - "EventName": "UOPS_ISSUED.ANY", + "UMask": "0xbf", + "EventName": "BR_INST_RETIRED.FAR_BRANCH", "SampleAfterValue": "200003", - "BriefDescription": "Uops issued to the back end per cycle" + "BriefDescription": "Retired far branch instructions (Precise event capable)" }, { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts core cycles if either divide unit is busy.", - "EventCode": "0xCD", + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts near indirect call or near indirect jmp branch instructions retired.", + "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x0", - "EventName": "CYCLES_DIV_BUSY.ALL", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles a divider is busy" + "UMask": "0xeb", + "EventName": "BR_INST_RETIRED.NON_RETURN_IND", + "SampleAfterValue": "200003", + "BriefDescription": "Retired instructions of near indirect Jmp or call (Precise event capable)" }, { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts core cycles the integer divide unit is busy.", - "EventCode": "0xCD", + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts near return branch instructions retired.", + "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "CYCLES_DIV_BUSY.IDIV", + "UMask": "0xf7", + "EventName": "BR_INST_RETIRED.RETURN", "SampleAfterValue": "200003", - "BriefDescription": "Cycles the integer divide unit is busy" + "BriefDescription": "Retired near return instructions (Precise event capable)" }, { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts core cycles the floating point divide unit is busy.", - "EventCode": "0xCD", + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts near CALL branch instructions retired.", + "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "CYCLES_DIV_BUSY.FPDIV", + "UMask": "0xf9", + "EventName": "BR_INST_RETIRED.CALL", "SampleAfterValue": "200003", - "BriefDescription": "Cycles the FP divide unit is busy" + "BriefDescription": "Retired near call instructions (Precise event capable)" }, { - "PublicDescription": "Counts the number of instructions that retire execution. For instructions that consist of multiple uops, this event counts the retirement of the last uop of the instruction. The counter continues counting during hardware interrupts, traps, and inside interrupt handlers. This event uses fixed counter 0. You cannot collect a PEBs record for this event.", - "EventCode": "0x00", - "Counter": "Fixed counter 1", - "UMask": "0x1", - "EventName": "INST_RETIRED.ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Instructions retired (Fixed event)" + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts near indirect CALL branch instructions retired.", + "EventCode": "0xC4", + "Counter": "0,1,2,3", + "UMask": "0xfb", + "EventName": "BR_INST_RETIRED.IND_CALL", + "SampleAfterValue": "200003", + "BriefDescription": "Retired near indirect call instructions (Precise event capable)" }, { - "PublicDescription": "Counts the number of core cycles while the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. In mobile systems the core frequency may change from time to time. For this reason this event may have a changing ratio with regards to time. This event uses fixed counter 1. You cannot collect a PEBs record for this event.", - "EventCode": "0x00", - "Counter": "Fixed counter 2", - "UMask": "0x2", - "EventName": "CPU_CLK_UNHALTED.CORE", - "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles when core is not halted (Fixed event)" + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts near relative CALL branch instructions retired.", + "EventCode": "0xC4", + "Counter": "0,1,2,3", + "UMask": "0xfd", + "EventName": "BR_INST_RETIRED.REL_CALL", + "SampleAfterValue": "200003", + "BriefDescription": "Retired near relative call instructions (Precise event capable)" }, { - "PublicDescription": "Counts the number of reference cycles that the core is not in a halt state. The core enters the halt state when it is running the HLT instruction. In mobile systems the core frequency may change from time. This event is not affected by core frequency changes but counts as if the core is running at the maximum frequency all the time. This event uses fixed counter 2. You cannot collect a PEBs record for this event.", - "EventCode": "0x00", - "Counter": "Fixed counter 3", - "UMask": "0x3", - "EventName": "CPU_CLK_UNHALTED.REF_TSC", - "SampleAfterValue": "2000003", - "BriefDescription": "Reference cycles when core is not halted (Fixed event)" + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts Jcc (Jump on Conditional Code/Jump if Condition is Met) branch instructions retired that were taken and does not count when the Jcc branch instruction were not taken.", + "EventCode": "0xC4", + "Counter": "0,1,2,3", + "UMask": "0xfe", + "EventName": "BR_INST_RETIRED.TAKEN_JCC", + "SampleAfterValue": "200003", + "BriefDescription": "Retired conditional branch instructions that were taken (Precise event capable)" }, { - "CollectPEBSRecord": "1", - "PublicDescription": "Core cycles when core is not halted. This event uses a (_P)rogrammable general purpose performance counter.", - "EventCode": "0x3C", + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts mispredicted branch instructions retired including all branch types.", + "EventCode": "0xC5", "Counter": "0,1,2,3", "UMask": "0x0", - "EventName": "CPU_CLK_UNHALTED.CORE_P", - "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles when core is not halted" + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", + "SampleAfterValue": "200003", + "BriefDescription": "Retired mispredicted branch instructions (Precise event capable)" }, { - "CollectPEBSRecord": "1", - "PublicDescription": "Reference cycles when core is not halted. This event uses a (_P)rogrammable general purpose performance counter.", - "EventCode": "0x3C", + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts mispredicted retired Jcc (Jump on Conditional Code/Jump if Condition is Met) branch instructions retired, including both when the branch was supposed to be taken and when it was not supposed to be taken (but the processor predicted the opposite condition).", + "EventCode": "0xC5", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "CPU_CLK_UNHALTED.REF", - "SampleAfterValue": "2000003", - "BriefDescription": "Reference cycles when core is not halted" + "UMask": "0x7e", + "EventName": "BR_MISP_RETIRED.JCC", + "SampleAfterValue": "200003", + "BriefDescription": "Retired mispredicted conditional branch instructions (Precise event capable)" }, { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts the number of times a BACLEAR is signaled for any reason, including, but not limited to indirect branch/call, Jcc (Jump on Conditional Code/Jump if Condition is Met) branch, unconditional branch/call, and returns.", - "EventCode": "0xE6", + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts mispredicted branch instructions retired that were near indirect call or near indirect jmp, where the target address taken was not what the processor predicted.", + "EventCode": "0xC5", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "BACLEARS.ALL", + "UMask": "0xeb", + "EventName": "BR_MISP_RETIRED.NON_RETURN_IND", "SampleAfterValue": "200003", - "BriefDescription": "BACLEARs asserted for any branch type" + "BriefDescription": "Retired mispredicted instructions of near indirect Jmp or near indirect call. (Precise event capable)" }, { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts BACLEARS on return instructions.", - "EventCode": "0xE6", + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts mispredicted near RET branch instructions retired, where the return address taken was not what the processor predicted.", + "EventCode": "0xC5", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "BACLEARS.RETURN", + "UMask": "0xf7", + "EventName": "BR_MISP_RETIRED.RETURN", "SampleAfterValue": "200003", - "BriefDescription": "BACLEARs asserted for return branch" + "BriefDescription": "Retired mispredicted near return instructions (Precise event capable)" }, { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts BACLEARS on Jcc (Jump on Conditional Code/Jump if Condition is Met) branches.", - "EventCode": "0xE6", + "PEBS": "2", + "CollectPEBSRecord": "2", + "PublicDescription": "Counts mispredicted near indirect CALL branch instructions retired, where the target address taken was not what the processor predicted.", + "EventCode": "0xC5", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "BACLEARS.COND", + "UMask": "0xfb", + "EventName": "BR_MISP_RETIRED.IND_CALL", "SampleAfterValue": "200003", - "BriefDescription": "BACLEARs asserted for conditional branch" + "BriefDescription": "Retired mispredicted near indirect call instructions (Precise event capable)" }, { "PEBS": "2", "CollectPEBSRecord": "2", - "PublicDescription": "Counts anytime a load that retires is blocked for any reason.", - "EventCode": "0x03", + "PublicDescription": "Counts mispredicted retired Jcc (Jump on Conditional Code/Jump if Condition is Met) branch instructions retired that were supposed to be taken but the processor predicted that it would not be taken.", + "EventCode": "0xC5", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "LD_BLOCKS.ALL_BLOCK", + "UMask": "0xfe", + "EventName": "BR_MISP_RETIRED.TAKEN_JCC", "SampleAfterValue": "200003", - "BriefDescription": "Loads blocked (Precise event capable)" + "BriefDescription": "Retired mispredicted conditional branch instructions that were taken (Precise event capable)" }, { - "PEBS": "2", - "CollectPEBSRecord": "2", - "PublicDescription": "Counts loads blocked because they are unable to find their physical address in the micro TLB (UTLB).", - "EventCode": "0x03", + "CollectPEBSRecord": "1", + "PublicDescription": "Counts core cycles if either divide unit is busy.", + "EventCode": "0xCD", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "LD_BLOCKS.UTLB_MISS", + "UMask": "0x0", + "EventName": "CYCLES_DIV_BUSY.ALL", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles a divider is busy" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts core cycles the integer divide unit is busy.", + "EventCode": "0xCD", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "CYCLES_DIV_BUSY.IDIV", "SampleAfterValue": "200003", - "BriefDescription": "Loads blocked because address in not in the UTLB (Precise event capable)" + "BriefDescription": "Cycles the integer divide unit is busy" }, { - "PEBS": "2", - "CollectPEBSRecord": "2", - "PublicDescription": "Counts a load blocked from using a store forward because of an address/size mismatch, only one of the loads blocked from each store will be counted.", - "EventCode": "0x03", + "CollectPEBSRecord": "1", + "PublicDescription": "Counts core cycles the floating point divide unit is busy.", + "EventCode": "0xCD", "Counter": "0,1,2,3", "UMask": "0x2", - "EventName": "LD_BLOCKS.STORE_FORWARD", + "EventName": "CYCLES_DIV_BUSY.FPDIV", "SampleAfterValue": "200003", - "BriefDescription": "Loads blocked due to store forward restriction (Precise event capable)" + "BriefDescription": "Cycles the FP divide unit is busy" }, { - "PEBS": "2", - "CollectPEBSRecord": "2", - "PublicDescription": "Counts a load blocked from using a store forward, but did not occur because the store data was not available at the right time. The forward might occur subsequently when the data is available.", - "EventCode": "0x03", + "CollectPEBSRecord": "1", + "PublicDescription": "Counts the number of times a BACLEAR is signaled for any reason, including, but not limited to indirect branch/call, Jcc (Jump on Conditional Code/Jump if Condition is Met) branch, unconditional branch/call, and returns.", + "EventCode": "0xE6", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "LD_BLOCKS.DATA_UNKNOWN", + "EventName": "BACLEARS.ALL", "SampleAfterValue": "200003", - "BriefDescription": "Loads blocked due to store data not ready (Precise event capable)" + "BriefDescription": "BACLEARs asserted for any branch type" }, { - "PEBS": "2", - "CollectPEBSRecord": "2", - "PublicDescription": "Counts loads that block because their address modulo 4K matches a pending store.", - "EventCode": "0x03", + "CollectPEBSRecord": "1", + "PublicDescription": "Counts BACLEARS on return instructions.", + "EventCode": "0xE6", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "LD_BLOCKS.4K_ALIAS", + "UMask": "0x8", + "EventName": "BACLEARS.RETURN", "SampleAfterValue": "200003", - "BriefDescription": "Loads blocked because address has 4k partial address false dependence (Precise event capable)" + "BriefDescription": "BACLEARs asserted for return branch" }, { - "PEBS": "2", "CollectPEBSRecord": "1", - "PublicDescription": "Counts the number of taken branch instructions retired.", - "EventCode": "0xC4", + "PublicDescription": "Counts BACLEARS on Jcc (Jump on Conditional Code/Jump if Condition is Met) branches.", + "EventCode": "0xE6", "Counter": "0,1,2,3", - "UMask": "0x80", - "EventName": "BR_INST_RETIRED.ALL_TAKEN_BRANCHES", + "UMask": "0x10", + "EventName": "BACLEARS.COND", "SampleAfterValue": "200003", - "BriefDescription": "Retired taken branch instructions (Precise event capable)" + "BriefDescription": "BACLEARs asserted for conditional branch" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/goldmont/virtual-memory.json b/tools/perf/pmu-events/arch/x86/goldmont/virtual-memory.json index 3202c4478836..9805198d3f5f 100644 --- a/tools/perf/pmu-events/arch/x86/goldmont/virtual-memory.json +++ b/tools/perf/pmu-events/arch/x86/goldmont/virtual-memory.json @@ -1,4 +1,34 @@ [ + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts every core cycle when a Data-side (walks due to a data operation) page walk is in progress.", + "EventCode": "0x05", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "PAGE_WALKS.D_SIDE_CYCLES", + "SampleAfterValue": "200003", + "BriefDescription": "Duration of D-side page-walks in cycles" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts every core cycle when a Instruction-side (walks due to an instruction fetch) page walk is in progress.", + "EventCode": "0x05", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "PAGE_WALKS.I_SIDE_CYCLES", + "SampleAfterValue": "200003", + "BriefDescription": "Duration of I-side pagewalks in cycles" + }, + { + "CollectPEBSRecord": "1", + "PublicDescription": "Counts every core cycle a page-walk is in progress due to either a data memory operation or an instruction fetch.", + "EventCode": "0x05", + "Counter": "0,1,2,3", + "UMask": "0x3", + "EventName": "PAGE_WALKS.CYCLES", + "SampleAfterValue": "200003", + "BriefDescription": "Duration of page-walks in cycles" + }, { "CollectPEBSRecord": "1", "PublicDescription": "Counts the number of times the machine was unable to find a translation in the Instruction Translation Lookaside Buffer (ITLB) for a linear address of an instruction fetch. It counts when new translation are filled into the ITLB. The event is speculative in nature, but will not count translations (page walks) that are begun and not finished, or translations that are finished but not filled into the ITLB.", @@ -41,35 +71,5 @@ "EventName": "MEM_UOPS_RETIRED.DTLB_MISS", "SampleAfterValue": "200003", "BriefDescription": "Memory uops retired that missed the DTLB (Precise event capable)" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts every core cycle when a Data-side (walks due to a data operation) page walk is in progress.", - "EventCode": "0x05", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "PAGE_WALKS.D_SIDE_CYCLES", - "SampleAfterValue": "200003", - "BriefDescription": "Duration of D-side page-walks in cycles" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts every core cycle when a Instruction-side (walks due to an instruction fetch) page walk is in progress.", - "EventCode": "0x05", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "PAGE_WALKS.I_SIDE_CYCLES", - "SampleAfterValue": "200003", - "BriefDescription": "Duration of I-side pagewalks in cycles" - }, - { - "CollectPEBSRecord": "1", - "PublicDescription": "Counts every core cycle a page-walk is in progress due to either a data memory operation or an instruction fetch.", - "EventCode": "0x05", - "Counter": "0,1,2,3", - "UMask": "0x3", - "EventName": "PAGE_WALKS.CYCLES", - "SampleAfterValue": "200003", - "BriefDescription": "Duration of page-walks in cycles" } ] \ No newline at end of file -- cgit v1.2.3-70-g09d2 From ca3a2d055d86e6a731c783f2081deb9b03e36d2e Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 18 Jan 2018 04:48:12 -0800 Subject: perf vendor events intel: Update Haswell events to V27 Signed-off-by: Andi Kleen Link: https://lkml.kernel.org/r/20180118234518.GA27753@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/haswell/cache.json | 365 ++++--- .../arch/x86/haswell/floating-point.json | 20 +- .../perf/pmu-events/arch/x86/haswell/frontend.json | 132 +-- tools/perf/pmu-events/arch/x86/haswell/memory.json | 21 + tools/perf/pmu-events/arch/x86/haswell/other.json | 20 +- .../perf/pmu-events/arch/x86/haswell/pipeline.json | 1131 ++++++++++---------- .../arch/x86/haswell/virtual-memory.json | 212 ++-- 7 files changed, 977 insertions(+), 924 deletions(-) (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/x86/haswell/cache.json b/tools/perf/pmu-events/arch/x86/haswell/cache.json index bfb5ebf48c54..da4d6ddd4f92 100644 --- a/tools/perf/pmu-events/arch/x86/haswell/cache.json +++ b/tools/perf/pmu-events/arch/x86/haswell/cache.json @@ -11,14 +11,34 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Demand data read requests that hit L2 cache.", + "PublicDescription": "Counts the number of store RFO requests that miss the L2 cache.", "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x41", + "UMask": "0x22", + "EventName": "L2_RQSTS.RFO_MISS", + "SampleAfterValue": "200003", + "BriefDescription": "RFO requests that miss L2 cache", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Number of instruction fetches that missed the L2 cache.", + "EventCode": "0x24", + "Counter": "0,1,2,3", + "UMask": "0x24", + "EventName": "L2_RQSTS.CODE_RD_MISS", + "SampleAfterValue": "200003", + "BriefDescription": "L2 cache misses when fetching instructions", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Demand requests that miss L2 cache.", + "EventCode": "0x24", + "Counter": "0,1,2,3", + "UMask": "0x27", "Errata": "HSD78", - "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT", + "EventName": "L2_RQSTS.ALL_DEMAND_MISS", "SampleAfterValue": "200003", - "BriefDescription": "Demand Data Read requests that hit L2 cache", + "BriefDescription": "Demand requests that miss L2 cache", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -31,6 +51,48 @@ "BriefDescription": "L2 prefetch requests that miss L2 cache", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "All requests that missed L2.", + "EventCode": "0x24", + "Counter": "0,1,2,3", + "UMask": "0x3f", + "Errata": "HSD78", + "EventName": "L2_RQSTS.MISS", + "SampleAfterValue": "200003", + "BriefDescription": "All requests that miss L2 cache", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Demand data read requests that hit L2 cache.", + "EventCode": "0x24", + "Counter": "0,1,2,3", + "UMask": "0x41", + "Errata": "HSD78", + "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT", + "SampleAfterValue": "200003", + "BriefDescription": "Demand Data Read requests that hit L2 cache", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Counts the number of store RFO requests that hit the L2 cache.", + "EventCode": "0x24", + "Counter": "0,1,2,3", + "UMask": "0x42", + "EventName": "L2_RQSTS.RFO_HIT", + "SampleAfterValue": "200003", + "BriefDescription": "RFO requests that hit L2 cache", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Number of instruction fetches that hit the L2 cache.", + "EventCode": "0x24", + "Counter": "0,1,2,3", + "UMask": "0x44", + "EventName": "L2_RQSTS.CODE_RD_HIT", + "SampleAfterValue": "200003", + "BriefDescription": "L2 cache hits when fetching instructions, code reads.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Counts all L2 HW prefetcher requests that hit L2.", "EventCode": "0x24", @@ -72,6 +134,17 @@ "BriefDescription": "L2 code requests", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Demand requests to L2 cache.", + "EventCode": "0x24", + "Counter": "0,1,2,3", + "UMask": "0xe7", + "Errata": "HSD78", + "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES", + "SampleAfterValue": "200003", + "BriefDescription": "Demand requests to L2 cache", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Counts all L2 HW prefetcher requests.", "EventCode": "0x24", @@ -82,6 +155,17 @@ "BriefDescription": "Requests from L2 hardware prefetchers", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "All requests to L2 cache.", + "EventCode": "0x24", + "Counter": "0,1,2,3", + "UMask": "0xff", + "Errata": "HSD78", + "EventName": "L2_RQSTS.REFERENCES", + "SampleAfterValue": "200003", + "BriefDescription": "All L2 requests", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Not rejected writebacks that hit L2 cache.", "EventCode": "0x27", @@ -122,6 +206,27 @@ "BriefDescription": "L1D miss oustandings duration in cycles", "CounterHTOff": "2" }, + { + "EventCode": "0x48", + "Counter": "2", + "UMask": "0x1", + "EventName": "L1D_PEND_MISS.PENDING_CYCLES", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles with L1D load Misses outstanding.", + "CounterMask": "1", + "CounterHTOff": "2" + }, + { + "EventCode": "0x48", + "Counter": "2", + "UMask": "0x1", + "AnyThread": "1", + "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.", + "CounterMask": "1", + "CounterHTOff": "2" + }, { "EventCode": "0x48", "Counter": "0,1,2,3", @@ -133,13 +238,13 @@ }, { "EventCode": "0x48", - "Counter": "2", - "UMask": "0x1", - "EventName": "L1D_PEND_MISS.PENDING_CYCLES", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "L1D_PEND_MISS.FB_FULL", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with L1D load Misses outstanding.", + "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.", "CounterMask": "1", - "CounterHTOff": "2" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "PublicDescription": "This event counts when new data lines are brought into the L1 Data cache, which cause other lines to be evicted from the cache.", @@ -162,6 +267,28 @@ "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x60", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Errata": "HSD78, HSD62, HSD61", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x60", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Errata": "HSD78, HSD62, HSD61", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.", + "CounterMask": "6", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Offcore outstanding Demand code Read transactions in SQ to uncore. Set Cmask=1 to count cycles.", "EventCode": "0x60", @@ -185,46 +312,35 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Offcore outstanding cacheable data read transactions in SQ to uncore. Set Cmask=1 to count cycles.", "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x8", + "UMask": "0x4", "Errata": "HSD62, HSD61", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", - "SampleAfterValue": "2000003", - "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x60", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Errata": "HSD78, HSD62, HSD61", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore.", + "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle.", "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "Offcore outstanding cacheable data read transactions in SQ to uncore. Set Cmask=1 to count cycles.", "EventCode": "0x60", "Counter": "0,1,2,3", "UMask": "0x8", "Errata": "HSD62, HSD61", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.", - "CounterMask": "1", + "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x4", + "UMask": "0x8", "Errata": "HSD62, HSD61", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD", "SampleAfterValue": "2000003", - "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle.", + "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.", "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -288,6 +404,15 @@ "BriefDescription": "Offcore requests buffer cannot take more entries for this thread core.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0xB7, 0xBB", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE", + "SampleAfterValue": "100003", + "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "CounterHTOff": "0,1,2,3" + }, { "PEBS": "1", "EventCode": "0xD0", @@ -296,7 +421,7 @@ "Errata": "HSD29, HSM30", "EventName": "MEM_UOPS_RETIRED.STLB_MISS_LOADS", "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops that miss the STLB.", + "BriefDescription": "Retired load uops that miss the STLB. (precise Event)", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, @@ -308,7 +433,7 @@ "Errata": "HSD29, HSM30", "EventName": "MEM_UOPS_RETIRED.STLB_MISS_STORES", "SampleAfterValue": "100003", - "BriefDescription": "Retired store uops that miss the STLB.", + "BriefDescription": "Retired store uops that miss the STLB. (precise Event)", "CounterHTOff": "0,1,2,3", "Data_LA": "1", "L1_Hit_Indication": "1" @@ -321,31 +446,33 @@ "Errata": "HSD76, HSD29, HSM30", "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS", "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops with locked access.", + "BriefDescription": "Retired load uops with locked access. (precise Event)", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, { "PEBS": "1", + "PublicDescription": "This event counts load uops retired which had memory addresses spilt across 2 cache lines. A line split is across 64B cache-lines which may include a page split (4K). This is a precise event.", "EventCode": "0xD0", "Counter": "0,1,2,3", "UMask": "0x41", "Errata": "HSD29, HSM30", "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS", "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops that split across a cacheline boundary.", + "BriefDescription": "Retired load uops that split across a cacheline boundary. (precise Event)", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, { "PEBS": "1", + "PublicDescription": "This event counts store uops retired which had memory addresses spilt across 2 cache lines. A line split is across 64B cache-lines which may include a page split (4K). This is a precise event.", "EventCode": "0xD0", "Counter": "0,1,2,3", "UMask": "0x42", "Errata": "HSD29, HSM30", "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES", "SampleAfterValue": "100003", - "BriefDescription": "Retired store uops that split across a cacheline boundary.", + "BriefDescription": "Retired store uops that split across a cacheline boundary. (precise Event)", "CounterHTOff": "0,1,2,3", "Data_LA": "1", "L1_Hit_Indication": "1" @@ -358,19 +485,20 @@ "Errata": "HSD29, HSM30", "EventName": "MEM_UOPS_RETIRED.ALL_LOADS", "SampleAfterValue": "2000003", - "BriefDescription": "All retired load uops.", + "BriefDescription": "All retired load uops. (precise Event)", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, { "PEBS": "1", + "PublicDescription": "This event counts all store uops retired. This is a precise event.", "EventCode": "0xD0", "Counter": "0,1,2,3", "UMask": "0x82", "Errata": "HSD29, HSM30", "EventName": "MEM_UOPS_RETIRED.ALL_STORES", "SampleAfterValue": "2000003", - "BriefDescription": "All retired store uops.", + "BriefDescription": "All retired store uops. (precise Event)", "CounterHTOff": "0,1,2,3", "Data_LA": "1", "L1_Hit_Indication": "1" @@ -401,20 +529,20 @@ }, { "PEBS": "1", - "PublicDescription": "Retired load uops with L3 cache hits as data sources.", + "PublicDescription": "This event counts retired load uops in which data sources were data hits in the L3 cache without snoops required. This does not include hardware prefetches. This is a precise event.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x4", "Errata": "HSD74, HSD29, HSD25, HSM26, HSM30", "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT", "SampleAfterValue": "50021", - "BriefDescription": "Retired load uops which data sources were data hits in L3 without snoops required.", + "BriefDescription": "Miss in last-level (L3) cache. Excludes Unknown data-source.", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, { "PEBS": "1", - "PublicDescription": "Retired load uops missed L1 cache as data sources.", + "PublicDescription": "This event counts retired load uops in which data sources missed in the L1 cache. This does not include hardware prefetches. This is a precise event.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x8", @@ -427,20 +555,18 @@ }, { "PEBS": "1", - "PublicDescription": "Retired load uops missed L2. Unknown data source excluded.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x10", "Errata": "HSD29, HSM30", "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS", "SampleAfterValue": "50021", - "BriefDescription": "Miss in mid-level (L2) cache. Excludes Unknown data-source.", + "BriefDescription": "Retired load uops with L2 cache misses as data sources.", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, { "PEBS": "1", - "PublicDescription": "Retired load uops missed L3. Excludes unknown data source .", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x20", @@ -477,25 +603,27 @@ }, { "PEBS": "1", + "PublicDescription": "This event counts retired load uops that hit in the L3 cache, but required a cross-core snoop which resulted in a HIT in an on-pkg core cache. This does not include hardware prefetches. This is a precise event.", "EventCode": "0xD2", "Counter": "0,1,2,3", "UMask": "0x2", "Errata": "HSD29, HSD25, HSM26, HSM30", "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT", "SampleAfterValue": "20011", - "BriefDescription": "Retired load uops which data sources were L3 and cross-core snoop hits in on-pkg core cache.", + "BriefDescription": "Retired load uops which data sources were L3 and cross-core snoop hits in on-pkg core cache. ", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, { "PEBS": "1", + "PublicDescription": "This event counts retired load uops that hit in the L3 cache, but required a cross-core snoop which resulted in a HITM (hit modified) in an on-pkg core cache. This does not include hardware prefetches. This is a precise event.", "EventCode": "0xD2", "Counter": "0,1,2,3", "UMask": "0x4", "Errata": "HSD29, HSD25, HSM26, HSM30", "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM", "SampleAfterValue": "20011", - "BriefDescription": "Retired load uops which data sources were HitM responses from shared L3.", + "BriefDescription": "Retired load uops which data sources were HitM responses from shared L3. ", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, @@ -513,14 +641,13 @@ }, { "PEBS": "1", - "PublicDescription": "This event counts retired load uops where the data came from local DRAM. This does not include hardware prefetches.", + "PublicDescription": "This event counts retired load uops where the data came from local DRAM. This does not include hardware prefetches. This is a precise event.", "EventCode": "0xD3", "Counter": "0,1,2,3", "UMask": "0x1", "Errata": "HSD74, HSD29, HSD25, HSM30", "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM", "SampleAfterValue": "100003", - "BriefDescription": "Data from local DRAM either Snoop not needed or Snoop Miss (RspI)", "CounterHTOff": "0,1,2,3", "Data_LA": "1" }, @@ -665,6 +792,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "", "EventCode": "0xf4", "Counter": "0,1,2,3", "UMask": "0x10", @@ -674,131 +802,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Counts the number of store RFO requests that hit the L2 cache.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x42", - "EventName": "L2_RQSTS.RFO_HIT", - "SampleAfterValue": "200003", - "BriefDescription": "RFO requests that hit L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Counts the number of store RFO requests that miss the L2 cache.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x22", - "EventName": "L2_RQSTS.RFO_MISS", - "SampleAfterValue": "200003", - "BriefDescription": "RFO requests that miss L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Number of instruction fetches that hit the L2 cache.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x44", - "EventName": "L2_RQSTS.CODE_RD_HIT", - "SampleAfterValue": "200003", - "BriefDescription": "L2 cache hits when fetching instructions, code reads.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Number of instruction fetches that missed the L2 cache.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x24", - "EventName": "L2_RQSTS.CODE_RD_MISS", - "SampleAfterValue": "200003", - "BriefDescription": "L2 cache misses when fetching instructions", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Demand requests that miss L2 cache.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x27", - "Errata": "HSD78", - "EventName": "L2_RQSTS.ALL_DEMAND_MISS", - "SampleAfterValue": "200003", - "BriefDescription": "Demand requests that miss L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Demand requests to L2 cache.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0xe7", - "Errata": "HSD78", - "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES", - "SampleAfterValue": "200003", - "BriefDescription": "Demand requests to L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "All requests that missed L2.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x3f", - "Errata": "HSD78", - "EventName": "L2_RQSTS.MISS", - "SampleAfterValue": "200003", - "BriefDescription": "All requests that miss L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "All requests to L2 cache.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0xff", - "Errata": "HSD78", - "EventName": "L2_RQSTS.REFERENCES", - "SampleAfterValue": "200003", - "BriefDescription": "All L2 requests", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xB7, 0xBB", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE", - "SampleAfterValue": "100003", - "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0x60", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Errata": "HSD78, HSD62, HSD61", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.", - "CounterMask": "6", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x48", - "Counter": "2", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.", - "CounterMask": "1", - "CounterHTOff": "2" - }, - { - "EventCode": "0x48", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "L1D_PEND_MISS.FB_FULL", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { + "PublicDescription": "Counts all requests that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f803c8fff", "Counter": "0,1,2,3", @@ -811,6 +815,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x10003c07f7", "Counter": "0,1,2,3", @@ -823,6 +828,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x04003c07f7", "Counter": "0,1,2,3", @@ -835,6 +841,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x04003c0244", "Counter": "0,1,2,3", @@ -847,6 +854,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x10003c0122", "Counter": "0,1,2,3", @@ -859,6 +867,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x04003c0122", "Counter": "0,1,2,3", @@ -871,6 +880,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x10003c0091", "Counter": "0,1,2,3", @@ -883,6 +893,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x04003c0091", "Counter": "0,1,2,3", @@ -895,6 +906,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f803c0200", "Counter": "0,1,2,3", @@ -907,6 +919,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f803c0100", "Counter": "0,1,2,3", @@ -919,6 +932,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f803c0080", "Counter": "0,1,2,3", @@ -931,6 +945,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f803c0040", "Counter": "0,1,2,3", @@ -943,6 +958,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f803c0020", "Counter": "0,1,2,3", @@ -955,6 +971,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3f803c0010", "Counter": "0,1,2,3", @@ -967,6 +984,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x10003c0004", "Counter": "0,1,2,3", @@ -979,6 +997,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x04003c0004", "Counter": "0,1,2,3", @@ -991,6 +1010,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x10003c0002", "Counter": "0,1,2,3", @@ -1003,6 +1023,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x04003c0002", "Counter": "0,1,2,3", @@ -1015,6 +1036,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x10003c0001", "Counter": "0,1,2,3", @@ -1027,6 +1049,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts demand data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x04003c0001", "Counter": "0,1,2,3", diff --git a/tools/perf/pmu-events/arch/x86/haswell/floating-point.json b/tools/perf/pmu-events/arch/x86/haswell/floating-point.json index 1732fa49c6d2..f9843e5a9b42 100644 --- a/tools/perf/pmu-events/arch/x86/haswell/floating-point.json +++ b/tools/perf/pmu-events/arch/x86/haswell/floating-point.json @@ -19,6 +19,16 @@ "BriefDescription": "Number of transitions from SSE to AVX-256 when penalty applicable.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Note that a whole rep string only counts AVX_INST.ALL once.", + "EventCode": "0xC6", + "Counter": "0,1,2,3", + "UMask": "0x7", + "EventName": "AVX_INSTS.ALL", + "SampleAfterValue": "2000003", + "BriefDescription": "Approximate counts of AVX & AVX2 256-bit instructions, including non-arithmetic instructions, loads, and stores. May count non-AVX instructions that employ 256-bit operations, including (but not necessarily limited to) rep string instructions that use 256-bit loads and stores for optimized performance, XSAVE* and XRSTOR*, and operations that transition the x87 FPU data registers between x87 and MMX.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Number of X87 FP assists due to output values.", "EventCode": "0xCA", @@ -69,15 +79,5 @@ "BriefDescription": "Cycles with any input/output SSE or FP assist", "CounterMask": "1", "CounterHTOff": "0,1,2,3" - }, - { - "PublicDescription": "Note that a whole rep string only counts AVX_INST.ALL once.", - "EventCode": "0xC6", - "Counter": "0,1,2,3", - "UMask": "0x7", - "EventName": "AVX_INSTS.ALL", - "SampleAfterValue": "2000003", - "BriefDescription": "Approximate counts of AVX & AVX2 256-bit instructions, including non-arithmetic instructions, loads, and stores. May count non-AVX instructions that employ 256-bit operations, including (but not necessarily limited to) rep string instructions that use 256-bit loads and stores for optimized performance, XSAVE* and XRSTOR*, and operations that transition the x87 FPU data registers between x87 and MMX.", - "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/haswell/frontend.json b/tools/perf/pmu-events/arch/x86/haswell/frontend.json index 57a1ce46971f..c0a5bedcc15c 100644 --- a/tools/perf/pmu-events/arch/x86/haswell/frontend.json +++ b/tools/perf/pmu-events/arch/x86/haswell/frontend.json @@ -21,74 +21,43 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Increment each cycle. # of uops delivered to IDQ from DSB path. Set Cmask = 1 to count cycles.", "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "IDQ.DSB_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by DSB. Set Cmask = 1 to count cycles. Add Edge=1 to count # of delivery.", - "EventCode": "0x79", - "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "IDQ.MS_DSB_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by MITE. Set Cmask = 1 to count cycles.", - "EventCode": "0x79", - "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "IDQ.MS_MITE_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts uops delivered by the Front-end with the assistance of the microcode sequencer. Microcode assists are used for complex instructions or scenarios that can't be handled by the standard decoder. Using other instructions, if possible, will usually improve performance.", - "EventCode": "0x79", - "Counter": "0,1,2,3", - "UMask": "0x30", - "EventName": "IDQ.MS_UOPS", + "UMask": "0x4", + "EventName": "IDQ.MITE_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path.", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts cycles during which the microcode sequencer assisted the Front-end in delivering uops. Microcode assists are used for complex instructions or scenarios that can't be handled by the standard decoder. Using other instructions, if possible, will usually improve performance.", + "PublicDescription": "Increment each cycle. # of uops delivered to IDQ from DSB path. Set Cmask = 1 to count cycles.", "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x30", - "EventName": "IDQ.MS_CYCLES", + "UMask": "0x8", + "EventName": "IDQ.DSB_UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "CounterMask": "1", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "IDQ.MITE_CYCLES", + "UMask": "0x8", + "EventName": "IDQ.DSB_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path.", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path.", "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by DSB. Set Cmask = 1 to count cycles. Add Edge=1 to count # of delivery.", "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "IDQ.DSB_CYCLES", + "UMask": "0x10", + "EventName": "IDQ.MS_DSB_UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path.", - "CounterMask": "1", + "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -134,6 +103,16 @@ "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by MITE. Set Cmask = 1 to count cycles.", + "EventCode": "0x79", + "Counter": "0,1,2,3", + "UMask": "0x20", + "EventName": "IDQ.MS_MITE_UOPS", + "SampleAfterValue": "2000003", + "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Counts cycles MITE is delivered four uops. Set Cmask = 4.", "EventCode": "0x79", @@ -156,6 +135,38 @@ "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "This event counts uops delivered by the Front-end with the assistance of the microcode sequencer. Microcode assists are used for complex instructions or scenarios that can't be handled by the standard decoder. Using other instructions, if possible, will usually improve performance.", + "EventCode": "0x79", + "Counter": "0,1,2,3", + "UMask": "0x30", + "EventName": "IDQ.MS_UOPS", + "SampleAfterValue": "2000003", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "This event counts cycles during which the microcode sequencer assisted the Front-end in delivering uops. Microcode assists are used for complex instructions or scenarios that can't be handled by the standard decoder. Using other instructions, if possible, will usually improve performance.", + "EventCode": "0x79", + "Counter": "0,1,2,3", + "UMask": "0x30", + "EventName": "IDQ.MS_CYCLES", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x79", + "Counter": "0,1,2,3", + "UMask": "0x30", + "EdgeDetect": "1", + "EventName": "IDQ.MS_SWITCHES", + "SampleAfterValue": "2000003", + "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Number of uops delivered to IDQ from any path.", "EventCode": "0x79", @@ -194,6 +205,15 @@ "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction-cache miss.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x80", + "Counter": "0,1,2,3", + "UMask": "0x4", + "EventName": "ICACHE.IFDATA_STALL", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction-cache miss.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "This event count the number of undelivered (unallocated) uops from the Front-end to the Resource Allocation Table (RAT) while the Back-end of the processor is not stalled. The Front-end can allocate up to 4 uops per cycle so this event can increment 0-4 times per cycle depending on the number of unallocated uops. This event is counted on a per-core basis.", "EventCode": "0x9C", @@ -270,25 +290,5 @@ "SampleAfterValue": "2000003", "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.", "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x79", - "Counter": "0,1,2,3", - "UMask": "0x30", - "EdgeDetect": "1", - "EventName": "IDQ.MS_SWITCHES", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x80", - "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "ICACHE.IFDATA_STALL", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction-cache miss.", - "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/haswell/memory.json b/tools/perf/pmu-events/arch/x86/haswell/memory.json index aab981b42339..e5f9fa6655b3 100644 --- a/tools/perf/pmu-events/arch/x86/haswell/memory.json +++ b/tools/perf/pmu-events/arch/x86/haswell/memory.json @@ -401,6 +401,7 @@ "CounterHTOff": "3" }, { + "PublicDescription": "Counts all requests that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3fffc08fff", "Counter": "0,1,2,3", @@ -413,6 +414,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x01004007f7", "Counter": "0,1,2,3", @@ -425,6 +427,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3fffc007f7", "Counter": "0,1,2,3", @@ -437,6 +440,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch code reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100400244", "Counter": "0,1,2,3", @@ -449,6 +453,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3fffc00244", "Counter": "0,1,2,3", @@ -461,6 +466,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100400122", "Counter": "0,1,2,3", @@ -473,6 +479,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch RFOs that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3fffc00122", "Counter": "0,1,2,3", @@ -485,6 +492,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100400091", "Counter": "0,1,2,3", @@ -497,6 +505,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand & prefetch data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3fffc00091", "Counter": "0,1,2,3", @@ -509,6 +518,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3fffc00200", "Counter": "0,1,2,3", @@ -521,6 +531,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3fffc00100", "Counter": "0,1,2,3", @@ -533,6 +544,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3fffc00080", "Counter": "0,1,2,3", @@ -545,6 +557,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3fffc00040", "Counter": "0,1,2,3", @@ -557,6 +570,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3fffc00020", "Counter": "0,1,2,3", @@ -569,6 +583,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3fffc00010", "Counter": "0,1,2,3", @@ -581,6 +596,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand code reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100400004", "Counter": "0,1,2,3", @@ -593,6 +609,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3fffc00004", "Counter": "0,1,2,3", @@ -605,6 +622,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100400002", "Counter": "0,1,2,3", @@ -617,6 +635,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts all demand data writes (RFOs) that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3fffc00002", "Counter": "0,1,2,3", @@ -629,6 +648,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts demand data reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100400001", "Counter": "0,1,2,3", @@ -641,6 +661,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts demand data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3fffc00001", "Counter": "0,1,2,3", diff --git a/tools/perf/pmu-events/arch/x86/haswell/other.json b/tools/perf/pmu-events/arch/x86/haswell/other.json index 85d6a14baf9d..8a4d898d76c1 100644 --- a/tools/perf/pmu-events/arch/x86/haswell/other.json +++ b/tools/perf/pmu-events/arch/x86/haswell/other.json @@ -9,16 +9,6 @@ "BriefDescription": "Unhalted core cycles when the thread is in ring 0", "CounterHTOff": "0,1,2,3,4,5,6,7" }, - { - "PublicDescription": "Unhalted core cycles when the thread is not in ring 0.", - "EventCode": "0x5C", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "CPL_CYCLES.RING123", - "SampleAfterValue": "2000003", - "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, { "EventCode": "0x5C", "Counter": "0,1,2,3", @@ -30,6 +20,16 @@ "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Unhalted core cycles when the thread is not in ring 0.", + "EventCode": "0x5C", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "CPL_CYCLES.RING123", + "SampleAfterValue": "2000003", + "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Cycles in which the L1D and L2 are locked, due to a UC lock or split lock.", "EventCode": "0x63", diff --git a/tools/perf/pmu-events/arch/x86/haswell/pipeline.json b/tools/perf/pmu-events/arch/x86/haswell/pipeline.json index 0099848607ad..a4dcfce4a512 100644 --- a/tools/perf/pmu-events/arch/x86/haswell/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/haswell/pipeline.json @@ -2,33 +2,43 @@ { "PublicDescription": "This event counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, this event counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. INST_RETIRED.ANY is counted by a designated fixed counter, leaving the programmable counters available for other events. Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.", "EventCode": "0x00", - "Counter": "Fixed counter 1", + "Counter": "Fixed counter 0", "UMask": "0x1", "Errata": "HSD140, HSD143", "EventName": "INST_RETIRED.ANY", "SampleAfterValue": "2000003", "BriefDescription": "Instructions retired from execution.", - "CounterHTOff": "Fixed counter 1" + "CounterHTOff": "Fixed counter 0" }, { "PublicDescription": "This event counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling.", "EventCode": "0x00", - "Counter": "Fixed counter 2", + "Counter": "Fixed counter 1", "UMask": "0x2", "EventName": "CPU_CLK_UNHALTED.THREAD", "SampleAfterValue": "2000003", "BriefDescription": "Core cycles when the thread is not in halt state.", - "CounterHTOff": "Fixed counter 2" + "CounterHTOff": "Fixed counter 1" + }, + { + "EventCode": "0x00", + "Counter": "Fixed counter 1", + "UMask": "0x2", + "AnyThread": "1", + "EventName": "CPU_CLK_UNHALTED.THREAD_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", + "CounterHTOff": "Fixed counter 1" }, { "PublicDescription": "This event counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state.", "EventCode": "0x00", - "Counter": "Fixed counter 3", + "Counter": "Fixed counter 2", "UMask": "0x3", "EventName": "CPU_CLK_UNHALTED.REF_TSC", "SampleAfterValue": "2000003", "BriefDescription": "Reference cycles when the core is not in halt state.", - "CounterHTOff": "Fixed counter 3" + "CounterHTOff": "Fixed counter 2" }, { "PublicDescription": "This event counts loads that followed a store to the same address, where the data could not be forwarded inside the pipeline from the store to the load. The most common reason why store forwarding would be blocked is when a load's address range overlaps with a preceding smaller uncompleted store. The penalty for blocked store forwarding is that the load must wait for the store to write its value to the cache before it can be issued.", @@ -67,7 +77,19 @@ "UMask": "0x3", "EventName": "INT_MISC.RECOVERY_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Number of cycles waiting for the checkpoints in Resource Allocation Table (RAT) to be recovered after Nuke due to all other cases except JEClear (e.g. whenever a ucode assist is needed like SSE exception, memory disambiguation, etc...)", + "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).", + "EventCode": "0x0D", + "Counter": "0,1,2,3", + "UMask": "0x3", + "AnyThread": "1", + "EventName": "INT_MISC.RECOVERY_CYCLES_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke)", "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -81,6 +103,29 @@ "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x0E", + "Invert": "1", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "UOPS_ISSUED.STALL_CYCLES", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0x0E", + "Invert": "1", + "Counter": "0,1,2,3", + "UMask": "0x1", + "AnyThread": "1", + "EventName": "UOPS_ISSUED.CORE_STALL_CYCLES", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for all threads.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3" + }, { "PublicDescription": "Number of flags-merge uops allocated. Such uops add delay.", "EventCode": "0x0E", @@ -112,35 +157,32 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x0E", - "Invert": "1", + "EventCode": "0x14", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_ISSUED.STALL_CYCLES", + "UMask": "0x2", + "EventName": "ARITH.DIVIDER_UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Any uop executed by the Divider. (This includes all divide uops, sqrt, ...)", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x0E", - "Invert": "1", + "PublicDescription": "Counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling.", + "EventCode": "0x3C", "Counter": "0,1,2,3", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "UOPS_ISSUED.CORE_STALL_CYCLES", + "UMask": "0x0", + "EventName": "CPU_CLK_UNHALTED.THREAD_P", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for all threads.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Thread cycles when thread is not in halt state", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x14", + "EventCode": "0x3C", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "ARITH.DIVIDER_UOPS", + "UMask": "0x0", + "AnyThread": "1", + "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY", "SampleAfterValue": "2000003", - "BriefDescription": "Any uop executed by the Divider. (This includes all divide uops, sqrt, ...)", + "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -153,6 +195,38 @@ "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).", + "EventCode": "0x3C", + "Counter": "0,1,2,3", + "UMask": "0x1", + "AnyThread": "1", + "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate)", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Reference cycles when the thread is unhalted. (counts at 100 MHz rate)", + "EventCode": "0x3C", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK", + "SampleAfterValue": "2000003", + "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).", + "EventCode": "0x3C", + "Counter": "0,1,2,3", + "UMask": "0x1", + "AnyThread": "1", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate)", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x3c", "Counter": "0,1,2,3", @@ -162,6 +236,15 @@ "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.", "CounterHTOff": "0,1,2,3" }, + { + "EventCode": "0x3C", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", + "SampleAfterValue": "2000003", + "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Non-SW-prefetch load dispatches that hit fill buffer allocated for S/W prefetch.", "EventCode": "0x4c", @@ -232,6 +315,18 @@ "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x5E", + "Invert": "1", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EdgeDetect": "1", + "EventName": "RS_EVENTS.EMPTY_END", + "SampleAfterValue": "200003", + "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "This event counts cycles where the decoder is stalled on an instruction with a length changing prefix (LCP).", "EventCode": "0x87", @@ -406,6 +501,15 @@ "BriefDescription": "Taken speculative and retired mispredicted indirect branches with return mnemonic.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x89", + "Counter": "0,1,2,3", + "UMask": "0xa0", + "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_NEAR_CALL", + "SampleAfterValue": "200003", + "BriefDescription": "Taken speculative and retired mispredicted indirect calls.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x89", "Counter": "0,1,2,3", @@ -445,136 +549,282 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles which a uop is dispatched on port 1 in this thread.", + "PublicDescription": "Cycles per core when uops are exectuted in port 0.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED_PORT.PORT_1", + "UMask": "0x1", + "AnyThread": "1", + "EventName": "UOPS_EXECUTED_PORT.PORT_0_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 1", + "BriefDescription": "Cycles per core when uops are executed in port 0.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles which a uop is dispatched on port 2 in this thread.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "UOPS_EXECUTED_PORT.PORT_2", + "UMask": "0x1", + "EventName": "UOPS_DISPATCHED_PORT.PORT_0", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 2", + "BriefDescription": "Cycles per thread when uops are executed in port 0.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles which a uop is dispatched on port 3 in this thread.", + "PublicDescription": "Cycles which a uop is dispatched on port 1 in this thread.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "UOPS_EXECUTED_PORT.PORT_3", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED_PORT.PORT_1", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 3", + "BriefDescription": "Cycles per thread when uops are executed in port 1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles which a uop is dispatched on port 4 in this thread.", + "PublicDescription": "Cycles per core when uops are exectuted in port 1.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "UOPS_EXECUTED_PORT.PORT_4", + "UMask": "0x2", + "AnyThread": "1", + "EventName": "UOPS_EXECUTED_PORT.PORT_1_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 4", + "BriefDescription": "Cycles per core when uops are executed in port 1.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles which a uop is dispatched on port 5 in this thread.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "UOPS_EXECUTED_PORT.PORT_5", + "UMask": "0x2", + "EventName": "UOPS_DISPATCHED_PORT.PORT_1", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 5", + "BriefDescription": "Cycles per thread when uops are executed in port 1.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles which a uop is dispatched on port 6 in this thread.", + "PublicDescription": "Cycles which a uop is dispatched on port 2 in this thread.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x40", - "EventName": "UOPS_EXECUTED_PORT.PORT_6", + "UMask": "0x4", + "EventName": "UOPS_EXECUTED_PORT.PORT_2", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 6", + "BriefDescription": "Cycles per thread when uops are executed in port 2", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles which a uop is dispatched on port 7 in this thread.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x80", - "EventName": "UOPS_EXECUTED_PORT.PORT_7", + "UMask": "0x4", + "AnyThread": "1", + "EventName": "UOPS_EXECUTED_PORT.PORT_2_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 7", + "BriefDescription": "Cycles per core when uops are dispatched to port 2.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles allocation is stalled due to resource related reason.", - "EventCode": "0xA2", + "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x1", - "Errata": "HSD135", - "EventName": "RESOURCE_STALLS.ANY", + "UMask": "0x4", + "EventName": "UOPS_DISPATCHED_PORT.PORT_2", "SampleAfterValue": "2000003", - "BriefDescription": "Resource-related stall cycles", + "BriefDescription": "Cycles per thread when uops are executed in port 2.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA2", + "PublicDescription": "Cycles which a uop is dispatched on port 3 in this thread.", + "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "RESOURCE_STALLS.RS", + "UMask": "0x8", + "EventName": "UOPS_EXECUTED_PORT.PORT_3", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles stalled due to no eligible RS entry available.", + "BriefDescription": "Cycles per thread when uops are executed in port 3", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts cycles during which no instructions were allocated because no Store Buffers (SB) were available.", - "EventCode": "0xA2", + "EventCode": "0xA1", "Counter": "0,1,2,3", "UMask": "0x8", - "EventName": "RESOURCE_STALLS.SB", + "AnyThread": "1", + "EventName": "UOPS_EXECUTED_PORT.PORT_3_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).", + "BriefDescription": "Cycles per core when uops are dispatched to port 3.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA2", + "EventCode": "0xA1", + "Counter": "0,1,2,3", + "UMask": "0x8", + "EventName": "UOPS_DISPATCHED_PORT.PORT_3", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles per thread when uops are executed in port 3.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles which a uop is dispatched on port 4 in this thread.", + "EventCode": "0xA1", "Counter": "0,1,2,3", "UMask": "0x10", - "EventName": "RESOURCE_STALLS.ROB", + "EventName": "UOPS_EXECUTED_PORT.PORT_4", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles stalled due to re-order buffer full.", + "BriefDescription": "Cycles per thread when uops are executed in port 4", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles with pending L2 miss loads. Set Cmask=2 to count cycle.", - "EventCode": "0xA3", + "PublicDescription": "Cycles per core when uops are exectuted in port 4.", + "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x1", - "Errata": "HSD78", - "EventName": "CYCLE_ACTIVITY.CYCLES_L2_PENDING", + "UMask": "0x10", + "AnyThread": "1", + "EventName": "UOPS_EXECUTED_PORT.PORT_4_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with pending L2 cache miss loads.", - "CounterMask": "1", + "BriefDescription": "Cycles per core when uops are executed in port 4.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles with pending L1 data cache miss loads. Set Cmask=8 to count cycle.", - "EventCode": "0xA3", - "Counter": "2", + "EventCode": "0xA1", + "Counter": "0,1,2,3", + "UMask": "0x10", + "EventName": "UOPS_DISPATCHED_PORT.PORT_4", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles per thread when uops are executed in port 4.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles which a uop is dispatched on port 5 in this thread.", + "EventCode": "0xA1", + "Counter": "0,1,2,3", + "UMask": "0x20", + "EventName": "UOPS_EXECUTED_PORT.PORT_5", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles per thread when uops are executed in port 5", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles per core when uops are exectuted in port 5.", + "EventCode": "0xA1", + "Counter": "0,1,2,3", + "UMask": "0x20", + "AnyThread": "1", + "EventName": "UOPS_EXECUTED_PORT.PORT_5_CORE", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles per core when uops are executed in port 5.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA1", + "Counter": "0,1,2,3", + "UMask": "0x20", + "EventName": "UOPS_DISPATCHED_PORT.PORT_5", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles per thread when uops are executed in port 5.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles which a uop is dispatched on port 6 in this thread.", + "EventCode": "0xA1", + "Counter": "0,1,2,3", + "UMask": "0x40", + "EventName": "UOPS_EXECUTED_PORT.PORT_6", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles per thread when uops are executed in port 6", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles per core when uops are exectuted in port 6.", + "EventCode": "0xA1", + "Counter": "0,1,2,3", + "UMask": "0x40", + "AnyThread": "1", + "EventName": "UOPS_EXECUTED_PORT.PORT_6_CORE", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles per core when uops are executed in port 6.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA1", + "Counter": "0,1,2,3", + "UMask": "0x40", + "EventName": "UOPS_DISPATCHED_PORT.PORT_6", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles per thread when uops are executed in port 6.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles which a uop is dispatched on port 7 in this thread.", + "EventCode": "0xA1", + "Counter": "0,1,2,3", + "UMask": "0x80", + "EventName": "UOPS_EXECUTED_PORT.PORT_7", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles per thread when uops are executed in port 7", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA1", + "Counter": "0,1,2,3", + "UMask": "0x80", + "AnyThread": "1", + "EventName": "UOPS_EXECUTED_PORT.PORT_7_CORE", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles per core when uops are dispatched to port 7.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA1", + "Counter": "0,1,2,3", + "UMask": "0x80", + "EventName": "UOPS_DISPATCHED_PORT.PORT_7", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles per thread when uops are executed in port 7.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles allocation is stalled due to resource related reason.", + "EventCode": "0xA2", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Errata": "HSD135", + "EventName": "RESOURCE_STALLS.ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Resource-related stall cycles", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA2", + "Counter": "0,1,2,3", + "UMask": "0x4", + "EventName": "RESOURCE_STALLS.RS", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles stalled due to no eligible RS entry available.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "This event counts cycles during which no instructions were allocated because no Store Buffers (SB) were available.", + "EventCode": "0xA2", + "Counter": "0,1,2,3", "UMask": "0x8", - "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING", + "EventName": "RESOURCE_STALLS.SB", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with pending L1 cache miss loads.", - "CounterMask": "8", - "CounterHTOff": "2" + "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA2", + "Counter": "0,1,2,3", + "UMask": "0x10", + "EventName": "RESOURCE_STALLS.ROB", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles stalled due to re-order buffer full.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles with pending L2 miss loads. Set Cmask=2 to count cycle.", + "EventCode": "0xA3", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Errata": "HSD78", + "EventName": "CYCLE_ACTIVITY.CYCLES_L2_PENDING", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles with pending L2 cache miss loads.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "PublicDescription": "Cycles with pending memory loads. Set Cmask=2 to count cycle.", @@ -594,7 +844,7 @@ "UMask": "0x4", "EventName": "CYCLE_ACTIVITY.CYCLES_NO_EXECUTE", "SampleAfterValue": "2000003", - "BriefDescription": "Total execution stalls", + "BriefDescription": "This event increments by 1 for every cycle where there was no execute for this thread.", "CounterMask": "4", "CounterHTOff": "0,1,2,3" }, @@ -620,6 +870,17 @@ "CounterMask": "6", "CounterHTOff": "0,1,2,3" }, + { + "PublicDescription": "Cycles with pending L1 data cache miss loads. Set Cmask=8 to count cycle.", + "EventCode": "0xA3", + "Counter": "2", + "UMask": "0x8", + "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles with pending L1 cache miss loads.", + "CounterMask": "8", + "CounterHTOff": "2" + }, { "PublicDescription": "Execution stalls due to L1 data cache miss loads. Set Cmask=0CH.", "EventCode": "0xA3", @@ -642,14 +903,23 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Counts total number of uops to be executed per-core each cycle.", - "EventCode": "0xB1", + "EventCode": "0xA8", "Counter": "0,1,2,3", - "UMask": "0x2", - "Errata": "HSD30, HSM31", - "EventName": "UOPS_EXECUTED.CORE", + "UMask": "0x1", + "EventName": "LSD.CYCLES_ACTIVE", "SampleAfterValue": "2000003", - "BriefDescription": "Number of uops executed on the core.", + "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA8", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "LSD.CYCLES_4_UOPS", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", + "CounterMask": "4", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -665,71 +935,172 @@ "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Number of instructions at retirement.", - "EventCode": "0xC0", + "PublicDescription": "This events counts the cycles where at least one uop was executed. It is counted per thread.", + "EventCode": "0xB1", "Counter": "0,1,2,3", - "UMask": "0x0", - "Errata": "HSD11, HSD140", - "EventName": "INST_RETIRED.ANY_P", + "UMask": "0x1", + "Errata": "HSD144, HSD30, HSM31", + "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", "SampleAfterValue": "2000003", - "BriefDescription": "Number of instructions retired. General Counter - architectural event", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "BriefDescription": "Cycles where at least 1 uop was executed per-thread", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts FP operations retired. For X87 FP operations that have no exceptions counting also includes flows that have several X87, or flows that use X87 uops in the exception handling.", - "EventCode": "0xC0", + "PublicDescription": "This events counts the cycles where at least two uop were executed. It is counted per thread.", + "EventCode": "0xB1", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "INST_RETIRED.X87", - "SampleAfterValue": "2000003", - "BriefDescription": "FP operations retired. X87 FP operations that have no exceptions: Counts also flows that have several X87 or flows that use X87 uops in the exception handling.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PEBS": "2", - "PublicDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution.", - "EventCode": "0xC0", - "Counter": "1", "UMask": "0x1", - "Errata": "HSD140", - "EventName": "INST_RETIRED.PREC_DIST", + "Errata": "HSD144, HSD30, HSM31", + "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC", "SampleAfterValue": "2000003", - "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution", - "CounterHTOff": "1" + "BriefDescription": "Cycles where at least 2 uops were executed per-thread", + "CounterMask": "2", + "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Number of microcode assists invoked by HW upon uop writeback.", - "EventCode": "0xC1", + "PublicDescription": "This events counts the cycles where at least three uop were executed. It is counted per thread.", + "EventCode": "0xB1", "Counter": "0,1,2,3", - "UMask": "0x40", - "EventName": "OTHER_ASSISTS.ANY_WB_ASSIST", - "SampleAfterValue": "100003", - "BriefDescription": "Number of times any microcode assist is invoked by HW upon uop writeback.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1", + "Errata": "HSD144, HSD30, HSM31", + "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles where at least 3 uops were executed per-thread", + "CounterMask": "3", + "CounterHTOff": "0,1,2,3" }, { - "PEBS": "1", - "PublicDescription": "Counts the number of micro-ops retired. Use Cmask=1 and invert to count active cycles or stalled cycles.", - "EventCode": "0xC2", + "EventCode": "0xB1", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "UOPS_RETIRED.ALL", + "Errata": "HSD144, HSD30, HSM31", + "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC", "SampleAfterValue": "2000003", - "BriefDescription": "Actually retired uops.", - "CounterHTOff": "0,1,2,3,4,5,6,7", - "Data_LA": "1" + "BriefDescription": "Cycles where at least 4 uops were executed per-thread.", + "CounterMask": "4", + "CounterHTOff": "0,1,2,3" }, { - "PEBS": "1", - "PublicDescription": "This event counts the number of retirement slots used each cycle. There are potentially 4 slots that can be used each cycle - meaning, 4 uops or 4 instructions could retire each cycle.", - "EventCode": "0xC2", + "PublicDescription": "Counts total number of uops to be executed per-core each cycle.", + "EventCode": "0xB1", "Counter": "0,1,2,3", "UMask": "0x2", - "EventName": "UOPS_RETIRED.RETIRE_SLOTS", + "Errata": "HSD30, HSM31", + "EventName": "UOPS_EXECUTED.CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Retirement slots used.", + "BriefDescription": "Number of uops executed on the core.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0xb1", + "Counter": "0,1,2,3", + "UMask": "0x2", + "Errata": "HSD30, HSM31", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xb1", + "Counter": "0,1,2,3", + "UMask": "0x2", + "Errata": "HSD30, HSM31", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", + "CounterMask": "2", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xb1", + "Counter": "0,1,2,3", + "UMask": "0x2", + "Errata": "HSD30, HSM31", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", + "CounterMask": "3", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xb1", + "Counter": "0,1,2,3", + "UMask": "0x2", + "Errata": "HSD30, HSM31", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", + "CounterMask": "4", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xb1", + "Invert": "1", + "Counter": "0,1,2,3", + "UMask": "0x2", + "Errata": "HSD30, HSM31", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Number of instructions at retirement.", + "EventCode": "0xC0", + "Counter": "0,1,2,3", + "UMask": "0x0", + "Errata": "HSD11, HSD140", + "EventName": "INST_RETIRED.ANY_P", + "SampleAfterValue": "2000003", + "BriefDescription": "Number of instructions retired. General Counter - architectural event", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PEBS": "2", + "PublicDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution.", + "EventCode": "0xC0", + "Counter": "1", + "UMask": "0x1", + "Errata": "HSD140", + "EventName": "INST_RETIRED.PREC_DIST", + "SampleAfterValue": "2000003", + "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution", + "CounterHTOff": "1" + }, + { + "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts FP operations retired. For X87 FP operations that have no exceptions counting also includes flows that have several X87, or flows that use X87 uops in the exception handling.", + "EventCode": "0xC0", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "INST_RETIRED.X87", + "SampleAfterValue": "2000003", + "BriefDescription": "FP operations retired. X87 FP operations that have no exceptions: Counts also flows that have several X87 or flows that use X87 uops in the exception handling.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Number of microcode assists invoked by HW upon uop writeback.", + "EventCode": "0xC1", + "Counter": "0,1,2,3", + "UMask": "0x40", + "EventName": "OTHER_ASSISTS.ANY_WB_ASSIST", + "SampleAfterValue": "100003", + "BriefDescription": "Number of times any microcode assist is invoked by HW upon uop writeback.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PEBS": "1", + "EventCode": "0xC2", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "UOPS_RETIRED.ALL", + "SampleAfterValue": "2000003", + "BriefDescription": "Actually retired uops.", + "CounterHTOff": "0,1,2,3,4,5,6,7", + "Data_LA": "1" + }, { "EventCode": "0xC2", "Invert": "1", @@ -764,6 +1135,16 @@ "CounterMask": "1", "CounterHTOff": "0,1,2,3" }, + { + "PEBS": "1", + "EventCode": "0xC2", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "UOPS_RETIRED.RETIRE_SLOTS", + "SampleAfterValue": "2000003", + "BriefDescription": "Retirement slots used.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0xC3", "Counter": "0,1,2,3", @@ -773,6 +1154,17 @@ "BriefDescription": "Cycles there was a Nuke. Account for both thread-specific and All Thread Nukes.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0xC3", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EdgeDetect": "1", + "EventName": "MACHINE_CLEARS.COUNT", + "SampleAfterValue": "100003", + "BriefDescription": "Number of machine clears (nukes) of any type.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "This event is incremented when self-modifying code (SMC) is detected, which causes a machine clear. Machine clears can have a significant performance impact if they are happening frequently.", "EventCode": "0xC3", @@ -792,9 +1184,18 @@ "BriefDescription": "This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Branch instructions at retirement.", + "EventCode": "0xC4", + "Counter": "0,1,2,3", + "UMask": "0x0", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES", + "SampleAfterValue": "400009", + "BriefDescription": "All (macro) branch instructions retired.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PEBS": "1", - "PublicDescription": "Counts the number of conditional branch instructions retired.", "EventCode": "0xC4", "Counter": "0,1,2,3", "UMask": "0x1", @@ -814,18 +1215,27 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Branch instructions at retirement.", + "PEBS": "1", "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x0", - "EventName": "BR_INST_RETIRED.ALL_BRANCHES", + "UMask": "0x2", + "EventName": "BR_INST_RETIRED.NEAR_CALL_R3", + "SampleAfterValue": "100003", + "BriefDescription": "Direct and indirect macro near call instructions retired (captured in ring 3).", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PEBS": "2", + "EventCode": "0xC4", + "Counter": "0,1,2,3", + "UMask": "0x4", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS", "SampleAfterValue": "400009", "BriefDescription": "All (macro) branch instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", - "PublicDescription": "Counts the number of near return instructions retired.", "EventCode": "0xC4", "Counter": "0,1,2,3", "UMask": "0x8", @@ -846,7 +1256,6 @@ }, { "PEBS": "1", - "PublicDescription": "Number of near taken branches retired.", "EventCode": "0xC4", "Counter": "0,1,2,3", "UMask": "0x20", @@ -866,14 +1275,14 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "2", - "EventCode": "0xC4", + "PublicDescription": "Mispredicted branch instructions at retirement.", + "EventCode": "0xC5", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS", + "UMask": "0x0", + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", "SampleAfterValue": "400009", - "BriefDescription": "All (macro) branch instructions retired.", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "All mispredicted macro branch instructions retired.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "PEBS": "1", @@ -885,16 +1294,6 @@ "BriefDescription": "Mispredicted conditional branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, - { - "PublicDescription": "Mispredicted branch instructions at retirement.", - "EventCode": "0xC5", - "Counter": "0,1,2,3", - "UMask": "0x0", - "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", - "SampleAfterValue": "400009", - "BriefDescription": "All mispredicted macro branch instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, { "PEBS": "2", "PublicDescription": "This event counts all mispredicted branch instructions retired. This is a precise event.", @@ -903,427 +1302,37 @@ "UMask": "0x4", "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS", "SampleAfterValue": "400009", - "BriefDescription": "Mispredicted macro branch instructions retired. ", + "BriefDescription": "Mispredicted macro branch instructions retired.", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Count cases of saving new LBR records by hardware.", - "EventCode": "0xCC", + "PEBS": "1", + "EventCode": "0xC5", "Counter": "0,1,2,3", "UMask": "0x20", - "EventName": "ROB_MISC_EVENTS.LBR_INSERTS", - "SampleAfterValue": "2000003", - "BriefDescription": "Count cases of saving new LBR", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling.", - "EventCode": "0x3C", - "Counter": "0,1,2,3", - "UMask": "0x0", - "EventName": "CPU_CLK_UNHALTED.THREAD_P", - "SampleAfterValue": "2000003", - "BriefDescription": "Thread cycles when thread is not in halt state", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x89", - "Counter": "0,1,2,3", - "UMask": "0xa0", - "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_NEAR_CALL", - "SampleAfterValue": "200003", - "BriefDescription": "Taken speculative and retired mispredicted indirect calls.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "Counter": "0,1,2,3", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "UOPS_EXECUTED_PORT.PORT_0_CORE", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are exectuted in port 0.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "Counter": "0,1,2,3", - "UMask": "0x2", - "AnyThread": "1", - "EventName": "UOPS_EXECUTED_PORT.PORT_1_CORE", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are exectuted in port 1.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "Counter": "0,1,2,3", - "UMask": "0x4", - "AnyThread": "1", - "EventName": "UOPS_EXECUTED_PORT.PORT_2_CORE", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are dispatched to port 2.", + "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", + "SampleAfterValue": "400009", + "BriefDescription": "number of near branch instructions retired that were mispredicted and taken.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", + "PublicDescription": "Count cases of saving new LBR records by hardware.", + "EventCode": "0xCC", "Counter": "0,1,2,3", - "UMask": "0x8", - "AnyThread": "1", - "EventName": "UOPS_EXECUTED_PORT.PORT_3_CORE", + "UMask": "0x20", + "EventName": "ROB_MISC_EVENTS.LBR_INSERTS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are dispatched to port 3.", + "BriefDescription": "Count cases of saving new LBR", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", - "Counter": "0,1,2,3", - "UMask": "0x10", - "AnyThread": "1", - "EventName": "UOPS_EXECUTED_PORT.PORT_4_CORE", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are exectuted in port 4.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "Counter": "0,1,2,3", - "UMask": "0x20", - "AnyThread": "1", - "EventName": "UOPS_EXECUTED_PORT.PORT_5_CORE", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are exectuted in port 5.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "Counter": "0,1,2,3", - "UMask": "0x40", - "AnyThread": "1", - "EventName": "UOPS_EXECUTED_PORT.PORT_6_CORE", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are exectuted in port 6.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "Counter": "0,1,2,3", - "UMask": "0x80", - "AnyThread": "1", - "EventName": "UOPS_EXECUTED_PORT.PORT_7_CORE", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are dispatched to port 7.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PEBS": "1", - "PublicDescription": "Number of near branch instructions retired that were taken but mispredicted.", - "EventCode": "0xC5", - "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", - "SampleAfterValue": "400009", - "BriefDescription": "number of near branch instructions retired that were mispredicted and taken.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This events counts the cycles where at least one uop was executed. It is counted per thread.", - "EventCode": "0xB1", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Errata": "HSD144, HSD30, HSM31", - "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where at least 1 uop was executed per-thread", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "PublicDescription": "This events counts the cycles where at least two uop were executed. It is counted per thread.", - "EventCode": "0xB1", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Errata": "HSD144, HSD30, HSM31", - "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where at least 2 uops were executed per-thread", - "CounterMask": "2", - "CounterHTOff": "0,1,2,3" - }, - { - "PublicDescription": "This events counts the cycles where at least three uop were executed. It is counted per thread.", - "EventCode": "0xB1", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Errata": "HSD144, HSD30, HSM31", - "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where at least 3 uops were executed per-thread", - "CounterMask": "3", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB1", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Errata": "HSD144, HSD30, HSM31", - "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where at least 4 uops were executed per-thread.", - "CounterMask": "4", - "CounterHTOff": "0,1,2,3" - }, - { - "PublicDescription": "Number of front end re-steers due to BPU misprediction.", - "EventCode": "0xe6", + "PublicDescription": "Number of front end re-steers due to BPU misprediction.", + "EventCode": "0xe6", "Counter": "0,1,2,3", "UMask": "0x1f", "EventName": "BACLEARS.ANY", "SampleAfterValue": "100003", "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.", "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC3", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EdgeDetect": "1", - "EventName": "MACHINE_CLEARS.COUNT", - "SampleAfterValue": "100003", - "BriefDescription": "Number of machine clears (nukes) of any type.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA8", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "LSD.CYCLES_ACTIVE", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA8", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "LSD.CYCLES_4_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", - "CounterMask": "4", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x5E", - "Invert": "1", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EdgeDetect": "1", - "EventName": "RS_EVENTS.EMPTY_END", - "SampleAfterValue": "200003", - "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_DISPATCHED_PORT.PORT_0", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 0.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_DISPATCHED_PORT.PORT_1", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 1.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "UOPS_DISPATCHED_PORT.PORT_2", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 2.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "UOPS_DISPATCHED_PORT.PORT_3", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 3.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "UOPS_DISPATCHED_PORT.PORT_4", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 4.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "UOPS_DISPATCHED_PORT.PORT_5", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 5.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "Counter": "0,1,2,3", - "UMask": "0x40", - "EventName": "UOPS_DISPATCHED_PORT.PORT_6", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 6.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "Counter": "0,1,2,3", - "UMask": "0x80", - "EventName": "UOPS_DISPATCHED_PORT.PORT_7", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 7.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x00", - "Counter": "Fixed counter 2", - "UMask": "0x2", - "AnyThread": "1", - "EventName": "CPU_CLK_UNHALTED.THREAD_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", - "CounterHTOff": "Fixed counter 2" - }, - { - "EventCode": "0x3C", - "Counter": "0,1,2,3", - "UMask": "0x0", - "AnyThread": "1", - "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).", - "EventCode": "0x3C", - "Counter": "0,1,2,3", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate)", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).", - "EventCode": "0x0D", - "Counter": "0,1,2,3", - "UMask": "0x3", - "AnyThread": "1", - "EventName": "INT_MISC.RECOVERY_CYCLES_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke)", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xb1", - "Counter": "0,1,2,3", - "UMask": "0x2", - "Errata": "HSD30, HSM31", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xb1", - "Counter": "0,1,2,3", - "UMask": "0x2", - "Errata": "HSD30, HSM31", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", - "CounterMask": "2", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xb1", - "Counter": "0,1,2,3", - "UMask": "0x2", - "Errata": "HSD30, HSM31", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", - "CounterMask": "3", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xb1", - "Counter": "0,1,2,3", - "UMask": "0x2", - "Errata": "HSD30, HSM31", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", - "CounterMask": "4", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xb1", - "Invert": "1", - "Counter": "0,1,2,3", - "UMask": "0x2", - "Errata": "HSD30, HSM31", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Reference cycles when the thread is unhalted. (counts at 100 MHz rate)", - "EventCode": "0x3C", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "CPU_CLK_UNHALTED.REF_XCLK", - "SampleAfterValue": "2000003", - "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).", - "EventCode": "0x3C", - "Counter": "0,1,2,3", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate)", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x3C", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", - "SampleAfterValue": "2000003", - "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.", - "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/haswell/virtual-memory.json b/tools/perf/pmu-events/arch/x86/haswell/virtual-memory.json index ce80a08d0f08..777b500a5c9f 100644 --- a/tools/perf/pmu-events/arch/x86/haswell/virtual-memory.json +++ b/tools/perf/pmu-events/arch/x86/haswell/virtual-memory.json @@ -38,6 +38,16 @@ "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (1G)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Completed page walks in any TLB of any page size due to demand load misses.", + "EventCode": "0x08", + "Counter": "0,1,2,3", + "UMask": "0xe", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", + "SampleAfterValue": "100003", + "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "This event counts cycles when the page miss handler (PMH) is servicing page walks caused by DTLB load misses.", "EventCode": "0x08", @@ -68,6 +78,16 @@ "BriefDescription": "Load misses that miss the DTLB and hit the STLB (2M)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Number of cache load STLB hits. No page walk.", + "EventCode": "0x08", + "Counter": "0,1,2,3", + "UMask": "0x60", + "EventName": "DTLB_LOAD_MISSES.STLB_HIT", + "SampleAfterValue": "2000003", + "BriefDescription": "Load operations that miss the first DTLB level but hit the second and do not cause page walks", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "DTLB demand load misses with low part of linear-to-physical address translation missed.", "EventCode": "0x08", @@ -117,6 +137,16 @@ "BriefDescription": "Store misses in all DTLB levels that cause completed page walks. (1G)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Completed page walks due to store miss in any TLB levels of any page size (4K/2M/4M/1G).", + "EventCode": "0x49", + "Counter": "0,1,2,3", + "UMask": "0xe", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", + "SampleAfterValue": "100003", + "BriefDescription": "Store misses in all DTLB levels that cause completed page walks", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "This event counts cycles when the page miss handler (PMH) is servicing page walks caused by DTLB store misses.", "EventCode": "0x49", @@ -147,6 +177,16 @@ "BriefDescription": "Store misses that miss the DTLB and hit the STLB (2M)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks.", + "EventCode": "0x49", + "Counter": "0,1,2,3", + "UMask": "0x60", + "EventName": "DTLB_STORE_MISSES.STLB_HIT", + "SampleAfterValue": "100003", + "BriefDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "DTLB store misses with low part of linear-to-physical address translation missed.", "EventCode": "0x49", @@ -205,6 +245,16 @@ "BriefDescription": "Store miss in all TLB levels causes a page walk that completes. (1G)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Completed page walks in ITLB of any page size.", + "EventCode": "0x85", + "Counter": "0,1,2,3", + "UMask": "0xe", + "EventName": "ITLB_MISSES.WALK_COMPLETED", + "SampleAfterValue": "100003", + "BriefDescription": "Misses in all ITLB levels that cause completed page walks", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "This event counts cycles when the page miss handler (PMH) is servicing page walks caused by ITLB misses.", "EventCode": "0x85", @@ -235,6 +285,16 @@ "BriefDescription": "Code misses that miss the DTLB and hit the STLB (2M)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "ITLB misses that hit STLB. No page walk.", + "EventCode": "0x85", + "Counter": "0,1,2,3", + "UMask": "0x60", + "EventName": "ITLB_MISSES.STLB_HIT", + "SampleAfterValue": "100003", + "BriefDescription": "Operations that miss the first ITLB level but hit the second and do not cause any page walks", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Counts the number of ITLB flushes, includes 4k/2M/4M pages.", "EventCode": "0xae", @@ -256,41 +316,45 @@ "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Number of ITLB page walker loads that hit in the L1+FB.", + "PublicDescription": "Number of DTLB page walker loads that hit in the L2.", "EventCode": "0xBC", "Counter": "0,1,2,3", - "UMask": "0x21", - "EventName": "PAGE_WALKER_LOADS.ITLB_L1", + "UMask": "0x12", + "EventName": "PAGE_WALKER_LOADS.DTLB_L2", "SampleAfterValue": "2000003", - "BriefDescription": "Number of ITLB page walker hits in the L1+FB", + "BriefDescription": "Number of DTLB page walker hits in the L2", "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Number of DTLB page walker loads that hit in the L3.", "EventCode": "0xBC", "Counter": "0,1,2,3", - "UMask": "0x41", - "EventName": "PAGE_WALKER_LOADS.EPT_DTLB_L1", + "UMask": "0x14", + "Errata": "HSD25", + "EventName": "PAGE_WALKER_LOADS.DTLB_L3", "SampleAfterValue": "2000003", - "BriefDescription": "Counts the number of Extended Page Table walks from the DTLB that hit in the L1 and FB.", + "BriefDescription": "Number of DTLB page walker hits in the L3 + XSNP", "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Number of DTLB page walker loads from memory.", "EventCode": "0xBC", "Counter": "0,1,2,3", - "UMask": "0x81", - "EventName": "PAGE_WALKER_LOADS.EPT_ITLB_L1", + "UMask": "0x18", + "Errata": "HSD25", + "EventName": "PAGE_WALKER_LOADS.DTLB_MEMORY", "SampleAfterValue": "2000003", - "BriefDescription": "Counts the number of Extended Page Table walks from the ITLB that hit in the L1 and FB.", + "BriefDescription": "Number of DTLB page walker hits in Memory", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Number of DTLB page walker loads that hit in the L2.", + "PublicDescription": "Number of ITLB page walker loads that hit in the L1+FB.", "EventCode": "0xBC", "Counter": "0,1,2,3", - "UMask": "0x12", - "EventName": "PAGE_WALKER_LOADS.DTLB_L2", + "UMask": "0x21", + "EventName": "PAGE_WALKER_LOADS.ITLB_L1", "SampleAfterValue": "2000003", - "BriefDescription": "Number of DTLB page walker hits in the L2", + "BriefDescription": "Number of ITLB page walker hits in the L1+FB", "CounterHTOff": "0,1,2,3" }, { @@ -304,43 +368,43 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Number of ITLB page walker loads that hit in the L3.", "EventCode": "0xBC", "Counter": "0,1,2,3", - "UMask": "0x42", - "EventName": "PAGE_WALKER_LOADS.EPT_DTLB_L2", + "UMask": "0x24", + "Errata": "HSD25", + "EventName": "PAGE_WALKER_LOADS.ITLB_L3", "SampleAfterValue": "2000003", - "BriefDescription": "Counts the number of Extended Page Table walks from the DTLB that hit in the L2.", + "BriefDescription": "Number of ITLB page walker hits in the L3 + XSNP", "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Number of ITLB page walker loads from memory.", "EventCode": "0xBC", "Counter": "0,1,2,3", - "UMask": "0x82", - "EventName": "PAGE_WALKER_LOADS.EPT_ITLB_L2", + "UMask": "0x28", + "Errata": "HSD25", + "EventName": "PAGE_WALKER_LOADS.ITLB_MEMORY", "SampleAfterValue": "2000003", - "BriefDescription": "Counts the number of Extended Page Table walks from the ITLB that hit in the L2.", + "BriefDescription": "Number of ITLB page walker hits in Memory", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Number of DTLB page walker loads that hit in the L3.", "EventCode": "0xBC", "Counter": "0,1,2,3", - "UMask": "0x14", - "Errata": "HSD25", - "EventName": "PAGE_WALKER_LOADS.DTLB_L3", + "UMask": "0x41", + "EventName": "PAGE_WALKER_LOADS.EPT_DTLB_L1", "SampleAfterValue": "2000003", - "BriefDescription": "Number of DTLB page walker hits in the L3 + XSNP", + "BriefDescription": "Counts the number of Extended Page Table walks from the DTLB that hit in the L1 and FB.", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Number of ITLB page walker loads that hit in the L3.", "EventCode": "0xBC", "Counter": "0,1,2,3", - "UMask": "0x24", - "Errata": "HSD25", - "EventName": "PAGE_WALKER_LOADS.ITLB_L3", + "UMask": "0x42", + "EventName": "PAGE_WALKER_LOADS.EPT_DTLB_L2", "SampleAfterValue": "2000003", - "BriefDescription": "Number of ITLB page walker hits in the L3 + XSNP", + "BriefDescription": "Counts the number of Extended Page Table walks from the DTLB that hit in the L2.", "CounterHTOff": "0,1,2,3" }, { @@ -355,41 +419,37 @@ { "EventCode": "0xBC", "Counter": "0,1,2,3", - "UMask": "0x84", - "EventName": "PAGE_WALKER_LOADS.EPT_ITLB_L3", + "UMask": "0x48", + "EventName": "PAGE_WALKER_LOADS.EPT_DTLB_MEMORY", "SampleAfterValue": "2000003", - "BriefDescription": "Counts the number of Extended Page Table walks from the ITLB that hit in the L2.", + "BriefDescription": "Counts the number of Extended Page Table walks from the DTLB that hit in memory.", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Number of DTLB page walker loads from memory.", "EventCode": "0xBC", "Counter": "0,1,2,3", - "UMask": "0x18", - "Errata": "HSD25", - "EventName": "PAGE_WALKER_LOADS.DTLB_MEMORY", + "UMask": "0x81", + "EventName": "PAGE_WALKER_LOADS.EPT_ITLB_L1", "SampleAfterValue": "2000003", - "BriefDescription": "Number of DTLB page walker hits in Memory", + "BriefDescription": "Counts the number of Extended Page Table walks from the ITLB that hit in the L1 and FB.", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Number of ITLB page walker loads from memory.", "EventCode": "0xBC", "Counter": "0,1,2,3", - "UMask": "0x28", - "Errata": "HSD25", - "EventName": "PAGE_WALKER_LOADS.ITLB_MEMORY", + "UMask": "0x82", + "EventName": "PAGE_WALKER_LOADS.EPT_ITLB_L2", "SampleAfterValue": "2000003", - "BriefDescription": "Number of ITLB page walker hits in Memory", + "BriefDescription": "Counts the number of Extended Page Table walks from the ITLB that hit in the L2.", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", "Counter": "0,1,2,3", - "UMask": "0x48", - "EventName": "PAGE_WALKER_LOADS.EPT_DTLB_MEMORY", + "UMask": "0x84", + "EventName": "PAGE_WALKER_LOADS.EPT_ITLB_L3", "SampleAfterValue": "2000003", - "BriefDescription": "Counts the number of Extended Page Table walks from the DTLB that hit in memory.", + "BriefDescription": "Counts the number of Extended Page Table walks from the ITLB that hit in the L2.", "CounterHTOff": "0,1,2,3" }, { @@ -420,65 +480,5 @@ "SampleAfterValue": "100003", "BriefDescription": "STLB flush attempts", "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Completed page walks in any TLB of any page size due to demand load misses.", - "EventCode": "0x08", - "Counter": "0,1,2,3", - "UMask": "0xe", - "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", - "SampleAfterValue": "100003", - "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Number of cache load STLB hits. No page walk.", - "EventCode": "0x08", - "Counter": "0,1,2,3", - "UMask": "0x60", - "EventName": "DTLB_LOAD_MISSES.STLB_HIT", - "SampleAfterValue": "2000003", - "BriefDescription": "Load operations that miss the first DTLB level but hit the second and do not cause page walks", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Completed page walks due to store miss in any TLB levels of any page size (4K/2M/4M/1G).", - "EventCode": "0x49", - "Counter": "0,1,2,3", - "UMask": "0xe", - "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", - "SampleAfterValue": "100003", - "BriefDescription": "Store misses in all DTLB levels that cause completed page walks", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks.", - "EventCode": "0x49", - "Counter": "0,1,2,3", - "UMask": "0x60", - "EventName": "DTLB_STORE_MISSES.STLB_HIT", - "SampleAfterValue": "100003", - "BriefDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Completed page walks in ITLB of any page size.", - "EventCode": "0x85", - "Counter": "0,1,2,3", - "UMask": "0xe", - "EventName": "ITLB_MISSES.WALK_COMPLETED", - "SampleAfterValue": "100003", - "BriefDescription": "Misses in all ITLB levels that cause completed page walks", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "ITLB misses that hit STLB. No page walk.", - "EventCode": "0x85", - "Counter": "0,1,2,3", - "UMask": "0x60", - "EventName": "ITLB_MISSES.STLB_HIT", - "SampleAfterValue": "100003", - "BriefDescription": "Operations that miss the first ITLB level but hit the second and do not cause any page walks", - "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file -- cgit v1.2.3-70-g09d2 From 032c16b296d512d151a3276ab3c53d4a4d65e2ed Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 18 Jan 2018 04:49:26 -0800 Subject: perf vendor events intel: Update HaswellX events to V19 Signed-off-by: Andi Kleen Link: https://lkml.kernel.org/r/20180118234518.GA27753@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/haswellx/cache.json | 377 ++++--- .../arch/x86/haswellx/floating-point.json | 20 +- .../pmu-events/arch/x86/haswellx/frontend.json | 132 +-- .../perf/pmu-events/arch/x86/haswellx/memory.json | 28 + tools/perf/pmu-events/arch/x86/haswellx/other.json | 20 +- .../pmu-events/arch/x86/haswellx/pipeline.json | 1133 ++++++++++---------- .../arch/x86/haswellx/virtual-memory.json | 212 ++-- 7 files changed, 991 insertions(+), 931 deletions(-) (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/x86/haswellx/cache.json b/tools/perf/pmu-events/arch/x86/haswellx/cache.json index f1bae0817a6f..b2fbd617306a 100644 --- a/tools/perf/pmu-events/arch/x86/haswellx/cache.json +++ b/tools/perf/pmu-events/arch/x86/haswellx/cache.json @@ -12,12 +12,32 @@ }, { "EventCode": "0x24", - "UMask": "0x41", - "BriefDescription": "Demand Data Read requests that hit L2 cache", + "UMask": "0x22", + "BriefDescription": "RFO requests that miss L2 cache", "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT", + "EventName": "L2_RQSTS.RFO_MISS", + "PublicDescription": "Counts the number of store RFO requests that miss the L2 cache.", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "UMask": "0x24", + "BriefDescription": "L2 cache misses when fetching instructions", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.CODE_RD_MISS", + "PublicDescription": "Number of instruction fetches that missed the L2 cache.", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "UMask": "0x27", + "BriefDescription": "Demand requests that miss L2 cache", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.ALL_DEMAND_MISS", "Errata": "HSD78", - "PublicDescription": "Demand data read requests that hit L2 cache.", + "PublicDescription": "Demand requests that miss L2 cache.", "SampleAfterValue": "200003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -31,6 +51,48 @@ "SampleAfterValue": "200003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x24", + "UMask": "0x3f", + "BriefDescription": "All requests that miss L2 cache", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.MISS", + "Errata": "HSD78", + "PublicDescription": "All requests that missed L2.", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "UMask": "0x41", + "BriefDescription": "Demand Data Read requests that hit L2 cache", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT", + "Errata": "HSD78", + "PublicDescription": "Demand data read requests that hit L2 cache.", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "UMask": "0x42", + "BriefDescription": "RFO requests that hit L2 cache", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.RFO_HIT", + "PublicDescription": "Counts the number of store RFO requests that hit the L2 cache.", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "UMask": "0x44", + "BriefDescription": "L2 cache hits when fetching instructions, code reads.", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.CODE_RD_HIT", + "PublicDescription": "Number of instruction fetches that hit the L2 cache.", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x24", "UMask": "0x50", @@ -72,6 +134,17 @@ "SampleAfterValue": "200003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x24", + "UMask": "0xe7", + "BriefDescription": "Demand requests to L2 cache", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES", + "Errata": "HSD78", + "PublicDescription": "Demand requests to L2 cache.", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x24", "UMask": "0xf8", @@ -82,6 +155,17 @@ "SampleAfterValue": "200003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x24", + "UMask": "0xff", + "BriefDescription": "All L2 requests", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.REFERENCES", + "Errata": "HSD78", + "PublicDescription": "All requests to L2 cache.", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x27", "UMask": "0x50", @@ -122,6 +206,27 @@ "SampleAfterValue": "2000003", "CounterHTOff": "2" }, + { + "EventCode": "0x48", + "UMask": "0x1", + "BriefDescription": "Cycles with L1D load Misses outstanding.", + "Counter": "2", + "EventName": "L1D_PEND_MISS.PENDING_CYCLES", + "CounterMask": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "2" + }, + { + "EventCode": "0x48", + "UMask": "0x1", + "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.", + "Counter": "2", + "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY", + "AnyThread": "1", + "CounterMask": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "2" + }, { "EventCode": "0x48", "UMask": "0x2", @@ -133,13 +238,13 @@ }, { "EventCode": "0x48", - "UMask": "0x1", - "BriefDescription": "Cycles with L1D load Misses outstanding.", - "Counter": "2", - "EventName": "L1D_PEND_MISS.PENDING_CYCLES", + "UMask": "0x2", + "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.", + "Counter": "0,1,2,3", + "EventName": "L1D_PEND_MISS.FB_FULL", "CounterMask": "1", "SampleAfterValue": "2000003", - "CounterHTOff": "2" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x51", @@ -162,6 +267,28 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x60", + "UMask": "0x1", + "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore.", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD", + "CounterMask": "1", + "Errata": "HSD78, HSD62, HSD61", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x60", + "UMask": "0x1", + "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6", + "CounterMask": "6", + "Errata": "HSD78, HSD62, HSD61", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x60", "UMask": "0x2", @@ -186,23 +313,23 @@ }, { "EventCode": "0x60", - "UMask": "0x8", - "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore", + "UMask": "0x4", + "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle.", "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", + "CounterMask": "1", "Errata": "HSD62, HSD61", - "PublicDescription": "Offcore outstanding cacheable data read transactions in SQ to uncore. Set Cmask=1 to count cycles.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x60", - "UMask": "0x1", - "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore.", + "UMask": "0x8", + "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore", "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD", - "CounterMask": "1", - "Errata": "HSD78, HSD62, HSD61", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", + "Errata": "HSD62, HSD61", + "PublicDescription": "Offcore outstanding cacheable data read transactions in SQ to uncore. Set Cmask=1 to count cycles.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -217,17 +344,6 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, - { - "EventCode": "0x60", - "UMask": "0x4", - "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle.", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", - "CounterMask": "1", - "Errata": "HSD62, HSD61", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, { "EventCode": "0x63", "UMask": "0x2", @@ -288,10 +404,19 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, { "EventCode": "0xD0", "UMask": "0x11", - "BriefDescription": "Retired load uops that miss the STLB.", + "BriefDescription": "Retired load uops that miss the STLB. (precise Event)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", @@ -303,20 +428,20 @@ { "EventCode": "0xD0", "UMask": "0x12", - "BriefDescription": "Retired store uops that miss the STLB.", + "BriefDescription": "Retired store uops that miss the STLB. (precise Event)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.STLB_MISS_STORES", "Errata": "HSD29, HSM30", - "SampleAfterValue": "100003", "L1_Hit_Indication": "1", + "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD0", "UMask": "0x21", - "BriefDescription": "Retired load uops with locked access.", + "BriefDescription": "Retired load uops with locked access. (precise Event)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", @@ -328,32 +453,34 @@ { "EventCode": "0xD0", "UMask": "0x41", - "BriefDescription": "Retired load uops that split across a cacheline boundary.", + "BriefDescription": "Retired load uops that split across a cacheline boundary. (precise Event)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS", "Errata": "HSD29, HSM30", + "PublicDescription": "This event counts load uops retired which had memory addresses spilt across 2 cache lines. A line split is across 64B cache-lines which may include a page split (4K). This is a precise event.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD0", "UMask": "0x42", - "BriefDescription": "Retired store uops that split across a cacheline boundary.", + "BriefDescription": "Retired store uops that split across a cacheline boundary. (precise Event)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES", "Errata": "HSD29, HSM30", - "SampleAfterValue": "100003", "L1_Hit_Indication": "1", + "PublicDescription": "This event counts store uops retired which had memory addresses spilt across 2 cache lines. A line split is across 64B cache-lines which may include a page split (4K). This is a precise event.", + "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD0", "UMask": "0x81", - "BriefDescription": "All retired load uops.", + "BriefDescription": "All retired load uops. (precise Event)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", @@ -365,14 +492,15 @@ { "EventCode": "0xD0", "UMask": "0x82", - "BriefDescription": "All retired store uops.", + "BriefDescription": "All retired store uops. (precise Event)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.ALL_STORES", "Errata": "HSD29, HSM30", - "SampleAfterValue": "2000003", "L1_Hit_Indication": "1", + "PublicDescription": "This event counts all store uops retired. This is a precise event.", + "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { @@ -402,13 +530,13 @@ { "EventCode": "0xD1", "UMask": "0x4", - "BriefDescription": "Retired load uops which data sources were data hits in L3 without snoops required.", + "BriefDescription": "Miss in last-level (L3) cache. Excludes Unknown data-source.", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT", "Errata": "HSD74, HSD29, HSD25, HSM26, HSM30", - "PublicDescription": "Retired load uops with L3 cache hits as data sources.", + "PublicDescription": "This event counts retired load uops in which data sources were data hits in the L3 cache without snoops required. This does not include hardware prefetches. This is a precise event.", "SampleAfterValue": "50021", "CounterHTOff": "0,1,2,3" }, @@ -421,20 +549,19 @@ "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS", "Errata": "HSM30", - "PublicDescription": "Retired load uops missed L1 cache as data sources.", + "PublicDescription": "This event counts retired load uops in which data sources missed in the L1 cache. This does not include hardware prefetches. This is a precise event.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD1", "UMask": "0x10", - "BriefDescription": "Miss in mid-level (L2) cache. Excludes Unknown data-source.", + "BriefDescription": "Retired load uops with L2 cache misses as data sources.", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS", "Errata": "HSD29, HSM30", - "PublicDescription": "Retired load uops missed L2. Unknown data source excluded.", "SampleAfterValue": "50021", "CounterHTOff": "0,1,2,3" }, @@ -447,7 +574,6 @@ "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.L3_MISS", "Errata": "HSD74, HSD29, HSD25, HSM26, HSM30", - "PublicDescription": "Retired load uops missed L3. Excludes unknown data source .", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -478,24 +604,26 @@ { "EventCode": "0xD2", "UMask": "0x2", - "BriefDescription": "Retired load uops which data sources were L3 and cross-core snoop hits in on-pkg core cache.", + "BriefDescription": "Retired load uops which data sources were L3 and cross-core snoop hits in on-pkg core cache. ", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT", "Errata": "HSD29, HSD25, HSM26, HSM30", + "PublicDescription": "This event counts retired load uops that hit in the L3 cache, but required a cross-core snoop which resulted in a HIT in an on-pkg core cache. This does not include hardware prefetches. This is a precise event.", "SampleAfterValue": "20011", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD2", "UMask": "0x4", - "BriefDescription": "Retired load uops which data sources were HitM responses from shared L3.", + "BriefDescription": "Retired load uops which data sources were HitM responses from shared L3. ", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM", "Errata": "HSD29, HSD25, HSM26, HSM30", + "PublicDescription": "This event counts retired load uops that hit in the L3 cache, but required a cross-core snoop which resulted in a HITM (hit modified) in an on-pkg core cache. This does not include hardware prefetches. This is a precise event.", "SampleAfterValue": "20011", "CounterHTOff": "0,1,2,3" }, @@ -514,20 +642,19 @@ { "EventCode": "0xD3", "UMask": "0x1", - "BriefDescription": "Data from local DRAM either Snoop not needed or Snoop Miss (RspI)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM", "Errata": "HSD74, HSD29, HSD25, HSM30", - "PublicDescription": "This event counts retired load uops where the data came from local DRAM. This does not include hardware prefetches.", + "PublicDescription": "This event counts retired load uops where the data came from local DRAM. This does not include hardware prefetches. This is a precise event.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD3", "UMask": "0x4", - "BriefDescription": "Retired load uop whose Data Source was: remote DRAM either Snoop not needed or Snoop Miss (RspI)", + "BriefDescription": "Retired load uop whose Data Source was: remote DRAM either Snoop not needed or Snoop Miss (RspI) (Precise Event)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", @@ -539,7 +666,7 @@ { "EventCode": "0xD3", "UMask": "0x10", - "BriefDescription": "Retired load uop whose Data Source was: Remote cache HITM", + "BriefDescription": "Retired load uop whose Data Source was: Remote cache HITM (Precise Event)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", @@ -551,7 +678,7 @@ { "EventCode": "0xD3", "UMask": "0x20", - "BriefDescription": "Retired load uop whose Data Source was: forwarded from remote cache", + "BriefDescription": "Retired load uop whose Data Source was: forwarded from remote cache (Precise Event)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", @@ -706,134 +833,10 @@ "BriefDescription": "Split locks in SQ", "Counter": "0,1,2,3", "EventName": "SQ_MISC.SPLIT_LOCK", + "PublicDescription": "", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, - { - "EventCode": "0x24", - "UMask": "0x42", - "BriefDescription": "RFO requests that hit L2 cache", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.RFO_HIT", - "PublicDescription": "Counts the number of store RFO requests that hit the L2 cache.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0x22", - "BriefDescription": "RFO requests that miss L2 cache", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.RFO_MISS", - "PublicDescription": "Counts the number of store RFO requests that miss the L2 cache.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0x44", - "BriefDescription": "L2 cache hits when fetching instructions, code reads.", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.CODE_RD_HIT", - "PublicDescription": "Number of instruction fetches that hit the L2 cache.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0x24", - "BriefDescription": "L2 cache misses when fetching instructions", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.CODE_RD_MISS", - "PublicDescription": "Number of instruction fetches that missed the L2 cache.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0x27", - "BriefDescription": "Demand requests that miss L2 cache", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.ALL_DEMAND_MISS", - "Errata": "HSD78", - "PublicDescription": "Demand requests that miss L2 cache.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0xe7", - "BriefDescription": "Demand requests to L2 cache", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES", - "Errata": "HSD78", - "PublicDescription": "Demand requests to L2 cache.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0x3f", - "BriefDescription": "All requests that miss L2 cache", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.MISS", - "Errata": "HSD78", - "PublicDescription": "All requests that missed L2.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0xff", - "BriefDescription": "All L2 requests", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.REFERENCES", - "Errata": "HSD78", - "PublicDescription": "All requests to L2 cache.", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0x60", - "UMask": "0x1", - "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6", - "CounterMask": "6", - "Errata": "HSD78, HSD62, HSD61", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x48", - "UMask": "0x1", - "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.", - "Counter": "2", - "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY", - "AnyThread": "1", - "CounterMask": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "2" - }, - { - "EventCode": "0x48", - "UMask": "0x2", - "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.", - "Counter": "0,1,2,3", - "EventName": "L1D_PEND_MISS.FB_FULL", - "CounterMask": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, { "Offcore": "1", "EventCode": "0xB7, 0xBB", @@ -843,6 +846,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts demand data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -855,6 +859,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -867,6 +872,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -879,6 +885,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -891,6 +898,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -903,6 +911,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -915,6 +924,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -927,6 +937,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.LLC_HIT.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -939,6 +950,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.LLC_HIT.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -951,6 +963,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -963,6 +976,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_LLC_RFO.LLC_HIT.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -975,6 +989,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_HIT.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -987,6 +1002,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -999,6 +1015,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1011,6 +1028,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1023,6 +1041,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1035,6 +1054,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1047,6 +1067,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1059,6 +1080,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1071,6 +1093,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_REQUESTS.LLC_HIT.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all requests that hit in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" } diff --git a/tools/perf/pmu-events/arch/x86/haswellx/floating-point.json b/tools/perf/pmu-events/arch/x86/haswellx/floating-point.json index 6282aed6e090..bc08cc1f2f7e 100644 --- a/tools/perf/pmu-events/arch/x86/haswellx/floating-point.json +++ b/tools/perf/pmu-events/arch/x86/haswellx/floating-point.json @@ -19,6 +19,16 @@ "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0xC6", + "UMask": "0x7", + "BriefDescription": "Approximate counts of AVX & AVX2 256-bit instructions, including non-arithmetic instructions, loads, and stores. May count non-AVX instructions that employ 256-bit operations, including (but not necessarily limited to) rep string instructions that use 256-bit loads and stores for optimized performance, XSAVE* and XRSTOR*, and operations that transition the x87 FPU data registers between x87 and MMX.", + "Counter": "0,1,2,3", + "EventName": "AVX_INSTS.ALL", + "PublicDescription": "Note that a whole rep string only counts AVX_INST.ALL once.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0xCA", "UMask": "0x2", @@ -69,15 +79,5 @@ "PublicDescription": "Cycles with any input/output SSE* or FP assists.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xC6", - "UMask": "0x7", - "BriefDescription": "Approximate counts of AVX & AVX2 256-bit instructions, including non-arithmetic instructions, loads, and stores. May count non-AVX instructions that employ 256-bit operations, including (but not necessarily limited to) rep string instructions that use 256-bit loads and stores for optimized performance, XSAVE* and XRSTOR*, and operations that transition the x87 FPU data registers between x87 and MMX.", - "Counter": "0,1,2,3", - "EventName": "AVX_INSTS.ALL", - "PublicDescription": "Note that a whole rep string only counts AVX_INST.ALL once.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/haswellx/frontend.json b/tools/perf/pmu-events/arch/x86/haswellx/frontend.json index 2d0c7aac1e61..a4d9f1fcf940 100644 --- a/tools/perf/pmu-events/arch/x86/haswellx/frontend.json +++ b/tools/perf/pmu-events/arch/x86/haswellx/frontend.json @@ -22,72 +22,41 @@ }, { "EventCode": "0x79", - "UMask": "0x8", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", - "Counter": "0,1,2,3", - "EventName": "IDQ.DSB_UOPS", - "PublicDescription": "Increment each cycle. # of uops delivered to IDQ from DSB path. Set Cmask = 1 to count cycles.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x79", - "UMask": "0x10", - "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "Counter": "0,1,2,3", - "EventName": "IDQ.MS_DSB_UOPS", - "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by DSB. Set Cmask = 1 to count cycles. Add Edge=1 to count # of delivery.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x79", - "UMask": "0x20", - "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "Counter": "0,1,2,3", - "EventName": "IDQ.MS_MITE_UOPS", - "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by MITE. Set Cmask = 1 to count cycles.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x79", - "UMask": "0x30", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "UMask": "0x4", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path.", "Counter": "0,1,2,3", - "EventName": "IDQ.MS_UOPS", - "PublicDescription": "This event counts uops delivered by the Front-end with the assistance of the microcode sequencer. Microcode assists are used for complex instructions or scenarios that can't be handled by the standard decoder. Using other instructions, if possible, will usually improve performance.", + "EventName": "IDQ.MITE_CYCLES", + "CounterMask": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x79", - "UMask": "0x30", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "UMask": "0x8", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", "Counter": "0,1,2,3", - "EventName": "IDQ.MS_CYCLES", - "CounterMask": "1", - "PublicDescription": "This event counts cycles during which the microcode sequencer assisted the Front-end in delivering uops. Microcode assists are used for complex instructions or scenarios that can't be handled by the standard decoder. Using other instructions, if possible, will usually improve performance.", + "EventName": "IDQ.DSB_UOPS", + "PublicDescription": "Increment each cycle. # of uops delivered to IDQ from DSB path. Set Cmask = 1 to count cycles.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x79", - "UMask": "0x4", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path.", + "UMask": "0x8", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path.", "Counter": "0,1,2,3", - "EventName": "IDQ.MITE_CYCLES", + "EventName": "IDQ.DSB_CYCLES", "CounterMask": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x79", - "UMask": "0x8", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path.", + "UMask": "0x10", + "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", "Counter": "0,1,2,3", - "EventName": "IDQ.DSB_CYCLES", - "CounterMask": "1", + "EventName": "IDQ.MS_DSB_UOPS", + "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by DSB. Set Cmask = 1 to count cycles. Add Edge=1 to count # of delivery.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -134,6 +103,16 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x79", + "UMask": "0x20", + "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "Counter": "0,1,2,3", + "EventName": "IDQ.MS_MITE_UOPS", + "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by MITE. Set Cmask = 1 to count cycles.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x79", "UMask": "0x24", @@ -156,6 +135,38 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x79", + "UMask": "0x30", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "Counter": "0,1,2,3", + "EventName": "IDQ.MS_UOPS", + "PublicDescription": "This event counts uops delivered by the Front-end with the assistance of the microcode sequencer. Microcode assists are used for complex instructions or scenarios that can't be handled by the standard decoder. Using other instructions, if possible, will usually improve performance.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x79", + "UMask": "0x30", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "Counter": "0,1,2,3", + "EventName": "IDQ.MS_CYCLES", + "CounterMask": "1", + "PublicDescription": "This event counts cycles during which the microcode sequencer assisted the Front-end in delivering uops. Microcode assists are used for complex instructions or scenarios that can't be handled by the standard decoder. Using other instructions, if possible, will usually improve performance.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EdgeDetect": "1", + "EventCode": "0x79", + "UMask": "0x30", + "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", + "Counter": "0,1,2,3", + "EventName": "IDQ.MS_SWITCHES", + "CounterMask": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x79", "UMask": "0x3c", @@ -194,6 +205,15 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x80", + "UMask": "0x4", + "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction-cache miss.", + "Counter": "0,1,2,3", + "EventName": "ICACHE.IFDATA_STALL", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x9C", "UMask": "0x1", @@ -270,25 +290,5 @@ "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EdgeDetect": "1", - "EventCode": "0x79", - "UMask": "0x30", - "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", - "Counter": "0,1,2,3", - "EventName": "IDQ.MS_SWITCHES", - "CounterMask": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x80", - "UMask": "0x4", - "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction-cache miss.", - "Counter": "0,1,2,3", - "EventName": "ICACHE.IFDATA_STALL", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/haswellx/memory.json b/tools/perf/pmu-events/arch/x86/haswellx/memory.json index 0886cc000d22..56b0f24b8029 100644 --- a/tools/perf/pmu-events/arch/x86/haswellx/memory.json +++ b/tools/perf/pmu-events/arch/x86/haswellx/memory.json @@ -409,6 +409,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts demand data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -421,6 +422,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.LOCAL_DRAM", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts demand data reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -433,6 +435,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand data writes (RFOs) that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -445,6 +448,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_MISS.LOCAL_DRAM", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -457,6 +461,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_MISS.REMOTE_HITM", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the modified data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -469,6 +474,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -481,6 +487,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.LOCAL_DRAM", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand code reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -493,6 +500,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -505,6 +513,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -517,6 +526,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -529,6 +539,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -541,6 +552,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_LLC_RFO.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -553,6 +565,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts prefetch (that bring data to LLC only) code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -565,6 +578,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch data reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -577,6 +591,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.LOCAL_DRAM", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -589,6 +604,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.REMOTE_DRAM", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from remote dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -601,6 +617,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.REMOTE_HITM", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the modified data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -613,6 +630,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -625,6 +643,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch RFOs that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -637,6 +656,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_MISS.LOCAL_DRAM", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -649,6 +669,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch code reads that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -661,6 +682,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.LOCAL_DRAM", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all demand & prefetch code reads that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -673,6 +695,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -685,6 +708,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.LOCAL_DRAM", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from local dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -697,6 +721,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_DRAM", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the data is returned from remote dram Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -709,6 +734,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_HITM", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and the modified data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -721,6 +747,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the L3 and clean or shared data is transferred from remote cache Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -733,6 +760,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_REQUESTS.LLC_MISS.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts all requests that miss in the L3 Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" } diff --git a/tools/perf/pmu-events/arch/x86/haswellx/other.json b/tools/perf/pmu-events/arch/x86/haswellx/other.json index 4e1b6ce96ca3..800e65df31bc 100644 --- a/tools/perf/pmu-events/arch/x86/haswellx/other.json +++ b/tools/perf/pmu-events/arch/x86/haswellx/other.json @@ -9,16 +9,6 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, - { - "EventCode": "0x5C", - "UMask": "0x2", - "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3", - "Counter": "0,1,2,3", - "EventName": "CPL_CYCLES.RING123", - "PublicDescription": "Unhalted core cycles when the thread is not in ring 0.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, { "EdgeDetect": "1", "EventCode": "0x5C", @@ -30,6 +20,16 @@ "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x5C", + "UMask": "0x2", + "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3", + "Counter": "0,1,2,3", + "EventName": "CPL_CYCLES.RING123", + "PublicDescription": "Unhalted core cycles when the thread is not in ring 0.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x63", "UMask": "0x1", diff --git a/tools/perf/pmu-events/arch/x86/haswellx/pipeline.json b/tools/perf/pmu-events/arch/x86/haswellx/pipeline.json index c3a163d34bd7..8a18bfe9e3e4 100644 --- a/tools/perf/pmu-events/arch/x86/haswellx/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/haswellx/pipeline.json @@ -3,32 +3,42 @@ "EventCode": "0x00", "UMask": "0x1", "BriefDescription": "Instructions retired from execution.", - "Counter": "Fixed counter 1", + "Counter": "Fixed counter 0", "EventName": "INST_RETIRED.ANY", "Errata": "HSD140, HSD143", "PublicDescription": "This event counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, this event counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. INST_RETIRED.ANY is counted by a designated fixed counter, leaving the programmable counters available for other events. Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.", "SampleAfterValue": "2000003", - "CounterHTOff": "Fixed counter 1" + "CounterHTOff": "Fixed counter 0" }, { "EventCode": "0x00", "UMask": "0x2", "BriefDescription": "Core cycles when the thread is not in halt state.", - "Counter": "Fixed counter 2", + "Counter": "Fixed counter 1", "EventName": "CPU_CLK_UNHALTED.THREAD", "PublicDescription": "This event counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling.", "SampleAfterValue": "2000003", - "CounterHTOff": "Fixed counter 2" + "CounterHTOff": "Fixed counter 1" + }, + { + "EventCode": "0x00", + "UMask": "0x2", + "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", + "Counter": "Fixed counter 1", + "EventName": "CPU_CLK_UNHALTED.THREAD_ANY", + "AnyThread": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "Fixed counter 1" }, { "EventCode": "0x00", "UMask": "0x3", "BriefDescription": "Reference cycles when the core is not in halt state.", - "Counter": "Fixed counter 3", + "Counter": "Fixed counter 2", "EventName": "CPU_CLK_UNHALTED.REF_TSC", "PublicDescription": "This event counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state.", "SampleAfterValue": "2000003", - "CounterHTOff": "Fixed counter 3" + "CounterHTOff": "Fixed counter 2" }, { "EventCode": "0x03", @@ -63,7 +73,7 @@ { "EventCode": "0x0D", "UMask": "0x3", - "BriefDescription": "Number of cycles waiting for the checkpoints in Resource Allocation Table (RAT) to be recovered after Nuke due to all other cases except JEClear (e.g. whenever a ucode assist is needed like SSE exception, memory disambiguation, etc...)", + "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)", "Counter": "0,1,2,3", "EventName": "INT_MISC.RECOVERY_CYCLES", "CounterMask": "1", @@ -71,6 +81,18 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x0D", + "UMask": "0x3", + "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke)", + "Counter": "0,1,2,3", + "EventName": "INT_MISC.RECOVERY_CYCLES_ANY", + "AnyThread": "1", + "CounterMask": "1", + "PublicDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x0E", "UMask": "0x1", @@ -81,6 +103,29 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "Invert": "1", + "EventCode": "0x0E", + "UMask": "0x1", + "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread.", + "Counter": "0,1,2,3", + "EventName": "UOPS_ISSUED.STALL_CYCLES", + "CounterMask": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3" + }, + { + "Invert": "1", + "EventCode": "0x0E", + "UMask": "0x1", + "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for all threads.", + "Counter": "0,1,2,3", + "EventName": "UOPS_ISSUED.CORE_STALL_CYCLES", + "AnyThread": "1", + "CounterMask": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3" + }, { "EventCode": "0x0E", "UMask": "0x10", @@ -112,34 +157,31 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "Invert": "1", - "EventCode": "0x0E", - "UMask": "0x1", - "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread.", + "EventCode": "0x14", + "UMask": "0x2", + "BriefDescription": "Any uop executed by the Divider. (This includes all divide uops, sqrt, ...)", "Counter": "0,1,2,3", - "EventName": "UOPS_ISSUED.STALL_CYCLES", - "CounterMask": "1", + "EventName": "ARITH.DIVIDER_UOPS", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "Invert": "1", - "EventCode": "0x0E", - "UMask": "0x1", - "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for all threads.", + "EventCode": "0x3C", + "UMask": "0x0", + "BriefDescription": "Thread cycles when thread is not in halt state", "Counter": "0,1,2,3", - "EventName": "UOPS_ISSUED.CORE_STALL_CYCLES", - "AnyThread": "1", - "CounterMask": "1", + "EventName": "CPU_CLK_UNHALTED.THREAD_P", + "PublicDescription": "Counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x14", - "UMask": "0x2", - "BriefDescription": "Any uop executed by the Divider. (This includes all divide uops, sqrt, ...)", + "EventCode": "0x3C", + "UMask": "0x0", + "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", "Counter": "0,1,2,3", - "EventName": "ARITH.DIVIDER_UOPS", + "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY", + "AnyThread": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -153,6 +195,38 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x3C", + "UMask": "0x1", + "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate)", + "Counter": "0,1,2,3", + "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY", + "AnyThread": "1", + "PublicDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x3C", + "UMask": "0x1", + "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)", + "Counter": "0,1,2,3", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK", + "PublicDescription": "Reference cycles when the thread is unhalted. (counts at 100 MHz rate)", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x3C", + "UMask": "0x1", + "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate)", + "Counter": "0,1,2,3", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY", + "AnyThread": "1", + "PublicDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x3c", "UMask": "0x2", @@ -162,6 +236,15 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, + { + "EventCode": "0x3C", + "UMask": "0x2", + "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.", + "Counter": "0,1,2,3", + "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x4c", "UMask": "0x1", @@ -232,6 +315,18 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EdgeDetect": "1", + "Invert": "1", + "EventCode": "0x5E", + "UMask": "0x1", + "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.", + "Counter": "0,1,2,3", + "EventName": "RS_EVENTS.EMPTY_END", + "CounterMask": "1", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x87", "UMask": "0x1", @@ -406,6 +501,15 @@ "SampleAfterValue": "200003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x89", + "UMask": "0xa0", + "BriefDescription": "Taken speculative and retired mispredicted indirect calls.", + "Counter": "0,1,2,3", + "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_NEAR_CALL", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x89", "UMask": "0xc1", @@ -446,135 +550,281 @@ }, { "EventCode": "0xA1", - "UMask": "0x2", - "BriefDescription": "Cycles per thread when uops are executed in port 1", + "UMask": "0x1", + "BriefDescription": "Cycles per core when uops are executed in port 0.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_1", - "PublicDescription": "Cycles which a uop is dispatched on port 1 in this thread.", + "EventName": "UOPS_EXECUTED_PORT.PORT_0_CORE", + "AnyThread": "1", + "PublicDescription": "Cycles per core when uops are exectuted in port 0.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xA1", - "UMask": "0x4", - "BriefDescription": "Cycles per thread when uops are executed in port 2", + "UMask": "0x1", + "BriefDescription": "Cycles per thread when uops are executed in port 0.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_2", - "PublicDescription": "Cycles which a uop is dispatched on port 2 in this thread.", + "EventName": "UOPS_DISPATCHED_PORT.PORT_0", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xA1", - "UMask": "0x8", - "BriefDescription": "Cycles per thread when uops are executed in port 3", + "UMask": "0x2", + "BriefDescription": "Cycles per thread when uops are executed in port 1", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_3", - "PublicDescription": "Cycles which a uop is dispatched on port 3 in this thread.", + "EventName": "UOPS_EXECUTED_PORT.PORT_1", + "PublicDescription": "Cycles which a uop is dispatched on port 1 in this thread.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xA1", - "UMask": "0x10", - "BriefDescription": "Cycles per thread when uops are executed in port 4", + "UMask": "0x2", + "BriefDescription": "Cycles per core when uops are executed in port 1.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_4", - "PublicDescription": "Cycles which a uop is dispatched on port 4 in this thread.", + "EventName": "UOPS_EXECUTED_PORT.PORT_1_CORE", + "AnyThread": "1", + "PublicDescription": "Cycles per core when uops are exectuted in port 1.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xA1", - "UMask": "0x20", - "BriefDescription": "Cycles per thread when uops are executed in port 5", + "UMask": "0x2", + "BriefDescription": "Cycles per thread when uops are executed in port 1.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_5", - "PublicDescription": "Cycles which a uop is dispatched on port 5 in this thread.", + "EventName": "UOPS_DISPATCHED_PORT.PORT_1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xA1", - "UMask": "0x40", - "BriefDescription": "Cycles per thread when uops are executed in port 6", + "UMask": "0x4", + "BriefDescription": "Cycles per thread when uops are executed in port 2", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_6", - "PublicDescription": "Cycles which a uop is dispatched on port 6 in this thread.", + "EventName": "UOPS_EXECUTED_PORT.PORT_2", + "PublicDescription": "Cycles which a uop is dispatched on port 2 in this thread.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xA1", - "UMask": "0x80", - "BriefDescription": "Cycles per thread when uops are executed in port 7", + "UMask": "0x4", + "BriefDescription": "Cycles per core when uops are dispatched to port 2.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_7", - "PublicDescription": "Cycles which a uop is dispatched on port 7 in this thread.", + "EventName": "UOPS_EXECUTED_PORT.PORT_2_CORE", + "AnyThread": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA2", - "UMask": "0x1", - "BriefDescription": "Resource-related stall cycles", + "EventCode": "0xA1", + "UMask": "0x4", + "BriefDescription": "Cycles per thread when uops are executed in port 2.", "Counter": "0,1,2,3", - "EventName": "RESOURCE_STALLS.ANY", - "Errata": "HSD135", - "PublicDescription": "Cycles allocation is stalled due to resource related reason.", + "EventName": "UOPS_DISPATCHED_PORT.PORT_2", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA2", - "UMask": "0x4", - "BriefDescription": "Cycles stalled due to no eligible RS entry available.", + "EventCode": "0xA1", + "UMask": "0x8", + "BriefDescription": "Cycles per thread when uops are executed in port 3", "Counter": "0,1,2,3", - "EventName": "RESOURCE_STALLS.RS", + "EventName": "UOPS_EXECUTED_PORT.PORT_3", + "PublicDescription": "Cycles which a uop is dispatched on port 3 in this thread.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA2", + "EventCode": "0xA1", "UMask": "0x8", - "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).", + "BriefDescription": "Cycles per core when uops are dispatched to port 3.", "Counter": "0,1,2,3", - "EventName": "RESOURCE_STALLS.SB", - "PublicDescription": "This event counts cycles during which no instructions were allocated because no Store Buffers (SB) were available.", + "EventName": "UOPS_EXECUTED_PORT.PORT_3_CORE", + "AnyThread": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA2", + "EventCode": "0xA1", + "UMask": "0x8", + "BriefDescription": "Cycles per thread when uops are executed in port 3.", + "Counter": "0,1,2,3", + "EventName": "UOPS_DISPATCHED_PORT.PORT_3", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA1", "UMask": "0x10", - "BriefDescription": "Cycles stalled due to re-order buffer full.", + "BriefDescription": "Cycles per thread when uops are executed in port 4", "Counter": "0,1,2,3", - "EventName": "RESOURCE_STALLS.ROB", + "EventName": "UOPS_EXECUTED_PORT.PORT_4", + "PublicDescription": "Cycles which a uop is dispatched on port 4 in this thread.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "UMask": "0x1", - "BriefDescription": "Cycles with pending L2 cache miss loads.", + "EventCode": "0xA1", + "UMask": "0x10", + "BriefDescription": "Cycles per core when uops are executed in port 4.", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.CYCLES_L2_PENDING", - "CounterMask": "1", - "Errata": "HSD78", - "PublicDescription": "Cycles with pending L2 miss loads. Set Cmask=2 to count cycle.", + "EventName": "UOPS_EXECUTED_PORT.PORT_4_CORE", + "AnyThread": "1", + "PublicDescription": "Cycles per core when uops are exectuted in port 4.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", + "EventCode": "0xA1", + "UMask": "0x10", + "BriefDescription": "Cycles per thread when uops are executed in port 4.", + "Counter": "0,1,2,3", + "EventName": "UOPS_DISPATCHED_PORT.PORT_4", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA1", + "UMask": "0x20", + "BriefDescription": "Cycles per thread when uops are executed in port 5", + "Counter": "0,1,2,3", + "EventName": "UOPS_EXECUTED_PORT.PORT_5", + "PublicDescription": "Cycles which a uop is dispatched on port 5 in this thread.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA1", + "UMask": "0x20", + "BriefDescription": "Cycles per core when uops are executed in port 5.", + "Counter": "0,1,2,3", + "EventName": "UOPS_EXECUTED_PORT.PORT_5_CORE", + "AnyThread": "1", + "PublicDescription": "Cycles per core when uops are exectuted in port 5.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA1", + "UMask": "0x20", + "BriefDescription": "Cycles per thread when uops are executed in port 5.", + "Counter": "0,1,2,3", + "EventName": "UOPS_DISPATCHED_PORT.PORT_5", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA1", + "UMask": "0x40", + "BriefDescription": "Cycles per thread when uops are executed in port 6", + "Counter": "0,1,2,3", + "EventName": "UOPS_EXECUTED_PORT.PORT_6", + "PublicDescription": "Cycles which a uop is dispatched on port 6 in this thread.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA1", + "UMask": "0x40", + "BriefDescription": "Cycles per core when uops are executed in port 6.", + "Counter": "0,1,2,3", + "EventName": "UOPS_EXECUTED_PORT.PORT_6_CORE", + "AnyThread": "1", + "PublicDescription": "Cycles per core when uops are exectuted in port 6.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA1", + "UMask": "0x40", + "BriefDescription": "Cycles per thread when uops are executed in port 6.", + "Counter": "0,1,2,3", + "EventName": "UOPS_DISPATCHED_PORT.PORT_6", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA1", + "UMask": "0x80", + "BriefDescription": "Cycles per thread when uops are executed in port 7", + "Counter": "0,1,2,3", + "EventName": "UOPS_EXECUTED_PORT.PORT_7", + "PublicDescription": "Cycles which a uop is dispatched on port 7 in this thread.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA1", + "UMask": "0x80", + "BriefDescription": "Cycles per core when uops are dispatched to port 7.", + "Counter": "0,1,2,3", + "EventName": "UOPS_EXECUTED_PORT.PORT_7_CORE", + "AnyThread": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA1", + "UMask": "0x80", + "BriefDescription": "Cycles per thread when uops are executed in port 7.", + "Counter": "0,1,2,3", + "EventName": "UOPS_DISPATCHED_PORT.PORT_7", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA2", + "UMask": "0x1", + "BriefDescription": "Resource-related stall cycles", + "Counter": "0,1,2,3", + "EventName": "RESOURCE_STALLS.ANY", + "Errata": "HSD135", + "PublicDescription": "Cycles allocation is stalled due to resource related reason.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA2", + "UMask": "0x4", + "BriefDescription": "Cycles stalled due to no eligible RS entry available.", + "Counter": "0,1,2,3", + "EventName": "RESOURCE_STALLS.RS", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA2", "UMask": "0x8", - "BriefDescription": "Cycles with pending L1 cache miss loads.", - "Counter": "2", - "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING", - "CounterMask": "8", - "PublicDescription": "Cycles with pending L1 data cache miss loads. Set Cmask=8 to count cycle.", + "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).", + "Counter": "0,1,2,3", + "EventName": "RESOURCE_STALLS.SB", + "PublicDescription": "This event counts cycles during which no instructions were allocated because no Store Buffers (SB) were available.", "SampleAfterValue": "2000003", - "CounterHTOff": "2" + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA2", + "UMask": "0x10", + "BriefDescription": "Cycles stalled due to re-order buffer full.", + "Counter": "0,1,2,3", + "EventName": "RESOURCE_STALLS.ROB", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA3", + "UMask": "0x1", + "BriefDescription": "Cycles with pending L2 cache miss loads.", + "Counter": "0,1,2,3", + "EventName": "CYCLE_ACTIVITY.CYCLES_L2_PENDING", + "CounterMask": "1", + "Errata": "HSD78", + "PublicDescription": "Cycles with pending L2 miss loads. Set Cmask=2 to count cycle.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xA3", @@ -590,7 +840,7 @@ { "EventCode": "0xA3", "UMask": "0x4", - "BriefDescription": "Total execution stalls", + "BriefDescription": "This event increments by 1 for every cycle where there was no execute for this thread.", "Counter": "0,1,2,3", "EventName": "CYCLE_ACTIVITY.CYCLES_NO_EXECUTE", "CounterMask": "4", @@ -620,6 +870,17 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, + { + "EventCode": "0xA3", + "UMask": "0x8", + "BriefDescription": "Cycles with pending L1 cache miss loads.", + "Counter": "2", + "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING", + "CounterMask": "8", + "PublicDescription": "Cycles with pending L1 data cache miss loads. Set Cmask=8 to count cycle.", + "SampleAfterValue": "2000003", + "CounterHTOff": "2" + }, { "EventCode": "0xA3", "UMask": "0xc", @@ -642,13 +903,22 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB1", - "UMask": "0x2", - "BriefDescription": "Number of uops executed on the core.", + "EventCode": "0xA8", + "UMask": "0x1", + "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE", - "Errata": "HSD30, HSM31", - "PublicDescription": "Counts total number of uops to be executed per-core each cycle.", + "EventName": "LSD.CYCLES_ACTIVE", + "CounterMask": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA8", + "UMask": "0x1", + "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", + "Counter": "0,1,2,3", + "EventName": "LSD.CYCLES_4_UOPS", + "CounterMask": "4", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -665,68 +935,169 @@ "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xC0", - "UMask": "0x0", - "BriefDescription": "Number of instructions retired. General Counter - architectural event", + "EventCode": "0xB1", + "UMask": "0x1", + "BriefDescription": "Cycles where at least 1 uop was executed per-thread", "Counter": "0,1,2,3", - "EventName": "INST_RETIRED.ANY_P", - "Errata": "HSD11, HSD140", - "PublicDescription": "Number of instructions at retirement.", + "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", + "CounterMask": "1", + "Errata": "HSD144, HSD30, HSM31", + "PublicDescription": "This events counts the cycles where at least one uop was executed. It is counted per thread.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xC0", - "UMask": "0x2", - "BriefDescription": "FP operations retired. X87 FP operations that have no exceptions: Counts also flows that have several X87 or flows that use X87 uops in the exception handling.", + "EventCode": "0xB1", + "UMask": "0x1", + "BriefDescription": "Cycles where at least 2 uops were executed per-thread", "Counter": "0,1,2,3", - "EventName": "INST_RETIRED.X87", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts FP operations retired. For X87 FP operations that have no exceptions counting also includes flows that have several X87, or flows that use X87 uops in the exception handling.", + "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC", + "CounterMask": "2", + "Errata": "HSD144, HSD30, HSM31", + "PublicDescription": "This events counts the cycles where at least two uop were executed. It is counted per thread.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xC0", + "EventCode": "0xB1", "UMask": "0x1", - "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution", - "PEBS": "2", - "Counter": "1", - "EventName": "INST_RETIRED.PREC_DIST", - "Errata": "HSD140", - "PublicDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution.", - "SampleAfterValue": "2000003", - "CounterHTOff": "1" - }, - { - "EventCode": "0xC1", - "UMask": "0x40", - "BriefDescription": "Number of times any microcode assist is invoked by HW upon uop writeback.", + "BriefDescription": "Cycles where at least 3 uops were executed per-thread", "Counter": "0,1,2,3", - "EventName": "OTHER_ASSISTS.ANY_WB_ASSIST", - "PublicDescription": "Number of microcode assists invoked by HW upon uop writeback.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC", + "CounterMask": "3", + "Errata": "HSD144, HSD30, HSM31", + "PublicDescription": "This events counts the cycles where at least three uop were executed. It is counted per thread.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xC2", + "EventCode": "0xB1", "UMask": "0x1", - "BriefDescription": "Actually retired uops.", - "Data_LA": "1", - "PEBS": "1", + "BriefDescription": "Cycles where at least 4 uops were executed per-thread.", "Counter": "0,1,2,3", - "EventName": "UOPS_RETIRED.ALL", - "PublicDescription": "Counts the number of micro-ops retired. Use Cmask=1 and invert to count active cycles or stalled cycles.", + "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC", + "CounterMask": "4", + "Errata": "HSD144, HSD30, HSM31", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xC2", + "EventCode": "0xB1", "UMask": "0x2", - "BriefDescription": "Retirement slots used.", - "PEBS": "1", + "BriefDescription": "Number of uops executed on the core.", "Counter": "0,1,2,3", - "EventName": "UOPS_RETIRED.RETIRE_SLOTS", - "PublicDescription": "This event counts the number of retirement slots used each cycle. There are potentially 4 slots that can be used each cycle - meaning, 4 uops or 4 instructions could retire each cycle.", + "EventName": "UOPS_EXECUTED.CORE", + "Errata": "HSD30, HSM31", + "PublicDescription": "Counts total number of uops to be executed per-core each cycle.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xb1", + "UMask": "0x2", + "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", + "Counter": "0,1,2,3", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", + "CounterMask": "1", + "Errata": "HSD30, HSM31", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xb1", + "UMask": "0x2", + "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", + "Counter": "0,1,2,3", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", + "CounterMask": "2", + "Errata": "HSD30, HSM31", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xb1", + "UMask": "0x2", + "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", + "Counter": "0,1,2,3", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", + "CounterMask": "3", + "Errata": "HSD30, HSM31", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xb1", + "UMask": "0x2", + "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", + "Counter": "0,1,2,3", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", + "CounterMask": "4", + "Errata": "HSD30, HSM31", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "Invert": "1", + "EventCode": "0xb1", + "UMask": "0x2", + "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.", + "Counter": "0,1,2,3", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE", + "Errata": "HSD30, HSM31", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC0", + "UMask": "0x0", + "BriefDescription": "Number of instructions retired. General Counter - architectural event", + "Counter": "0,1,2,3", + "EventName": "INST_RETIRED.ANY_P", + "Errata": "HSD11, HSD140", + "PublicDescription": "Number of instructions at retirement.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC0", + "UMask": "0x1", + "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution", + "PEBS": "2", + "Counter": "1", + "EventName": "INST_RETIRED.PREC_DIST", + "Errata": "HSD140", + "PublicDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution.", + "SampleAfterValue": "2000003", + "CounterHTOff": "1" + }, + { + "EventCode": "0xC0", + "UMask": "0x2", + "BriefDescription": "FP operations retired. X87 FP operations that have no exceptions: Counts also flows that have several X87 or flows that use X87 uops in the exception handling.", + "Counter": "0,1,2,3", + "EventName": "INST_RETIRED.X87", + "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts FP operations retired. For X87 FP operations that have no exceptions counting also includes flows that have several X87, or flows that use X87 uops in the exception handling.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC1", + "UMask": "0x40", + "BriefDescription": "Number of times any microcode assist is invoked by HW upon uop writeback.", + "Counter": "0,1,2,3", + "EventName": "OTHER_ASSISTS.ANY_WB_ASSIST", + "PublicDescription": "Number of microcode assists invoked by HW upon uop writeback.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC2", + "UMask": "0x1", + "BriefDescription": "Actually retired uops.", + "Data_LA": "1", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "UOPS_RETIRED.ALL", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -764,6 +1135,16 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, + { + "EventCode": "0xC2", + "UMask": "0x2", + "BriefDescription": "Retirement slots used.", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "UOPS_RETIRED.RETIRE_SLOTS", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0xC3", "UMask": "0x1", @@ -773,6 +1154,17 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EdgeDetect": "1", + "EventCode": "0xC3", + "UMask": "0x1", + "BriefDescription": "Number of machine clears (nukes) of any type.", + "Counter": "0,1,2,3", + "EventName": "MACHINE_CLEARS.COUNT", + "CounterMask": "1", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0xC3", "UMask": "0x4", @@ -792,6 +1184,16 @@ "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0xC4", + "UMask": "0x0", + "BriefDescription": "All (macro) branch instructions retired.", + "Counter": "0,1,2,3", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES", + "PublicDescription": "Branch instructions at retirement.", + "SampleAfterValue": "400009", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0xC4", "UMask": "0x1", @@ -799,7 +1201,6 @@ "PEBS": "1", "Counter": "0,1,2,3", "EventName": "BR_INST_RETIRED.CONDITIONAL", - "PublicDescription": "Counts the number of conditional branch instructions retired.", "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -815,13 +1216,23 @@ }, { "EventCode": "0xC4", - "UMask": "0x0", + "UMask": "0x2", + "BriefDescription": "Direct and indirect macro near call instructions retired (captured in ring 3).", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "BR_INST_RETIRED.NEAR_CALL_R3", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC4", + "UMask": "0x4", "BriefDescription": "All (macro) branch instructions retired.", + "PEBS": "2", "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.ALL_BRANCHES", - "PublicDescription": "Branch instructions at retirement.", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS", "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xC4", @@ -830,7 +1241,6 @@ "PEBS": "1", "Counter": "0,1,2,3", "EventName": "BR_INST_RETIRED.NEAR_RETURN", - "PublicDescription": "Counts the number of near return instructions retired.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -851,7 +1261,6 @@ "PEBS": "1", "Counter": "0,1,2,3", "EventName": "BR_INST_RETIRED.NEAR_TAKEN", - "PublicDescription": "Number of near taken branches retired.", "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -866,14 +1275,14 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC4", - "UMask": "0x4", - "BriefDescription": "All (macro) branch instructions retired.", - "PEBS": "2", + "EventCode": "0xC5", + "UMask": "0x0", + "BriefDescription": "All mispredicted macro branch instructions retired.", "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS", + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", + "PublicDescription": "Mispredicted branch instructions at retirement.", "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xC5", @@ -885,20 +1294,10 @@ "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, - { - "EventCode": "0xC5", - "UMask": "0x0", - "BriefDescription": "All mispredicted macro branch instructions retired.", - "Counter": "0,1,2,3", - "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", - "PublicDescription": "Mispredicted branch instructions at retirement.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, { "EventCode": "0xC5", "UMask": "0x4", - "BriefDescription": "Mispredicted macro branch instructions retired. ", + "BriefDescription": "Mispredicted macro branch instructions retired.", "PEBS": "2", "Counter": "0,1,2,3", "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS", @@ -907,423 +1306,33 @@ "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xCC", + "EventCode": "0xC5", "UMask": "0x20", - "BriefDescription": "Count cases of saving new LBR", - "Counter": "0,1,2,3", - "EventName": "ROB_MISC_EVENTS.LBR_INSERTS", - "PublicDescription": "Count cases of saving new LBR records by hardware.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x3C", - "UMask": "0x0", - "BriefDescription": "Thread cycles when thread is not in halt state", - "Counter": "0,1,2,3", - "EventName": "CPU_CLK_UNHALTED.THREAD_P", - "PublicDescription": "Counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x89", - "UMask": "0xa0", - "BriefDescription": "Taken speculative and retired mispredicted indirect calls.", - "Counter": "0,1,2,3", - "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_NEAR_CALL", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "UMask": "0x1", - "BriefDescription": "Cycles per core when uops are exectuted in port 0.", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_0_CORE", - "AnyThread": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "UMask": "0x2", - "BriefDescription": "Cycles per core when uops are exectuted in port 1.", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_1_CORE", - "AnyThread": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "UMask": "0x4", - "BriefDescription": "Cycles per core when uops are dispatched to port 2.", + "BriefDescription": "number of near branch instructions retired that were mispredicted and taken.", + "PEBS": "1", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_2_CORE", - "AnyThread": "1", - "SampleAfterValue": "2000003", + "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", + "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", - "UMask": "0x8", - "BriefDescription": "Cycles per core when uops are dispatched to port 3.", + "EventCode": "0xCC", + "UMask": "0x20", + "BriefDescription": "Count cases of saving new LBR", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_3_CORE", - "AnyThread": "1", + "EventName": "ROB_MISC_EVENTS.LBR_INSERTS", + "PublicDescription": "Count cases of saving new LBR records by hardware.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", - "UMask": "0x10", - "BriefDescription": "Cycles per core when uops are exectuted in port 4.", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_4_CORE", - "AnyThread": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "UMask": "0x20", - "BriefDescription": "Cycles per core when uops are exectuted in port 5.", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_5_CORE", - "AnyThread": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "UMask": "0x40", - "BriefDescription": "Cycles per core when uops are exectuted in port 6.", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_6_CORE", - "AnyThread": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "UMask": "0x80", - "BriefDescription": "Cycles per core when uops are dispatched to port 7.", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_7_CORE", - "AnyThread": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC5", - "UMask": "0x20", - "BriefDescription": "number of near branch instructions retired that were mispredicted and taken.", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", - "PublicDescription": "Number of near branch instructions retired that were taken but mispredicted.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xB1", - "UMask": "0x1", - "BriefDescription": "Cycles where at least 1 uop was executed per-thread", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", - "CounterMask": "1", - "Errata": "HSD144, HSD30, HSM31", - "PublicDescription": "This events counts the cycles where at least one uop was executed. It is counted per thread.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB1", - "UMask": "0x1", - "BriefDescription": "Cycles where at least 2 uops were executed per-thread", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC", - "CounterMask": "2", - "Errata": "HSD144, HSD30, HSM31", - "PublicDescription": "This events counts the cycles where at least two uop were executed. It is counted per thread.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB1", - "UMask": "0x1", - "BriefDescription": "Cycles where at least 3 uops were executed per-thread", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC", - "CounterMask": "3", - "Errata": "HSD144, HSD30, HSM31", - "PublicDescription": "This events counts the cycles where at least three uop were executed. It is counted per thread.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB1", - "UMask": "0x1", - "BriefDescription": "Cycles where at least 4 uops were executed per-thread.", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC", - "CounterMask": "4", - "Errata": "HSD144, HSD30, HSM31", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xe6", - "UMask": "0x1f", - "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.", + "EventCode": "0xe6", + "UMask": "0x1f", + "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.", "Counter": "0,1,2,3", "EventName": "BACLEARS.ANY", "PublicDescription": "Number of front end re-steers due to BPU misprediction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EdgeDetect": "1", - "EventCode": "0xC3", - "UMask": "0x1", - "BriefDescription": "Number of machine clears (nukes) of any type.", - "Counter": "0,1,2,3", - "EventName": "MACHINE_CLEARS.COUNT", - "CounterMask": "1", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA8", - "UMask": "0x1", - "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.", - "Counter": "0,1,2,3", - "EventName": "LSD.CYCLES_ACTIVE", - "CounterMask": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA8", - "UMask": "0x1", - "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", - "Counter": "0,1,2,3", - "EventName": "LSD.CYCLES_4_UOPS", - "CounterMask": "4", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EdgeDetect": "1", - "Invert": "1", - "EventCode": "0x5E", - "UMask": "0x1", - "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.", - "Counter": "0,1,2,3", - "EventName": "RS_EVENTS.EMPTY_END", - "CounterMask": "1", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "UMask": "0x1", - "BriefDescription": "Cycles per thread when uops are executed in port 0.", - "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_0", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "UMask": "0x2", - "BriefDescription": "Cycles per thread when uops are executed in port 1.", - "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_1", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "UMask": "0x4", - "BriefDescription": "Cycles per thread when uops are executed in port 2.", - "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_2", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "UMask": "0x8", - "BriefDescription": "Cycles per thread when uops are executed in port 3.", - "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_3", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "UMask": "0x10", - "BriefDescription": "Cycles per thread when uops are executed in port 4.", - "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_4", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "UMask": "0x20", - "BriefDescription": "Cycles per thread when uops are executed in port 5.", - "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_5", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "UMask": "0x40", - "BriefDescription": "Cycles per thread when uops are executed in port 6.", - "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_6", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA1", - "UMask": "0x80", - "BriefDescription": "Cycles per thread when uops are executed in port 7.", - "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_7", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x00", - "UMask": "0x2", - "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", - "Counter": "Fixed counter 2", - "EventName": "CPU_CLK_UNHALTED.THREAD_ANY", - "AnyThread": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "Fixed counter 2" - }, - { - "EventCode": "0x3C", - "UMask": "0x0", - "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", - "Counter": "0,1,2,3", - "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY", - "AnyThread": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x3C", - "UMask": "0x1", - "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate)", - "Counter": "0,1,2,3", - "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY", - "AnyThread": "1", - "PublicDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x0D", - "UMask": "0x3", - "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke)", - "Counter": "0,1,2,3", - "EventName": "INT_MISC.RECOVERY_CYCLES_ANY", - "AnyThread": "1", - "CounterMask": "1", - "PublicDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xb1", - "UMask": "0x2", - "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", - "CounterMask": "1", - "Errata": "HSD30, HSM31", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xb1", - "UMask": "0x2", - "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", - "CounterMask": "2", - "Errata": "HSD30, HSM31", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xb1", - "UMask": "0x2", - "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", - "CounterMask": "3", - "Errata": "HSD30, HSM31", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xb1", - "UMask": "0x2", - "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", - "CounterMask": "4", - "Errata": "HSD30, HSM31", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "Invert": "1", - "EventCode": "0xb1", - "UMask": "0x2", - "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE", - "Errata": "HSD30, HSM31", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x3C", - "UMask": "0x1", - "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)", - "Counter": "0,1,2,3", - "EventName": "CPU_CLK_UNHALTED.REF_XCLK", - "PublicDescription": "Reference cycles when the thread is unhalted. (counts at 100 MHz rate)", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x3C", - "UMask": "0x1", - "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate)", - "Counter": "0,1,2,3", - "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY", - "AnyThread": "1", - "PublicDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x3C", - "UMask": "0x2", - "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.", - "Counter": "0,1,2,3", - "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/haswellx/virtual-memory.json b/tools/perf/pmu-events/arch/x86/haswellx/virtual-memory.json index 9c00f8ef6a07..168df552b1a8 100644 --- a/tools/perf/pmu-events/arch/x86/haswellx/virtual-memory.json +++ b/tools/perf/pmu-events/arch/x86/haswellx/virtual-memory.json @@ -38,6 +38,16 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x08", + "UMask": "0xe", + "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.", + "Counter": "0,1,2,3", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", + "PublicDescription": "Completed page walks in any TLB of any page size due to demand load misses.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x08", "UMask": "0x10", @@ -68,6 +78,16 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x08", + "UMask": "0x60", + "BriefDescription": "Load operations that miss the first DTLB level but hit the second and do not cause page walks", + "Counter": "0,1,2,3", + "EventName": "DTLB_LOAD_MISSES.STLB_HIT", + "PublicDescription": "Number of cache load STLB hits. No page walk.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x08", "UMask": "0x80", @@ -117,6 +137,16 @@ "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x49", + "UMask": "0xe", + "BriefDescription": "Store misses in all DTLB levels that cause completed page walks", + "Counter": "0,1,2,3", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", + "PublicDescription": "Completed page walks due to store miss in any TLB levels of any page size (4K/2M/4M/1G).", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x49", "UMask": "0x10", @@ -147,6 +177,16 @@ "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x49", + "UMask": "0x60", + "BriefDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks", + "Counter": "0,1,2,3", + "EventName": "DTLB_STORE_MISSES.STLB_HIT", + "PublicDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x49", "UMask": "0x80", @@ -205,6 +245,16 @@ "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x85", + "UMask": "0xe", + "BriefDescription": "Misses in all ITLB levels that cause completed page walks", + "Counter": "0,1,2,3", + "EventName": "ITLB_MISSES.WALK_COMPLETED", + "PublicDescription": "Completed page walks in ITLB of any page size.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x85", "UMask": "0x10", @@ -235,6 +285,16 @@ "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x85", + "UMask": "0x60", + "BriefDescription": "Operations that miss the first ITLB level but hit the second and do not cause any page walks", + "Counter": "0,1,2,3", + "EventName": "ITLB_MISSES.STLB_HIT", + "PublicDescription": "ITLB misses that hit STLB. No page walk.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0xae", "UMask": "0x1", @@ -257,39 +317,43 @@ }, { "EventCode": "0xBC", - "UMask": "0x21", - "BriefDescription": "Number of ITLB page walker hits in the L1+FB", + "UMask": "0x12", + "BriefDescription": "Number of DTLB page walker hits in the L2", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.ITLB_L1", - "PublicDescription": "Number of ITLB page walker loads that hit in the L1+FB.", + "EventName": "PAGE_WALKER_LOADS.DTLB_L2", + "PublicDescription": "Number of DTLB page walker loads that hit in the L2.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", - "UMask": "0x41", - "BriefDescription": "Counts the number of Extended Page Table walks from the DTLB that hit in the L1 and FB.", + "UMask": "0x14", + "BriefDescription": "Number of DTLB page walker hits in the L3 + XSNP", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.EPT_DTLB_L1", + "EventName": "PAGE_WALKER_LOADS.DTLB_L3", + "Errata": "HSD25", + "PublicDescription": "Number of DTLB page walker loads that hit in the L3.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", - "UMask": "0x81", - "BriefDescription": "Counts the number of Extended Page Table walks from the ITLB that hit in the L1 and FB.", + "UMask": "0x18", + "BriefDescription": "Number of DTLB page walker hits in Memory", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.EPT_ITLB_L1", + "EventName": "PAGE_WALKER_LOADS.DTLB_MEMORY", + "Errata": "HSD25", + "PublicDescription": "Number of DTLB page walker loads from memory.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", - "UMask": "0x12", - "BriefDescription": "Number of DTLB page walker hits in the L2", + "UMask": "0x21", + "BriefDescription": "Number of ITLB page walker hits in the L1+FB", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.DTLB_L2", - "PublicDescription": "Number of DTLB page walker loads that hit in the L2.", + "EventName": "PAGE_WALKER_LOADS.ITLB_L1", + "PublicDescription": "Number of ITLB page walker loads that hit in the L1+FB.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, @@ -305,41 +369,41 @@ }, { "EventCode": "0xBC", - "UMask": "0x42", - "BriefDescription": "Counts the number of Extended Page Table walks from the DTLB that hit in the L2.", + "UMask": "0x24", + "BriefDescription": "Number of ITLB page walker hits in the L3 + XSNP", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.EPT_DTLB_L2", + "EventName": "PAGE_WALKER_LOADS.ITLB_L3", + "Errata": "HSD25", + "PublicDescription": "Number of ITLB page walker loads that hit in the L3.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", - "UMask": "0x82", - "BriefDescription": "Counts the number of Extended Page Table walks from the ITLB that hit in the L2.", + "UMask": "0x28", + "BriefDescription": "Number of ITLB page walker hits in Memory", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.EPT_ITLB_L2", + "EventName": "PAGE_WALKER_LOADS.ITLB_MEMORY", + "Errata": "HSD25", + "PublicDescription": "Number of ITLB page walker loads from memory.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", - "UMask": "0x14", - "BriefDescription": "Number of DTLB page walker hits in the L3 + XSNP", + "UMask": "0x41", + "BriefDescription": "Counts the number of Extended Page Table walks from the DTLB that hit in the L1 and FB.", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.DTLB_L3", - "Errata": "HSD25", - "PublicDescription": "Number of DTLB page walker loads that hit in the L3.", + "EventName": "PAGE_WALKER_LOADS.EPT_DTLB_L1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", - "UMask": "0x24", - "BriefDescription": "Number of ITLB page walker hits in the L3 + XSNP", + "UMask": "0x42", + "BriefDescription": "Counts the number of Extended Page Table walks from the DTLB that hit in the L2.", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.ITLB_L3", - "Errata": "HSD25", - "PublicDescription": "Number of ITLB page walker loads that hit in the L3.", + "EventName": "PAGE_WALKER_LOADS.EPT_DTLB_L2", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, @@ -354,41 +418,37 @@ }, { "EventCode": "0xBC", - "UMask": "0x84", - "BriefDescription": "Counts the number of Extended Page Table walks from the ITLB that hit in the L2.", + "UMask": "0x48", + "BriefDescription": "Counts the number of Extended Page Table walks from the DTLB that hit in memory.", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.EPT_ITLB_L3", + "EventName": "PAGE_WALKER_LOADS.EPT_DTLB_MEMORY", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", - "UMask": "0x18", - "BriefDescription": "Number of DTLB page walker hits in Memory", + "UMask": "0x81", + "BriefDescription": "Counts the number of Extended Page Table walks from the ITLB that hit in the L1 and FB.", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.DTLB_MEMORY", - "Errata": "HSD25", - "PublicDescription": "Number of DTLB page walker loads from memory.", + "EventName": "PAGE_WALKER_LOADS.EPT_ITLB_L1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", - "UMask": "0x28", - "BriefDescription": "Number of ITLB page walker hits in Memory", + "UMask": "0x82", + "BriefDescription": "Counts the number of Extended Page Table walks from the ITLB that hit in the L2.", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.ITLB_MEMORY", - "Errata": "HSD25", - "PublicDescription": "Number of ITLB page walker loads from memory.", + "EventName": "PAGE_WALKER_LOADS.EPT_ITLB_L2", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", - "UMask": "0x48", - "BriefDescription": "Counts the number of Extended Page Table walks from the DTLB that hit in memory.", + "UMask": "0x84", + "BriefDescription": "Counts the number of Extended Page Table walks from the ITLB that hit in the L2.", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.EPT_DTLB_MEMORY", + "EventName": "PAGE_WALKER_LOADS.EPT_ITLB_L3", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, @@ -420,65 +480,5 @@ "PublicDescription": "Count number of STLB flush attempts.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x08", - "UMask": "0xe", - "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.", - "Counter": "0,1,2,3", - "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", - "PublicDescription": "Completed page walks in any TLB of any page size due to demand load misses.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x08", - "UMask": "0x60", - "BriefDescription": "Load operations that miss the first DTLB level but hit the second and do not cause page walks", - "Counter": "0,1,2,3", - "EventName": "DTLB_LOAD_MISSES.STLB_HIT", - "PublicDescription": "Number of cache load STLB hits. No page walk.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x49", - "UMask": "0xe", - "BriefDescription": "Store misses in all DTLB levels that cause completed page walks", - "Counter": "0,1,2,3", - "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", - "PublicDescription": "Completed page walks due to store miss in any TLB levels of any page size (4K/2M/4M/1G).", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x49", - "UMask": "0x60", - "BriefDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks", - "Counter": "0,1,2,3", - "EventName": "DTLB_STORE_MISSES.STLB_HIT", - "PublicDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x85", - "UMask": "0xe", - "BriefDescription": "Misses in all ITLB levels that cause completed page walks", - "Counter": "0,1,2,3", - "EventName": "ITLB_MISSES.WALK_COMPLETED", - "PublicDescription": "Completed page walks in ITLB of any page size.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x85", - "UMask": "0x60", - "BriefDescription": "Operations that miss the first ITLB level but hit the second and do not cause any page walks", - "Counter": "0,1,2,3", - "EventName": "ITLB_MISSES.STLB_HIT", - "PublicDescription": "ITLB misses that hit STLB. No page walk.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file -- cgit v1.2.3-70-g09d2 From c955cd2b04200cae6a00284d6c6cda09c6eeaced Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 18 Jan 2018 04:50:11 -0800 Subject: perf vendor events intel: Update IvyBridge events to V20 Signed-off-by: Andi Kleen Link: https://lkml.kernel.org/r/20180118234518.GA27753@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/pmu-events/arch/x86/ivybridge/cache.json | 567 +++----------- .../pmu-events/arch/x86/ivybridge/frontend.json | 122 +-- .../perf/pmu-events/arch/x86/ivybridge/memory.json | 92 +-- .../perf/pmu-events/arch/x86/ivybridge/other.json | 20 +- .../pmu-events/arch/x86/ivybridge/pipeline.json | 822 +++++++++++---------- .../arch/x86/ivybridge/virtual-memory.json | 60 +- 6 files changed, 634 insertions(+), 1049 deletions(-) (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/cache.json b/tools/perf/pmu-events/arch/x86/ivybridge/cache.json index f1ee6d4853c5..3c0a28e27d73 100644 --- a/tools/perf/pmu-events/arch/x86/ivybridge/cache.json +++ b/tools/perf/pmu-events/arch/x86/ivybridge/cache.json @@ -9,6 +9,16 @@ "BriefDescription": "Demand Data Read requests that hit L2 cache", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Counts any demand and L1 HW prefetch data load requests to L2.", + "EventCode": "0x24", + "Counter": "0,1,2,3", + "UMask": "0x3", + "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD", + "SampleAfterValue": "200003", + "BriefDescription": "Demand Data Read requests", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "RFO requests that hit L2 cache.", "EventCode": "0x24", @@ -29,6 +39,16 @@ "BriefDescription": "RFO requests that miss L2 cache", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Counts all L2 store RFO requests.", + "EventCode": "0x24", + "Counter": "0,1,2,3", + "UMask": "0xc", + "EventName": "L2_RQSTS.ALL_RFO", + "SampleAfterValue": "200003", + "BriefDescription": "RFO requests to L2 cache", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Number of instruction fetches that hit the L2 cache.", "EventCode": "0x24", @@ -49,6 +69,16 @@ "BriefDescription": "L2 cache misses when fetching instructions", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Counts all L2 code requests.", + "EventCode": "0x24", + "Counter": "0,1,2,3", + "UMask": "0x30", + "EventName": "L2_RQSTS.ALL_CODE_RD", + "SampleAfterValue": "200003", + "BriefDescription": "L2 code requests", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Counts all L2 HW prefetcher requests that hit L2.", "EventCode": "0x24", @@ -69,36 +99,6 @@ "BriefDescription": "Requests from the L2 hardware prefetchers that miss L2 cache", "CounterHTOff": "0,1,2,3,4,5,6,7" }, - { - "PublicDescription": "Counts any demand and L1 HW prefetch data load requests to L2.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x3", - "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD", - "SampleAfterValue": "200003", - "BriefDescription": "Demand Data Read requests", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Counts all L2 store RFO requests.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0xc", - "EventName": "L2_RQSTS.ALL_RFO", - "SampleAfterValue": "200003", - "BriefDescription": "RFO requests to L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Counts all L2 code requests.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x30", - "EventName": "L2_RQSTS.ALL_CODE_RD", - "SampleAfterValue": "200003", - "BriefDescription": "L2 code requests", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, { "PublicDescription": "Counts all L2 HW prefetcher requests.", "EventCode": "0x24", @@ -218,6 +218,29 @@ "CounterMask": "1", "CounterHTOff": "2" }, + { + "PublicDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.", + "EventCode": "0x48", + "Counter": "2", + "UMask": "0x1", + "AnyThread": "1", + "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core", + "CounterMask": "1", + "CounterHTOff": "2" + }, + { + "PublicDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.", + "EventCode": "0x48", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "L1D_PEND_MISS.FB_FULL", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Counts the number of lines brought into the L1 data cache.", "EventCode": "0x51", @@ -239,76 +262,87 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Offcore outstanding Demand Code Read transactions in SQ to uncore. Set Cmask=1 to count cycles.", + "PublicDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore.", "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD", + "UMask": "0x1", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD", "SampleAfterValue": "2000003", - "BriefDescription": "Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle", + "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Offcore outstanding RFO store transactions in SQ to uncore. Set Cmask=1 to count cycles.", + "PublicDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.", "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO", + "UMask": "0x1", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6", "SampleAfterValue": "2000003", - "BriefDescription": "Offcore outstanding RFO store transactions in SuperQueue (SQ), queue to uncore", + "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue", + "CounterMask": "6", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Offcore outstanding cacheable data read transactions in SQ to uncore. Set Cmask=1 to count cycles.", + "PublicDescription": "Offcore outstanding Demand Code Read transactions in SQ to uncore. Set Cmask=1 to count cycles.", "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", + "UMask": "0x2", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD", "SampleAfterValue": "2000003", - "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore", + "BriefDescription": "Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore.", + "PublicDescription": "Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle.", "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD", + "UMask": "0x2", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore", + "BriefDescription": "Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle", "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.", + "PublicDescription": "Offcore outstanding RFO store transactions in SQ to uncore. Set Cmask=1 to count cycles.", "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD", + "UMask": "0x4", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore", - "CounterMask": "1", + "BriefDescription": "Offcore outstanding RFO store transactions in SuperQueue (SQ), queue to uncore", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle.", + "PublicDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle.", "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD", + "UMask": "0x4", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", "SampleAfterValue": "2000003", - "BriefDescription": "Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle", + "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle", "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle.", + "PublicDescription": "Offcore outstanding cacheable data read transactions in SQ to uncore. Set Cmask=1 to count cycles.", "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", + "UMask": "0x8", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", "SampleAfterValue": "2000003", - "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle", + "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.", + "EventCode": "0x60", + "Counter": "0,1,2,3", + "UMask": "0x8", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore", "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -379,7 +413,7 @@ "UMask": "0x11", "EventName": "MEM_UOPS_RETIRED.STLB_MISS_LOADS", "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops that miss the STLB.", + "BriefDescription": "Retired load uops that miss the STLB. (Precise Event)", "CounterHTOff": "0,1,2,3" }, { @@ -389,7 +423,7 @@ "UMask": "0x12", "EventName": "MEM_UOPS_RETIRED.STLB_MISS_STORES", "SampleAfterValue": "100003", - "BriefDescription": "Retired store uops that miss the STLB.", + "BriefDescription": "Retired store uops that miss the STLB. (Precise Event)", "CounterHTOff": "0,1,2,3" }, { @@ -399,7 +433,7 @@ "UMask": "0x21", "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS", "SampleAfterValue": "100007", - "BriefDescription": "Retired load uops with locked access.", + "BriefDescription": "Retired load uops with locked access. (Precise Event)", "CounterHTOff": "0,1,2,3" }, { @@ -409,7 +443,7 @@ "UMask": "0x41", "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS", "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops that split across a cacheline boundary.", + "BriefDescription": "Retired load uops that split across a cacheline boundary. (Precise Event)", "CounterHTOff": "0,1,2,3" }, { @@ -419,7 +453,7 @@ "UMask": "0x42", "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES", "SampleAfterValue": "100003", - "BriefDescription": "Retired store uops that split across a cacheline boundary.", + "BriefDescription": "Retired store uops that split across a cacheline boundary. (Precise Event)", "CounterHTOff": "0,1,2,3" }, { @@ -429,7 +463,7 @@ "UMask": "0x81", "EventName": "MEM_UOPS_RETIRED.ALL_LOADS", "SampleAfterValue": "2000003", - "BriefDescription": "All retired load uops.", + "BriefDescription": "All retired load uops. (Precise Event)", "CounterHTOff": "0,1,2,3" }, { @@ -439,67 +473,61 @@ "UMask": "0x82", "EventName": "MEM_UOPS_RETIRED.ALL_STORES", "SampleAfterValue": "2000003", - "BriefDescription": "All retired store uops.", + "BriefDescription": "All retired store uops. (Precise Event)", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", - "PublicDescription": "Retired load uops with L1 cache hits as data sources.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT", "SampleAfterValue": "2000003", - "BriefDescription": "Retired load uops with L1 cache hits as data sources. ", + "BriefDescription": "Retired load uops with L1 cache hits as data sources.", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", - "PublicDescription": "Retired load uops with L2 cache hits as data sources.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x2", "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT", "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops with L2 cache hits as data sources. ", + "BriefDescription": "Retired load uops with L2 cache hits as data sources.", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", - "PublicDescription": "Retired load uops whose data source was LLC hit with no snoop required.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x4", "EventName": "MEM_LOAD_UOPS_RETIRED.LLC_HIT", "SampleAfterValue": "50021", - "BriefDescription": "Retired load uops which data sources were data hits in LLC without snoops required. ", + "BriefDescription": "Retired load uops which data sources were data hits in LLC without snoops required.", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", - "PublicDescription": "Retired load uops whose data source followed an L1 miss.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x8", "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS", "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops which data sources following L1 data-cache miss", + "BriefDescription": "Retired load uops which data sources following L1 data-cache miss.", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", - "PublicDescription": "Retired load uops that missed L2, excluding unknown sources.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x10", "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS", "SampleAfterValue": "50021", - "BriefDescription": "Miss in mid-level (L2) cache. Excludes Unknown data-source.", + "BriefDescription": "Retired load uops with L2 cache misses as data sources.", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", - "PublicDescription": "Retired load uops whose data source is LLC miss.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x20", @@ -510,61 +538,56 @@ }, { "PEBS": "1", - "PublicDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x40", "EventName": "MEM_LOAD_UOPS_RETIRED.HIT_LFB", "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready. ", + "BriefDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready.", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", - "PublicDescription": "Retired load uops whose data source was an on-package core cache LLC hit and cross-core snoop missed.", "EventCode": "0xD2", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS", "SampleAfterValue": "20011", - "BriefDescription": "Retired load uops which data sources were LLC hit and cross-core snoop missed in on-pkg core cache. ", + "BriefDescription": "Retired load uops which data sources were LLC hit and cross-core snoop missed in on-pkg core cache.", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", - "PublicDescription": "Retired load uops whose data source was an on-package LLC hit and cross-core snoop hits.", "EventCode": "0xD2", "Counter": "0,1,2,3", "UMask": "0x2", "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT", "SampleAfterValue": "20011", - "BriefDescription": "Retired load uops which data sources were LLC and cross-core snoop hits in on-pkg core cache. ", + "BriefDescription": "Retired load uops which data sources were LLC and cross-core snoop hits in on-pkg core cache.", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", - "PublicDescription": "Retired load uops whose data source was an on-package core cache with HitM responses.", "EventCode": "0xD2", "Counter": "0,1,2,3", "UMask": "0x4", "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM", "SampleAfterValue": "20011", - "BriefDescription": "Retired load uops which data sources were HitM responses from shared LLC. ", + "BriefDescription": "Retired load uops which data sources were HitM responses from shared LLC.", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", - "PublicDescription": "Retired load uops whose data source was LLC hit with no snoop required.", "EventCode": "0xD2", "Counter": "0,1,2,3", "UMask": "0x8", "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_NONE", "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops which data sources were hits in LLC without snoops required. ", + "BriefDescription": "Retired load uops which data sources were hits in LLC without snoops required.", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Retired load uop whose Data Source was: local DRAM either Snoop not needed or Snoop Miss (RspI)", + "PublicDescription": "Retired load uops whose data source was local memory (cross-socket snoop not needed or missed).", "EventCode": "0xD3", "Counter": "0,1,2,3", "UMask": "0x1", @@ -751,373 +774,5 @@ "SampleAfterValue": "100003", "BriefDescription": "Split locks in SQ", "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Retired load uops whose data source was local memory (cross-socket snoop not needed or missed).", - "EventCode": "0xD3", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM", - "SampleAfterValue": "100007", - "BriefDescription": "Retired load uops which data sources missed LLC but serviced from local dram.", - "CounterHTOff": "0,1,2,3" - }, - { - "PublicDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.", - "EventCode": "0x60", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue", - "CounterMask": "6", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.", - "EventCode": "0x48", - "Counter": "2", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core", - "CounterMask": "1", - "CounterHTOff": "2" - }, - { - "PublicDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.", - "EventCode": "0x48", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "L1D_PEND_MISS.FB_FULL", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0244", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_HIT.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch code reads that hit in the LLC", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1003c0244", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand & prefetch code reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0091", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch data reads that hit in the LLC", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x4003c0091", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10003c0091", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1003c0091", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0122", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_HIT.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch RFOs that hit in the LLC", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1003c0122", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand & prefetch RFOs that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10008", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.COREWB.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all writebacks from the core to the LLC", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0004", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_HIT.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand code reads that hit in the LLC", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1003c0004", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand code reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0001", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data reads that hit in the LLC", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x4003c0001", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10003c0001", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1003c0001", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0002", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs) that hit in the LLC", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10003c0002", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data writes (RFOs) that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1003c0002", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data writes (RFOs) that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x18000", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.OTHER.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts miscellaneous accesses that include port i/o, MMIO and uncacheable memory accesses. It also includes L2 hints sent to LLC to keep a line from being evicted out of the core caches", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10400", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.SPLIT_LOCK_UC_LOCK.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts requests where the address of an atomic lock instruction spans a cache line boundary or the lock instruction is executed on uncacheable address ", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10800", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts non-temporal stores", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00010001", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data reads ", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00010002", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand rfo's ", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00010004", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand code reads", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x000105B3", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch data reads", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00010122", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_RFO.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch prefetch RFOs ", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x000107F7", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_READS.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all data/code/rfo references (demand & prefetch) ", - "CounterHTOff": "0,1,2,3" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/frontend.json b/tools/perf/pmu-events/arch/x86/ivybridge/frontend.json index de72b84b3536..efaa949ead31 100644 --- a/tools/perf/pmu-events/arch/x86/ivybridge/frontend.json +++ b/tools/perf/pmu-events/arch/x86/ivybridge/frontend.json @@ -20,76 +20,45 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Increment each cycle. # of uops delivered to IDQ from DSB path. Set Cmask = 1 to count cycles.", - "EventCode": "0x79", - "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "IDQ.DSB_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by DSB. Set Cmask = 1 to count cycles. Add Edge=1 to count # of delivery.", - "EventCode": "0x79", - "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "IDQ.MS_DSB_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by MITE. Set Cmask = 1 to count cycles.", - "EventCode": "0x79", - "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "IDQ.MS_MITE_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Increment each cycle # of uops delivered to IDQ from MS by either DSB or MITE. Set Cmask = 1 to count cycles.", + "PublicDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path.", "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x30", - "EventName": "IDQ.MS_UOPS", + "UMask": "0x4", + "EventName": "IDQ.MITE_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.", + "PublicDescription": "Increment each cycle. # of uops delivered to IDQ from DSB path. Set Cmask = 1 to count cycles.", "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x30", - "EventName": "IDQ.MS_CYCLES", + "UMask": "0x8", + "EventName": "IDQ.DSB_UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "CounterMask": "1", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path.", + "PublicDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path.", "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "IDQ.MITE_CYCLES", + "UMask": "0x8", + "EventName": "IDQ.DSB_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path", "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path.", + "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by DSB. Set Cmask = 1 to count cycles. Add Edge=1 to count # of delivery.", "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "IDQ.DSB_CYCLES", + "UMask": "0x10", + "EventName": "IDQ.MS_DSB_UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path", - "CounterMask": "1", + "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -137,6 +106,16 @@ "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by MITE. Set Cmask = 1 to count cycles.", + "EventCode": "0x79", + "Counter": "0,1,2,3", + "UMask": "0x20", + "EventName": "IDQ.MS_MITE_UOPS", + "SampleAfterValue": "2000003", + "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Counts cycles MITE is delivered four uops. Set Cmask = 4.", "EventCode": "0x79", @@ -159,6 +138,39 @@ "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Increment each cycle # of uops delivered to IDQ from MS by either DSB or MITE. Set Cmask = 1 to count cycles.", + "EventCode": "0x79", + "Counter": "0,1,2,3", + "UMask": "0x30", + "EventName": "IDQ.MS_UOPS", + "SampleAfterValue": "2000003", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.", + "EventCode": "0x79", + "Counter": "0,1,2,3", + "UMask": "0x30", + "EventName": "IDQ.MS_CYCLES", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", + "EventCode": "0x79", + "Counter": "0,1,2,3", + "UMask": "0x30", + "EdgeDetect": "1", + "EventName": "IDQ.MS_SWITCHES", + "SampleAfterValue": "2000003", + "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Number of uops delivered to IDQ from any path.", "EventCode": "0x79", @@ -206,7 +218,7 @@ "UMask": "0x1", "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled ", + "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled", "CounterHTOff": "0,1,2,3" }, { @@ -289,17 +301,5 @@ "SampleAfterValue": "2000003", "BriefDescription": "Cycles when Decode Stream Buffer (DSB) fill encounter more than 3 Decode Stream Buffer (DSB) lines", "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", - "EventCode": "0x79", - "Counter": "0,1,2,3", - "UMask": "0x30", - "EdgeDetect": "1", - "EventName": "IDQ.MS_SWITCHES", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/memory.json b/tools/perf/pmu-events/arch/x86/ivybridge/memory.json index e1c6a1d4a4d5..6005b364c580 100644 --- a/tools/perf/pmu-events/arch/x86/ivybridge/memory.json +++ b/tools/perf/pmu-events/arch/x86/ivybridge/memory.json @@ -37,18 +37,6 @@ "BriefDescription": "Counts the number of machine clears due to memory order conflicts.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, - { - "PEBS": "2", - "EventCode": "0xCD", - "Counter": "3", - "UMask": "0x2", - "EventName": "MEM_TRANS_RETIRED.PRECISE_STORE", - "SampleAfterValue": "2000003", - "BriefDescription": "Sample stores and collect precise store operation via PEBS record. PMC3 only.", - "PRECISE_STORE": "1", - "TakenAlone": "1", - "CounterHTOff": "3" - }, { "PEBS": "2", "PublicDescription": "Loads with latency value being above 4.", @@ -162,75 +150,15 @@ "CounterHTOff": "3" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x300400244", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch code reads that miss the LLC and the data returned from dram", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x300400091", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch data reads that miss the LLC and the data returned from dram", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3004003f7", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the LLC and the data returned from dram", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x300400004", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand code reads that miss the LLC and the data returned from dram", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x300400001", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that miss the LLC and the data returned from dram", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x6004001b3", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DATA_IN_SOCKET.LLC_MISS.LOCAL_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts LLC replacements", - "CounterHTOff": "0,1,2,3" + "PEBS": "2", + "EventCode": "0xCD", + "Counter": "3", + "UMask": "0x2", + "EventName": "MEM_TRANS_RETIRED.PRECISE_STORE", + "SampleAfterValue": "2000003", + "BriefDescription": "Sample stores and collect precise store operation via PEBS record. PMC3 only.", + "PRECISE_STORE": "1", + "TakenAlone": "1", + "CounterHTOff": "3" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/other.json b/tools/perf/pmu-events/arch/x86/ivybridge/other.json index 9c2dd0511a32..4eb83ee40412 100644 --- a/tools/perf/pmu-events/arch/x86/ivybridge/other.json +++ b/tools/perf/pmu-events/arch/x86/ivybridge/other.json @@ -9,16 +9,6 @@ "BriefDescription": "Unhalted core cycles when the thread is in ring 0", "CounterHTOff": "0,1,2,3,4,5,6,7" }, - { - "PublicDescription": "Unhalted core cycles when the thread is not in ring 0.", - "EventCode": "0x5C", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "CPL_CYCLES.RING123", - "SampleAfterValue": "2000003", - "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, { "PublicDescription": "Number of intervals between processor halts while thread is in ring 0.", "EventCode": "0x5C", @@ -31,6 +21,16 @@ "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Unhalted core cycles when the thread is not in ring 0.", + "EventCode": "0x5C", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "CPL_CYCLES.RING123", + "SampleAfterValue": "2000003", + "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Cycles in which the L1D and L2 are locked, due to a UC lock or split lock.", "EventCode": "0x63", diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/pipeline.json b/tools/perf/pmu-events/arch/x86/ivybridge/pipeline.json index 2145c28193f7..0afbfd95ea30 100644 --- a/tools/perf/pmu-events/arch/x86/ivybridge/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/ivybridge/pipeline.json @@ -1,30 +1,41 @@ [ { "EventCode": "0x00", - "Counter": "Fixed counter 1", + "Counter": "Fixed counter 0", "UMask": "0x1", "EventName": "INST_RETIRED.ANY", "SampleAfterValue": "2000003", "BriefDescription": "Instructions retired from execution.", - "CounterHTOff": "Fixed counter 1" + "CounterHTOff": "Fixed counter 0" }, { "EventCode": "0x00", - "Counter": "Fixed counter 2", + "Counter": "Fixed counter 1", "UMask": "0x2", "EventName": "CPU_CLK_UNHALTED.THREAD", "SampleAfterValue": "2000003", "BriefDescription": "Core cycles when the thread is not in halt state.", - "CounterHTOff": "Fixed counter 2" + "CounterHTOff": "Fixed counter 1" + }, + { + "PublicDescription": "Core cycles when at least one thread on the physical core is not in halt state.", + "EventCode": "0x00", + "Counter": "Fixed counter 1", + "UMask": "0x2", + "AnyThread": "1", + "EventName": "CPU_CLK_UNHALTED.THREAD_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state", + "CounterHTOff": "Fixed counter 1" }, { "EventCode": "0x00", - "Counter": "Fixed counter 3", + "Counter": "Fixed counter 2", "UMask": "0x3", "EventName": "CPU_CLK_UNHALTED.REF_TSC", "SampleAfterValue": "2000003", "BriefDescription": "Reference cycles when the core is not in halt state.", - "CounterHTOff": "Fixed counter 3" + "CounterHTOff": "Fixed counter 2" }, { "PublicDescription": "Loads blocked by overlapping with store buffer that cannot be forwarded.", @@ -77,6 +88,17 @@ "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x0D", + "Counter": "0,1,2,3", + "UMask": "0x3", + "AnyThread": "1", + "EventName": "INT_MISC.RECOVERY_CYCLES_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Increments each cycle the # of Uops issued by the RAT to RS. Set Cmask = 1, Inv = 1, Any= 1to count stalled cycles of this core.", "EventCode": "0x0E", @@ -174,6 +196,17 @@ "BriefDescription": "Thread cycles when thread is not in halt state", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Core cycles when at least one thread on the physical core is not in halt state.", + "EventCode": "0x3C", + "Counter": "0,1,2,3", + "UMask": "0x0", + "AnyThread": "1", + "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Increments at the frequency of XCLK (100 MHz) when not halted.", "EventCode": "0x3C", @@ -184,6 +217,36 @@ "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x3C", + "Counter": "0,1,2,3", + "UMask": "0x1", + "AnyThread": "1", + "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted. (counts at 100 MHz rate)", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Reference cycles when the thread is unhalted. (counts at 100 MHz rate)", + "EventCode": "0x3C", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK", + "SampleAfterValue": "2000003", + "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x3C", + "Counter": "0,1,2,3", + "UMask": "0x1", + "AnyThread": "1", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted. (counts at 100 MHz rate)", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x3C", "Counter": "0,1,2,3", @@ -193,6 +256,15 @@ "BriefDescription": "Count XClk pulses when this thread is unhalted and the other is halted.", "CounterHTOff": "0,1,2,3" }, + { + "EventCode": "0x3C", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", + "SampleAfterValue": "2000003", + "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Non-SW-prefetch load dispatches that hit fill buffer allocated for S/W prefetch.", "EventCode": "0x4C", @@ -216,37 +288,37 @@ { "EventCode": "0x58", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "MOVE_ELIMINATION.INT_NOT_ELIMINATED", + "UMask": "0x1", + "EventName": "MOVE_ELIMINATION.INT_ELIMINATED", "SampleAfterValue": "1000003", - "BriefDescription": "Number of integer Move Elimination candidate uops that were not eliminated.", + "BriefDescription": "Number of integer Move Elimination candidate uops that were eliminated.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x58", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "MOVE_ELIMINATION.SIMD_NOT_ELIMINATED", + "UMask": "0x2", + "EventName": "MOVE_ELIMINATION.SIMD_ELIMINATED", "SampleAfterValue": "1000003", - "BriefDescription": "Number of SIMD Move Elimination candidate uops that were not eliminated.", + "BriefDescription": "Number of SIMD Move Elimination candidate uops that were eliminated.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x58", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "MOVE_ELIMINATION.INT_ELIMINATED", + "UMask": "0x4", + "EventName": "MOVE_ELIMINATION.INT_NOT_ELIMINATED", "SampleAfterValue": "1000003", - "BriefDescription": "Number of integer Move Elimination candidate uops that were eliminated.", + "BriefDescription": "Number of integer Move Elimination candidate uops that were not eliminated.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x58", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "MOVE_ELIMINATION.SIMD_ELIMINATED", + "UMask": "0x8", + "EventName": "MOVE_ELIMINATION.SIMD_NOT_ELIMINATED", "SampleAfterValue": "1000003", - "BriefDescription": "Number of SIMD Move Elimination candidate uops that were eliminated.", + "BriefDescription": "Number of SIMD Move Elimination candidate uops that were not eliminated.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -259,6 +331,18 @@ "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x5E", + "Invert": "1", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EdgeDetect": "1", + "EventName": "RS_EVENTS.EMPTY_END", + "SampleAfterValue": "200003", + "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x87", "Counter": "0,1,2,3", @@ -498,118 +582,118 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles which a Uop is dispatched on port 1.", + "PublicDescription": "Cycles per core when uops are dispatched to port 0.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_DISPATCHED_PORT.PORT_1", + "UMask": "0x1", + "AnyThread": "1", + "EventName": "UOPS_DISPATCHED_PORT.PORT_0_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are dispatched to port 1", + "BriefDescription": "Cycles per core when uops are dispatched to port 0", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles which a Uop is dispatched on port 4.", + "PublicDescription": "Cycles which a Uop is dispatched on port 1.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x40", - "EventName": "UOPS_DISPATCHED_PORT.PORT_4", + "UMask": "0x2", + "EventName": "UOPS_DISPATCHED_PORT.PORT_1", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are dispatched to port 4", + "BriefDescription": "Cycles per thread when uops are dispatched to port 1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles which a Uop is dispatched on port 5.", + "PublicDescription": "Cycles per core when uops are dispatched to port 1.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x80", - "EventName": "UOPS_DISPATCHED_PORT.PORT_5", + "UMask": "0x2", + "AnyThread": "1", + "EventName": "UOPS_DISPATCHED_PORT.PORT_1_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are dispatched to port 5", + "BriefDescription": "Cycles per core when uops are dispatched to port 1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles per core when uops are dispatched to port 0.", + "PublicDescription": "Cycles which a Uop is dispatched on port 2.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "UOPS_DISPATCHED_PORT.PORT_0_CORE", + "UMask": "0xc", + "EventName": "UOPS_DISPATCHED_PORT.PORT_2", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are dispatched to port 0", + "BriefDescription": "Cycles per thread when load or STA uops are dispatched to port 2", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles per core when uops are dispatched to port 1.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x2", + "UMask": "0xc", "AnyThread": "1", - "EventName": "UOPS_DISPATCHED_PORT.PORT_1_CORE", + "EventName": "UOPS_DISPATCHED_PORT.PORT_2_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are dispatched to port 1", + "BriefDescription": "Uops dispatched to port 2, loads and stores per core (speculative and retired).", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles per core when uops are dispatched to port 4.", + "PublicDescription": "Cycles which a Uop is dispatched on port 3.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x40", - "AnyThread": "1", - "EventName": "UOPS_DISPATCHED_PORT.PORT_4_CORE", + "UMask": "0x30", + "EventName": "UOPS_DISPATCHED_PORT.PORT_3", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are dispatched to port 4", + "BriefDescription": "Cycles per thread when load or STA uops are dispatched to port 3", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles per core when uops are dispatched to port 5.", + "PublicDescription": "Cycles per core when load or STA uops are dispatched to port 3.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x80", + "UMask": "0x30", "AnyThread": "1", - "EventName": "UOPS_DISPATCHED_PORT.PORT_5_CORE", + "EventName": "UOPS_DISPATCHED_PORT.PORT_3_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are dispatched to port 5", + "BriefDescription": "Cycles per core when load or STA uops are dispatched to port 3", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles which a Uop is dispatched on port 2.", + "PublicDescription": "Cycles which a Uop is dispatched on port 4.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0xc", - "EventName": "UOPS_DISPATCHED_PORT.PORT_2", + "UMask": "0x40", + "EventName": "UOPS_DISPATCHED_PORT.PORT_4", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when load or STA uops are dispatched to port 2", + "BriefDescription": "Cycles per thread when uops are dispatched to port 4", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles which a Uop is dispatched on port 3.", + "PublicDescription": "Cycles per core when uops are dispatched to port 4.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x30", - "EventName": "UOPS_DISPATCHED_PORT.PORT_3", + "UMask": "0x40", + "AnyThread": "1", + "EventName": "UOPS_DISPATCHED_PORT.PORT_4_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when load or STA uops are dispatched to port 3", + "BriefDescription": "Cycles per core when uops are dispatched to port 4", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "Cycles which a Uop is dispatched on port 5.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0xc", - "AnyThread": "1", - "EventName": "UOPS_DISPATCHED_PORT.PORT_2_CORE", + "UMask": "0x80", + "EventName": "UOPS_DISPATCHED_PORT.PORT_5", "SampleAfterValue": "2000003", - "BriefDescription": "Uops dispatched to port 2, loads and stores per core (speculative and retired).", + "BriefDescription": "Cycles per thread when uops are dispatched to port 5", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles per core when load or STA uops are dispatched to port 3.", + "PublicDescription": "Cycles per core when uops are dispatched to port 5.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x30", + "UMask": "0x80", "AnyThread": "1", - "EventName": "UOPS_DISPATCHED_PORT.PORT_3_CORE", + "EventName": "UOPS_DISPATCHED_PORT.PORT_5_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when load or STA uops are dispatched to port 3", + "BriefDescription": "Cycles per core when uops are dispatched to port 5", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -662,15 +746,14 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles with pending L1 cache miss loads. Set AnyThread to count per core.", "EventCode": "0xA3", - "Counter": "2", - "UMask": "0x8", - "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with pending L1 cache miss loads.", - "CounterMask": "8", - "CounterHTOff": "2" + "BriefDescription": "Cycles while L2 cache miss load* is outstanding.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "PublicDescription": "Cycles with pending memory loads. Set AnyThread to count per core.", @@ -683,6 +766,16 @@ "CounterMask": "2", "CounterHTOff": "0,1,2,3" }, + { + "EventCode": "0xA3", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles while memory subsystem has an outstanding load.", + "CounterMask": "2", + "CounterHTOff": "0,1,2,3" + }, { "PublicDescription": "Total execution stalls.", "EventCode": "0xA3", @@ -690,7 +783,17 @@ "UMask": "0x4", "EventName": "CYCLE_ACTIVITY.CYCLES_NO_EXECUTE", "SampleAfterValue": "2000003", - "BriefDescription": "Total execution stalls", + "BriefDescription": "This event increments by 1 for every cycle where there was no execute for this thread.", + "CounterMask": "4", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xA3", + "Counter": "0,1,2,3", + "UMask": "0x4", + "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL", + "SampleAfterValue": "2000003", + "BriefDescription": "Total execution stalls.", "CounterMask": "4", "CounterHTOff": "0,1,2,3" }, @@ -705,6 +808,16 @@ "CounterMask": "5", "CounterHTOff": "0,1,2,3" }, + { + "EventCode": "0xA3", + "Counter": "0,1,2,3", + "UMask": "0x5", + "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS", + "SampleAfterValue": "2000003", + "BriefDescription": "Execution stalls while L2 cache miss load* is outstanding.", + "CounterMask": "5", + "CounterHTOff": "0,1,2,3" + }, { "EventCode": "0xA3", "Counter": "0,1,2,3", @@ -716,16 +829,57 @@ "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Execution stalls due to L1 data cache miss loads. Set Cmask=0CH.", "EventCode": "0xA3", - "Counter": "2", - "UMask": "0xc", - "EventName": "CYCLE_ACTIVITY.STALLS_L1D_PENDING", - "SampleAfterValue": "2000003", + "Counter": "0,1,2,3", + "UMask": "0x6", + "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", + "CounterMask": "6", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Cycles with pending L1 cache miss loads. Set AnyThread to count per core.", + "EventCode": "0xA3", + "Counter": "2", + "UMask": "0x8", + "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles with pending L1 cache miss loads.", + "CounterMask": "8", + "CounterHTOff": "2" + }, + { + "EventCode": "0xA3", + "Counter": "2", + "UMask": "0x8", + "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", + "CounterMask": "8", + "CounterHTOff": "2" + }, + { + "PublicDescription": "Execution stalls due to L1 data cache miss loads. Set Cmask=0CH.", + "EventCode": "0xA3", + "Counter": "2", + "UMask": "0xc", + "EventName": "CYCLE_ACTIVITY.STALLS_L1D_PENDING", + "SampleAfterValue": "2000003", "BriefDescription": "Execution stalls due to L1 data cache misses", "CounterMask": "12", "CounterHTOff": "2" }, + { + "EventCode": "0xA3", + "Counter": "2", + "UMask": "0xc", + "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS", + "SampleAfterValue": "2000003", + "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", + "CounterMask": "12", + "CounterHTOff": "2" + }, { "EventCode": "0xA8", "Counter": "0,1,2,3", @@ -746,6 +900,17 @@ "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", + "EventCode": "0xA8", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "LSD.CYCLES_4_UOPS", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder", + "CounterMask": "4", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Counts total number of uops to be executed per-thread each cycle. Set Cmask = 1, INV =1 to count stall cycles.", "EventCode": "0xB1", @@ -756,6 +921,61 @@ "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0xB1", + "Invert": "1", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "UOPS_EXECUTED.STALL_CYCLES", + "SampleAfterValue": "2000003", + "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Cycles where at least 1 uop was executed per-thread.", + "EventCode": "0xB1", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles where at least 1 uop was executed per-thread", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles where at least 2 uops were executed per-thread.", + "EventCode": "0xB1", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles where at least 2 uops were executed per-thread", + "CounterMask": "2", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles where at least 3 uops were executed per-thread.", + "EventCode": "0xB1", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles where at least 3 uops were executed per-thread", + "CounterMask": "3", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles where at least 4 uops were executed per-thread.", + "EventCode": "0xB1", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles where at least 4 uops were executed per-thread", + "CounterMask": "4", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Counts total number of uops to be executed per-core each cycle.", "EventCode": "0xB1", @@ -767,15 +987,59 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", "EventCode": "0xB1", - "Invert": "1", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.STALL_CYCLES", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", "SampleAfterValue": "2000003", - "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.", + "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core", "CounterMask": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", + "EventCode": "0xB1", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core", + "CounterMask": "2", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", + "EventCode": "0xB1", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core", + "CounterMask": "3", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", + "EventCode": "0xB1", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core", + "CounterMask": "4", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles with no micro-ops executed from any thread on physical core.", + "EventCode": "0xB1", + "Invert": "1", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "PublicDescription": "Number of instructions at retirement.", @@ -809,24 +1073,12 @@ }, { "PEBS": "1", - "PublicDescription": "Counts the number of micro-ops retired, Use cmask=1 and invert to count active cycles or stalled cycles.", "EventCode": "0xC2", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "UOPS_RETIRED.ALL", "SampleAfterValue": "2000003", - "BriefDescription": "Actually retired uops. ", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PEBS": "1", - "PublicDescription": "Counts the number of retirement slots used each cycle.", - "EventCode": "0xC2", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_RETIRED.RETIRE_SLOTS", - "SampleAfterValue": "2000003", - "BriefDescription": "Retirement slots used. ", + "BriefDescription": "Retired uops.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -863,6 +1115,27 @@ "CounterMask": "1", "CounterHTOff": "0,1,2,3" }, + { + "PEBS": "1", + "EventCode": "0xC2", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "UOPS_RETIRED.RETIRE_SLOTS", + "SampleAfterValue": "2000003", + "BriefDescription": "Retirement slots used.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC3", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EdgeDetect": "1", + "EventName": "MACHINE_CLEARS.COUNT", + "SampleAfterValue": "100003", + "BriefDescription": "Number of machine clears (nukes) of any type.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Number of self-modifying-code machine clears detected.", "EventCode": "0xC3", @@ -880,50 +1153,67 @@ "UMask": "0x20", "EventName": "MACHINE_CLEARS.MASKMOV", "SampleAfterValue": "100003", - "BriefDescription": "This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0. ", + "BriefDescription": "This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Branch instructions at retirement.", + "EventCode": "0xC4", + "Counter": "0,1,2,3", + "UMask": "0x0", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES", + "SampleAfterValue": "400009", + "BriefDescription": "All (macro) branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "PEBS": "1", - "PublicDescription": "Counts the number of conditional branch instructions retired.", "EventCode": "0xC4", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "BR_INST_RETIRED.CONDITIONAL", "SampleAfterValue": "400009", - "BriefDescription": "Conditional branch instructions retired. ", + "BriefDescription": "Conditional branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "PEBS": "1", - "PublicDescription": "Direct and indirect near call instructions retired.", "EventCode": "0xC4", "Counter": "0,1,2,3", "UMask": "0x2", "EventName": "BR_INST_RETIRED.NEAR_CALL", "SampleAfterValue": "100007", - "BriefDescription": "Direct and indirect near call instructions retired. ", + "BriefDescription": "Direct and indirect near call instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Branch instructions at retirement.", + "PEBS": "1", "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x0", - "EventName": "BR_INST_RETIRED.ALL_BRANCHES", + "UMask": "0x2", + "EventName": "BR_INST_RETIRED.NEAR_CALL_R3", + "SampleAfterValue": "100007", + "BriefDescription": "Direct and indirect macro near call instructions retired (captured in ring 3).", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PEBS": "2", + "EventCode": "0xC4", + "Counter": "0,1,2,3", + "UMask": "0x4", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS", "SampleAfterValue": "400009", "BriefDescription": "All (macro) branch instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", - "PublicDescription": "Counts the number of near return instructions retired.", "EventCode": "0xC4", "Counter": "0,1,2,3", "UMask": "0x8", "EventName": "BR_INST_RETIRED.NEAR_RETURN", "SampleAfterValue": "100007", - "BriefDescription": "Return instructions retired. ", + "BriefDescription": "Return instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -933,18 +1223,17 @@ "UMask": "0x10", "EventName": "BR_INST_RETIRED.NOT_TAKEN", "SampleAfterValue": "400009", - "BriefDescription": "Not taken branch instructions retired. ", + "BriefDescription": "Not taken branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "PEBS": "1", - "PublicDescription": "Number of near taken branches retired.", "EventCode": "0xC4", "Counter": "0,1,2,3", "UMask": "0x20", "EventName": "BR_INST_RETIRED.NEAR_TAKEN", "SampleAfterValue": "400009", - "BriefDescription": "Taken branch instructions retired. ", + "BriefDescription": "Taken branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -954,28 +1243,7 @@ "UMask": "0x40", "EventName": "BR_INST_RETIRED.FAR_BRANCH", "SampleAfterValue": "100007", - "BriefDescription": "Far branch instructions retired. ", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PEBS": "2", - "EventCode": "0xC4", - "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS", - "SampleAfterValue": "400009", - "BriefDescription": "All (macro) branch instructions retired.", - "CounterHTOff": "0,1,2,3" - }, - { - "PEBS": "1", - "PublicDescription": "Mispredicted conditional branch instructions retired.", - "EventCode": "0xC5", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "BR_MISP_RETIRED.CONDITIONAL", - "SampleAfterValue": "400009", - "BriefDescription": "Mispredicted conditional branch instructions retired. ", + "BriefDescription": "Far branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -990,13 +1258,12 @@ }, { "PEBS": "1", - "PublicDescription": "Mispredicted taken branch instructions retired.", "EventCode": "0xC5", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", + "UMask": "0x1", + "EventName": "BR_MISP_RETIRED.CONDITIONAL", "SampleAfterValue": "400009", - "BriefDescription": "number of near branch instructions retired that were mispredicted and taken. ", + "BriefDescription": "Mispredicted conditional branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -1009,6 +1276,16 @@ "BriefDescription": "Mispredicted macro branch instructions retired.", "CounterHTOff": "0,1,2,3" }, + { + "PEBS": "1", + "EventCode": "0xC5", + "Counter": "0,1,2,3", + "UMask": "0x20", + "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", + "SampleAfterValue": "400009", + "BriefDescription": "number of near branch instructions retired that were mispredicted and taken.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Count cases of saving new LBR records by hardware.", "EventCode": "0xCC", @@ -1028,280 +1305,5 @@ "SampleAfterValue": "100003", "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.", "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycles where at least 1 uop was executed per-thread.", - "EventCode": "0xB1", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where at least 1 uop was executed per-thread", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycles where at least 2 uops were executed per-thread.", - "EventCode": "0xB1", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where at least 2 uops were executed per-thread", - "CounterMask": "2", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycles where at least 3 uops were executed per-thread.", - "EventCode": "0xB1", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where at least 3 uops were executed per-thread", - "CounterMask": "3", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycles where at least 4 uops were executed per-thread.", - "EventCode": "0xB1", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where at least 4 uops were executed per-thread", - "CounterMask": "4", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x5E", - "Invert": "1", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EdgeDetect": "1", - "EventName": "RS_EVENTS.EMPTY_END", - "SampleAfterValue": "200003", - "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC3", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EdgeDetect": "1", - "EventName": "MACHINE_CLEARS.COUNT", - "SampleAfterValue": "100003", - "BriefDescription": "Number of machine clears (nukes) of any type.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", - "EventCode": "0xA8", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "LSD.CYCLES_4_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder", - "CounterMask": "4", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA3", - "Counter": "2", - "UMask": "0x8", - "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", - "CounterMask": "8", - "CounterHTOff": "2" - }, - { - "EventCode": "0xA3", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles while L2 cache miss load* is outstanding.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA3", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles while memory subsystem has an outstanding load.", - "CounterMask": "2", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xA3", - "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL", - "SampleAfterValue": "2000003", - "BriefDescription": "Total execution stalls.", - "CounterMask": "4", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xA3", - "Counter": "2", - "UMask": "0xc", - "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS", - "SampleAfterValue": "2000003", - "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", - "CounterMask": "12", - "CounterHTOff": "2" - }, - { - "EventCode": "0xA3", - "Counter": "0,1,2,3", - "UMask": "0x5", - "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS", - "SampleAfterValue": "2000003", - "BriefDescription": "Execution stalls while L2 cache miss load* is outstanding.", - "CounterMask": "5", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xA3", - "Counter": "0,1,2,3", - "UMask": "0x6", - "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", - "CounterMask": "6", - "CounterHTOff": "0,1,2,3" - }, - { - "PublicDescription": "Core cycles when at least one thread on the physical core is not in halt state.", - "EventCode": "0x00", - "Counter": "Fixed counter 2", - "UMask": "0x2", - "AnyThread": "1", - "EventName": "CPU_CLK_UNHALTED.THREAD_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state", - "CounterHTOff": "Fixed counter 2" - }, - { - "PublicDescription": "Core cycles when at least one thread on the physical core is not in halt state.", - "EventCode": "0x3C", - "Counter": "0,1,2,3", - "UMask": "0x0", - "AnyThread": "1", - "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x3C", - "Counter": "0,1,2,3", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted. (counts at 100 MHz rate)", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x0D", - "Counter": "0,1,2,3", - "UMask": "0x3", - "AnyThread": "1", - "EventName": "INT_MISC.RECOVERY_CYCLES_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", - "EventCode": "0xB1", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", - "EventCode": "0xB1", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core", - "CounterMask": "2", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", - "EventCode": "0xB1", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core", - "CounterMask": "3", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", - "EventCode": "0xB1", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core", - "CounterMask": "4", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycles with no micro-ops executed from any thread on physical core.", - "EventCode": "0xB1", - "Invert": "1", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Reference cycles when the thread is unhalted. (counts at 100 MHz rate)", - "EventCode": "0x3C", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "CPU_CLK_UNHALTED.REF_XCLK", - "SampleAfterValue": "2000003", - "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x3C", - "Counter": "0,1,2,3", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted. (counts at 100 MHz rate)", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x3C", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", - "SampleAfterValue": "2000003", - "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.", - "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/virtual-memory.json b/tools/perf/pmu-events/arch/x86/ivybridge/virtual-memory.json index f036f5398906..f243551b4d12 100644 --- a/tools/perf/pmu-events/arch/x86/ivybridge/virtual-memory.json +++ b/tools/perf/pmu-events/arch/x86/ivybridge/virtual-memory.json @@ -1,4 +1,34 @@ [ + { + "PublicDescription": "Misses in all TLB levels that cause a page walk of any page size from demand loads.", + "EventCode": "0x08", + "Counter": "0,1,2,3", + "UMask": "0x81", + "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK", + "SampleAfterValue": "100003", + "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes an page walk of any page size.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Misses in all TLB levels that caused page walk completed of any size by demand loads.", + "EventCode": "0x08", + "Counter": "0,1,2,3", + "UMask": "0x82", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", + "SampleAfterValue": "100003", + "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycle PMH is busy with a walk due to demand loads.", + "EventCode": "0x08", + "Counter": "0,1,2,3", + "UMask": "0x84", + "EventName": "DTLB_LOAD_MISSES.WALK_DURATION", + "SampleAfterValue": "2000003", + "BriefDescription": "Demand load cycles page miss handler (PMH) is busy with this walk.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x08", "Counter": "0,1,2,3", @@ -146,35 +176,5 @@ "SampleAfterValue": "100007", "BriefDescription": "STLB flush attempts", "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Misses in all TLB levels that cause a page walk of any page size from demand loads.", - "EventCode": "0x08", - "Counter": "0,1,2,3", - "UMask": "0x81", - "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK", - "SampleAfterValue": "100003", - "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes an page walk of any page size.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Misses in all TLB levels that caused page walk completed of any size by demand loads.", - "EventCode": "0x08", - "Counter": "0,1,2,3", - "UMask": "0x82", - "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", - "SampleAfterValue": "100003", - "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycle PMH is busy with a walk due to demand loads.", - "EventCode": "0x08", - "Counter": "0,1,2,3", - "UMask": "0x84", - "EventName": "DTLB_LOAD_MISSES.WALK_DURATION", - "SampleAfterValue": "2000003", - "BriefDescription": "Demand load cycles page miss handler (PMH) is busy with this walk.", - "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file -- cgit v1.2.3-70-g09d2 From 194b6fa41a06273166c5016980a67121bd1460c6 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 18 Jan 2018 04:51:14 -0800 Subject: perf vendor events intel: Update IvyTown events to V20 Signed-off-by: Andi Kleen Link: https://lkml.kernel.org/r/20180118234518.GA27753@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/ivytown/cache.json | 692 +++-------------- .../perf/pmu-events/arch/x86/ivytown/frontend.json | 122 +-- tools/perf/pmu-events/arch/x86/ivytown/memory.json | 368 +-------- tools/perf/pmu-events/arch/x86/ivytown/other.json | 20 +- .../perf/pmu-events/arch/x86/ivytown/pipeline.json | 822 +++++++++++---------- .../arch/x86/ivytown/virtual-memory.json | 60 +- 6 files changed, 635 insertions(+), 1449 deletions(-) (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/x86/ivytown/cache.json b/tools/perf/pmu-events/arch/x86/ivytown/cache.json index ff27a620edd8..d8cc93b3a04c 100644 --- a/tools/perf/pmu-events/arch/x86/ivytown/cache.json +++ b/tools/perf/pmu-events/arch/x86/ivytown/cache.json @@ -9,6 +9,16 @@ "BriefDescription": "Demand Data Read requests that hit L2 cache", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Counts any demand and L1 HW prefetch data load requests to L2.", + "EventCode": "0x24", + "Counter": "0,1,2,3", + "UMask": "0x3", + "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD", + "SampleAfterValue": "200003", + "BriefDescription": "Demand Data Read requests", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "RFO requests that hit L2 cache.", "EventCode": "0x24", @@ -29,6 +39,16 @@ "BriefDescription": "RFO requests that miss L2 cache", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Counts all L2 store RFO requests.", + "EventCode": "0x24", + "Counter": "0,1,2,3", + "UMask": "0xc", + "EventName": "L2_RQSTS.ALL_RFO", + "SampleAfterValue": "200003", + "BriefDescription": "RFO requests to L2 cache", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Number of instruction fetches that hit the L2 cache.", "EventCode": "0x24", @@ -49,6 +69,16 @@ "BriefDescription": "L2 cache misses when fetching instructions", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Counts all L2 code requests.", + "EventCode": "0x24", + "Counter": "0,1,2,3", + "UMask": "0x30", + "EventName": "L2_RQSTS.ALL_CODE_RD", + "SampleAfterValue": "200003", + "BriefDescription": "L2 code requests", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Counts all L2 HW prefetcher requests that hit L2.", "EventCode": "0x24", @@ -69,36 +99,6 @@ "BriefDescription": "Requests from the L2 hardware prefetchers that miss L2 cache", "CounterHTOff": "0,1,2,3,4,5,6,7" }, - { - "PublicDescription": "Counts any demand and L1 HW prefetch data load requests to L2.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x3", - "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD", - "SampleAfterValue": "200003", - "BriefDescription": "Demand Data Read requests", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Counts all L2 store RFO requests.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0xc", - "EventName": "L2_RQSTS.ALL_RFO", - "SampleAfterValue": "200003", - "BriefDescription": "RFO requests to L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Counts all L2 code requests.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x30", - "EventName": "L2_RQSTS.ALL_CODE_RD", - "SampleAfterValue": "200003", - "BriefDescription": "L2 code requests", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, { "PublicDescription": "Counts all L2 HW prefetcher requests.", "EventCode": "0x24", @@ -218,6 +218,29 @@ "CounterMask": "1", "CounterHTOff": "2" }, + { + "PublicDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.", + "EventCode": "0x48", + "Counter": "2", + "UMask": "0x1", + "AnyThread": "1", + "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core", + "CounterMask": "1", + "CounterHTOff": "2" + }, + { + "PublicDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.", + "EventCode": "0x48", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "L1D_PEND_MISS.FB_FULL", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Counts the number of lines brought into the L1 data cache.", "EventCode": "0x51", @@ -239,76 +262,87 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Offcore outstanding Demand Code Read transactions in SQ to uncore. Set Cmask=1 to count cycles.", + "PublicDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore.", "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD", + "UMask": "0x1", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD", "SampleAfterValue": "2000003", - "BriefDescription": "Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle", + "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Offcore outstanding RFO store transactions in SQ to uncore. Set Cmask=1 to count cycles.", + "PublicDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.", "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO", + "UMask": "0x1", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6", "SampleAfterValue": "2000003", - "BriefDescription": "Offcore outstanding RFO store transactions in SuperQueue (SQ), queue to uncore", + "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue", + "CounterMask": "6", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Offcore outstanding cacheable data read transactions in SQ to uncore. Set Cmask=1 to count cycles.", + "PublicDescription": "Offcore outstanding Demand Code Read transactions in SQ to uncore. Set Cmask=1 to count cycles.", "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", + "UMask": "0x2", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD", "SampleAfterValue": "2000003", - "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore", + "BriefDescription": "Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore.", + "PublicDescription": "Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle.", "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD", + "UMask": "0x2", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore", + "BriefDescription": "Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle", "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.", + "PublicDescription": "Offcore outstanding RFO store transactions in SQ to uncore. Set Cmask=1 to count cycles.", "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD", + "UMask": "0x4", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore", - "CounterMask": "1", + "BriefDescription": "Offcore outstanding RFO store transactions in SuperQueue (SQ), queue to uncore", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle.", + "PublicDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle.", "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD", + "UMask": "0x4", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", "SampleAfterValue": "2000003", - "BriefDescription": "Offcore outstanding code reads transactions in SuperQueue (SQ), queue to uncore, every cycle", + "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle", "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle.", + "PublicDescription": "Offcore outstanding cacheable data read transactions in SQ to uncore. Set Cmask=1 to count cycles.", "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", + "UMask": "0x8", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", "SampleAfterValue": "2000003", - "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle", + "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.", + "EventCode": "0x60", + "Counter": "0,1,2,3", + "UMask": "0x8", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore", "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -379,7 +413,7 @@ "UMask": "0x11", "EventName": "MEM_UOPS_RETIRED.STLB_MISS_LOADS", "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops that miss the STLB.", + "BriefDescription": "Retired load uops that miss the STLB. (Precise Event)", "CounterHTOff": "0,1,2,3" }, { @@ -389,7 +423,7 @@ "UMask": "0x12", "EventName": "MEM_UOPS_RETIRED.STLB_MISS_STORES", "SampleAfterValue": "100003", - "BriefDescription": "Retired store uops that miss the STLB.", + "BriefDescription": "Retired store uops that miss the STLB. (Precise Event)", "CounterHTOff": "0,1,2,3" }, { @@ -399,7 +433,7 @@ "UMask": "0x21", "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS", "SampleAfterValue": "100007", - "BriefDescription": "Retired load uops with locked access.", + "BriefDescription": "Retired load uops with locked access. (Precise Event)", "CounterHTOff": "0,1,2,3" }, { @@ -409,7 +443,7 @@ "UMask": "0x41", "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS", "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops that split across a cacheline boundary.", + "BriefDescription": "Retired load uops that split across a cacheline boundary. (Precise Event)", "CounterHTOff": "0,1,2,3" }, { @@ -419,7 +453,7 @@ "UMask": "0x42", "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES", "SampleAfterValue": "100003", - "BriefDescription": "Retired store uops that split across a cacheline boundary.", + "BriefDescription": "Retired store uops that split across a cacheline boundary. (Precise Event)", "CounterHTOff": "0,1,2,3" }, { @@ -429,7 +463,7 @@ "UMask": "0x81", "EventName": "MEM_UOPS_RETIRED.ALL_LOADS", "SampleAfterValue": "2000003", - "BriefDescription": "All retired load uops.", + "BriefDescription": "All retired load uops. (Precise Event)", "CounterHTOff": "0,1,2,3" }, { @@ -439,67 +473,61 @@ "UMask": "0x82", "EventName": "MEM_UOPS_RETIRED.ALL_STORES", "SampleAfterValue": "2000003", - "BriefDescription": "All retired store uops.", + "BriefDescription": "All retired store uops. (Precise Event)", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", - "PublicDescription": "Retired load uops with L1 cache hits as data sources.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT", "SampleAfterValue": "2000003", - "BriefDescription": "Retired load uops with L1 cache hits as data sources. ", + "BriefDescription": "Retired load uops with L1 cache hits as data sources.", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", - "PublicDescription": "Retired load uops with L2 cache hits as data sources.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x2", "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT", "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops with L2 cache hits as data sources. ", + "BriefDescription": "Retired load uops with L2 cache hits as data sources.", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", - "PublicDescription": "Retired load uops whose data source was LLC hit with no snoop required.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x4", "EventName": "MEM_LOAD_UOPS_RETIRED.LLC_HIT", "SampleAfterValue": "50021", - "BriefDescription": "Retired load uops which data sources were data hits in LLC without snoops required. ", + "BriefDescription": "Retired load uops which data sources were data hits in LLC without snoops required.", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", - "PublicDescription": "Retired load uops whose data source followed an L1 miss.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x8", "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS", "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops which data sources following L1 data-cache miss", + "BriefDescription": "Retired load uops which data sources following L1 data-cache miss.", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", - "PublicDescription": "Retired load uops that missed L2, excluding unknown sources.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x10", "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS", "SampleAfterValue": "50021", - "BriefDescription": "Miss in mid-level (L2) cache. Excludes Unknown data-source.", + "BriefDescription": "Retired load uops with L2 cache misses as data sources.", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", - "PublicDescription": "Retired load uops whose data source is LLC miss.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x20", @@ -510,67 +538,61 @@ }, { "PEBS": "1", - "PublicDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready.", "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x40", "EventName": "MEM_LOAD_UOPS_RETIRED.HIT_LFB", "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready. ", + "BriefDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready.", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", - "PublicDescription": "Retired load uops whose data source was an on-package core cache LLC hit and cross-core snoop missed.", "EventCode": "0xD2", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_MISS", "SampleAfterValue": "20011", - "BriefDescription": "Retired load uops which data sources were LLC hit and cross-core snoop missed in on-pkg core cache. ", + "BriefDescription": "Retired load uops which data sources were LLC hit and cross-core snoop missed in on-pkg core cache.", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", - "PublicDescription": "Retired load uops whose data source was an on-package LLC hit and cross-core snoop hits.", "EventCode": "0xD2", "Counter": "0,1,2,3", "UMask": "0x2", "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HIT", "SampleAfterValue": "20011", - "BriefDescription": "Retired load uops which data sources were LLC and cross-core snoop hits in on-pkg core cache. ", + "BriefDescription": "Retired load uops which data sources were LLC and cross-core snoop hits in on-pkg core cache.", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", - "PublicDescription": "Retired load uops whose data source was an on-package core cache with HitM responses.", "EventCode": "0xD2", "Counter": "0,1,2,3", "UMask": "0x4", "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_HITM", "SampleAfterValue": "20011", - "BriefDescription": "Retired load uops which data sources were HitM responses from shared LLC. ", + "BriefDescription": "Retired load uops which data sources were HitM responses from shared LLC.", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", - "PublicDescription": "Retired load uops whose data source was LLC hit with no snoop required.", "EventCode": "0xD2", "Counter": "0,1,2,3", "UMask": "0x8", "EventName": "MEM_LOAD_UOPS_LLC_HIT_RETIRED.XSNP_NONE", "SampleAfterValue": "100003", - "BriefDescription": "Retired load uops which data sources were hits in LLC without snoops required. ", + "BriefDescription": "Retired load uops which data sources were hits in LLC without snoops required.", "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Retired load uop whose Data Source was: local DRAM either Snoop not needed or Snoop Miss (RspI)", "EventCode": "0xD3", "Counter": "0,1,2,3", - "UMask": "0x1", + "UMask": "0x3", "EventName": "MEM_LOAD_UOPS_LLC_MISS_RETIRED.LOCAL_DRAM", "SampleAfterValue": "100007", - "BriefDescription": "Retired load uops which data sources missed LLC but serviced from local dram.", + "BriefDescription": "Retired load uops whose data source was local DRAM (Snoop not needed, Snoop Miss, or Snoop Hit data not forwarded).", "CounterHTOff": "0,1,2,3" }, { @@ -778,495 +800,5 @@ "SampleAfterValue": "100003", "BriefDescription": "Split locks in SQ", "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.", - "EventCode": "0x60", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue", - "CounterMask": "6", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.", - "EventCode": "0x48", - "Counter": "2", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core", - "CounterMask": "1", - "CounterHTOff": "2" - }, - { - "PublicDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.", - "EventCode": "0x48", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "L1D_PEND_MISS.FB_FULL", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x4003c0091", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10003c0091", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1003c0091", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2003c0091", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and sibling core snoop returned a clean response", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0090", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch data reads that hit the LLC", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x4003c0090", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10003c0090", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1003c0090", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2003c0090", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch data reads that hit in the LLC and sibling core snoop returned a clean response", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c03f7", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the LLC", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x4003c03f7", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10003c03f7", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1003c03f7", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2003c03f7", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the LLC and sibling core snoop returned a clean response", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10008", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.COREWB.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all writebacks from the core to the LLC", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0004", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_HIT.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand code reads that hit in the LLC", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0001", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data reads that hit in the LLC", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x4003c0001", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10003c0001", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1003c0001", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2003c0001", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that hit in the LLC and sibling core snoop returned a clean response", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10003c0002", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data writes (RFOs) that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x803c8000", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.OTHER.LRU_HINTS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts L2 hints sent to LLC to keep a line from being evicted out of the core caches", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x23ffc08000", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.OTHER.PORTIO_MMIO_UC", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts miscellaneous accesses that include port i/o, MMIO and uncacheable memory accesses", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0040", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.LLC_HIT.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to L2) code reads that hit in the LLC", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0010", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x4003c0010", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10003c0010", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1003c0010", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2003c0010", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC and the snoops sent to sibling cores return clean response", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0200", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_HIT.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the LLC", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3f803c0080", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x4003c0080", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10003c0080", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.HITM_OTHER_CORE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1003c0080", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2003c0080", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC and the snoops sent to sibling cores return clean response", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10400", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.SPLIT_LOCK_UC_LOCK.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts requests where the address of an atomic lock instruction spans a cache line boundary or the lock instruction is executed on uncacheable address", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10800", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts non-temporal stores", - "CounterHTOff": "0,1,2,3" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/ivytown/frontend.json b/tools/perf/pmu-events/arch/x86/ivytown/frontend.json index de72b84b3536..efaa949ead31 100644 --- a/tools/perf/pmu-events/arch/x86/ivytown/frontend.json +++ b/tools/perf/pmu-events/arch/x86/ivytown/frontend.json @@ -20,76 +20,45 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Increment each cycle. # of uops delivered to IDQ from DSB path. Set Cmask = 1 to count cycles.", - "EventCode": "0x79", - "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "IDQ.DSB_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by DSB. Set Cmask = 1 to count cycles. Add Edge=1 to count # of delivery.", - "EventCode": "0x79", - "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "IDQ.MS_DSB_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by MITE. Set Cmask = 1 to count cycles.", - "EventCode": "0x79", - "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "IDQ.MS_MITE_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Increment each cycle # of uops delivered to IDQ from MS by either DSB or MITE. Set Cmask = 1 to count cycles.", + "PublicDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path.", "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x30", - "EventName": "IDQ.MS_UOPS", + "UMask": "0x4", + "EventName": "IDQ.MITE_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.", + "PublicDescription": "Increment each cycle. # of uops delivered to IDQ from DSB path. Set Cmask = 1 to count cycles.", "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x30", - "EventName": "IDQ.MS_CYCLES", + "UMask": "0x8", + "EventName": "IDQ.DSB_UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "CounterMask": "1", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path.", + "PublicDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path.", "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "IDQ.MITE_CYCLES", + "UMask": "0x8", + "EventName": "IDQ.DSB_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path", "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path.", + "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by DSB. Set Cmask = 1 to count cycles. Add Edge=1 to count # of delivery.", "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "IDQ.DSB_CYCLES", + "UMask": "0x10", + "EventName": "IDQ.MS_DSB_UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path", - "CounterMask": "1", + "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -137,6 +106,16 @@ "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Increment each cycle # of uops delivered to IDQ when MS_busy by MITE. Set Cmask = 1 to count cycles.", + "EventCode": "0x79", + "Counter": "0,1,2,3", + "UMask": "0x20", + "EventName": "IDQ.MS_MITE_UOPS", + "SampleAfterValue": "2000003", + "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Counts cycles MITE is delivered four uops. Set Cmask = 4.", "EventCode": "0x79", @@ -159,6 +138,39 @@ "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Increment each cycle # of uops delivered to IDQ from MS by either DSB or MITE. Set Cmask = 1 to count cycles.", + "EventCode": "0x79", + "Counter": "0,1,2,3", + "UMask": "0x30", + "EventName": "IDQ.MS_UOPS", + "SampleAfterValue": "2000003", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy.", + "EventCode": "0x79", + "Counter": "0,1,2,3", + "UMask": "0x30", + "EventName": "IDQ.MS_CYCLES", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", + "EventCode": "0x79", + "Counter": "0,1,2,3", + "UMask": "0x30", + "EdgeDetect": "1", + "EventName": "IDQ.MS_SWITCHES", + "SampleAfterValue": "2000003", + "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Number of uops delivered to IDQ from any path.", "EventCode": "0x79", @@ -206,7 +218,7 @@ "UMask": "0x1", "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled ", + "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled", "CounterHTOff": "0,1,2,3" }, { @@ -289,17 +301,5 @@ "SampleAfterValue": "2000003", "BriefDescription": "Cycles when Decode Stream Buffer (DSB) fill encounter more than 3 Decode Stream Buffer (DSB) lines", "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", - "EventCode": "0x79", - "Counter": "0,1,2,3", - "UMask": "0x30", - "EdgeDetect": "1", - "EventName": "IDQ.MS_SWITCHES", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/ivytown/memory.json b/tools/perf/pmu-events/arch/x86/ivytown/memory.json index 437d98f3e344..4ec94df8d70b 100644 --- a/tools/perf/pmu-events/arch/x86/ivytown/memory.json +++ b/tools/perf/pmu-events/arch/x86/ivytown/memory.json @@ -28,18 +28,6 @@ "BriefDescription": "Counts the number of machine clears due to memory order conflicts.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, - { - "PEBS": "2", - "EventCode": "0xCD", - "Counter": "3", - "UMask": "0x2", - "EventName": "MEM_TRANS_RETIRED.PRECISE_STORE", - "SampleAfterValue": "2000003", - "BriefDescription": "Sample stores and collect precise store operation via PEBS record. PMC3 only.", - "PRECISE_STORE": "1", - "TakenAlone": "1", - "CounterHTOff": "3" - }, { "PEBS": "2", "PublicDescription": "Loads with latency value being above 4.", @@ -153,351 +141,15 @@ "CounterHTOff": "3" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fffc00244", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch code reads that miss the LLC", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x67f800244", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.REMOTE_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch code reads that miss the LLC and the data returned from remote dram", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x87f800244", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch code reads that miss the LLC and the data forwarded from remote cache", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fffc20091", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand & prefetch data reads that hits the LLC", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fffc203f7", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit the LLC", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x6004003f7", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.LOCAL_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the LLC and the data returned from local dram", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x87f8203f7", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the LLC and the data forwarded from remote cache", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x107fc003f7", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the LLC the data is found in M state in remote cache and forwarded from there", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fffc20004", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand code reads that miss the LLC", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x600400004", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.LOCAL_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand code reads that miss the LLC and the data returned from local dram", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x67f800004", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.REMOTE_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand code reads that miss the LLC and the data returned from remote dram", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x87f820004", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand code reads that miss the LLC and the data forwarded from remote cache", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x107fc00004", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand code reads that miss the LLC the data is found in M state in remote cache and forwarded from there", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x67fc00001", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.ANY_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that miss the LLC and the data returned from remote & local dram", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fffc20001", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that miss in the LLC", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x600400001", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.LOCAL_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that miss the LLC and the data returned from local dram", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x67f800001", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.REMOTE_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that miss the LLC and the data returned from remote dram", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x87f820001", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that miss the LLC and the data forwarded from remote cache", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x107fc00001", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that miss the LLC the data is found in M state in remote cache and forwarded from there", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x107fc20002", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs) that miss the LLC and the data is found in M state in remote cache and forwarded from there.", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fffc20040", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.LLC_MISS.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to L2) code reads that miss the LLC and the data returned from remote & local dram", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x67fc00010", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.ANY_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the LLC and the data returned from remote & local dram", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fffc20010", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss in the LLC", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x600400010", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.LOCAL_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the LLC and the data returned from local dram", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x67f800010", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.REMOTE_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the LLC and the data returned from remote dram", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x87f820010", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.REMOTE_HIT_FORWARD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the LLC and the data forwarded from remote cache", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x107fc00010", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.REMOTE_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the LLC the data is found in M state in remote cache and forwarded from there", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fffc20200", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_MISS.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss in the LLC", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fffc20080", - "Counter": "0,1,2,3", - "UMask": "0x1", - "Offcore": "1", - "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_MISS.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that miss in the LLC", - "CounterHTOff": "0,1,2,3" + "PEBS": "2", + "EventCode": "0xCD", + "Counter": "3", + "UMask": "0x2", + "EventName": "MEM_TRANS_RETIRED.PRECISE_STORE", + "SampleAfterValue": "2000003", + "BriefDescription": "Sample stores and collect precise store operation via PEBS record. PMC3 only.", + "PRECISE_STORE": "1", + "TakenAlone": "1", + "CounterHTOff": "3" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/ivytown/other.json b/tools/perf/pmu-events/arch/x86/ivytown/other.json index 9c2dd0511a32..4eb83ee40412 100644 --- a/tools/perf/pmu-events/arch/x86/ivytown/other.json +++ b/tools/perf/pmu-events/arch/x86/ivytown/other.json @@ -9,16 +9,6 @@ "BriefDescription": "Unhalted core cycles when the thread is in ring 0", "CounterHTOff": "0,1,2,3,4,5,6,7" }, - { - "PublicDescription": "Unhalted core cycles when the thread is not in ring 0.", - "EventCode": "0x5C", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "CPL_CYCLES.RING123", - "SampleAfterValue": "2000003", - "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, { "PublicDescription": "Number of intervals between processor halts while thread is in ring 0.", "EventCode": "0x5C", @@ -31,6 +21,16 @@ "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Unhalted core cycles when the thread is not in ring 0.", + "EventCode": "0x5C", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "CPL_CYCLES.RING123", + "SampleAfterValue": "2000003", + "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Cycles in which the L1D and L2 are locked, due to a UC lock or split lock.", "EventCode": "0x63", diff --git a/tools/perf/pmu-events/arch/x86/ivytown/pipeline.json b/tools/perf/pmu-events/arch/x86/ivytown/pipeline.json index 2145c28193f7..0afbfd95ea30 100644 --- a/tools/perf/pmu-events/arch/x86/ivytown/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/ivytown/pipeline.json @@ -1,30 +1,41 @@ [ { "EventCode": "0x00", - "Counter": "Fixed counter 1", + "Counter": "Fixed counter 0", "UMask": "0x1", "EventName": "INST_RETIRED.ANY", "SampleAfterValue": "2000003", "BriefDescription": "Instructions retired from execution.", - "CounterHTOff": "Fixed counter 1" + "CounterHTOff": "Fixed counter 0" }, { "EventCode": "0x00", - "Counter": "Fixed counter 2", + "Counter": "Fixed counter 1", "UMask": "0x2", "EventName": "CPU_CLK_UNHALTED.THREAD", "SampleAfterValue": "2000003", "BriefDescription": "Core cycles when the thread is not in halt state.", - "CounterHTOff": "Fixed counter 2" + "CounterHTOff": "Fixed counter 1" + }, + { + "PublicDescription": "Core cycles when at least one thread on the physical core is not in halt state.", + "EventCode": "0x00", + "Counter": "Fixed counter 1", + "UMask": "0x2", + "AnyThread": "1", + "EventName": "CPU_CLK_UNHALTED.THREAD_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state", + "CounterHTOff": "Fixed counter 1" }, { "EventCode": "0x00", - "Counter": "Fixed counter 3", + "Counter": "Fixed counter 2", "UMask": "0x3", "EventName": "CPU_CLK_UNHALTED.REF_TSC", "SampleAfterValue": "2000003", "BriefDescription": "Reference cycles when the core is not in halt state.", - "CounterHTOff": "Fixed counter 3" + "CounterHTOff": "Fixed counter 2" }, { "PublicDescription": "Loads blocked by overlapping with store buffer that cannot be forwarded.", @@ -77,6 +88,17 @@ "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x0D", + "Counter": "0,1,2,3", + "UMask": "0x3", + "AnyThread": "1", + "EventName": "INT_MISC.RECOVERY_CYCLES_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Increments each cycle the # of Uops issued by the RAT to RS. Set Cmask = 1, Inv = 1, Any= 1to count stalled cycles of this core.", "EventCode": "0x0E", @@ -174,6 +196,17 @@ "BriefDescription": "Thread cycles when thread is not in halt state", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Core cycles when at least one thread on the physical core is not in halt state.", + "EventCode": "0x3C", + "Counter": "0,1,2,3", + "UMask": "0x0", + "AnyThread": "1", + "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Increments at the frequency of XCLK (100 MHz) when not halted.", "EventCode": "0x3C", @@ -184,6 +217,36 @@ "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x3C", + "Counter": "0,1,2,3", + "UMask": "0x1", + "AnyThread": "1", + "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted. (counts at 100 MHz rate)", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Reference cycles when the thread is unhalted. (counts at 100 MHz rate)", + "EventCode": "0x3C", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK", + "SampleAfterValue": "2000003", + "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x3C", + "Counter": "0,1,2,3", + "UMask": "0x1", + "AnyThread": "1", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted. (counts at 100 MHz rate)", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x3C", "Counter": "0,1,2,3", @@ -193,6 +256,15 @@ "BriefDescription": "Count XClk pulses when this thread is unhalted and the other is halted.", "CounterHTOff": "0,1,2,3" }, + { + "EventCode": "0x3C", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", + "SampleAfterValue": "2000003", + "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Non-SW-prefetch load dispatches that hit fill buffer allocated for S/W prefetch.", "EventCode": "0x4C", @@ -216,37 +288,37 @@ { "EventCode": "0x58", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "MOVE_ELIMINATION.INT_NOT_ELIMINATED", + "UMask": "0x1", + "EventName": "MOVE_ELIMINATION.INT_ELIMINATED", "SampleAfterValue": "1000003", - "BriefDescription": "Number of integer Move Elimination candidate uops that were not eliminated.", + "BriefDescription": "Number of integer Move Elimination candidate uops that were eliminated.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x58", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "MOVE_ELIMINATION.SIMD_NOT_ELIMINATED", + "UMask": "0x2", + "EventName": "MOVE_ELIMINATION.SIMD_ELIMINATED", "SampleAfterValue": "1000003", - "BriefDescription": "Number of SIMD Move Elimination candidate uops that were not eliminated.", + "BriefDescription": "Number of SIMD Move Elimination candidate uops that were eliminated.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x58", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "MOVE_ELIMINATION.INT_ELIMINATED", + "UMask": "0x4", + "EventName": "MOVE_ELIMINATION.INT_NOT_ELIMINATED", "SampleAfterValue": "1000003", - "BriefDescription": "Number of integer Move Elimination candidate uops that were eliminated.", + "BriefDescription": "Number of integer Move Elimination candidate uops that were not eliminated.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x58", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "MOVE_ELIMINATION.SIMD_ELIMINATED", + "UMask": "0x8", + "EventName": "MOVE_ELIMINATION.SIMD_NOT_ELIMINATED", "SampleAfterValue": "1000003", - "BriefDescription": "Number of SIMD Move Elimination candidate uops that were eliminated.", + "BriefDescription": "Number of SIMD Move Elimination candidate uops that were not eliminated.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -259,6 +331,18 @@ "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x5E", + "Invert": "1", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EdgeDetect": "1", + "EventName": "RS_EVENTS.EMPTY_END", + "SampleAfterValue": "200003", + "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x87", "Counter": "0,1,2,3", @@ -498,118 +582,118 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles which a Uop is dispatched on port 1.", + "PublicDescription": "Cycles per core when uops are dispatched to port 0.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_DISPATCHED_PORT.PORT_1", + "UMask": "0x1", + "AnyThread": "1", + "EventName": "UOPS_DISPATCHED_PORT.PORT_0_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are dispatched to port 1", + "BriefDescription": "Cycles per core when uops are dispatched to port 0", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles which a Uop is dispatched on port 4.", + "PublicDescription": "Cycles which a Uop is dispatched on port 1.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x40", - "EventName": "UOPS_DISPATCHED_PORT.PORT_4", + "UMask": "0x2", + "EventName": "UOPS_DISPATCHED_PORT.PORT_1", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are dispatched to port 4", + "BriefDescription": "Cycles per thread when uops are dispatched to port 1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles which a Uop is dispatched on port 5.", + "PublicDescription": "Cycles per core when uops are dispatched to port 1.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x80", - "EventName": "UOPS_DISPATCHED_PORT.PORT_5", + "UMask": "0x2", + "AnyThread": "1", + "EventName": "UOPS_DISPATCHED_PORT.PORT_1_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are dispatched to port 5", + "BriefDescription": "Cycles per core when uops are dispatched to port 1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles per core when uops are dispatched to port 0.", + "PublicDescription": "Cycles which a Uop is dispatched on port 2.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "UOPS_DISPATCHED_PORT.PORT_0_CORE", + "UMask": "0xc", + "EventName": "UOPS_DISPATCHED_PORT.PORT_2", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are dispatched to port 0", + "BriefDescription": "Cycles per thread when load or STA uops are dispatched to port 2", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles per core when uops are dispatched to port 1.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x2", + "UMask": "0xc", "AnyThread": "1", - "EventName": "UOPS_DISPATCHED_PORT.PORT_1_CORE", + "EventName": "UOPS_DISPATCHED_PORT.PORT_2_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are dispatched to port 1", + "BriefDescription": "Uops dispatched to port 2, loads and stores per core (speculative and retired).", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles per core when uops are dispatched to port 4.", + "PublicDescription": "Cycles which a Uop is dispatched on port 3.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x40", - "AnyThread": "1", - "EventName": "UOPS_DISPATCHED_PORT.PORT_4_CORE", + "UMask": "0x30", + "EventName": "UOPS_DISPATCHED_PORT.PORT_3", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are dispatched to port 4", + "BriefDescription": "Cycles per thread when load or STA uops are dispatched to port 3", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles per core when uops are dispatched to port 5.", + "PublicDescription": "Cycles per core when load or STA uops are dispatched to port 3.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x80", + "UMask": "0x30", "AnyThread": "1", - "EventName": "UOPS_DISPATCHED_PORT.PORT_5_CORE", + "EventName": "UOPS_DISPATCHED_PORT.PORT_3_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when uops are dispatched to port 5", + "BriefDescription": "Cycles per core when load or STA uops are dispatched to port 3", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles which a Uop is dispatched on port 2.", + "PublicDescription": "Cycles which a Uop is dispatched on port 4.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0xc", - "EventName": "UOPS_DISPATCHED_PORT.PORT_2", + "UMask": "0x40", + "EventName": "UOPS_DISPATCHED_PORT.PORT_4", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when load or STA uops are dispatched to port 2", + "BriefDescription": "Cycles per thread when uops are dispatched to port 4", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles which a Uop is dispatched on port 3.", + "PublicDescription": "Cycles per core when uops are dispatched to port 4.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x30", - "EventName": "UOPS_DISPATCHED_PORT.PORT_3", + "UMask": "0x40", + "AnyThread": "1", + "EventName": "UOPS_DISPATCHED_PORT.PORT_4_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when load or STA uops are dispatched to port 3", + "BriefDescription": "Cycles per core when uops are dispatched to port 4", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "Cycles which a Uop is dispatched on port 5.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0xc", - "AnyThread": "1", - "EventName": "UOPS_DISPATCHED_PORT.PORT_2_CORE", + "UMask": "0x80", + "EventName": "UOPS_DISPATCHED_PORT.PORT_5", "SampleAfterValue": "2000003", - "BriefDescription": "Uops dispatched to port 2, loads and stores per core (speculative and retired).", + "BriefDescription": "Cycles per thread when uops are dispatched to port 5", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles per core when load or STA uops are dispatched to port 3.", + "PublicDescription": "Cycles per core when uops are dispatched to port 5.", "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x30", + "UMask": "0x80", "AnyThread": "1", - "EventName": "UOPS_DISPATCHED_PORT.PORT_3_CORE", + "EventName": "UOPS_DISPATCHED_PORT.PORT_5_CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per core when load or STA uops are dispatched to port 3", + "BriefDescription": "Cycles per core when uops are dispatched to port 5", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -662,15 +746,14 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles with pending L1 cache miss loads. Set AnyThread to count per core.", "EventCode": "0xA3", - "Counter": "2", - "UMask": "0x8", - "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with pending L1 cache miss loads.", - "CounterMask": "8", - "CounterHTOff": "2" + "BriefDescription": "Cycles while L2 cache miss load* is outstanding.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "PublicDescription": "Cycles with pending memory loads. Set AnyThread to count per core.", @@ -683,6 +766,16 @@ "CounterMask": "2", "CounterHTOff": "0,1,2,3" }, + { + "EventCode": "0xA3", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles while memory subsystem has an outstanding load.", + "CounterMask": "2", + "CounterHTOff": "0,1,2,3" + }, { "PublicDescription": "Total execution stalls.", "EventCode": "0xA3", @@ -690,7 +783,17 @@ "UMask": "0x4", "EventName": "CYCLE_ACTIVITY.CYCLES_NO_EXECUTE", "SampleAfterValue": "2000003", - "BriefDescription": "Total execution stalls", + "BriefDescription": "This event increments by 1 for every cycle where there was no execute for this thread.", + "CounterMask": "4", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xA3", + "Counter": "0,1,2,3", + "UMask": "0x4", + "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL", + "SampleAfterValue": "2000003", + "BriefDescription": "Total execution stalls.", "CounterMask": "4", "CounterHTOff": "0,1,2,3" }, @@ -705,6 +808,16 @@ "CounterMask": "5", "CounterHTOff": "0,1,2,3" }, + { + "EventCode": "0xA3", + "Counter": "0,1,2,3", + "UMask": "0x5", + "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS", + "SampleAfterValue": "2000003", + "BriefDescription": "Execution stalls while L2 cache miss load* is outstanding.", + "CounterMask": "5", + "CounterHTOff": "0,1,2,3" + }, { "EventCode": "0xA3", "Counter": "0,1,2,3", @@ -716,16 +829,57 @@ "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Execution stalls due to L1 data cache miss loads. Set Cmask=0CH.", "EventCode": "0xA3", - "Counter": "2", - "UMask": "0xc", - "EventName": "CYCLE_ACTIVITY.STALLS_L1D_PENDING", - "SampleAfterValue": "2000003", + "Counter": "0,1,2,3", + "UMask": "0x6", + "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", + "CounterMask": "6", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Cycles with pending L1 cache miss loads. Set AnyThread to count per core.", + "EventCode": "0xA3", + "Counter": "2", + "UMask": "0x8", + "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles with pending L1 cache miss loads.", + "CounterMask": "8", + "CounterHTOff": "2" + }, + { + "EventCode": "0xA3", + "Counter": "2", + "UMask": "0x8", + "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", + "CounterMask": "8", + "CounterHTOff": "2" + }, + { + "PublicDescription": "Execution stalls due to L1 data cache miss loads. Set Cmask=0CH.", + "EventCode": "0xA3", + "Counter": "2", + "UMask": "0xc", + "EventName": "CYCLE_ACTIVITY.STALLS_L1D_PENDING", + "SampleAfterValue": "2000003", "BriefDescription": "Execution stalls due to L1 data cache misses", "CounterMask": "12", "CounterHTOff": "2" }, + { + "EventCode": "0xA3", + "Counter": "2", + "UMask": "0xc", + "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS", + "SampleAfterValue": "2000003", + "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", + "CounterMask": "12", + "CounterHTOff": "2" + }, { "EventCode": "0xA8", "Counter": "0,1,2,3", @@ -746,6 +900,17 @@ "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", + "EventCode": "0xA8", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "LSD.CYCLES_4_UOPS", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder", + "CounterMask": "4", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Counts total number of uops to be executed per-thread each cycle. Set Cmask = 1, INV =1 to count stall cycles.", "EventCode": "0xB1", @@ -756,6 +921,61 @@ "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0xB1", + "Invert": "1", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "UOPS_EXECUTED.STALL_CYCLES", + "SampleAfterValue": "2000003", + "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3" + }, + { + "PublicDescription": "Cycles where at least 1 uop was executed per-thread.", + "EventCode": "0xB1", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles where at least 1 uop was executed per-thread", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles where at least 2 uops were executed per-thread.", + "EventCode": "0xB1", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles where at least 2 uops were executed per-thread", + "CounterMask": "2", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles where at least 3 uops were executed per-thread.", + "EventCode": "0xB1", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles where at least 3 uops were executed per-thread", + "CounterMask": "3", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles where at least 4 uops were executed per-thread.", + "EventCode": "0xB1", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles where at least 4 uops were executed per-thread", + "CounterMask": "4", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Counts total number of uops to be executed per-core each cycle.", "EventCode": "0xB1", @@ -767,15 +987,59 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", "EventCode": "0xB1", - "Invert": "1", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.STALL_CYCLES", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", "SampleAfterValue": "2000003", - "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.", + "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core", "CounterMask": "1", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", + "EventCode": "0xB1", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core", + "CounterMask": "2", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", + "EventCode": "0xB1", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core", + "CounterMask": "3", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", + "EventCode": "0xB1", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core", + "CounterMask": "4", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Cycles with no micro-ops executed from any thread on physical core.", + "EventCode": "0xB1", + "Invert": "1", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "PublicDescription": "Number of instructions at retirement.", @@ -809,24 +1073,12 @@ }, { "PEBS": "1", - "PublicDescription": "Counts the number of micro-ops retired, Use cmask=1 and invert to count active cycles or stalled cycles.", "EventCode": "0xC2", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "UOPS_RETIRED.ALL", "SampleAfterValue": "2000003", - "BriefDescription": "Actually retired uops. ", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PEBS": "1", - "PublicDescription": "Counts the number of retirement slots used each cycle.", - "EventCode": "0xC2", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_RETIRED.RETIRE_SLOTS", - "SampleAfterValue": "2000003", - "BriefDescription": "Retirement slots used. ", + "BriefDescription": "Retired uops.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -863,6 +1115,27 @@ "CounterMask": "1", "CounterHTOff": "0,1,2,3" }, + { + "PEBS": "1", + "EventCode": "0xC2", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "UOPS_RETIRED.RETIRE_SLOTS", + "SampleAfterValue": "2000003", + "BriefDescription": "Retirement slots used.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xC3", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EdgeDetect": "1", + "EventName": "MACHINE_CLEARS.COUNT", + "SampleAfterValue": "100003", + "BriefDescription": "Number of machine clears (nukes) of any type.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Number of self-modifying-code machine clears detected.", "EventCode": "0xC3", @@ -880,50 +1153,67 @@ "UMask": "0x20", "EventName": "MACHINE_CLEARS.MASKMOV", "SampleAfterValue": "100003", - "BriefDescription": "This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0. ", + "BriefDescription": "This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Branch instructions at retirement.", + "EventCode": "0xC4", + "Counter": "0,1,2,3", + "UMask": "0x0", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES", + "SampleAfterValue": "400009", + "BriefDescription": "All (macro) branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "PEBS": "1", - "PublicDescription": "Counts the number of conditional branch instructions retired.", "EventCode": "0xC4", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "BR_INST_RETIRED.CONDITIONAL", "SampleAfterValue": "400009", - "BriefDescription": "Conditional branch instructions retired. ", + "BriefDescription": "Conditional branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "PEBS": "1", - "PublicDescription": "Direct and indirect near call instructions retired.", "EventCode": "0xC4", "Counter": "0,1,2,3", "UMask": "0x2", "EventName": "BR_INST_RETIRED.NEAR_CALL", "SampleAfterValue": "100007", - "BriefDescription": "Direct and indirect near call instructions retired. ", + "BriefDescription": "Direct and indirect near call instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Branch instructions at retirement.", + "PEBS": "1", "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x0", - "EventName": "BR_INST_RETIRED.ALL_BRANCHES", + "UMask": "0x2", + "EventName": "BR_INST_RETIRED.NEAR_CALL_R3", + "SampleAfterValue": "100007", + "BriefDescription": "Direct and indirect macro near call instructions retired (captured in ring 3).", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PEBS": "2", + "EventCode": "0xC4", + "Counter": "0,1,2,3", + "UMask": "0x4", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS", "SampleAfterValue": "400009", "BriefDescription": "All (macro) branch instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", - "PublicDescription": "Counts the number of near return instructions retired.", "EventCode": "0xC4", "Counter": "0,1,2,3", "UMask": "0x8", "EventName": "BR_INST_RETIRED.NEAR_RETURN", "SampleAfterValue": "100007", - "BriefDescription": "Return instructions retired. ", + "BriefDescription": "Return instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -933,18 +1223,17 @@ "UMask": "0x10", "EventName": "BR_INST_RETIRED.NOT_TAKEN", "SampleAfterValue": "400009", - "BriefDescription": "Not taken branch instructions retired. ", + "BriefDescription": "Not taken branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "PEBS": "1", - "PublicDescription": "Number of near taken branches retired.", "EventCode": "0xC4", "Counter": "0,1,2,3", "UMask": "0x20", "EventName": "BR_INST_RETIRED.NEAR_TAKEN", "SampleAfterValue": "400009", - "BriefDescription": "Taken branch instructions retired. ", + "BriefDescription": "Taken branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -954,28 +1243,7 @@ "UMask": "0x40", "EventName": "BR_INST_RETIRED.FAR_BRANCH", "SampleAfterValue": "100007", - "BriefDescription": "Far branch instructions retired. ", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PEBS": "2", - "EventCode": "0xC4", - "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS", - "SampleAfterValue": "400009", - "BriefDescription": "All (macro) branch instructions retired.", - "CounterHTOff": "0,1,2,3" - }, - { - "PEBS": "1", - "PublicDescription": "Mispredicted conditional branch instructions retired.", - "EventCode": "0xC5", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "BR_MISP_RETIRED.CONDITIONAL", - "SampleAfterValue": "400009", - "BriefDescription": "Mispredicted conditional branch instructions retired. ", + "BriefDescription": "Far branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -990,13 +1258,12 @@ }, { "PEBS": "1", - "PublicDescription": "Mispredicted taken branch instructions retired.", "EventCode": "0xC5", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", + "UMask": "0x1", + "EventName": "BR_MISP_RETIRED.CONDITIONAL", "SampleAfterValue": "400009", - "BriefDescription": "number of near branch instructions retired that were mispredicted and taken. ", + "BriefDescription": "Mispredicted conditional branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -1009,6 +1276,16 @@ "BriefDescription": "Mispredicted macro branch instructions retired.", "CounterHTOff": "0,1,2,3" }, + { + "PEBS": "1", + "EventCode": "0xC5", + "Counter": "0,1,2,3", + "UMask": "0x20", + "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", + "SampleAfterValue": "400009", + "BriefDescription": "number of near branch instructions retired that were mispredicted and taken.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "PublicDescription": "Count cases of saving new LBR records by hardware.", "EventCode": "0xCC", @@ -1028,280 +1305,5 @@ "SampleAfterValue": "100003", "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.", "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycles where at least 1 uop was executed per-thread.", - "EventCode": "0xB1", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where at least 1 uop was executed per-thread", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycles where at least 2 uops were executed per-thread.", - "EventCode": "0xB1", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where at least 2 uops were executed per-thread", - "CounterMask": "2", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycles where at least 3 uops were executed per-thread.", - "EventCode": "0xB1", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where at least 3 uops were executed per-thread", - "CounterMask": "3", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycles where at least 4 uops were executed per-thread.", - "EventCode": "0xB1", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where at least 4 uops were executed per-thread", - "CounterMask": "4", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x5E", - "Invert": "1", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EdgeDetect": "1", - "EventName": "RS_EVENTS.EMPTY_END", - "SampleAfterValue": "200003", - "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC3", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EdgeDetect": "1", - "EventName": "MACHINE_CLEARS.COUNT", - "SampleAfterValue": "100003", - "BriefDescription": "Number of machine clears (nukes) of any type.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", - "EventCode": "0xA8", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "LSD.CYCLES_4_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder", - "CounterMask": "4", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA3", - "Counter": "2", - "UMask": "0x8", - "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", - "CounterMask": "8", - "CounterHTOff": "2" - }, - { - "EventCode": "0xA3", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles while L2 cache miss load* is outstanding.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA3", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles while memory subsystem has an outstanding load.", - "CounterMask": "2", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xA3", - "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL", - "SampleAfterValue": "2000003", - "BriefDescription": "Total execution stalls.", - "CounterMask": "4", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xA3", - "Counter": "2", - "UMask": "0xc", - "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS", - "SampleAfterValue": "2000003", - "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", - "CounterMask": "12", - "CounterHTOff": "2" - }, - { - "EventCode": "0xA3", - "Counter": "0,1,2,3", - "UMask": "0x5", - "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS", - "SampleAfterValue": "2000003", - "BriefDescription": "Execution stalls while L2 cache miss load* is outstanding.", - "CounterMask": "5", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xA3", - "Counter": "0,1,2,3", - "UMask": "0x6", - "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", - "CounterMask": "6", - "CounterHTOff": "0,1,2,3" - }, - { - "PublicDescription": "Core cycles when at least one thread on the physical core is not in halt state.", - "EventCode": "0x00", - "Counter": "Fixed counter 2", - "UMask": "0x2", - "AnyThread": "1", - "EventName": "CPU_CLK_UNHALTED.THREAD_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state", - "CounterHTOff": "Fixed counter 2" - }, - { - "PublicDescription": "Core cycles when at least one thread on the physical core is not in halt state.", - "EventCode": "0x3C", - "Counter": "0,1,2,3", - "UMask": "0x0", - "AnyThread": "1", - "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x3C", - "Counter": "0,1,2,3", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted. (counts at 100 MHz rate)", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x0D", - "Counter": "0,1,2,3", - "UMask": "0x3", - "AnyThread": "1", - "EventName": "INT_MISC.RECOVERY_CYCLES_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", - "EventCode": "0xB1", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", - "EventCode": "0xB1", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core", - "CounterMask": "2", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", - "EventCode": "0xB1", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core", - "CounterMask": "3", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", - "EventCode": "0xB1", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core", - "CounterMask": "4", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycles with no micro-ops executed from any thread on physical core.", - "EventCode": "0xB1", - "Invert": "1", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Reference cycles when the thread is unhalted. (counts at 100 MHz rate)", - "EventCode": "0x3C", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "CPU_CLK_UNHALTED.REF_XCLK", - "SampleAfterValue": "2000003", - "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x3C", - "Counter": "0,1,2,3", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted. (counts at 100 MHz rate)", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x3C", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", - "SampleAfterValue": "2000003", - "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.", - "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/ivytown/virtual-memory.json b/tools/perf/pmu-events/arch/x86/ivytown/virtual-memory.json index c8de548b78fa..4645e9d3f460 100644 --- a/tools/perf/pmu-events/arch/x86/ivytown/virtual-memory.json +++ b/tools/perf/pmu-events/arch/x86/ivytown/virtual-memory.json @@ -1,4 +1,14 @@ [ + { + "PublicDescription": "Misses in all TLB levels that cause a page walk of any page size from demand loads.", + "EventCode": "0x08", + "Counter": "0,1,2,3", + "UMask": "0x81", + "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK", + "SampleAfterValue": "100003", + "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes an page walk of any page size.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x08", "Counter": "0,1,2,3", @@ -8,6 +18,16 @@ "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Misses in all TLB levels that caused page walk completed of any size by demand loads.", + "EventCode": "0x08", + "Counter": "0,1,2,3", + "UMask": "0x82", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", + "SampleAfterValue": "100003", + "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x08", "Counter": "0,1,2,3", @@ -17,6 +37,16 @@ "BriefDescription": "Demand load cycles page miss handler (PMH) is busy with this walk.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "PublicDescription": "Cycle PMH is busy with a walk due to demand loads.", + "EventCode": "0x08", + "Counter": "0,1,2,3", + "UMask": "0x84", + "EventName": "DTLB_LOAD_MISSES.WALK_DURATION", + "SampleAfterValue": "2000003", + "BriefDescription": "Demand load cycles page miss handler (PMH) is busy with this walk.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x08", "Counter": "0,1,2,3", @@ -164,35 +194,5 @@ "SampleAfterValue": "100007", "BriefDescription": "STLB flush attempts", "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Misses in all TLB levels that cause a page walk of any page size from demand loads.", - "EventCode": "0x08", - "Counter": "0,1,2,3", - "UMask": "0x81", - "EventName": "DTLB_LOAD_MISSES.MISS_CAUSES_A_WALK", - "SampleAfterValue": "100003", - "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes an page walk of any page size.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Misses in all TLB levels that caused page walk completed of any size by demand loads.", - "EventCode": "0x08", - "Counter": "0,1,2,3", - "UMask": "0x82", - "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", - "SampleAfterValue": "100003", - "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Cycle PMH is busy with a walk due to demand loads.", - "EventCode": "0x08", - "Counter": "0,1,2,3", - "UMask": "0x84", - "EventName": "DTLB_LOAD_MISSES.WALK_DURATION", - "SampleAfterValue": "2000003", - "BriefDescription": "Demand load cycles page miss handler (PMH) is busy with this walk.", - "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file -- cgit v1.2.3-70-g09d2 From ffaa6f274201cb40721f85a143f6bbd6fc8ef601 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 18 Jan 2018 04:52:48 -0800 Subject: perf vendor events intel: Update Silvermont events to V14 Signed-off-by: Andi Kleen Link: https://lkml.kernel.org/r/20180118234518.GA27753@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/silvermont/cache.json | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/x86/silvermont/cache.json b/tools/perf/pmu-events/arch/x86/silvermont/cache.json index 0bd1bc5302de..82be7d1b8b81 100644 --- a/tools/perf/pmu-events/arch/x86/silvermont/cache.json +++ b/tools/perf/pmu-events/arch/x86/silvermont/cache.json @@ -36,12 +36,13 @@ "BriefDescription": "L2 cache request misses" }, { + "PublicDescription": "Counts cycles that fetch is stalled due to an outstanding ICache miss. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes due to an ICache miss. Note: this event is not the same as the total number of cycles spent retrieving instruction cache lines from the memory hierarchy.\r\nCounts cycles that fetch is stalled due to any reason. That is, the decoder queue is able to accept bytes, but the fetch unit is unable to provide bytes. This will include cycles due to an ITLB miss, ICache miss and other events. \r\n", "EventCode": "0x86", "Counter": "0,1", "UMask": "0x4", "EventName": "FETCH_STALL.ICACHE_FILL_PENDING_CYCLES", "SampleAfterValue": "200003", - "BriefDescription": "Counts the number of cycles the NIP stalls because of an icache miss. This is a cumulative count of cycles the NIP stalled for all icache misses." + "BriefDescription": "Cycles code-fetch stalled due to an outstanding ICache miss." }, { "PEBS": "1", -- cgit v1.2.3-70-g09d2 From c93240a72418ccd440b0c472f26ad00c8746d518 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 18 Jan 2018 04:53:50 -0800 Subject: perf vendor events intel: Update Skylake events to V36 Signed-off-by: Andi Kleen Link: https://lkml.kernel.org/r/20180118234518.GA27753@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/skylake/cache.json | 4390 +++----------------- .../arch/x86/skylake/floating-point.json | 5 +- .../perf/pmu-events/arch/x86/skylake/frontend.json | 232 +- tools/perf/pmu-events/arch/x86/skylake/memory.json | 2118 +--------- tools/perf/pmu-events/arch/x86/skylake/other.json | 40 +- .../perf/pmu-events/arch/x86/skylake/pipeline.json | 973 ++--- .../arch/x86/skylake/virtual-memory.json | 262 +- 7 files changed, 1517 insertions(+), 6503 deletions(-) (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/x86/skylake/cache.json b/tools/perf/pmu-events/arch/x86/skylake/cache.json index 0551a9ba865d..54bfe9e4045c 100644 --- a/tools/perf/pmu-events/arch/x86/skylake/cache.json +++ b/tools/perf/pmu-events/arch/x86/skylake/cache.json @@ -1,4206 +1,852 @@ [ { - "PEBS": "1", - "EventCode": "0xD0", - "Counter": "0,1,2,3", - "UMask": "0x11", - "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS", - "SampleAfterValue": "100003", - "BriefDescription": "Retired load instructions that miss the STLB.", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1" - }, - { - "PEBS": "1", - "EventCode": "0xD0", - "Counter": "0,1,2,3", - "UMask": "0x12", - "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES", - "SampleAfterValue": "100003", - "BriefDescription": "Retired store instructions that miss the STLB.", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1", - "L1_Hit_Indication": "1" - }, - { - "PEBS": "1", - "EventCode": "0xD0", + "PublicDescription": "Counts the number of demand Data Read requests that miss L2 cache. Only not rejected loads are counted.", + "EventCode": "0x24", "Counter": "0,1,2,3", "UMask": "0x21", - "EventName": "MEM_INST_RETIRED.LOCK_LOADS", - "SampleAfterValue": "100007", - "BriefDescription": "Retired load instructions with locked access.", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1" - }, - { - "PEBS": "1", - "EventCode": "0xD0", - "Counter": "0,1,2,3", - "UMask": "0x41", - "EventName": "MEM_INST_RETIRED.SPLIT_LOADS", - "SampleAfterValue": "100003", - "BriefDescription": "Retired load instructions that split across a cacheline boundary.", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1" - }, - { - "PEBS": "1", - "EventCode": "0xD0", - "Counter": "0,1,2,3", - "UMask": "0x42", - "EventName": "MEM_INST_RETIRED.SPLIT_STORES", - "SampleAfterValue": "100003", - "BriefDescription": "Retired store instructions that split across a cacheline boundary.", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1", - "L1_Hit_Indication": "1" - }, - { - "PEBS": "1", - "EventCode": "0xD0", - "Counter": "0,1,2,3", - "UMask": "0x81", - "EventName": "MEM_INST_RETIRED.ALL_LOADS", - "SampleAfterValue": "2000003", - "BriefDescription": "All retired load instructions.", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1" - }, - { - "PEBS": "1", - "EventCode": "0xD0", - "Counter": "0,1,2,3", - "UMask": "0x82", - "EventName": "MEM_INST_RETIRED.ALL_STORES", - "SampleAfterValue": "2000003", - "BriefDescription": "All retired store instructions.", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1", - "L1_Hit_Indication": "1" - }, - { - "PEBS": "1", - "PublicDescription": "Retired load instructions with L1 cache hits as data sources.", - "EventCode": "0xD1", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "MEM_LOAD_RETIRED.L1_HIT", - "SampleAfterValue": "2000003", - "BriefDescription": "Retired load instructions with L1 cache hits as data sources", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1" - }, - { - "PEBS": "1", - "PublicDescription": "Retired load instructions with L2 cache hits as data sources.", - "EventCode": "0xD1", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "MEM_LOAD_RETIRED.L2_HIT", - "SampleAfterValue": "100003", - "BriefDescription": "Retired load instructions with L2 cache hits as data sources", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1" - }, - { - "PEBS": "1", - "PublicDescription": "Retired load instructions with L3 cache hits as data sources.", - "EventCode": "0xD1", - "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "MEM_LOAD_RETIRED.L3_HIT", - "SampleAfterValue": "50021", - "BriefDescription": "Retired load instructions with L3 cache hits as data sources", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1" - }, - { - "PEBS": "1", - "PublicDescription": "Retired load instructions missed L1 cache as data sources.", - "EventCode": "0xD1", - "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "MEM_LOAD_RETIRED.L1_MISS", - "SampleAfterValue": "100003", - "BriefDescription": "Retired load instructions missed L1 cache as data sources", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1" - }, - { - "PEBS": "1", - "PublicDescription": "Retired load instructions missed L2 cache as data sources.", - "EventCode": "0xD1", - "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "MEM_LOAD_RETIRED.L2_MISS", - "SampleAfterValue": "50021", - "BriefDescription": "Retired load instructions missed L2 cache as data sources", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1" - }, - { - "PEBS": "1", - "PublicDescription": "Retired load instructions missed L3 cache as data sources.", - "EventCode": "0xD1", - "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "MEM_LOAD_RETIRED.L3_MISS", - "SampleAfterValue": "100007", - "BriefDescription": "Retired load instructions missed L3 cache as data sources", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1" - }, - { - "PEBS": "1", - "PublicDescription": "Retired load instructions which data sources were load missed L1 but hit FB due to preceding miss to the same cache line with data not ready.", - "EventCode": "0xD1", - "Counter": "0,1,2,3", - "UMask": "0x40", - "EventName": "MEM_LOAD_RETIRED.FB_HIT", - "SampleAfterValue": "100007", - "BriefDescription": "Retired load instructions which data sources were load missed L1 but hit FB due to preceding miss to the same cache line with data not ready", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1" - }, - { - "PEBS": "1", - "EventCode": "0xD2", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS", - "SampleAfterValue": "20011", - "BriefDescription": "Retired load instructions which data sources were L3 hit and cross-core snoop missed in on-pkg core cache.", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1" - }, - { - "PEBS": "1", - "PublicDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache.", - "EventCode": "0xD2", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT", - "SampleAfterValue": "20011", - "BriefDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1" + "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS", + "SampleAfterValue": "200003", + "BriefDescription": "Demand Data Read miss L2, no rejects", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "PublicDescription": "Retired load instructions which data sources were HitM responses from shared L3.", - "EventCode": "0xD2", + "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that miss L2 cache.", + "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM", - "SampleAfterValue": "20011", - "BriefDescription": "Retired load instructions which data sources were HitM responses from shared L3", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1" + "UMask": "0x22", + "EventName": "L2_RQSTS.RFO_MISS", + "SampleAfterValue": "200003", + "BriefDescription": "RFO requests that miss L2 cache", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "PublicDescription": "Retired load instructions which data sources were hits in L3 without snoops required.", - "EventCode": "0xD2", + "PublicDescription": "Counts L2 cache misses when fetching instructions.", + "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NONE", - "SampleAfterValue": "100003", - "BriefDescription": "Retired load instructions which data sources were hits in L3 without snoops required", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1" + "UMask": "0x24", + "EventName": "L2_RQSTS.CODE_RD_MISS", + "SampleAfterValue": "200003", + "BriefDescription": "L2 cache misses when fetching instructions", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "EventCode": "0xD4", + "PublicDescription": "Demand requests that miss L2 cache.", + "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "MEM_LOAD_MISC_RETIRED.UC", - "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions with at least 1 uncacheable load or lock.", - "CounterHTOff": "0,1,2,3", - "Data_LA": "1" + "UMask": "0x27", + "EventName": "L2_RQSTS.ALL_DEMAND_MISS", + "SampleAfterValue": "200003", + "BriefDescription": "Demand requests that miss L2 cache", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.", - "EventCode": "0x51", + "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache.", + "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "L1D.REPLACEMENT", - "SampleAfterValue": "2000003", - "BriefDescription": "L1D data line replacements", + "UMask": "0x38", + "EventName": "L2_RQSTS.PF_MISS", + "SampleAfterValue": "200003", + "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts duration of L1D miss outstanding, that is each cycle number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand\n from the demand Hit FB, if it is allocated by hardware or software prefetch.\nNote: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.", - "EventCode": "0x48", + "PublicDescription": "All requests that miss L2 cache.", + "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "L1D_PEND_MISS.PENDING", - "SampleAfterValue": "2000003", - "BriefDescription": "L1D miss outstandings duration in cycles", + "UMask": "0x3f", + "EventName": "L2_RQSTS.MISS", + "SampleAfterValue": "200003", + "BriefDescription": "All requests that miss L2 cache", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x48", + "PublicDescription": "Counts the number of demand Data Read requests that hit L2 cache. Only non rejected loads are counted.", + "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "L1D_PEND_MISS.FB_FULL", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times a request needed a FB entry but there was no entry available for it. That is the FB unavailability was dominant reason for blocking the request. A request includes cacheable/uncacheable demands that is load, store or SW prefetch.", + "UMask": "0x41", + "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT", + "SampleAfterValue": "200003", + "BriefDescription": "Demand Data Read requests that hit L2 cache", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts duration of L1D miss outstanding in cycles.", - "EventCode": "0x48", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "L1D_PEND_MISS.PENDING_CYCLES", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with L1D load Misses outstanding.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.", - "EventCode": "0xB0", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD", - "SampleAfterValue": "100003", - "BriefDescription": "Demand Data Read requests sent to uncore", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts both cacheable and noncachaeble code read requests.", - "EventCode": "0xB0", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD", - "SampleAfterValue": "100003", - "BriefDescription": "Cacheable and noncachaeble code read requests", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.", - "EventCode": "0xB0", - "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "OFFCORE_REQUESTS.DEMAND_RFO", - "SampleAfterValue": "100003", - "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.", - "EventCode": "0xB0", - "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD", - "SampleAfterValue": "100003", - "BriefDescription": "Demand and prefetch data reads", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts memory transactions reached the super queue including requests initiated by the core, all L3 prefetches, page walks, and so on.", - "EventCode": "0xB0", - "Counter": "0,1,2,3", - "UMask": "0x80", - "EventName": "OFFCORE_REQUESTS.ALL_REQUESTS", - "SampleAfterValue": "100003", - "BriefDescription": "Any memory transaction that reached the SQ.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts the number of offcore outstanding Demand Data Read transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor. See the corresponding Umask under OFFCORE_REQUESTS.\nNote: A prefetch promoted to Demand is counted from the promotion point.", - "EventCode": "0x60", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD", - "SampleAfterValue": "2000003", - "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", - "EventCode": "0x60", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD", - "SampleAfterValue": "2000003", - "BriefDescription": "Offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore, every cycle. ", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts the number of offcore outstanding RFO (store) transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", - "EventCode": "0x60", - "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO", - "SampleAfterValue": "2000003", - "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", - "EventCode": "0x60", - "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", - "SampleAfterValue": "2000003", - "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).", - "EventCode": "0x60", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", - "EventCode": "0x60", - "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts the number of cases when the offcore requests buffer cannot take more entries for the core. This can happen when the superqueue does not contain eligible entries, or when L1D writeback pending FIFO requests is full.\nNote: Writeback pending FIFO has six entries.", - "EventCode": "0xB2", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL", - "SampleAfterValue": "2000003", - "BriefDescription": "Offcore requests buffer cannot take more entries for this thread core.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts L2 writebacks that access L2 cache.", - "EventCode": "0xF0", - "Counter": "0,1,2,3", - "UMask": "0x40", - "EventName": "L2_TRANS.L2_WB", - "SampleAfterValue": "200003", - "BriefDescription": "L2 writebacks that access L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts core-originated cacheable demand requests that miss the last level cache (LLC). Demand requests include loads, RFOs, and hardware prefetches from L1D, and instruction fetches from IFU.", - "EventCode": "0x2E", - "Counter": "0,1,2,3", - "UMask": "0x41", - "Errata": "SKL057", - "EventName": "LONGEST_LAT_CACHE.MISS", - "SampleAfterValue": "100003", - "BriefDescription": "Core-originated cacheable demand requests missed L3", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts core-originated cacheable demand requests that refer to the last level cache (LLC). Demand requests include loads, RFOs, and hardware prefetches from L1D, and instruction fetches from IFU.", - "EventCode": "0x2E", - "Counter": "0,1,2,3", - "UMask": "0x4f", - "Errata": "SKL057", - "EventName": "LONGEST_LAT_CACHE.REFERENCE", - "SampleAfterValue": "100003", - "BriefDescription": "Core-originated cacheable demand requests that refer to L3", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts the number of cache line split locks sent to the uncore.", - "EventCode": "0xF4", - "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "SQ_MISC.SPLIT_LOCK", - "SampleAfterValue": "100003", - "BriefDescription": "Number of cache line split locks sent to uncore.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", - "EventCode": "0xB7, 0xBB", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE", - "SampleAfterValue": "100003", - "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction", - "CounterHTOff": "0,1,2,3" - }, - { - "PublicDescription": "This event counts the number of demand Data Read requests that miss L2 cache. Only not rejected loads are counted.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x21", - "EventName": "L2_RQSTS.DEMAND_DATA_RD_MISS", - "SampleAfterValue": "200003", - "BriefDescription": "Demand Data Read miss L2, no rejects", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts the number of demand Data Read requests that hit L2 cache. Only not rejected loads are counted.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x41", - "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT", - "SampleAfterValue": "200003", - "BriefDescription": "Demand Data Read requests that hit L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts the number of demand Data Read requests (including requests from L1D hardware prefetchers). These loads may hit or miss L2 cache. Only non rejected loads are counted.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0xe1", - "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD", - "SampleAfterValue": "200003", - "BriefDescription": "Demand Data Read requests", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0xe2", - "EventName": "L2_RQSTS.ALL_RFO", - "SampleAfterValue": "200003", - "BriefDescription": "RFO requests to L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts the total number of L2 code requests.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0xe4", - "EventName": "L2_RQSTS.ALL_CODE_RD", - "SampleAfterValue": "200003", - "BriefDescription": "L2 code requests", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts the total number of requests from the L2 hardware prefetchers.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0xf8", - "EventName": "L2_RQSTS.ALL_PF", - "SampleAfterValue": "200003", - "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x38", - "EventName": "L2_RQSTS.PF_MISS", - "SampleAfterValue": "200003", - "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that miss L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0xd8", - "EventName": "L2_RQSTS.PF_HIT", - "SampleAfterValue": "200003", - "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "RFO requests that hit L2 cache.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x42", - "EventName": "L2_RQSTS.RFO_HIT", - "SampleAfterValue": "200003", - "BriefDescription": "RFO requests that hit L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "RFO requests that miss L2 cache.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x22", - "EventName": "L2_RQSTS.RFO_MISS", - "SampleAfterValue": "200003", - "BriefDescription": "RFO requests that miss L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x44", - "EventName": "L2_RQSTS.CODE_RD_HIT", - "SampleAfterValue": "200003", - "BriefDescription": "L2 cache hits when fetching instructions, code reads.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "L2 cache misses when fetching instructions.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x24", - "EventName": "L2_RQSTS.CODE_RD_MISS", - "SampleAfterValue": "200003", - "BriefDescription": "L2 cache misses when fetching instructions", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Demand requests that miss L2 cache.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x27", - "EventName": "L2_RQSTS.ALL_DEMAND_MISS", - "SampleAfterValue": "200003", - "BriefDescription": "Demand requests that miss L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Demand requests to L2 cache.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0xe7", - "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES", - "SampleAfterValue": "200003", - "BriefDescription": "Demand requests to L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "All requests that miss L2 cache.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0x3f", - "EventName": "L2_RQSTS.MISS", - "SampleAfterValue": "200003", - "BriefDescription": "All requests that miss L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "All L2 requests.", - "EventCode": "0x24", - "Counter": "0,1,2,3", - "UMask": "0xff", - "EventName": "L2_RQSTS.REFERENCES", - "SampleAfterValue": "200003", - "BriefDescription": "All L2 requests", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xF2", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "L2_LINES_OUT.SILENT", - "SampleAfterValue": "200003", - "BriefDescription": "Counts the number of lines that are silently dropped by L2 cache when triggered by an L2 cache fill. These lines are typically in Shared or Exclusive state. A non-threaded event.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xF2", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "L2_LINES_OUT.NON_SILENT", - "SampleAfterValue": "200003", - "BriefDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines are in Modified state. Modified lines are written back to L3", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache.", - "EventCode": "0xF2", - "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "L2_LINES_OUT.USELESS_PREF", - "SampleAfterValue": "200003", - "BriefDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts the number of L2 cache lines filling the L2. Counting does not cover rejects.", - "EventCode": "0xF1", - "Counter": "0,1,2,3", - "UMask": "0x1f", - "EventName": "L2_LINES_IN.ALL", - "SampleAfterValue": "100003", - "BriefDescription": "L2 cache lines filling L2", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", - "EventCode": "0x60", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", - "EventCode": "0x60", - "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x48", - "Counter": "0,1,2,3", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x60", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.", - "CounterMask": "6", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xF2", - "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "L2_LINES_OUT.USELESS_HWPF", - "SampleAfterValue": "200003", - "BriefDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0408000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L4_HIT_LOCAL_L4 & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000408000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L4_HIT_LOCAL_L4 & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400408000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L4_HIT_LOCAL_L4 & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200408000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L4_HIT_LOCAL_L4 & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100408000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L4_HIT_LOCAL_L4 & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080408000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L4_HIT_LOCAL_L4 & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040408000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L4_HIT_LOCAL_L4 & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc01c8000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10001c8000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x04001c8000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x02001c8000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests that hit in the L3 and the snoops sent to sibling cores return clean response.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x01001c8000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00801c8000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00401c8000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0108000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_S & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000108000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_S & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400108000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_S & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200108000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_S & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100108000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_S & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080108000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_S & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040108000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_S & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0088000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_E & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000088000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_E & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400088000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_E & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200088000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_E & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100088000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_E & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080088000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_E & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040088000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_E & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0048000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_M & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000048000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_M & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400048000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_M & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200048000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_M & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100048000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_M & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080048000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_M & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040048000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_M & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0028000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & SUPPLIER_NONE & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000028000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & SUPPLIER_NONE & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400028000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & SUPPLIER_NONE & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200028000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & SUPPLIER_NONE & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100028000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & SUPPLIER_NONE & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080028000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & SUPPLIER_NONE & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040028000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & SUPPLIER_NONE & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0000018000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts any other requests that have any response type.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0400800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L4_HIT_LOCAL_L4.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L4_HIT_LOCAL_L4 & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000400800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L4_HIT_LOCAL_L4.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L4_HIT_LOCAL_L4 & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400400800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L4_HIT_LOCAL_L4 & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200400800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L4_HIT_LOCAL_L4.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L4_HIT_LOCAL_L4 & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100400800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L4_HIT_LOCAL_L4 & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080400800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L4_HIT_LOCAL_L4.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L4_HIT_LOCAL_L4 & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040400800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L4_HIT_LOCAL_L4.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L4_HIT_LOCAL_L4 & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc01c0800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10001c0800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x04001c0800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts streaming stores that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x02001c0800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts streaming stores that hit in the L3 and the snoops sent to sibling cores return clean response.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x01001c0800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts streaming stores that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00801c0800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00401c0800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0100800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_S.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_S & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000100800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_S.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_S & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400100800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_S.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_S & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200100800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_S.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_S & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100100800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_S.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_S & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080100800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_S.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_S & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040100800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_S.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_S & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0080800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_E.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_E & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000080800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_E.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_E & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400080800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_E.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_E & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200080800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_E.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_E & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100080800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_E.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_E & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080080800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_E.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_E & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040080800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_E.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_E & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0040800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_M.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_M & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000040800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_M.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_M & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400040800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_M.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_M & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200040800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_M.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_M & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100040800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_M.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_M & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080040800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_M.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_M & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040040800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_M.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_M & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0020800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.SUPPLIER_NONE.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & SUPPLIER_NONE & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000020800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.SUPPLIER_NONE.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & SUPPLIER_NONE & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400020800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & SUPPLIER_NONE & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200020800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.SUPPLIER_NONE.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & SUPPLIER_NONE & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100020800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.SUPPLIER_NONE.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & SUPPLIER_NONE & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080020800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.SUPPLIER_NONE.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & SUPPLIER_NONE & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040020800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.SUPPLIER_NONE.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & SUPPLIER_NONE & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0000010800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts streaming stores that have any response type.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0400100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L4_HIT_LOCAL_L4.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L4_HIT_LOCAL_L4 & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000400100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L4_HIT_LOCAL_L4.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L4_HIT_LOCAL_L4 & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400400100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L4_HIT_LOCAL_L4 & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200400100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L4_HIT_LOCAL_L4.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L4_HIT_LOCAL_L4 & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100400100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L4_HIT_LOCAL_L4 & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080400100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L4_HIT_LOCAL_L4.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L4_HIT_LOCAL_L4 & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040400100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L4_HIT_LOCAL_L4.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L4_HIT_LOCAL_L4 & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc01c0100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10001c0100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x04001c0100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x02001c0100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoops sent to sibling cores return clean response.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x01001c0100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00801c0100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00401c0100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0100100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_S.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_S & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000100100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_S.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_S & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400100100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_S.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_S & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200100100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_S.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_S & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100100100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_S.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_S & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080100100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_S.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_S & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040100100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_S.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_S & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0080100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_E.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_E & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000080100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_E.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_E & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400080100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_E.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_E & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200080100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_E.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_E & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100080100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_E.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_E & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080080100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_E.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_E & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040080100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_E.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_E & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0040100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_M.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_M & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000040100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_M.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_M & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400040100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_M.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_M & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200040100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_M.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_M & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100040100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_M.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_M & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080040100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_M.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_M & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040040100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_M.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_M & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0020100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & SUPPLIER_NONE & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000020100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & SUPPLIER_NONE & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400020100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & SUPPLIER_NONE & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200020100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & SUPPLIER_NONE & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100020100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & SUPPLIER_NONE & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080020100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & SUPPLIER_NONE & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040020100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & SUPPLIER_NONE & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0000010100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) RFOs that have any response type.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0400080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L4_HIT_LOCAL_L4.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L4_HIT_LOCAL_L4 & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000400080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400400080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200400080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100400080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080400080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040400080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L4_HIT_LOCAL_L4.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L4_HIT_LOCAL_L4 & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc01c0080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10001c0080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x04001c0080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x02001c0080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoops sent to sibling cores return clean response.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x01001c0080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00801c0080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00401c0080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0100080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_S.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_S & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000100080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_S.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_S & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400100080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_S.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_S & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200100080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_S.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_S & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100100080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_S.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_S & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080100080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_S.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_S & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040100080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_S.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_S & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0080080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_E.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_E & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000080080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_E.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_E & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400080080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_E.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_E & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200080080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_E.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_E & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100080080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_E.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_E & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080080080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_E.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_E & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040080080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_E.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_E & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0040080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_M.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_M & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000040080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_M.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_M & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400040080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_M.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_M & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200040080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_M.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_M & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100040080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_M.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_M & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080040080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_M.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_M & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040040080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_M.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_M & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0020080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & SUPPLIER_NONE & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000020080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & SUPPLIER_NONE & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400020080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & SUPPLIER_NONE & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200020080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & SUPPLIER_NONE & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100020080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & SUPPLIER_NONE & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080020080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & SUPPLIER_NONE & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040020080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & SUPPLIER_NONE & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0000010080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all prefetch (that bring data to LLC only) data reads that have any response type.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0400004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L4_HIT_LOCAL_L4 & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000400004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L4_HIT_LOCAL_L4 & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400400004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L4_HIT_LOCAL_L4 & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200400004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L4_HIT_LOCAL_L4 & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100400004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L4_HIT_LOCAL_L4 & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080400004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L4_HIT_LOCAL_L4 & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040400004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L4_HIT_LOCAL_L4 & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc01c0004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10001c0004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x04001c0004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand code reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x02001c0004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand code reads that hit in the L3 and the snoops sent to sibling cores return clean response.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x01001c0004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00801c0004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00401c0004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0100004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_S & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000100004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_S & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400100004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_S & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200100004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_S & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100100004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_S & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080100004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_S & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040100004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_S & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0080004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_E & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000080004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_E & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400080004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_E & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200080004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_E & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100080004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_E & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080080004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_E & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040080004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_E & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0040004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_M & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000040004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_M & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400040004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_M & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200040004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_M & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100040004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_M & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080040004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_M & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040040004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_M & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0020004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & SUPPLIER_NONE & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000020004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & SUPPLIER_NONE & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400020004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & SUPPLIER_NONE & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200020004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & SUPPLIER_NONE & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100020004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & SUPPLIER_NONE & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080020004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & SUPPLIER_NONE & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040020004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & SUPPLIER_NONE & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0000010004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand code reads that have any response type.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0400002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L4_HIT_LOCAL_L4 & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000400002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L4_HIT_LOCAL_L4 & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400400002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L4_HIT_LOCAL_L4 & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200400002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L4_HIT_LOCAL_L4 & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100400002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L4_HIT_LOCAL_L4 & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080400002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L4_HIT_LOCAL_L4 & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040400002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L4_HIT_LOCAL_L4 & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc01c0002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10001c0002 ", + "PublicDescription": "Counts the RFO (Read-for-Ownership) requests that hit L2 cache.", + "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x42", + "EventName": "L2_RQSTS.RFO_HIT", + "SampleAfterValue": "200003", + "BriefDescription": "RFO requests that hit L2 cache", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x04001c0002 ", + "PublicDescription": "Counts L2 cache hits when fetching instructions, code reads.", + "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x44", + "EventName": "L2_RQSTS.CODE_RD_HIT", + "SampleAfterValue": "200003", + "BriefDescription": "L2 cache hits when fetching instructions, code reads.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x02001c0002 ", + "PublicDescription": "Counts requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache.", + "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoops sent to sibling cores return clean response.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0xd8", + "EventName": "L2_RQSTS.PF_HIT", + "SampleAfterValue": "200003", + "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches that hit L2 cache", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x01001c0002 ", + "PublicDescription": "Counts the number of demand Data Read requests (including requests from L1D hardware prefetchers). These loads may hit or miss L2 cache. Only non rejected loads are counted.", + "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs) that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0xe1", + "EventName": "L2_RQSTS.ALL_DEMAND_DATA_RD", + "SampleAfterValue": "200003", + "BriefDescription": "Demand Data Read requests", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00801c0002 ", + "PublicDescription": "Counts the total number of RFO (read for ownership) requests to L2 cache. L2 RFO requests include both L1D demand RFO misses as well as L1D RFO prefetches.", + "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0xe2", + "EventName": "L2_RQSTS.ALL_RFO", + "SampleAfterValue": "200003", + "BriefDescription": "RFO requests to L2 cache", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00401c0002 ", + "PublicDescription": "Counts the total number of L2 code requests.", + "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0xe4", + "EventName": "L2_RQSTS.ALL_CODE_RD", + "SampleAfterValue": "200003", + "BriefDescription": "L2 code requests", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0100002 ", + "PublicDescription": "Demand requests to L2 cache.", + "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_S & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0xe7", + "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES", + "SampleAfterValue": "200003", + "BriefDescription": "Demand requests to L2 cache", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000100002 ", + "PublicDescription": "Counts the total number of requests from the L2 hardware prefetchers.", + "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_S & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0xf8", + "EventName": "L2_RQSTS.ALL_PF", + "SampleAfterValue": "200003", + "BriefDescription": "Requests from the L1/L2/L3 hardware prefetchers or Load software prefetches", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400100002 ", + "PublicDescription": "All L2 requests.", + "EventCode": "0x24", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_S & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0xff", + "EventName": "L2_RQSTS.REFERENCES", + "SampleAfterValue": "200003", + "BriefDescription": "All L2 requests", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200100002 ", + "PublicDescription": "Counts core-originated cacheable requests that miss the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all misses to the L3.", + "EventCode": "0x2E", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "UMask": "0x41", + "Errata": "SKL057", + "EventName": "LONGEST_LAT_CACHE.MISS", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_S & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Core-originated cacheable demand requests missed L3", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100100002 ", + "PublicDescription": "Counts core-originated cacheable requests to the L3 cache (Longest Latency cache). Requests include data and code reads, Reads-for-Ownership (RFOs), speculative accesses and hardware prefetches from L1 and L2. It does not include all accesses to the L3.", + "EventCode": "0x2E", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "UMask": "0x4f", + "Errata": "SKL057", + "EventName": "LONGEST_LAT_CACHE.REFERENCE", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_S & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Core-originated cacheable demand requests that refer to L3", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080100002 ", + "PublicDescription": "Counts duration of L1D miss outstanding, that is each cycle number of Fill Buffers (FB) outstanding required by Demand Reads. FB either is held by demand loads, or it is held by non-demand loads and gets hit at least once by demand. The valid outstanding interval is defined until the FB deallocation by one of the following ways: from FB allocation, if FB is allocated by demand from the demand Hit FB, if it is allocated by hardware or software prefetch.Note: In the L1D, a Demand Read contains cacheable or noncacheable demand loads, including ones causing cache-line splits and reads due to page walks resulted from any request type.", + "EventCode": "0x48", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_S & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "EventName": "L1D_PEND_MISS.PENDING", + "SampleAfterValue": "2000003", + "BriefDescription": "L1D miss outstandings duration in cycles", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040100002 ", + "PublicDescription": "Counts duration of L1D miss outstanding in cycles.", + "EventCode": "0x48", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_S & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "EventName": "L1D_PEND_MISS.PENDING_CYCLES", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles with L1D load Misses outstanding.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0080002 ", + "EventCode": "0x48", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_E & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "AnyThread": "1", + "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000080002 ", + "PublicDescription": "Number of times a request needed a FB (Fill Buffer) entry but there was no entry available for it. A request includes cacheable/uncacheable demands that are load, store or SW prefetch instructions.", + "EventCode": "0x48", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_E & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x2", + "EventName": "L1D_PEND_MISS.FB_FULL", + "SampleAfterValue": "2000003", + "BriefDescription": "Number of times a request needed a FB entry but there was no entry available for it. That is the FB unavailability was dominant reason for blocking the request. A request includes cacheable/uncacheable demands that is load, store or SW prefetch.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400080002 ", + "PublicDescription": "Counts L1D data line replacements including opportunistic replacements, and replacements that require stall-for-replace or block-for-replace.", + "EventCode": "0x51", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_E & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "EventName": "L1D.REPLACEMENT", + "SampleAfterValue": "2000003", + "BriefDescription": "L1D data line replacements", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200080002 ", + "PublicDescription": "Counts the number of offcore outstanding Demand Data Read transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor. See the corresponding Umask under OFFCORE_REQUESTS.Note: A prefetch promoted to Demand is counted from the promotion point.", + "EventCode": "0x60", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_E & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD", + "SampleAfterValue": "2000003", + "BriefDescription": "Offcore outstanding Demand Data Read transactions in uncore queue.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100080002 ", + "PublicDescription": "Counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).", + "EventCode": "0x60", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_E & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080080002 ", + "EventCode": "0x60", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_E & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.", + "CounterMask": "6", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040080002 ", + "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", + "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_E & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x2", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD", + "SampleAfterValue": "2000003", + "BriefDescription": "Offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore, every cycle.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0040002 ", + "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", + "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_M & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x2", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_CODE_RD", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles with offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000040002 ", + "PublicDescription": "Counts the number of offcore outstanding RFO (store) transactions in the super queue (SQ) every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", + "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_M & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x4", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_RFO", + "SampleAfterValue": "2000003", + "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400040002 ", + "PublicDescription": "Counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", + "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_M & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x4", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles with offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200040002 ", + "PublicDescription": "Counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", + "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_M & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x8", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", + "SampleAfterValue": "2000003", + "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100040002 ", + "PublicDescription": "Counts cycles when offcore outstanding cacheable Core Data Read transactions are present in the super queue. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", + "EventCode": "0x60", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_M & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x8", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DATA_RD", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles when offcore outstanding cacheable Core Data Read transactions are present in SuperQueue (SQ), queue to uncore.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080040002 ", + "PublicDescription": "Counts the Demand Data Read requests sent to uncore. Use it in conjunction with OFFCORE_REQUESTS_OUTSTANDING to determine average latency in the uncore.", + "EventCode": "0xB0", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "EventName": "OFFCORE_REQUESTS.DEMAND_DATA_RD", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_M & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Demand Data Read requests sent to uncore", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040040002 ", + "PublicDescription": "Counts both cacheable and non-cacheable code read requests.", + "EventCode": "0xB0", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", + "UMask": "0x2", + "EventName": "OFFCORE_REQUESTS.DEMAND_CODE_RD", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_M & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Cacheable and noncachaeble code read requests", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0020002 ", + "PublicDescription": "Counts the demand RFO (read for ownership) requests including regular RFOs, locks, ItoM.", + "EventCode": "0xB0", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "UMask": "0x4", + "EventName": "OFFCORE_REQUESTS.DEMAND_RFO", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & SUPPLIER_NONE & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Demand RFO requests including regular RFOs, locks, ItoM", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000020002 ", + "PublicDescription": "Counts the demand and prefetch data reads. All Core Data Reads include cacheable 'Demands' and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.", + "EventCode": "0xB0", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "UMask": "0x8", + "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & SUPPLIER_NONE & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Demand and prefetch data reads", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400020002 ", + "PublicDescription": "Counts memory transactions reached the super queue including requests initiated by the core, all L3 prefetches, page walks, etc..", + "EventCode": "0xB0", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", + "UMask": "0x80", + "EventName": "OFFCORE_REQUESTS.ALL_REQUESTS", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & SUPPLIER_NONE & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Any memory transaction that reached the SQ.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200020002 ", + "PublicDescription": "Counts the number of cases when the offcore requests buffer cannot take more entries for the core. This can happen when the superqueue does not contain eligible entries, or when L1D writeback pending FIFO requests is full.Note: Writeback pending FIFO has six entries.", + "EventCode": "0xB2", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & SUPPLIER_NONE & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "EventName": "OFFCORE_REQUESTS_BUFFER.SQ_FULL", + "SampleAfterValue": "2000003", + "BriefDescription": "Offcore requests buffer cannot take more entries for this thread core.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100020002 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", + "EventName": "OFFCORE_RESPONSE", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & SUPPLIER_NONE & SNOOP_NOT_NEEDED", - "Offcore": "1", + "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction", "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080020002 ", + "PEBS": "1", + "PublicDescription": "Retired load instructions that miss the STLB.", + "EventCode": "0xD0", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "UMask": "0x11", + "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & SUPPLIER_NONE & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Retired load instructions that miss the STLB. (Precise Event)", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040020002 ", + "PEBS": "1", + "PublicDescription": "Retired store instructions that miss the STLB.", + "EventCode": "0xD0", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", + "UMask": "0x12", + "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & SUPPLIER_NONE & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Retired store instructions that miss the STLB. (Precise Event)", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "L1_Hit_Indication": "1" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0000010002 ", + "PEBS": "1", + "EventCode": "0xD0", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts all demand data writes (RFOs) that have any response type.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x21", + "EventName": "MEM_INST_RETIRED.LOCK_LOADS", + "SampleAfterValue": "100007", + "BriefDescription": "Retired load instructions with locked access. (Precise Event)", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0400001 ", + "PEBS": "1", + "EventCode": "0xD0", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "UMask": "0x41", + "EventName": "MEM_INST_RETIRED.SPLIT_LOADS", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Retired load instructions that split across a cacheline boundary. (Precise Event)", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000400001 ", + "PEBS": "1", + "EventCode": "0xD0", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "UMask": "0x42", + "EventName": "MEM_INST_RETIRED.SPLIT_STORES", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Retired store instructions that split across a cacheline boundary. (Precise Event)", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "L1_Hit_Indication": "1" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400400001 ", + "PEBS": "1", + "EventCode": "0xD0", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x81", + "EventName": "MEM_INST_RETIRED.ALL_LOADS", + "SampleAfterValue": "2000003", + "BriefDescription": "All retired load instructions. (Precise Event)", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200400001 ", + "PEBS": "1", + "PublicDescription": "All retired store instructions.", + "EventCode": "0xD0", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x82", + "EventName": "MEM_INST_RETIRED.ALL_STORES", + "SampleAfterValue": "2000003", + "BriefDescription": "All retired store instructions. (Precise Event)", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1", + "L1_Hit_Indication": "1" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100400001 ", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.\r\n", + "EventCode": "0xD1", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "EventName": "MEM_LOAD_RETIRED.L1_HIT", + "SampleAfterValue": "2000003", + "BriefDescription": "Retired load instructions with L1 cache hits as data sources", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080400001 ", + "PEBS": "1", + "PublicDescription": "Retired load instructions with L2 cache hits as data sources.", + "EventCode": "0xD1", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", + "UMask": "0x2", + "EventName": "MEM_LOAD_RETIRED.L2_HIT", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Retired load instructions with L2 cache hits as data sources", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040400001 ", + "PEBS": "1", + "PublicDescription": "Retired load instructions with L3 cache hits as data sources.", + "EventCode": "0xD1", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x4", + "EventName": "MEM_LOAD_RETIRED.L3_HIT", + "SampleAfterValue": "50021", + "BriefDescription": "Retired load instructions with L3 cache hits as data sources", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc01c0001 ", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L1 cache.", + "EventCode": "0xD1", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x8", + "EventName": "MEM_LOAD_RETIRED.L1_MISS", + "SampleAfterValue": "100003", + "BriefDescription": "Retired load instructions missed L1 cache as data sources", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x10001c0001 ", + "PEBS": "1", + "PublicDescription": "Retired load instructions missed L2 cache as data sources.", + "EventCode": "0xD1", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x10", + "EventName": "MEM_LOAD_RETIRED.L2_MISS", + "SampleAfterValue": "50021", + "BriefDescription": "Retired load instructions missed L2 cache as data sources", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x04001c0001 ", + "PEBS": "1", + "PublicDescription": "Retired load instructions missed L3 cache as data sources.", + "EventCode": "0xD1", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x20", + "EventName": "MEM_LOAD_RETIRED.L3_MISS", + "SampleAfterValue": "100007", + "BriefDescription": "Retired load instructions missed L3 cache as data sources", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x02001c0001 ", + "PEBS": "1", + "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready. \r\n", + "EventCode": "0xD1", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that hit in the L3 and the snoops sent to sibling cores return clean response.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x40", + "EventName": "MEM_LOAD_RETIRED.FB_HIT", + "SampleAfterValue": "100007", + "BriefDescription": "Retired load instructions which data sources were load missed L1 but hit FB due to preceding miss to the same cache line with data not ready", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x01001c0001 ", + "PEBS": "1", + "EventCode": "0xD2", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "Counts demand data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_MISS", + "SampleAfterValue": "20011", + "BriefDescription": "Retired load instructions which data sources were L3 hit and cross-core snoop missed in on-pkg core cache.", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00801c0001 ", + "PEBS": "1", + "PublicDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache.", + "EventCode": "0xD2", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x2", + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HIT", + "SampleAfterValue": "20011", + "BriefDescription": "Retired load instructions which data sources were L3 and cross-core snoop hits in on-pkg core cache", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00401c0001 ", + "PEBS": "1", + "PublicDescription": "Retired load instructions which data sources were HitM responses from shared L3.", + "EventCode": "0xD2", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x4", + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_HITM", + "SampleAfterValue": "20011", + "BriefDescription": "Retired load instructions which data sources were HitM responses from shared L3", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0100001 ", + "PEBS": "1", + "PublicDescription": "Retired load instructions which data sources were hits in L3 without snoops required.", + "EventCode": "0xD2", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", + "UMask": "0x8", + "EventName": "MEM_LOAD_L3_HIT_RETIRED.XSNP_NONE", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_S & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Retired load instructions which data sources were hits in L3 without snoops required", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000100001 ", + "PEBS": "1", + "EventCode": "0xD4", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_S & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x4", + "EventName": "MEM_LOAD_MISC_RETIRED.UC", + "SampleAfterValue": "100007", + "BriefDescription": "Retired instructions with at least 1 uncacheable load or lock.", + "CounterHTOff": "0,1,2,3", + "Data_LA": "1" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400100001 ", + "PublicDescription": "Counts L2 writebacks that access L2 cache.", + "EventCode": "0xF0", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_S & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x40", + "EventName": "L2_TRANS.L2_WB", + "SampleAfterValue": "200003", + "BriefDescription": "L2 writebacks that access L2 cache", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200100001 ", + "PublicDescription": "Counts the number of L2 cache lines filling the L2. Counting does not cover rejects.", + "EventCode": "0xF1", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", + "UMask": "0x1f", + "EventName": "L2_LINES_IN.ALL", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_S & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "L2 cache lines filling L2", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100100001 ", + "EventCode": "0xF2", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_S & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "EventName": "L2_LINES_OUT.SILENT", + "SampleAfterValue": "200003", + "BriefDescription": "Counts the number of lines that are silently dropped by L2 cache when triggered by an L2 cache fill. These lines are typically in Shared or Exclusive state. A non-threaded event.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080100001 ", + "EventCode": "0xF2", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_S & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x2", + "EventName": "L2_LINES_OUT.NON_SILENT", + "SampleAfterValue": "200003", + "BriefDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines are in Modified state. Modified lines are written back to L3", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040100001 ", + "PublicDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache.", + "EventCode": "0xF2", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_S & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x4", + "EventName": "L2_LINES_OUT.USELESS_PREF", + "SampleAfterValue": "200003", + "BriefDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0080001 ", + "EventCode": "0xF2", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_E & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "UMask": "0x4", + "EventName": "L2_LINES_OUT.USELESS_HWPF", + "SampleAfterValue": "200003", + "BriefDescription": "Counts the number of lines that have been hardware prefetched but not used and now evicted by L2 cache", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000080001 ", + "PublicDescription": "Counts the number of cache line split locks sent to the uncore.", + "EventCode": "0xF4", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", + "UMask": "0x10", + "EventName": "SQ_MISC.SPLIT_LOCK", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_E & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" + "BriefDescription": "Number of cache line split locks sent to uncore.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400080001 ", + "MSRValue": "0x3fc0400001 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_HIT_NO_FWD", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_E & SNOOP_HIT_NO_FWD", + "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & ANY_SNOOP", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200080001 ", + "MSRValue": "0x1000400001 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_MISS", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_HITM", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_E & SNOOP_MISS", + "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_HITM", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100080001 ", + "MSRValue": "0x0400400001 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_NOT_NEEDED", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_HIT_NO_FWD", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_E & SNOOP_NOT_NEEDED", + "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_HIT_NO_FWD", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080080001 ", + "MSRValue": "0x0200400001 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_NONE", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_MISS", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_E & SNOOP_NONE", + "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_MISS", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040080001 ", + "MSRValue": "0x0100400001 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SPL_HIT", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NOT_NEEDED", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_E & SPL_HIT", + "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_NOT_NEEDED", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc0040001 ", + "MSRValue": "0x0080400001 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.ANY_SNOOP", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NONE", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_M & ANY_SNOOP", + "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_NONE", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1000040001 ", + "MSRValue": "0x3fc01c0001 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_HITM", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_M & SNOOP_HITM", + "BriefDescription": "DEMAND_DATA_RD & L3_HIT & ANY_SNOOP", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0400040001 ", + "MSRValue": "0x10001c0001 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_HIT_NO_FWD", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HITM", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_M & SNOOP_HIT_NO_FWD", + "BriefDescription": "DEMAND_DATA_RD & L3_HIT & SNOOP_HITM", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts demand data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0200040001 ", + "MSRValue": "0x04001c0001 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_MISS", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_NO_FWD", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_M & SNOOP_MISS", + "BriefDescription": "Counts demand data reads that hit in the L3 and the snoops to sibling cores hit in either E/S state and the line is not forwarded.", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts demand data reads that hit in the L3 and the snoops sent to sibling cores return clean response. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0100040001 ", + "MSRValue": "0x02001c0001 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_NOT_NEEDED", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_MISS", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_M & SNOOP_NOT_NEEDED", + "BriefDescription": "Counts demand data reads that hit in the L3 and the snoops sent to sibling cores return clean response.", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts demand data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0080040001 ", + "MSRValue": "0x01001c0001 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_NONE", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NOT_NEEDED", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_M & SNOOP_NONE", + "BriefDescription": "Counts demand data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040040001 ", + "MSRValue": "0x00801c0001 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SPL_HIT", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NONE", "MSRIndex": "0x1a6,0x1a7", "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_M & SPL_HIT", + "BriefDescription": "DEMAND_DATA_RD & L3_HIT & SNOOP_NONE", "Offcore": "1", "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x3fc0020001 ", "Counter": "0,1,2,3", @@ -4213,6 +859,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1000020001 ", "Counter": "0,1,2,3", @@ -4225,6 +872,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0400020001 ", "Counter": "0,1,2,3", @@ -4237,6 +885,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0200020001 ", "Counter": "0,1,2,3", @@ -4249,6 +898,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0100020001 ", "Counter": "0,1,2,3", @@ -4261,6 +911,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0080020001 ", "Counter": "0,1,2,3", @@ -4273,18 +924,7 @@ "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0040020001 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { + "PublicDescription": "Counts demand data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0000010001 ", "Counter": "0,1,2,3", diff --git a/tools/perf/pmu-events/arch/x86/skylake/floating-point.json b/tools/perf/pmu-events/arch/x86/skylake/floating-point.json index 3c6b59af5d54..213dd6230cf2 100644 --- a/tools/perf/pmu-events/arch/x86/skylake/floating-point.json +++ b/tools/perf/pmu-events/arch/x86/skylake/floating-point.json @@ -27,13 +27,12 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", "EventCode": "0xC7", "Counter": "0,1,2,3", "UMask": "0x8", "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE", "SampleAfterValue": "2000003", - "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. ", + "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -55,7 +54,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts cycles with any input and output SSE or x87 FP assist. If an input and output assist are detected on the same cycle the event increments by 1.", + "PublicDescription": "Counts cycles with any input and output SSE or x87 FP assist. If an input and output assist are detected on the same cycle the event increments by 1.", "EventCode": "0xCA", "Counter": "0,1,2,3", "UMask": "0x1e", diff --git a/tools/perf/pmu-events/arch/x86/skylake/frontend.json b/tools/perf/pmu-events/arch/x86/skylake/frontend.json index e697dbd63e6e..578dff5bd823 100644 --- a/tools/perf/pmu-events/arch/x86/skylake/frontend.json +++ b/tools/perf/pmu-events/arch/x86/skylake/frontend.json @@ -1,62 +1,81 @@ [ { - "EventCode": "0x80", + "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", + "EventCode": "0x79", "Counter": "0,1,2,3", "UMask": "0x4", - "EventName": "ICACHE_16B.IFDATA_STALL", + "EventName": "IDQ.MITE_UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss.", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x83", + "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ.", + "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "ICACHE_64B.IFTAG_HIT", - "SampleAfterValue": "200003", - "BriefDescription": "Instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity.", + "UMask": "0x4", + "EventName": "IDQ.MITE_CYCLES", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x83", + "PublicDescription": "Counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.", + "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "ICACHE_64B.IFTAG_MISS", - "SampleAfterValue": "200003", - "BriefDescription": "Instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity.", + "UMask": "0x8", + "EventName": "IDQ.DSB_UOPS", + "SampleAfterValue": "2000003", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x83", + "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.", + "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "ICACHE_64B.IFTAG_STALL", - "SampleAfterValue": "200003", - "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.", + "UMask": "0x8", + "EventName": "IDQ.DSB_CYCLES", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", + "PublicDescription": "Counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.", "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "IDQ.MITE_UOPS", + "UMask": "0x10", + "EventName": "IDQ.MS_DSB_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path", + "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.", + "PublicDescription": "Counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.", "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "IDQ.DSB_UOPS", + "UMask": "0x18", + "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", + "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops", + "CounterMask": "4", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may 'bypass' the IDQ.", + "PublicDescription": "Counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Count includes uops that may 'bypass' the IDQ.", + "EventCode": "0x79", + "Counter": "0,1,2,3", + "UMask": "0x18", + "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.", "EventCode": "0x79", "Counter": "0,1,2,3", "UMask": "0x20", @@ -66,95 +85,99 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may 'bypass' the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.", + "PublicDescription": "Counts the number of cycles 4 uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).", "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x30", - "EventName": "IDQ.MS_CYCLES", + "UMask": "0x24", + "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "CounterMask": "1", + "BriefDescription": "Cycles MITE is delivering 4 Uops", + "CounterMask": "4", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ.", + "PublicDescription": "Counts the number of cycles uops were delivered to the Instruction Decode Queue (IDQ) from the MITE (legacy decode pipeline) path. Counting includes uops that may 'bypass' the IDQ. During these cycles uops are not being delivered from the Decode Stream Buffer (DSB).", "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "IDQ.MITE_CYCLES", + "UMask": "0x24", + "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path", + "BriefDescription": "Cycles MITE is delivering any Uop", "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.", + "PublicDescription": "Counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.", "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "IDQ.DSB_CYCLES", + "UMask": "0x30", + "EventName": "IDQ.MS_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may 'bypass' the IDQ.", + "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "IDQ.MS_DSB_CYCLES", + "UMask": "0x30", + "EdgeDetect": "1", + "EventName": "IDQ.MS_SWITCHES", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer", "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.", + "PublicDescription": "Counts the total number of uops delivered by the Microcode Sequencer (MS). Any instruction over 4 uops will be delivered by the MS. Some instructions such as transcendentals may additionally generate uops from the MS.", "EventCode": "0x79", "Counter": "0,1,2,3", - "UMask": "0x18", - "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS", + "UMask": "0x30", + "EventName": "IDQ.MS_UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering 4 Uops", - "CounterMask": "4", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may 'bypass' the IDQ.", - "EventCode": "0x79", + "PublicDescription": "Cycles where a code line fetch is stalled due to an L1 instruction cache miss. The legacy decode pipeline works at a 16 Byte granularity.", + "EventCode": "0x80", "Counter": "0,1,2,3", - "UMask": "0x18", - "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS", + "UMask": "0x4", + "EventName": "ICACHE_16B.IFDATA_STALL", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles Decode Stream Buffer (DSB) is delivering any Uop", - "CounterMask": "1", + "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache miss.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", - "EventCode": "0x79", + "EventCode": "0x83", "Counter": "0,1,2,3", - "UMask": "0x24", - "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles MITE is delivering 4 Uops", - "CounterMask": "4", + "UMask": "0x1", + "EventName": "ICACHE_64B.IFTAG_HIT", + "SampleAfterValue": "200003", + "BriefDescription": "Instruction fetch tag lookups that hit in the instruction cache (L1I). Counts at 64-byte cache-line granularity.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may 'bypass' the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", - "EventCode": "0x79", + "EventCode": "0x83", "Counter": "0,1,2,3", - "UMask": "0x24", - "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles MITE is delivering any Uop", - "CounterMask": "1", + "UMask": "0x2", + "EventName": "ICACHE_64B.IFTAG_MISS", + "SampleAfterValue": "200003", + "BriefDescription": "Instruction fetch tag lookups that miss in the instruction cache (L1I). Counts at 64-byte cache-line granularity.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x83", + "Counter": "0,1,2,3", + "UMask": "0x4", + "EventName": "ICACHE_64B.IFTAG_STALL", + "SampleAfterValue": "200003", + "BriefDescription": "Cycles where a code fetch is stalled due to L1 instruction cache tag miss.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding ?4 ? x? when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when:\n a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread\n\n b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions)\n \n c. Instruction Decode Queue (IDQ) delivers four uops.", + "PublicDescription": "Counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding 4 x when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when: a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread. b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions). c. Instruction Decode Queue (IDQ) delivers four uops.", "EventCode": "0x9C", "Counter": "0,1,2,3", "UMask": "0x1", @@ -164,7 +187,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles when no uops are delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core =4.", + "PublicDescription": "Counts, on the per-thread basis, cycles when no uops are delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core =4.", "EventCode": "0x9C", "Counter": "0,1,2,3", "UMask": "0x1", @@ -175,7 +198,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles when less than 1 uop is delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core >=3.", + "PublicDescription": "Counts, on the per-thread basis, cycles when less than 1 uop is delivered to Resource Allocation Table (RAT). IDQ_Uops_Not_Delivered.core >= 3.", "EventCode": "0x9C", "Counter": "0,1,2,3", "UMask": "0x1", @@ -186,6 +209,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "Cycles with less than 2 uops delivered by the front-end.", "EventCode": "0x9C", "Counter": "0,1,2,3", "UMask": "0x1", @@ -196,6 +220,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "Cycles with less than 3 uops delivered by the front-end.", "EventCode": "0x9C", "Counter": "0,1,2,3", "UMask": "0x1", @@ -217,7 +242,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. \nMM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.\nPenalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 0?2 cycles.", + "PublicDescription": "Counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. MM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.Penalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 02 cycles.", "EventCode": "0xAB", "Counter": "0,1,2,3", "UMask": "0x2", @@ -228,6 +253,7 @@ }, { "PEBS": "1", + "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. \r\n", "EventCode": "0xC6", "MSRValue": "0x11", "Counter": "0,1,2,3", @@ -235,7 +261,7 @@ "EventName": "FRONTEND_RETIRED.DSB_MISS", "MSRIndex": "0x3F7", "SampleAfterValue": "100007", - "BriefDescription": "Retired Instructions who experienced decode stream buffer (DSB - the decoded instruction-cache) miss.", + "BriefDescription": "Retired Instructions who experienced decode stream buffer (DSB - the decoded instruction-cache) miss. Precise Event.", "TakenAlone": "1", "CounterHTOff": "0,1,2,3" }, @@ -248,7 +274,7 @@ "EventName": "FRONTEND_RETIRED.L1I_MISS", "MSRIndex": "0x3F7", "SampleAfterValue": "100007", - "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss.", + "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss. Precise Event.", "TakenAlone": "1", "CounterHTOff": "0,1,2,3" }, @@ -261,12 +287,13 @@ "EventName": "FRONTEND_RETIRED.L2_MISS", "MSRIndex": "0x3F7", "SampleAfterValue": "100007", - "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss.", + "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss. Precise Event.", "TakenAlone": "1", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", + "PublicDescription": "Counts retired Instructions that experienced iTLB (Instruction TLB) true miss.", "EventCode": "0xC6", "MSRValue": "0x14", "Counter": "0,1,2,3", @@ -274,12 +301,13 @@ "EventName": "FRONTEND_RETIRED.ITLB_MISS", "MSRIndex": "0x3F7", "SampleAfterValue": "100007", - "BriefDescription": "Retired Instructions who experienced iTLB true miss.", + "BriefDescription": "Retired Instructions who experienced iTLB true miss. Precise Event.", "TakenAlone": "1", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", + "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.", "EventCode": "0xC6", "MSRValue": "0x15", "Counter": "0,1,2,3", @@ -287,7 +315,7 @@ "EventName": "FRONTEND_RETIRED.STLB_MISS", "MSRIndex": "0x3F7", "SampleAfterValue": "100007", - "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss.", + "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss. Precise Event.", "TakenAlone": "1", "CounterHTOff": "0,1,2,3" }, @@ -300,7 +328,7 @@ "EventName": "FRONTEND_RETIRED.LATENCY_GE_2", "MSRIndex": "0x3F7", "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 2 cycles which was not interrupted by a back-end stall.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.", "TakenAlone": "1", "CounterHTOff": "0,1,2,3" }, @@ -313,7 +341,7 @@ "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_2", "MSRIndex": "0x3F7", "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 2 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 2 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.", "TakenAlone": "1", "CounterHTOff": "0,1,2,3" }, @@ -326,34 +354,13 @@ "EventName": "FRONTEND_RETIRED.LATENCY_GE_4", "MSRIndex": "0x3F7", "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall. Precise Event.", "TakenAlone": "1", "CounterHTOff": "0,1,2,3" }, - { - "PublicDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", - "EventCode": "0x79", - "Counter": "0,1,2,3", - "UMask": "0x30", - "EdgeDetect": "1", - "EventName": "IDQ.MS_SWITCHES", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts the total number of uops delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may 'bypass' the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.", - "EventCode": "0x79", - "Counter": "0,1,2,3", - "UMask": "0x30", - "EventName": "IDQ.MS_UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, { "PEBS": "1", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops. \r\n", "EventCode": "0xC6", "MSRValue": "0x400806", "Counter": "0,1,2,3", @@ -367,6 +374,7 @@ }, { "PEBS": "1", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.\r\n", "EventCode": "0xC6", "MSRValue": "0x401006", "Counter": "0,1,2,3", @@ -374,12 +382,13 @@ "EventName": "FRONTEND_RETIRED.LATENCY_GE_16", "MSRIndex": "0x3F7", "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall. Precise Event.", "TakenAlone": "1", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.\r\n", "EventCode": "0xC6", "MSRValue": "0x402006", "Counter": "0,1,2,3", @@ -387,7 +396,7 @@ "EventName": "FRONTEND_RETIRED.LATENCY_GE_32", "MSRIndex": "0x3F7", "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall. Precise Event.", "TakenAlone": "1", "CounterHTOff": "0,1,2,3" }, @@ -400,7 +409,7 @@ "EventName": "FRONTEND_RETIRED.LATENCY_GE_64", "MSRIndex": "0x3F7", "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall. Precise Event.", "TakenAlone": "1", "CounterHTOff": "0,1,2,3" }, @@ -413,7 +422,7 @@ "EventName": "FRONTEND_RETIRED.LATENCY_GE_128", "MSRIndex": "0x3F7", "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall. Precise Event.", "TakenAlone": "1", "CounterHTOff": "0,1,2,3" }, @@ -426,7 +435,7 @@ "EventName": "FRONTEND_RETIRED.LATENCY_GE_256", "MSRIndex": "0x3F7", "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall. Precise Event.", "TakenAlone": "1", "CounterHTOff": "0,1,2,3" }, @@ -439,12 +448,13 @@ "EventName": "FRONTEND_RETIRED.LATENCY_GE_512", "MSRIndex": "0x3F7", "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall. Precise Event.", "TakenAlone": "1", "CounterHTOff": "0,1,2,3" }, { "PEBS": "1", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.\r\n", "EventCode": "0xC6", "MSRValue": "0x100206", "Counter": "0,1,2,3", @@ -452,7 +462,7 @@ "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1", "MSRIndex": "0x3F7", "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.", "TakenAlone": "1", "CounterHTOff": "0,1,2,3" }, @@ -465,7 +475,7 @@ "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_3", "MSRIndex": "0x3F7", "SampleAfterValue": "100007", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 3 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 3 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.", "TakenAlone": "1", "CounterHTOff": "0,1,2,3" } diff --git a/tools/perf/pmu-events/arch/x86/skylake/memory.json b/tools/perf/pmu-events/arch/x86/skylake/memory.json index d7fd5b06825b..3bd8b712c889 100644 --- a/tools/perf/pmu-events/arch/x86/skylake/memory.json +++ b/tools/perf/pmu-events/arch/x86/skylake/memory.json @@ -1,6 +1,74 @@ [ { - "PublicDescription": "Unfriendly TSX abort triggered by a flowmarker.", + "PublicDescription": "Number of times a TSX line had a cache conflict.", + "EventCode": "0x54", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "TX_MEM.ABORT_CONFLICT", + "SampleAfterValue": "2000003", + "BriefDescription": "Number of times a transactional abort was signaled due to a data conflict on a transactionally accessed address", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x54", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "TX_MEM.ABORT_CAPACITY", + "SampleAfterValue": "2000003", + "BriefDescription": "Number of times a transactional abort was signaled due to a data capacity limitation for transactional reads or writes.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Number of times a TSX Abort was triggered due to a non-release/commit store to lock.", + "EventCode": "0x54", + "Counter": "0,1,2,3", + "UMask": "0x4", + "EventName": "TX_MEM.ABORT_HLE_STORE_TO_ELIDED_LOCK", + "SampleAfterValue": "2000003", + "BriefDescription": "Number of times a HLE transactional region aborted due to a non XRELEASE prefixed instruction writing to an elided lock in the elision buffer", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Number of times a TSX Abort was triggered due to commit but Lock Buffer not empty.", + "EventCode": "0x54", + "Counter": "0,1,2,3", + "UMask": "0x8", + "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_NOT_EMPTY", + "SampleAfterValue": "2000003", + "BriefDescription": "Number of times an HLE transactional execution aborted due to NoAllocatedElisionBuffer being non-zero.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Number of times a TSX Abort was triggered due to release/commit but data and address mismatch.", + "EventCode": "0x54", + "Counter": "0,1,2,3", + "UMask": "0x10", + "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_MISMATCH", + "SampleAfterValue": "2000003", + "BriefDescription": "Number of times an HLE transactional execution aborted due to XRELEASE lock not satisfying the address and value requirements in the elision buffer", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Number of times a TSX Abort was triggered due to attempting an unsupported alignment from Lock Buffer.", + "EventCode": "0x54", + "Counter": "0,1,2,3", + "UMask": "0x20", + "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_UNSUPPORTED_ALIGNMENT", + "SampleAfterValue": "2000003", + "BriefDescription": "Number of times an HLE transactional execution aborted due to an unsupported read alignment from the elision buffer.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Number of times we could not allocate Lock Buffer.", + "EventCode": "0x54", + "Counter": "0,1,2,3", + "UMask": "0x40", + "EventName": "TX_MEM.HLE_ELISION_BUFFER_FULL", + "SampleAfterValue": "2000003", + "BriefDescription": "Number of times HLE lock could not be elided due to ElisionBufferAvailable being zero.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { "EventCode": "0x5d", "Counter": "0,1,2,3", "UMask": "0x1", @@ -10,7 +78,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Unfriendly TSX abort triggered by a vzeroupper instruction.", + "PublicDescription": "Unfriendly TSX abort triggered by a vzeroupper instruction.", "EventCode": "0x5d", "Counter": "0,1,2,3", "UMask": "0x2", @@ -50,7 +118,77 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Number of times we entered an HLE region\n does not count nested transactions.", + "EventCode": "0x60", + "Counter": "0,1,2,3", + "UMask": "0x10", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD", + "SampleAfterValue": "2000003", + "BriefDescription": "Counts number of Offcore outstanding Demand Data Read requests that miss L3 cache in the superQ every cycle.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x60", + "Counter": "0,1,2,3", + "UMask": "0x10", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ.", + "CounterMask": "1", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x60", + "Counter": "0,1,2,3", + "UMask": "0x10", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles with at least 6 Demand Data Read requests that miss L3 cache in the superQ.", + "CounterMask": "6", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA3", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "CYCLE_ACTIVITY.CYCLES_L3_MISS", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles while L3 cache miss demand load is outstanding.", + "CounterMask": "2", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xA3", + "Counter": "0,1,2,3", + "UMask": "0x6", + "EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS", + "SampleAfterValue": "2000003", + "BriefDescription": "Execution stalls while L3 cache miss demand load is outstanding.", + "CounterMask": "6", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Demand Data Read requests who miss L3 cache.", + "EventCode": "0xB0", + "Counter": "0,1,2,3", + "UMask": "0x10", + "EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD", + "SampleAfterValue": "100003", + "BriefDescription": "Demand Data Read requests who miss L3 cache", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Counts the number of memory ordering Machine Clears detected. Memory Ordering Machine Clears can result from one of the following:a. memory disambiguation,b. external snoop, orc. cross SMT-HW-thread snoop (stores) hitting load buffer.", + "EventCode": "0xC3", + "Counter": "0,1,2,3", + "UMask": "0x2", + "Errata": "SKL089", + "EventName": "MACHINE_CLEARS.MEMORY_ORDERING", + "SampleAfterValue": "100003", + "BriefDescription": "Counts the number of machine clears due to memory order conflicts.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Number of times we entered an HLE region. Does not count nested transactions.", "EventCode": "0xC8", "Counter": "0,1,2,3", "UMask": "0x1", @@ -71,7 +209,7 @@ }, { "PEBS": "1", - "PublicDescription": "Number of times HLE abort was triggered.", + "PublicDescription": "Number of times HLE abort was triggered. (PEBS)", "EventCode": "0xC8", "Counter": "0,1,2,3", "UMask": "0x4", @@ -99,13 +237,12 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).", "EventCode": "0xC8", "Counter": "0,1,2,3", "UMask": "0x20", "EventName": "HLE_RETIRED.ABORTED_UNFRIENDLY", "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.). ", + "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -128,7 +265,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Number of times we entered an RTM region\n does not count nested transactions.", + "PublicDescription": "Number of times we entered an RTM region. Does not count nested transactions.", "EventCode": "0xC9", "Counter": "0,1,2,3", "UMask": "0x1", @@ -149,7 +286,7 @@ }, { "PEBS": "1", - "PublicDescription": "Number of times RTM abort was triggered.", + "PublicDescription": "Number of times RTM abort was triggered. (PEBS)", "EventCode": "0xC9", "Counter": "0,1,2,3", "UMask": "0x4", @@ -207,17 +344,6 @@ "BriefDescription": "Number of times an RTM execution aborted due to none of the previous 4 categories (e.g. interrupt)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, - { - "PublicDescription": "This event counts the number of memory ordering Machine Clears detected. Memory Ordering Machine Clears can result from one of the following:\n1. memory disambiguation,\n2. external snoop, or\n3. cross SMT-HW-thread snoop (stores) hitting load buffer.", - "EventCode": "0xC3", - "Counter": "0,1,2,3", - "UMask": "0x2", - "Errata": "SKL089", - "EventName": "MACHINE_CLEARS.MEMORY_ORDERING", - "SampleAfterValue": "100003", - "BriefDescription": "Counts the number of machine clears due to memory order conflicts.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, { "PEBS": "2", "PublicDescription": "Counts loads when the latency from first dispatch to completion is greater than 4 cycles. Reported latency may be longer than just the memory latency.", @@ -331,1816 +457,87 @@ "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Number of times a TSX line had a cache conflict.", - "EventCode": "0x54", + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3ffc000001 ", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "TX_MEM.ABORT_CONFLICT", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times a transactional abort was signaled due to a data conflict on a transactionally accessed address", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x54", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "TX_MEM.ABORT_CAPACITY", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times a transactional abort was signaled due to a data capacity limitation for transactional reads or writes.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "DEMAND_DATA_RD & L3_MISS & ANY_SNOOP", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Number of times a TSX Abort was triggered due to a non-release/commit store to lock.", - "EventCode": "0x54", + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x103c000001 ", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "TX_MEM.ABORT_HLE_STORE_TO_ELIDED_LOCK", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times a HLE transactional region aborted due to a non XRELEASE prefixed instruction writing to an elided lock in the elision buffer", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_HITM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_HITM", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Number of times a TSX Abort was triggered due to commit but Lock Buffer not empty.", - "EventCode": "0x54", + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x043c000001 ", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_NOT_EMPTY", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an HLE transactional execution aborted due to NoAllocatedElisionBuffer being non-zero.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_HIT_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_HIT_NO_FWD", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Number of times a TSX Abort was triggered due to release/commit but data and address mismatch.", - "EventCode": "0x54", + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x023c000001 ", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_MISMATCH", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an HLE transactional execution aborted due to XRELEASE lock not satisfying the address and value requirements in the elision buffer", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_MISS", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Number of times a TSX Abort was triggered due to attempting an unsupported alignment from Lock Buffer.", - "EventCode": "0x54", + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x013c000001 ", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "TX_MEM.ABORT_HLE_ELISION_BUFFER_UNSUPPORTED_ALIGNMENT", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times an HLE transactional execution aborted due to an unsupported read alignment from the elision buffer.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NOT_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_NOT_NEEDED", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Number of times we could not allocate Lock Buffer.", - "EventCode": "0x54", + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x00bc000001 ", "Counter": "0,1,2,3", - "UMask": "0x40", - "EventName": "TX_MEM.HLE_ELISION_BUFFER_FULL", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of times HLE lock could not be elided due to ElisionBufferAvailable being zero.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NONE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_NONE", + "Offcore": "1", + "CounterHTOff": "0,1,2,3" }, { - "PublicDescription": "Demand Data Read requests who miss L3 cache.", - "EventCode": "0xB0", - "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "OFFCORE_REQUESTS.L3_MISS_DEMAND_DATA_RD", - "SampleAfterValue": "100003", - "BriefDescription": "Demand Data Read requests who miss L3 cache", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x60", - "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD", - "SampleAfterValue": "2000003", - "BriefDescription": "Counts number of Offcore outstanding Demand Data Read requests that miss L3 cache in the superQ every cycle.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA3", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "CYCLE_ACTIVITY.CYCLES_L3_MISS", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles while L3 cache miss demand load is outstanding.", - "CounterMask": "2", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA3", - "Counter": "0,1,2,3", - "UMask": "0x6", - "EventName": "CYCLE_ACTIVITY.STALLS_L3_MISS", - "SampleAfterValue": "2000003", - "BriefDescription": "Execution stalls while L3 cache miss demand load is outstanding.", - "CounterMask": "6", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x60", - "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_L3_MISS_DEMAND_DATA_RD", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with at least 1 Demand Data Read requests who miss L3 cache in the superQ.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x60", - "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.L3_MISS_DEMAND_DATA_RD_GE_6", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with at least 6 Demand Data Read requests that miss L3 cache in the superQ.", - "CounterMask": "6", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3ffc008000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_MISS & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x203c008000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_MISS & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x103c008000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_MISS & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x043c008000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_MISS & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x023c008000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_MISS & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x013c008000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_MISS & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00bc008000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_MISS & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x007c008000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_MISS & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc4008000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_MISS_LOCAL_DRAM & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2004008000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1004008000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_MISS_LOCAL_DRAM & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0404008000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0204008000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_MISS_LOCAL_DRAM & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0104008000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0084008000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_MISS_LOCAL_DRAM & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0044008000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_MISS_LOCAL_DRAM & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000408000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L4_HIT_LOCAL_L4 & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x20001c8000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000108000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_S.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_S & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000088000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_E.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_E & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000048000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT_M.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & L3_HIT_M & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000028000 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.OTHER.SUPPLIER_NONE.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "OTHER & SUPPLIER_NONE & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3ffc000800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_MISS & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x203c000800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_MISS.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_MISS & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x103c000800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_MISS.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_MISS & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x043c000800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_MISS.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_MISS & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x023c000800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_MISS.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_MISS & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x013c000800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_MISS.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_MISS & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00bc000800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_MISS.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_MISS & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x007c000800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_MISS.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_MISS & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc4000800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_MISS_LOCAL_DRAM.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_MISS_LOCAL_DRAM & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2004000800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1004000800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_MISS_LOCAL_DRAM.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_MISS_LOCAL_DRAM & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0404000800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0204000800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_MISS_LOCAL_DRAM.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_MISS_LOCAL_DRAM & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0104000800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0084000800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_MISS_LOCAL_DRAM.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_MISS_LOCAL_DRAM & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0044000800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_MISS_LOCAL_DRAM.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_MISS_LOCAL_DRAM & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000400800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L4_HIT_LOCAL_L4 & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x20001c0800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000100800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_S.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_S & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000080800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_E.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_E & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000040800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.L3_HIT_M.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & L3_HIT_M & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000020800 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.SUPPLIER_NONE.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "STREAMING_STORES & SUPPLIER_NONE & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3ffc000100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_MISS & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x203c000100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_MISS & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x103c000100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_MISS & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x043c000100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_MISS & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x023c000100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_MISS & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x013c000100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_MISS & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00bc000100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_MISS & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x007c000100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_MISS & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc4000100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_MISS_LOCAL_DRAM & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2004000100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1004000100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_MISS_LOCAL_DRAM & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0404000100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0204000100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0104000100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0084000100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0044000100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_MISS_LOCAL_DRAM & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000400100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L4_HIT_LOCAL_L4 & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x20001c0100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000100100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_S.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_S & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000080100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_E.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_E & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000040100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT_M.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & L3_HIT_M & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000020100 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.SUPPLIER_NONE.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_RFO & SUPPLIER_NONE & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3ffc000080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_MISS & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x203c000080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_MISS & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x103c000080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_MISS & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x043c000080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_MISS & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x023c000080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_MISS & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x013c000080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_MISS & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00bc000080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_MISS & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x007c000080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_MISS & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc4000080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2004000080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1004000080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0404000080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0204000080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0104000080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0084000080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0044000080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_MISS_LOCAL_DRAM & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000400080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x20001c0080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000100080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_S.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_S & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000080080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_E.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_E & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000040080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT_M.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & L3_HIT_M & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000020080 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.SUPPLIER_NONE.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "PF_L3_DATA_RD & SUPPLIER_NONE & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3ffc000004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_MISS & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x203c000004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_MISS & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x103c000004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_MISS & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x043c000004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_MISS & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x023c000004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_MISS & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x013c000004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_MISS & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00bc000004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_MISS & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x007c000004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_MISS & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc4000004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2004000004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1004000004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0404000004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0204000004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0104000004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0084000004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0044000004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_MISS_LOCAL_DRAM & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000400004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L4_HIT_LOCAL_L4 & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x20001c0004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000100004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_S.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_S & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000080004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_E.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_E & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000040004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT_M.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & L3_HIT_M & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000020004 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.SUPPLIER_NONE.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_CODE_RD & SUPPLIER_NONE & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3ffc000002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_MISS & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x203c000002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_MISS & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x103c000002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_MISS & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x043c000002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_MISS & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x023c000002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_MISS & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x013c000002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_MISS & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00bc000002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_MISS & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x007c000002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_MISS & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc4000002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_MISS_LOCAL_DRAM & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2004000002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x1004000002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_MISS_LOCAL_DRAM & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0404000002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_MISS_LOCAL_DRAM & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0204000002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_MISS_LOCAL_DRAM & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0104000002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0084000002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_MISS_LOCAL_DRAM & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0044000002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_MISS_LOCAL_DRAM & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000400002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L4_HIT_LOCAL_L4 & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x20001c0002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000100002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_S.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_S & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000080002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_E.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_E & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000040002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT_M.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & L3_HIT_M & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000020002 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.SUPPLIER_NONE.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_RFO & SUPPLIER_NONE & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3ffc000001 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_MISS & ANY_SNOOP", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x203c000001 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x103c000001 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_HITM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_HITM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x043c000001 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_HIT_NO_FWD", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_HIT_NO_FWD", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x023c000001 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_MISS", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x013c000001 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NOT_NEEDED", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_NOT_NEEDED", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x00bc000001 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_NONE", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SNOOP_NONE", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x007c000001 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_MISS & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x3fc4000001 ", + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3fc4000001 ", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.ANY_SNOOP", @@ -2151,18 +548,7 @@ "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2004000001 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x1004000001 ", "Counter": "0,1,2,3", @@ -2175,6 +561,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0404000001 ", "Counter": "0,1,2,3", @@ -2187,6 +574,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0204000001 ", "Counter": "0,1,2,3", @@ -2199,6 +587,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0104000001 ", "Counter": "0,1,2,3", @@ -2211,6 +600,7 @@ "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "EventCode": "0xB7, 0xBB", "MSRValue": "0x0084000001 ", "Counter": "0,1,2,3", @@ -2221,89 +611,5 @@ "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SNOOP_NONE", "Offcore": "1", "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x0044000001 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SPL_HIT", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_MISS_LOCAL_DRAM & SPL_HIT", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000400001 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L4_HIT_LOCAL_L4.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L4_HIT_LOCAL_L4 & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x20001c0001 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000100001 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_S.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_S & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000080001 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_E.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_E & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000040001 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT_M.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & L3_HIT_M & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xB7, 0xBB", - "MSRValue": "0x2000020001 ", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.SUPPLIER_NONE.SNOOP_NON_DRAM", - "MSRIndex": "0x1a6,0x1a7", - "SampleAfterValue": "100003", - "BriefDescription": "DEMAND_DATA_RD & SUPPLIER_NONE & SNOOP_NON_DRAM", - "Offcore": "1", - "CounterHTOff": "0,1,2,3" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/skylake/other.json b/tools/perf/pmu-events/arch/x86/skylake/other.json index cfdc323acc82..84a316d380ac 100644 --- a/tools/perf/pmu-events/arch/x86/skylake/other.json +++ b/tools/perf/pmu-events/arch/x86/skylake/other.json @@ -1,11 +1,47 @@ [ { - "PublicDescription": "This event counts the number of hardware interruptions received by the processor.", + "EventCode": "0x32", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "SW_PREFETCH_ACCESS.NTA", + "SampleAfterValue": "2000003", + "BriefDescription": "Number of PREFETCHNTA instructions executed.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x32", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "SW_PREFETCH_ACCESS.T0", + "SampleAfterValue": "2000003", + "BriefDescription": "Number of PREFETCHT0 instructions executed.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x32", + "Counter": "0,1,2,3", + "UMask": "0x4", + "EventName": "SW_PREFETCH_ACCESS.T1_T2", + "SampleAfterValue": "2000003", + "BriefDescription": "Number of PREFETCHT1 or PREFETCHT2 instructions executed.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x32", + "Counter": "0,1,2,3", + "UMask": "0x8", + "EventName": "SW_PREFETCH_ACCESS.PREFETCHW", + "SampleAfterValue": "2000003", + "BriefDescription": "Number of PREFETCHW instructions executed.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Counts the number of hardware interruptions received by the processor.", "EventCode": "0xCB", "Counter": "0,1,2,3", "UMask": "0x1", "EventName": "HW_INTERRUPTS.RECEIVED", - "SampleAfterValue": "100003", + "SampleAfterValue": "203", "BriefDescription": "Number of hardware interrupts received by the processor.", "CounterHTOff": "0,1,2,3,4,5,6,7" } diff --git a/tools/perf/pmu-events/arch/x86/skylake/pipeline.json b/tools/perf/pmu-events/arch/x86/skylake/pipeline.json index 0f7adb809be3..bc6d2afbcd8a 100644 --- a/tools/perf/pmu-events/arch/x86/skylake/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/skylake/pipeline.json @@ -1,80 +1,92 @@ [ { - "PublicDescription": "This event counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, this event counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. \nNotes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. \nCounting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.", + "PublicDescription": "Counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, Counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. Counting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.", "EventCode": "0x00", - "Counter": "Fixed counter 1", + "Counter": "Fixed counter 0", "UMask": "0x1", "EventName": "INST_RETIRED.ANY", "SampleAfterValue": "2000003", "BriefDescription": "Instructions retired from execution.", - "CounterHTOff": "Fixed counter 1" + "CounterHTOff": "Fixed counter 0" }, { - "PublicDescription": "This event counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.", + "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.", "EventCode": "0x00", - "Counter": "Fixed counter 2", + "Counter": "Fixed counter 1", "UMask": "0x2", "EventName": "CPU_CLK_UNHALTED.THREAD", "SampleAfterValue": "2000003", "BriefDescription": "Core cycles when the thread is not in halt state", - "CounterHTOff": "Fixed counter 2" + "CounterHTOff": "Fixed counter 1" + }, + { + "EventCode": "0x00", + "Counter": "Fixed counter 1", + "UMask": "0x2", + "AnyThread": "1", + "EventName": "CPU_CLK_UNHALTED.THREAD_ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", + "CounterHTOff": "Fixed counter 1" }, { - "PublicDescription": "This event counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.", + "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.", "EventCode": "0x00", - "Counter": "Fixed counter 3", + "Counter": "Fixed counter 2", "UMask": "0x3", "EventName": "CPU_CLK_UNHALTED.REF_TSC", "SampleAfterValue": "2000003", "BriefDescription": "Reference cycles when the core is not in halt state.", - "CounterHTOff": "Fixed counter 3" + "CounterHTOff": "Fixed counter 2" }, { - "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.", - "EventCode": "0x3C", + "PublicDescription": "Counts how many times the load operation got the true Block-on-Store blocking code preventing store forwarding. This includes cases when:a. preceding store conflicts with the load (incomplete overlap),b. store forwarding is impossible due to u-arch limitations,c. preceding lock RMW operations are not forwarded,d. store has the no-forward bit set (uncacheable/page-split/masked stores),e. all-blocking stores are used (mostly, fences and port I/O), and others.The most common case is a load blocked due to its address range overlapping with a preceding smaller uncompleted store. Note: This event does not take into account cases of out-of-SW-control (for example, SbTailHit), unknown physical STA, and cases of blocking loads on store due to being non-WB memory type or a lock. These cases are covered by other events. See the table of not supported store forwards in the Optimization Guide.", + "EventCode": "0x03", "Counter": "0,1,2,3", - "UMask": "0x0", - "EventName": "CPU_CLK_UNHALTED.THREAD_P", - "SampleAfterValue": "2000003", - "BriefDescription": "Thread cycles when thread is not in halt state", + "UMask": "0x2", + "EventName": "LD_BLOCKS.STORE_FORWARD", + "SampleAfterValue": "100003", + "BriefDescription": "Loads blocked by overlapping with store buffer that cannot be forwarded .", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xE6", + "PublicDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.", + "EventCode": "0x03", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "BACLEARS.ANY", + "UMask": "0x8", + "EventName": "LD_BLOCKS.NO_SR", "SampleAfterValue": "100003", - "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.", + "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA8", + "PublicDescription": "Counts false dependencies in MOB when the partial comparison upon loose net check and dependency was resolved by the Enhanced Loose net mechanism. This may not result in high performance penalties. Loose net checks can fail when loads and stores are 4k aliased.", + "EventCode": "0x07", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "LSD.UOPS", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of Uops delivered by the LSD.", + "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS", + "SampleAfterValue": "100003", + "BriefDescription": "False dependencies in MOB due to partial compare on address.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts stalls occured due to changing prefix length (66, 67 or REX.W when they change the length of the decoded instruction). Occurrences counting is proportional to the number of prefixes in a 16B-line. This may result in the following penalties: three-cycle penalty for each LCP in a 16-byte chunk.", - "EventCode": "0x87", + "PublicDescription": "Core cycles the Resource allocator was stalled due to recovery from an earlier branch misprediction or machine clear event.", + "EventCode": "0x0D", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "ILD_STALL.LCP", + "EventName": "INT_MISC.RECOVERY_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Stalls caused by changing prefix length of the instruction.", + "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles checkpoints in Resource Allocation Table (RAT) are recovering from JEClear or machine clear.", "EventCode": "0x0D", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "INT_MISC.RECOVERY_CYCLES", + "AnyThread": "1", + "EventName": "INT_MISC.RECOVERY_CYCLES_ANY", "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)", + "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -87,33 +99,35 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts resource-related stall cycles. Reasons for stalls can be as follows:\n - *any* u-arch structure got full (LB, SB, RS, ROB, BOB, LM, Physical Register Reclaim Table (PRRT), or Physical History Table (PHT) slots)\n - *any* u-arch structure got empty (like INT/SIMD FreeLists)\n - FPU control word (FPCW), MXCSR\nand others. This counts cycles that the pipeline backend blocked uop delivery from the front end.", - "EventCode": "0xA2", + "PublicDescription": "Counts the number of uops that the Resource Allocation Table (RAT) issues to the Reservation Station (RS).", + "EventCode": "0x0E", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "RESOURCE_STALLS.ANY", + "EventName": "UOPS_ISSUED.ANY", "SampleAfterValue": "2000003", - "BriefDescription": "Resource-related stall cycles", + "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts stall cycles caused by the store buffer (SB) overflow (excluding draining from synch). This counts cycles that the pipeline backend blocked uop delivery from the front end.", - "EventCode": "0xA2", + "PublicDescription": "Counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.", + "EventCode": "0x0E", + "Invert": "1", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "RESOURCE_STALLS.SB", + "UMask": "0x1", + "EventName": "UOPS_ISSUED.STALL_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).", + "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS).", + "PublicDescription": "Counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting with the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register. For more information, refer to Mixing Intel AVX and Intel SSE Code section of the Optimization Guide.", "EventCode": "0x0E", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_ISSUED.ANY", + "UMask": "0x2", + "EventName": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH", "SampleAfterValue": "2000003", - "BriefDescription": "Uops that Resource Allocation Table (RAT) issues to Reservation Station (RS)", + "BriefDescription": "Uops inserted at issue-stage in order to preserve upper bits of vector registers.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { @@ -126,361 +140,318 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.", - "EventCode": "0x0E", - "Invert": "1", + "EventCode": "0x14", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "UOPS_ISSUED.STALL_CYCLES", + "EventName": "ARITH.DIVIDER_ACTIVE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread", + "BriefDescription": "Cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.", "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts cycles during which the reservation station (RS) is empty for the thread.\nNote: In ST-mode, not active thread should drive 0. This is usually caused by severely costly branch mispredictions, or allocator/FE issues.", - "EventCode": "0x5E", + "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.", + "EventCode": "0x3C", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "RS_EVENTS.EMPTY_CYCLES", + "UMask": "0x0", + "EventName": "CPU_CLK_UNHALTED.THREAD_P", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread", + "BriefDescription": "Thread cycles when thread is not in halt state", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x5E", - "Invert": "1", + "EventCode": "0x3C", "Counter": "0,1,2,3", - "UMask": "0x1", - "EdgeDetect": "1", - "EventName": "RS_EVENTS.EMPTY_END", + "UMask": "0x0", + "AnyThread": "1", + "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY", "SampleAfterValue": "2000003", - "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.", - "CounterMask": "1", + "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT.", - "EventCode": "0xCC", + "PublicDescription": "Counts when the Current Privilege Level (CPL) transitions from ring 1, 2 or 3 to ring 0 (Kernel).", + "EventCode": "0x3C", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "ROB_MISC_EVENTS.LBR_INSERTS", - "SampleAfterValue": "2000003", - "BriefDescription": "Increments whenever there is an update to the LBR array.", + "UMask": "0x0", + "EdgeDetect": "1", + "EventName": "CPU_CLK_UNHALTED.RING0_TRANS", + "SampleAfterValue": "100007", + "BriefDescription": "Counts when there is a transition from ring 1, 2 or 3 to ring 0.", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Number of machine clears (nukes) of any type.", - "EventCode": "0xC3", + "EventCode": "0x3C", "Counter": "0,1,2,3", "UMask": "0x1", - "EdgeDetect": "1", - "EventName": "MACHINE_CLEARS.COUNT", - "SampleAfterValue": "100003", - "BriefDescription": "Number of machine clears (nukes) of any type. ", - "CounterMask": "1", + "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK", + "SampleAfterValue": "2503", + "BriefDescription": "Core crystal clock cycles when the thread is unhalted.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts self-modifying code (SMC) detected, which causes a machine clear.", - "EventCode": "0xC3", + "EventCode": "0x3C", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "MACHINE_CLEARS.SMC", - "SampleAfterValue": "100003", - "BriefDescription": "Self-modifying code (SMC) detected.", + "UMask": "0x1", + "AnyThread": "1", + "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY", + "SampleAfterValue": "2503", + "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).", - "EventCode": "0xC0", + "EventCode": "0x3C", "Counter": "0,1,2,3", - "UMask": "0x0", - "Errata": "SKL091, SKL044", - "EventName": "INST_RETIRED.ANY_P", - "SampleAfterValue": "2000003", - "BriefDescription": "Number of instructions retired. General Counter - architectural event", + "UMask": "0x1", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK", + "SampleAfterValue": "2503", + "BriefDescription": "Core crystal clock cycles when the thread is unhalted.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "2", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts instructions retired.", - "EventCode": "0xC0", - "Counter": "1", + "EventCode": "0x3C", + "Counter": "0,1,2,3", "UMask": "0x1", - "Errata": "SKL091, SKL044", - "EventName": "INST_RETIRED.PREC_DIST", + "AnyThread": "1", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY", + "SampleAfterValue": "2503", + "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x3C", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE", "SampleAfterValue": "2000003", - "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution", - "CounterHTOff": "1" + "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts the number of retirement slots used.", - "EventCode": "0xC2", + "EventCode": "0x3C", "Counter": "0,1,2,3", "UMask": "0x2", - "EventName": "UOPS_RETIRED.RETIRE_SLOTS", + "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", + "SampleAfterValue": "2503", + "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by ASM (Assembly File) inspection of the nearby instructions.", + "EventCode": "0x4C", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "LOAD_HIT_PRE.SW_PF", + "SampleAfterValue": "100003", + "BriefDescription": "Demand load dispatches that hit L1D fill buffer (FB) allocated for software prefetch.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Counts cycles during which the reservation station (RS) is empty for the thread.; Note: In ST-mode, not active thread should drive 0. This is usually caused by severely costly branch mispredictions, or allocator/FE issues.", + "EventCode": "0x5E", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "RS_EVENTS.EMPTY_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Retirement slots used.", + "BriefDescription": "Cycles when Reservation Station (RS) is empty for the thread", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts cycles without actually retired uops.", - "EventCode": "0xC2", + "PublicDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate front-end Latency Bound issues.", + "EventCode": "0x5E", "Invert": "1", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "UOPS_RETIRED.STALL_CYCLES", + "EdgeDetect": "1", + "EventName": "RS_EVENTS.EMPTY_END", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles without actually retired uops.", + "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.", "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.", - "EventCode": "0xC2", - "Invert": "1", + "PublicDescription": "Counts cycles that the Instruction Length decoder (ILD) stalls occurred due to dynamically changing prefix length of the decoded instruction (by operand size prefix instruction 0x66, address size prefix instruction 0x67 or REX.W for Intel64). Count is proportional to the number of prefixes in a 16B-line. This may result in a three-cycle penalty for each LCP (Length changing prefix) in a 16-byte chunk.", + "EventCode": "0x87", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "UOPS_RETIRED.TOTAL_CYCLES", + "EventName": "ILD_STALL.LCP", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with less than 10 actually retired uops.", - "CounterMask": "10", + "BriefDescription": "Stalls caused by changing prefix length of the instruction.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts conditional branch instructions retired.", - "EventCode": "0xC4", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 0.", + "EventCode": "0xA1", "Counter": "0,1,2,3", "UMask": "0x1", - "Errata": "SKL091", - "EventName": "BR_INST_RETIRED.CONDITIONAL", - "SampleAfterValue": "400009", - "BriefDescription": "Conditional branch instructions retired.", + "EventName": "UOPS_DISPATCHED_PORT.PORT_0", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles per thread when uops are executed in port 0", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts both direct and indirect near call instructions retired.", - "EventCode": "0xC4", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 1.", + "EventCode": "0xA1", "Counter": "0,1,2,3", "UMask": "0x2", - "Errata": "SKL091", - "EventName": "BR_INST_RETIRED.NEAR_CALL", - "SampleAfterValue": "100007", - "BriefDescription": "Direct and indirect near call instructions retired.", + "EventName": "UOPS_DISPATCHED_PORT.PORT_1", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles per thread when uops are executed in port 1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts all (macro) branch instructions retired.", - "EventCode": "0xC4", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 2.", + "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x0", - "Errata": "SKL091", - "EventName": "BR_INST_RETIRED.ALL_BRANCHES", - "SampleAfterValue": "400009", - "BriefDescription": "All (macro) branch instructions retired.", + "UMask": "0x4", + "EventName": "UOPS_DISPATCHED_PORT.PORT_2", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles per thread when uops are executed in port 2", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts return instructions retired.", - "EventCode": "0xC4", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 3.", + "EventCode": "0xA1", "Counter": "0,1,2,3", "UMask": "0x8", - "Errata": "SKL091", - "EventName": "BR_INST_RETIRED.NEAR_RETURN", - "SampleAfterValue": "100007", - "BriefDescription": "Return instructions retired.", + "EventName": "UOPS_DISPATCHED_PORT.PORT_3", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles per thread when uops are executed in port 3", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts not taken branch instructions retired.", - "EventCode": "0xC4", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 4.", + "EventCode": "0xA1", "Counter": "0,1,2,3", "UMask": "0x10", - "Errata": "SKL091", - "EventName": "BR_INST_RETIRED.NOT_TAKEN", - "SampleAfterValue": "400009", - "BriefDescription": "Not taken branch instructions retired.", + "EventName": "UOPS_DISPATCHED_PORT.PORT_4", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles per thread when uops are executed in port 4", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts taken branch instructions retired.", - "EventCode": "0xC4", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 5.", + "EventCode": "0xA1", "Counter": "0,1,2,3", "UMask": "0x20", - "Errata": "SKL091", - "EventName": "BR_INST_RETIRED.NEAR_TAKEN", - "SampleAfterValue": "400009", - "BriefDescription": "Taken branch instructions retired.", + "EventName": "UOPS_DISPATCHED_PORT.PORT_5", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles per thread when uops are executed in port 5", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts far branch instructions retired.", - "EventCode": "0xC4", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 6.", + "EventCode": "0xA1", "Counter": "0,1,2,3", "UMask": "0x40", - "Errata": "SKL091", - "EventName": "BR_INST_RETIRED.FAR_BRANCH", - "SampleAfterValue": "100007", - "BriefDescription": "Far branch instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PEBS": "2", - "PublicDescription": "This is a precise version of BR_INST_RETIRED.ALL_BRANCHES that counts all (macro) branch instructions retired.", - "EventCode": "0xC4", - "Counter": "0,1,2,3", - "UMask": "0x4", - "Errata": "SKL091", - "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS", - "SampleAfterValue": "400009", - "BriefDescription": "All (macro) branch instructions retired. ", - "CounterHTOff": "0,1,2,3" - }, - { - "PEBS": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts mispredicted conditional branch instructions retired.", - "EventCode": "0xC5", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "BR_MISP_RETIRED.CONDITIONAL", - "SampleAfterValue": "400009", - "BriefDescription": "Mispredicted conditional branch instructions retired.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PEBS": "1", - "PublicDescription": "This event counts both taken and not taken retired mispredicted direct and indirect near calls, including both register and memory indirect.", - "EventCode": "0xC5", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "BR_MISP_RETIRED.NEAR_CALL", - "SampleAfterValue": "400009", - "BriefDescription": "Mispredicted direct and indirect near call instructions retired.", + "EventName": "UOPS_DISPATCHED_PORT.PORT_6", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles per thread when uops are executed in port 6", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts all mispredicted macro branch instructions retired.", - "EventCode": "0xC5", + "PublicDescription": "Counts, on the per-thread basis, cycles during which at least one uop is dispatched from the Reservation Station (RS) to port 7.", + "EventCode": "0xA1", "Counter": "0,1,2,3", - "UMask": "0x0", - "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", - "SampleAfterValue": "400009", - "BriefDescription": "All mispredicted macro branch instructions retired.", + "UMask": "0x80", + "EventName": "UOPS_DISPATCHED_PORT.PORT_7", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles per thread when uops are executed in port 7", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "1", - "EventCode": "0xC5", + "PublicDescription": "Counts resource-related stall cycles. Reasons for stalls can be as follows:a. *any* u-arch structure got full (LB, SB, RS, ROB, BOB, LM, Physical Register Reclaim Table (PRRT), or Physical History Table (PHT) slots).b. *any* u-arch structure got empty (like INT/SIMD FreeLists).c. FPU control word (FPCW), MXCSR.and others. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.", + "EventCode": "0xA2", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", - "SampleAfterValue": "400009", - "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken.", + "UMask": "0x1", + "EventName": "RESOURCE_STALLS.ANY", + "SampleAfterValue": "2000003", + "BriefDescription": "Resource-related stall cycles", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PEBS": "2", - "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.", - "EventCode": "0xC5", - "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS", - "SampleAfterValue": "400009", - "BriefDescription": "Mispredicted macro branch instructions retired. ", - "CounterHTOff": "0,1,2,3" - }, - { - "PublicDescription": "Number of uops to be executed per-thread each cycle.", - "EventCode": "0xB1", + "PublicDescription": "Counts allocation stall cycles caused by the store buffer (SB) being full. This counts cycles that the pipeline back-end blocked uop delivery from the front-end.", + "EventCode": "0xA2", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.THREAD", + "UMask": "0x8", + "EventName": "RESOURCE_STALLS.SB", "SampleAfterValue": "2000003", - "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.", + "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Number of uops executed from any thread.", - "EventCode": "0xB1", + "EventCode": "0xA3", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE", + "UMask": "0x1", + "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS", "SampleAfterValue": "2000003", - "BriefDescription": "Number of uops executed on the core.", + "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB1", + "EventCode": "0xA3", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "UOPS_EXECUTED.X87", + "UMask": "0x4", + "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL", "SampleAfterValue": "2000003", - "BriefDescription": "Counts the number of x87 uops dispatched.", + "BriefDescription": "Total execution stalls.", + "CounterMask": "4", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.", - "EventCode": "0xB1", - "Invert": "1", + "EventCode": "0xA3", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.STALL_CYCLES", + "UMask": "0x5", + "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS", "SampleAfterValue": "2000003", - "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.", - "CounterMask": "1", + "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.", + "CounterMask": "5", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles where at least 1 uop was executed per-thread.", - "EventCode": "0xB1", + "EventCode": "0xA3", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", + "UMask": "0x8", + "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where at least 1 uop was executed per-thread", - "CounterMask": "1", + "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", + "CounterMask": "8", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles where at least 2 uops were executed per-thread.", - "EventCode": "0xB1", + "EventCode": "0xA3", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC", + "UMask": "0xc", + "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where at least 2 uops were executed per-thread", - "CounterMask": "2", + "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", + "CounterMask": "12", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles where at least 3 uops were executed per-thread.", - "EventCode": "0xB1", + "EventCode": "0xA3", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC", + "UMask": "0x10", + "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where at least 3 uops were executed per-thread", - "CounterMask": "3", + "BriefDescription": "Cycles while memory subsystem has an outstanding load.", + "CounterMask": "16", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "Cycles where at least 4 uops were executed per-thread.", - "EventCode": "0xB1", + "EventCode": "0xA3", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC", + "UMask": "0x14", + "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles where at least 4 uops were executed per-thread", - "CounterMask": "4", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", + "CounterMask": "20", + "CounterHTOff": "0,1,2,3" }, { + "PublicDescription": "Counts cycles during which no uops were executed on all ports and Reservation Station (RS) was not empty.", "EventCode": "0xA6", "Counter": "0,1,2,3", "UMask": "0x1", @@ -490,6 +461,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "Counts cycles during which a total of 1 uop was executed on all ports and Reservation Station (RS) was not empty.", "EventCode": "0xA6", "Counter": "0,1,2,3", "UMask": "0x2", @@ -499,6 +471,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "Counts cycles during which a total of 2 uops were executed on all ports and Reservation Station (RS) was not empty.", "EventCode": "0xA6", "Counter": "0,1,2,3", "UMask": "0x4", @@ -508,6 +481,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "Cycles total of 3 uops are executed on all ports and Reservation Station (RS) was not empty.", "EventCode": "0xA6", "Counter": "0,1,2,3", "UMask": "0x8", @@ -517,6 +491,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "Cycles total of 4 uops are executed on all ports and Reservation Station (RS) was not empty.", "EventCode": "0xA6", "Counter": "0,1,2,3", "UMask": "0x10", @@ -535,212 +510,196 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 0.", - "EventCode": "0xA1", + "PublicDescription": "Number of uops delivered to the back-end by the LSD(Loop Stream Detector).", + "EventCode": "0xA8", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "UOPS_DISPATCHED_PORT.PORT_0", + "EventName": "LSD.UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 0", + "BriefDescription": "Number of Uops delivered by the LSD.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 1.", - "EventCode": "0xA1", + "PublicDescription": "Counts the cycles when at least one uop is delivered by the LSD (Loop-stream detector).", + "EventCode": "0xA8", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_DISPATCHED_PORT.PORT_1", + "UMask": "0x1", + "EventName": "LSD.CYCLES_ACTIVE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 1", + "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 2.", - "EventCode": "0xA1", + "PublicDescription": "Counts the cycles when 4 uops are delivered by the LSD (Loop-stream detector).", + "EventCode": "0xA8", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "UOPS_DISPATCHED_PORT.PORT_2", + "UMask": "0x1", + "EventName": "LSD.CYCLES_4_UOPS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 2", + "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", + "CounterMask": "4", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 3.", - "EventCode": "0xA1", + "PublicDescription": "Number of uops to be executed per-thread each cycle.", + "EventCode": "0xB1", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "UOPS_DISPATCHED_PORT.PORT_3", + "UMask": "0x1", + "EventName": "UOPS_EXECUTED.THREAD", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 3", + "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 4.", - "EventCode": "0xA1", + "PublicDescription": "Counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.", + "EventCode": "0xB1", + "Invert": "1", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "UOPS_DISPATCHED_PORT.PORT_4", + "UMask": "0x1", + "EventName": "UOPS_EXECUTED.STALL_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 4", + "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 5.", - "EventCode": "0xA1", + "PublicDescription": "Cycles where at least 1 uop was executed per-thread.", + "EventCode": "0xB1", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "UOPS_DISPATCHED_PORT.PORT_5", + "UMask": "0x1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_1_UOP_EXEC", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 5", + "BriefDescription": "Cycles where at least 1 uop was executed per-thread", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 6.", - "EventCode": "0xA1", + "PublicDescription": "Cycles where at least 2 uops were executed per-thread.", + "EventCode": "0xB1", "Counter": "0,1,2,3", - "UMask": "0x40", - "EventName": "UOPS_DISPATCHED_PORT.PORT_6", + "UMask": "0x1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_2_UOPS_EXEC", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 6", + "BriefDescription": "Cycles where at least 2 uops were executed per-thread", + "CounterMask": "2", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 7.", - "EventCode": "0xA1", + "PublicDescription": "Cycles where at least 3 uops were executed per-thread.", + "EventCode": "0xB1", "Counter": "0,1,2,3", - "UMask": "0x80", - "EventName": "UOPS_DISPATCHED_PORT.PORT_7", + "UMask": "0x1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_3_UOPS_EXEC", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles per thread when uops are executed in port 7", + "BriefDescription": "Cycles where at least 3 uops were executed per-thread", + "CounterMask": "3", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", + "PublicDescription": "Cycles where at least 4 uops were executed per-thread.", + "EventCode": "0xB1", "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL", + "UMask": "0x1", + "EventName": "UOPS_EXECUTED.CYCLES_GE_4_UOPS_EXEC", "SampleAfterValue": "2000003", - "BriefDescription": "Total execution stalls.", + "BriefDescription": "Cycles where at least 4 uops were executed per-thread", "CounterMask": "4", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", + "PublicDescription": "Number of uops executed from any thread.", + "EventCode": "0xB1", "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED.CORE", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", - "CounterMask": "8", + "BriefDescription": "Number of uops executed on the core.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", + "EventCode": "0xB1", "Counter": "0,1,2,3", - "UMask": "0xc", - "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", "SampleAfterValue": "2000003", - "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", - "CounterMask": "12", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts all not software-prefetch load dispatches that hit the fill buffer (FB) allocated for the software prefetch. It can also be incremented by some lock instructions. So it should only be used with profiling so that the locks can be excluded by asm inspection of the nearby instructions.", - "EventCode": "0x4C", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "LOAD_HIT_PRE.SW_PF", - "SampleAfterValue": "100003", - "BriefDescription": "Demand load dispatches that hit L1D fill buffer (FB) allocated for software prefetch.", + "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts how many times the load operation got the true Block-on-Store blocking code preventing store forwarding. This includes cases when:\n - preceding store conflicts with the load (incomplete overlap)\n\n - store forwarding is impossible due to u-arch limitations\n\n - preceding lock RMW operations are not forwarded\n\n - store has the no-forward bit set (uncacheable/page-split/masked stores)\n\n - all-blocking stores are used (mostly, fences and port I/O)\n\nand others.\nThe most common case is a load blocked due to its address range overlapping with a preceding smaller uncompleted store. Note: This event does not take into account cases of out-of-SW-control (for example, SbTailHit), unknown physical STA, and cases of blocking loads on store due to being non-WB memory type or a lock. These cases are covered by other events.\nSee the table of not supported store forwards in the Optimization Guide.", - "EventCode": "0x03", + "EventCode": "0xB1", "Counter": "0,1,2,3", "UMask": "0x2", - "EventName": "LD_BLOCKS.STORE_FORWARD", - "SampleAfterValue": "100003", - "BriefDescription": "Loads blocked by overlapping with store buffer that cannot be forwarded .", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use.", - "EventCode": "0x03", - "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "LD_BLOCKS.NO_SR", - "SampleAfterValue": "100003", - "BriefDescription": "The number of times that split load operations are temporarily blocked because all resources for handling the split accesses are in use", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", + "CounterMask": "2", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts false dependencies in MOB when the partial comparison upon loose net check and dependency was resolved by the Enhanced Loose net mechanism. This may not result in high performance penalties. Loose net checks can fail when loads and stores are 4k aliased.", - "EventCode": "0x07", + "EventCode": "0xB1", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "LD_BLOCKS_PARTIAL.ADDRESS_ALIAS", - "SampleAfterValue": "100003", - "BriefDescription": "False dependencies in MOB due to partial compare on address.", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", + "SampleAfterValue": "2000003", + "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", + "CounterMask": "3", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", + "EventCode": "0xB1", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.", - "CounterMask": "1", + "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", + "CounterMask": "4", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", + "EventCode": "0xB1", + "Invert": "1", "Counter": "0,1,2,3", - "UMask": "0x5", - "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS", + "UMask": "0x2", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE", "SampleAfterValue": "2000003", - "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.", - "CounterMask": "5", + "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", + "PublicDescription": "Counts the number of x87 uops executed.", + "EventCode": "0xB1", "Counter": "0,1,2,3", "UMask": "0x10", - "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY", + "EventName": "UOPS_EXECUTED.X87", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles while memory subsystem has an outstanding load.", - "CounterMask": "16", + "BriefDescription": "Counts the number of x87 uops dispatched.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", + "PublicDescription": "Counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).", + "EventCode": "0xC0", "Counter": "0,1,2,3", - "UMask": "0x14", - "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY", + "UMask": "0x0", + "Errata": "SKL091, SKL044", + "EventName": "INST_RETIRED.ANY_P", "SampleAfterValue": "2000003", - "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", - "CounterMask": "20", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0x3C", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK", - "SampleAfterValue": "2503", - "BriefDescription": "Core crystal clock cycles when the thread is unhalted.", + "BriefDescription": "Number of instructions retired. General Counter - architectural event", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x3C", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "CPU_CLK_THREAD_UNHALTED.ONE_THREAD_ACTIVE", + "PEBS": "2", + "PublicDescription": "A version of INST_RETIRED that allows for a more unbiased distribution of samples across instructions retired. It utilizes the Precise Distribution of Instructions Retired (PDIR) feature to mitigate some bias in how retired instructions get sampled.", + "EventCode": "0xC0", + "Counter": "1", + "UMask": "0x1", + "Errata": "SKL091, SKL044", + "EventName": "INST_RETIRED.PREC_DIST", "SampleAfterValue": "2000003", - "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution", + "CounterHTOff": "1" }, { "PEBS": "2", @@ -757,183 +716,235 @@ "CounterHTOff": "0,2,3" }, { - "EventCode": "0x14", + "EventCode": "0xC1", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "ARITH.DIVIDER_ACTIVE", + "UMask": "0x3f", + "EventName": "OTHER_ASSISTS.ANY", + "SampleAfterValue": "100003", + "BriefDescription": "Number of times a microcode assist is invoked by HW other than FP-assist. Examples include AD (page Access Dirty) and AVX* related assists.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Counts the retirement slots used.", + "EventCode": "0xC2", + "Counter": "0,1,2,3", + "UMask": "0x2", + "EventName": "UOPS_RETIRED.RETIRE_SLOTS", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles when divide unit is busy executing divide or square root operations. Accounts for integer and floating-point operations.", - "CounterMask": "1", + "BriefDescription": "Retirement slots used.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA8", + "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts cycles without actually retired uops.", + "EventCode": "0xC2", + "Invert": "1", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "LSD.CYCLES_ACTIVE", + "UMask": "0x2", + "EventName": "UOPS_RETIRED.STALL_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.", + "BriefDescription": "Cycles without actually retired uops.", "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA8", + "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.", + "EventCode": "0xC2", + "Invert": "1", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "LSD.CYCLES_4_UOPS", + "UMask": "0x2", + "EventName": "UOPS_RETIRED.TOTAL_CYCLES", "SampleAfterValue": "2000003", - "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", - "CounterMask": "4", + "BriefDescription": "Cycles with less than 10 actually retired uops.", + "CounterMask": "10", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC1", + "EventCode": "0xC3", "Counter": "0,1,2,3", - "UMask": "0x3f", - "EventName": "OTHER_ASSISTS.ANY", + "UMask": "0x1", + "EdgeDetect": "1", + "EventName": "MACHINE_CLEARS.COUNT", "SampleAfterValue": "100003", - "BriefDescription": "Number of times a microcode assist is invoked by HW other than FP-assist. Examples include AD (page Access Dirty) and AVX* related assists.", + "BriefDescription": "Number of machine clears (nukes) of any type.", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register.\r\nFor more information, refer to ?Mixing Intel AVX and Intel SSE Code? section of the Optimization Guide.", - "EventCode": "0x0E", + "PublicDescription": "Counts self-modifying code (SMC) detected, which causes a machine clear.", + "EventCode": "0xC3", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH", - "SampleAfterValue": "2000003", - "BriefDescription": "Uops inserted at issue-stage in order to preserve upper bits of vector registers.", + "UMask": "0x4", + "EventName": "MACHINE_CLEARS.SMC", + "SampleAfterValue": "100003", + "BriefDescription": "Self-modifying code (SMC) detected.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x00", - "Counter": "Fixed counter 2", - "UMask": "0x2", - "AnyThread": "1", - "EventName": "CPU_CLK_UNHALTED.THREAD_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", - "CounterHTOff": "Fixed counter 2" - }, - { - "EventCode": "0x3C", + "PublicDescription": "Counts all (macro) branch instructions retired.", + "EventCode": "0xC4", "Counter": "0,1,2,3", "UMask": "0x0", - "AnyThread": "1", - "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", + "Errata": "SKL091", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES", + "SampleAfterValue": "400009", + "BriefDescription": "All (macro) branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x3C", + "PEBS": "1", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts conditional branch instructions retired.", + "EventCode": "0xC4", "Counter": "0,1,2,3", "UMask": "0x1", - "AnyThread": "1", - "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY", - "SampleAfterValue": "2503", - "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.", + "Errata": "SKL091", + "EventName": "BR_INST_RETIRED.CONDITIONAL", + "SampleAfterValue": "400009", + "BriefDescription": "Conditional branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x0D", + "PEBS": "1", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts both direct and indirect near call instructions retired.", + "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "INT_MISC.RECOVERY_CYCLES_ANY", - "SampleAfterValue": "2000003", - "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).", + "UMask": "0x2", + "Errata": "SKL091", + "EventName": "BR_INST_RETIRED.NEAR_CALL", + "SampleAfterValue": "100007", + "BriefDescription": "Direct and indirect near call instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB1", + "PEBS": "2", + "PublicDescription": "This is a precise version of BR_INST_RETIRED.ALL_BRANCHES that counts all (macro) branch instructions retired.", + "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", - "CounterMask": "1", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "UMask": "0x4", + "Errata": "SKL091", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS", + "SampleAfterValue": "400009", + "BriefDescription": "All (macro) branch instructions retired.", + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xB1", + "PEBS": "1", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts return instructions retired.", + "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", - "CounterMask": "2", + "UMask": "0x8", + "Errata": "SKL091", + "EventName": "BR_INST_RETIRED.NEAR_RETURN", + "SampleAfterValue": "100007", + "BriefDescription": "Return instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB1", + "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts not taken branch instructions retired.", + "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", - "CounterMask": "3", + "UMask": "0x10", + "Errata": "SKL091", + "EventName": "BR_INST_RETIRED.NOT_TAKEN", + "SampleAfterValue": "400009", + "BriefDescription": "Not taken branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB1", + "PEBS": "1", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts taken branch instructions retired.", + "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", - "CounterMask": "4", + "UMask": "0x20", + "Errata": "SKL091", + "EventName": "BR_INST_RETIRED.NEAR_TAKEN", + "SampleAfterValue": "400009", + "BriefDescription": "Taken branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB1", - "Invert": "1", + "PEBS": "1", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts far branch instructions retired.", + "EventCode": "0xC4", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE", - "SampleAfterValue": "2000003", - "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.", - "CounterMask": "1", + "UMask": "0x40", + "Errata": "SKL091", + "EventName": "BR_INST_RETIRED.FAR_BRANCH", + "SampleAfterValue": "100007", + "BriefDescription": "Counts the number of far branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts when the Current Privilege Level (CPL) transitions from ring 1, 2 or 3 to ring 0 (Kernel).", - "EventCode": "0x3C", + "PublicDescription": "Counts all the retired branch instructions that were mispredicted by the processor. A branch misprediction occurs when the processor incorrectly predicts the destination of the branch. When the misprediction is discovered at execution, all the instructions executed in the wrong (speculative) path must be discarded, and the processor must start fetching from the correct path.", + "EventCode": "0xC5", "Counter": "0,1,2,3", "UMask": "0x0", - "EdgeDetect": "1", - "EventName": "CPU_CLK_UNHALTED.RING0_TRANS", - "SampleAfterValue": "100007", - "BriefDescription": "Counts when there is a transition from ring 1, 2 or 3 to ring 0.", - "CounterMask": "1", + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", + "SampleAfterValue": "400009", + "BriefDescription": "All mispredicted macro branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x3C", + "PEBS": "1", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts mispredicted conditional branch instructions retired.", + "EventCode": "0xC5", "Counter": "0,1,2,3", "UMask": "0x1", - "EventName": "CPU_CLK_UNHALTED.REF_XCLK", - "SampleAfterValue": "2503", - "BriefDescription": "Core crystal clock cycles when the thread is unhalted.", + "EventName": "BR_MISP_RETIRED.CONDITIONAL", + "SampleAfterValue": "400009", + "BriefDescription": "Mispredicted conditional branch instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x3C", + "PEBS": "1", + "PublicDescription": "This event counts both taken and not taken retired mispredicted direct and indirect near calls, including both register and memory indirect.", + "EventCode": "0xC5", "Counter": "0,1,2,3", - "UMask": "0x1", - "AnyThread": "1", - "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY", - "SampleAfterValue": "2503", - "BriefDescription": "Core crystal clock cycles when at least one thread on the physical core is unhalted.", + "UMask": "0x2", + "EventName": "BR_MISP_RETIRED.NEAR_CALL", + "SampleAfterValue": "400009", + "BriefDescription": "Mispredicted direct and indirect near call instructions retired.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x3C", + "PEBS": "2", + "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.", + "EventCode": "0xC5", "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", - "SampleAfterValue": "2503", - "BriefDescription": "Core crystal clock cycles when this thread is unhalted and the other thread is halted.", + "UMask": "0x4", + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS", + "SampleAfterValue": "400009", + "BriefDescription": "Mispredicted macro branch instructions retired.", + "CounterHTOff": "0,1,2,3" + }, + { + "PEBS": "1", + "PublicDescription": "Number of near branch instructions retired that were mispredicted and taken.", + "EventCode": "0xC5", + "Counter": "0,1,2,3", + "UMask": "0x20", + "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", + "SampleAfterValue": "400009", + "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken. ", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Increments when an entry is added to the Last Branch Record (LBR) array (or removed from the array in case of RETURNs in call stack mode). The event requires LBR enable via IA32_DEBUGCTL MSR and branch type selection via MSR_LBR_SELECT.", + "EventCode": "0xCC", + "Counter": "0,1,2,3", + "UMask": "0x20", + "EventName": "ROB_MISC_EVENTS.LBR_INSERTS", + "SampleAfterValue": "2000003", + "BriefDescription": "Increments whenever there is an update to the LBR array.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Counts the number of times the front-end is resteered when it finds a branch instruction in a fetch line. This occurs for the first time a branch instruction is fetched or when the branch is not tracked by the BPU (Branch Prediction Unit) anymore.", + "EventCode": "0xE6", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "BACLEARS.ANY", + "SampleAfterValue": "100003", + "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.", "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/skylake/virtual-memory.json b/tools/perf/pmu-events/arch/x86/skylake/virtual-memory.json index 02f32cbf6789..2bcba7daca14 100644 --- a/tools/perf/pmu-events/arch/x86/skylake/virtual-memory.json +++ b/tools/perf/pmu-events/arch/x86/skylake/virtual-memory.json @@ -1,83 +1,6 @@ [ { - "PublicDescription": "This event counts the number of flushes of the big or small ITLB pages. Counting include both TLB Flush (covering all sets) and TLB Set Clear (set-specific).", - "EventCode": "0xAE", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "ITLB.ITLB_FLUSH", - "SampleAfterValue": "100007", - "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x4F", - "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "EPT.WALK_PENDING", - "SampleAfterValue": "2000003", - "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a EPT (Extended Page Table) walk for any request type.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts store misses in all DTLB levels that cause page walks of any page size (4K/2M/4M/1G).", - "EventCode": "0x85", - "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK", - "SampleAfterValue": "100003", - "BriefDescription": "Misses at all ITLB levels that cause page walks", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (4K page size). The page walk can end with or without a fault.", - "EventCode": "0x85", - "Counter": "0,1,2,3", - "UMask": "0x2", - "EventName": "ITLB_MISSES.WALK_COMPLETED_4K", - "SampleAfterValue": "100003", - "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.", - "EventCode": "0x85", - "Counter": "0,1,2,3", - "UMask": "0x4", - "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M", - "SampleAfterValue": "100003", - "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (1G page size). The page walk can end with or without a fault.", - "EventCode": "0x85", - "Counter": "0,1,2,3", - "UMask": "0x8", - "EventName": "ITLB_MISSES.WALK_COMPLETED_1G", - "SampleAfterValue": "100003", - "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (1G)", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x85", - "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "ITLB_MISSES.WALK_PENDING", - "SampleAfterValue": "100003", - "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake. ", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x85", - "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "ITLB_MISSES.STLB_HIT", - "SampleAfterValue": "100003", - "BriefDescription": "Instruction fetch requests that miss the ITLB and hit the STLB.", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "PublicDescription": "This event counts load misses in all DTLB levels that cause page walks of any page size (4K/2M/4M/1G).", + "PublicDescription": "Counts demand data loads that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.", "EventCode": "0x08", "Counter": "0,1,2,3", "UMask": "0x1", @@ -87,45 +10,68 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts load misses in all DTLB levels that cause a completed page walk (4K page size). The page walk can end with or without a fault.", + "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.", "EventCode": "0x08", "Counter": "0,1,2,3", "UMask": "0x2", "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K", "SampleAfterValue": "2000003", - "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes (4K).", + "BriefDescription": "Page walk completed due to a demand data load to a 4K page", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts load misses in all DTLB levels that cause a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.", + "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 2M/4M pages. The page walks can end with or without a page fault.", "EventCode": "0x08", "Counter": "0,1,2,3", "UMask": "0x4", "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M", "SampleAfterValue": "2000003", - "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes (2M/4M).", + "BriefDescription": "Page walk completed due to a demand data load to a 2M/4M page", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts load misses in all DTLB levels that cause a completed page walk (1G page size). The page walk can end with or without a fault.", + "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.", "EventCode": "0x08", "Counter": "0,1,2,3", "UMask": "0x8", "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G", "SampleAfterValue": "2000003", - "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (1G)", + "BriefDescription": "Page walk completed due to a demand data load to a 1G page", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "Counts demand data loads that caused a completed page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels. The page walk can end with or without a fault.", + "EventCode": "0x08", + "Counter": "0,1,2,3", + "UMask": "0xe", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", + "SampleAfterValue": "100003", + "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (All page sizes)", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake microarchitecture.", "EventCode": "0x08", "Counter": "0,1,2,3", "UMask": "0x10", "EventName": "DTLB_LOAD_MISSES.WALK_PENDING", "SampleAfterValue": "2000003", - "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake. ", + "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a load.", + "EventCode": "0x08", + "Counter": "0,1,2,3", + "UMask": "0x10", + "EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE", + "SampleAfterValue": "100003", + "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "Counts loads that miss the DTLB (Data TLB) and hit the STLB (Second level TLB).", "EventCode": "0x08", "Counter": "0,1,2,3", "UMask": "0x20", @@ -135,7 +81,7 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts store misses in all DTLB levels that cause page walks of any page size (4K/2M/4M/1G).", + "PublicDescription": "Counts demand data stores that caused a page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels, but the walk need not have completed.", "EventCode": "0x49", "Counter": "0,1,2,3", "UMask": "0x1", @@ -145,45 +91,68 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (4K page size). The page walk can end with or without a fault.", + "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.", "EventCode": "0x49", "Counter": "0,1,2,3", "UMask": "0x2", "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K", "SampleAfterValue": "100003", - "BriefDescription": "Store miss in all TLB levels causes a page walk that completes. (4K)", + "BriefDescription": "Page walk completed due to a demand data store to a 4K page", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.", + "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 2M/4M pages. The page walks can end with or without a page fault.", "EventCode": "0x49", "Counter": "0,1,2,3", "UMask": "0x4", "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M", "SampleAfterValue": "100003", - "BriefDescription": "Store misses in all DTLB levels that cause completed page walks (2M/4M)", + "BriefDescription": "Page walk completed due to a demand data store to a 2M/4M page", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts store misses in all DTLB levels that cause a completed page walk (1G page size). The page walk can end with or without a fault.", + "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 1G pages. The page walks can end with or without a page fault.", "EventCode": "0x49", "Counter": "0,1,2,3", "UMask": "0x8", "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G", "SampleAfterValue": "100003", - "BriefDescription": "Store misses in all DTLB levels that cause completed page walks (1G)", + "BriefDescription": "Page walk completed due to a demand data store to a 1G page", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Counts demand data stores that caused a completed page walk of any page size (4K/2M/4M/1G). This implies it missed in all TLB levels. The page walk can end with or without a fault.", + "EventCode": "0x49", + "Counter": "0,1,2,3", + "UMask": "0xe", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", + "SampleAfterValue": "100003", + "BriefDescription": "Store misses in all TLB levels causes a page walk that completes. (All page sizes)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake microarchitecture.", "EventCode": "0x49", "Counter": "0,1,2,3", "UMask": "0x10", "EventName": "DTLB_STORE_MISSES.WALK_PENDING", "SampleAfterValue": "2000003", - "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake. ", + "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Counts cycles when at least one PMH (Page Miss Handler) is busy with a page walk for a store.", + "EventCode": "0x49", + "Counter": "0,1,2,3", + "UMask": "0x10", + "EventName": "DTLB_STORE_MISSES.WALK_ACTIVE", + "SampleAfterValue": "100003", + "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.", + "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "Stores that miss the DTLB (Data TLB) and hit the STLB (2nd Level TLB).", "EventCode": "0x49", "Counter": "0,1,2,3", "UMask": "0x20", @@ -193,73 +162,77 @@ "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of DTLB flush attempts of the thread-specific entries.", - "EventCode": "0xBD", + "PublicDescription": "Counts cycles for each PMH (Page Miss Handler) that is busy with an EPT (Extended Page Table) walk for any request type.", + "EventCode": "0x4F", "Counter": "0,1,2,3", - "UMask": "0x1", - "EventName": "TLB_FLUSH.DTLB_THREAD", - "SampleAfterValue": "100007", - "BriefDescription": "DTLB flush attempts of the thread-specific entries", + "UMask": "0x10", + "EventName": "EPT.WALK_PENDING", + "SampleAfterValue": "2000003", + "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a EPT (Extended Page Table) walk for any request type.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "PublicDescription": "This event counts the number of any STLB flush attempts (such as entire, VPID, PCID, InvPage, CR3 write, and so on).", - "EventCode": "0xBD", + "PublicDescription": "Counts page walks of any page size (4K/2M/4M/1G) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB, but the walk need not have completed.", + "EventCode": "0x85", "Counter": "0,1,2,3", - "UMask": "0x20", - "EventName": "TLB_FLUSH.STLB_ANY", - "SampleAfterValue": "100007", - "BriefDescription": "STLB flush attempts", + "UMask": "0x1", + "EventName": "ITLB_MISSES.MISS_CAUSES_A_WALK", + "SampleAfterValue": "100003", + "BriefDescription": "Misses at all ITLB levels that cause page walks", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "Counts completed page walks (4K page size) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB. The page walk can end with or without a fault.", "EventCode": "0x85", "Counter": "0,1,2,3", - "UMask": "0xe", - "EventName": "ITLB_MISSES.WALK_COMPLETED", + "UMask": "0x2", + "EventName": "ITLB_MISSES.WALK_COMPLETED_4K", "SampleAfterValue": "100003", - "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (All page sizes)", + "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (4K)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x08", + "PublicDescription": "Counts code misses in all ITLB levels that caused a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.", + "EventCode": "0x85", "Counter": "0,1,2,3", - "UMask": "0xe", - "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", + "UMask": "0x4", + "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M", "SampleAfterValue": "100003", - "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (All page sizes)", + "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x49", + "PublicDescription": "Counts store misses in all DTLB levels that cause a completed page walk (1G page size). The page walk can end with or without a fault.", + "EventCode": "0x85", "Counter": "0,1,2,3", - "UMask": "0xe", - "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", + "UMask": "0x8", + "EventName": "ITLB_MISSES.WALK_COMPLETED_1G", "SampleAfterValue": "100003", - "BriefDescription": "Store misses in all TLB levels causes a page walk that completes. (All page sizes)", + "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (1G)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x49", + "PublicDescription": "Counts completed page walks (2M and 4M page sizes) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB. The page walk can end with or without a fault.", + "EventCode": "0x85", "Counter": "0,1,2,3", - "UMask": "0x10", - "EventName": "DTLB_STORE_MISSES.WALK_ACTIVE", + "UMask": "0xe", + "EventName": "ITLB_MISSES.WALK_COMPLETED", "SampleAfterValue": "100003", - "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a store. EPT page walk duration are excluded in Skylake. ", - "CounterMask": "1", + "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (All page sizes)", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x08", + "PublicDescription": "Counts 1 per cycle for each PMH (Page Miss Handler) that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake michroarchitecture.", + "EventCode": "0x85", "Counter": "0,1,2,3", "UMask": "0x10", - "EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE", + "EventName": "ITLB_MISSES.WALK_PENDING", "SampleAfterValue": "100003", - "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a load. EPT page walk duration are excluded in Skylake. ", - "CounterMask": "1", + "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake.", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { + "PublicDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake microarchitecture.", "EventCode": "0x85", "Counter": "0,1,2,3", "UMask": "0x10", @@ -268,5 +241,44 @@ "BriefDescription": "Cycles when at least one PMH is busy with a page walk for code (instruction fetch) request. EPT page walk duration are excluded in Skylake.", "CounterMask": "1", "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x85", + "Counter": "0,1,2,3", + "UMask": "0x20", + "EventName": "ITLB_MISSES.STLB_HIT", + "SampleAfterValue": "100003", + "BriefDescription": "Instruction fetch requests that miss the ITLB and hit the STLB.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Counts the number of flushes of the big or small ITLB pages. Counting include both TLB Flush (covering all sets) and TLB Set Clear (set-specific).", + "EventCode": "0xAE", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "ITLB.ITLB_FLUSH", + "SampleAfterValue": "100007", + "BriefDescription": "Flushing of the Instruction TLB (ITLB) pages, includes 4k/2M/4M pages.", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Counts the number of DTLB flush attempts of the thread-specific entries.", + "EventCode": "0xBD", + "Counter": "0,1,2,3", + "UMask": "0x1", + "EventName": "TLB_FLUSH.DTLB_THREAD", + "SampleAfterValue": "100007", + "BriefDescription": "DTLB flush attempts of the thread-specific entries", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "PublicDescription": "Counts the number of any STLB flush attempts (such as entire, VPID, PCID, InvPage, CR3 write, etc.).", + "EventCode": "0xBD", + "Counter": "0,1,2,3", + "UMask": "0x20", + "EventName": "TLB_FLUSH.STLB_ANY", + "SampleAfterValue": "100007", + "BriefDescription": "STLB flush attempts", + "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file -- cgit v1.2.3-70-g09d2 From 1716021e2e88a284cc737db40524734dffea2de5 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 18 Jan 2018 04:54:25 -0800 Subject: perf vendor events intel: Update SkylakeX events to V1.06 Signed-off-by: Andi Kleen Link: https://lkml.kernel.org/r/20180118234518.GA27753@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/skylakex/cache.json | 257 +++++++++++++-------- .../arch/x86/skylakex/floating-point.json | 3 +- .../pmu-events/arch/x86/skylakex/frontend.json | 48 ++-- .../perf/pmu-events/arch/x86/skylakex/memory.json | 231 ++++++++++++------ tools/perf/pmu-events/arch/x86/skylakex/other.json | 94 +++++++- .../pmu-events/arch/x86/skylakex/pipeline.json | 44 ++-- .../arch/x86/skylakex/virtual-memory.json | 42 ++-- 7 files changed, 482 insertions(+), 237 deletions(-) (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/x86/skylakex/cache.json b/tools/perf/pmu-events/arch/x86/skylakex/cache.json index b5bc742b6fbc..5c9940866acd 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/cache.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/cache.json @@ -265,7 +265,7 @@ { "EventCode": "0x60", "UMask": "0x2", - "BriefDescription": "Offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore, every cycle. ", + "BriefDescription": "Offcore outstanding Code Reads transactions in the SuperQueue (SQ), queue to uncore, every cycle.", "Counter": "0,1,2,3", "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD", "PublicDescription": "Counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The 'Offcore outstanding' state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", @@ -398,22 +398,24 @@ { "EventCode": "0xD0", "UMask": "0x11", - "BriefDescription": "Retired load instructions that miss the STLB.", + "BriefDescription": "Retired load instructions that miss the STLB. (Precise Event)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_INST_RETIRED.STLB_MISS_LOADS", + "PublicDescription": "Retired load instructions that miss the STLB.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD0", "UMask": "0x12", - "BriefDescription": "Retired store instructions that miss the STLB.", + "BriefDescription": "Retired store instructions that miss the STLB. (Precise Event)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_INST_RETIRED.STLB_MISS_STORES", + "PublicDescription": "Retired store instructions that miss the STLB.", "SampleAfterValue": "100003", "L1_Hit_Indication": "1", "CounterHTOff": "0,1,2,3" @@ -421,7 +423,7 @@ { "EventCode": "0xD0", "UMask": "0x21", - "BriefDescription": "Retired load instructions with locked access.", + "BriefDescription": "Retired load instructions with locked access. (Precise Event)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", @@ -432,24 +434,22 @@ { "EventCode": "0xD0", "UMask": "0x41", - "BriefDescription": "Retired load instructions that split across a cacheline boundary.", + "BriefDescription": "Retired load instructions that split across a cacheline boundary. (Precise Event)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_INST_RETIRED.SPLIT_LOADS", - "PublicDescription": "Counts retired load instructions that split across a cacheline boundary.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD0", "UMask": "0x42", - "BriefDescription": "Retired store instructions that split across a cacheline boundary.", + "BriefDescription": "Retired store instructions that split across a cacheline boundary. (Precise Event)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_INST_RETIRED.SPLIT_STORES", - "PublicDescription": "Counts retired store instructions that split across a cacheline boundary.", "SampleAfterValue": "100003", "L1_Hit_Indication": "1", "CounterHTOff": "0,1,2,3" @@ -457,7 +457,7 @@ { "EventCode": "0xD0", "UMask": "0x81", - "BriefDescription": "All retired load instructions.", + "BriefDescription": "All retired load instructions. (Precise Event)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", @@ -468,11 +468,12 @@ { "EventCode": "0xD0", "UMask": "0x82", - "BriefDescription": "All retired store instructions.", + "BriefDescription": "All retired store instructions. (Precise Event)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_INST_RETIRED.ALL_STORES", + "PublicDescription": "All retired store instructions.", "SampleAfterValue": "2000003", "L1_Hit_Indication": "1", "CounterHTOff": "0,1,2,3" @@ -485,7 +486,7 @@ "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_RETIRED.L1_HIT", - "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.", + "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L1 data cache. This event includes all SW prefetches and lock instructions regardless of the data source.\r\n", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, @@ -509,7 +510,7 @@ "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_RETIRED.L3_HIT", - "PublicDescription": "Counts retired load instructions with at least one uop that hit in the L3 cache. ", + "PublicDescription": "Retired load instructions with L3 cache hits as data sources.", "SampleAfterValue": "50021", "CounterHTOff": "0,1,2,3" }, @@ -545,7 +546,7 @@ "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_RETIRED.L3_MISS", - "PublicDescription": "Counts retired load instructions with at least one uop that missed in the L3 cache. ", + "PublicDescription": "Retired load instructions missed L3 cache as data sources.", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3" }, @@ -557,7 +558,7 @@ "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_RETIRED.FB_HIT", - "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready. ", + "PublicDescription": "Counts retired load instructions with at least one uop was load missed in L1 but hit FB (Fill Buffers) due to preceding miss to the same cache line with data not ready. \r\n", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3" }, @@ -616,7 +617,6 @@ "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_L3_MISS_RETIRED.LOCAL_DRAM", - "PublicDescription": "Retired load instructions which data sources missed L3 but serviced from local DRAM.", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3" }, @@ -639,7 +639,6 @@ "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_HITM", - "PublicDescription": "Retired load instructions whose data sources was remote HITM.", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3" }, @@ -648,9 +647,9 @@ "UMask": "0x8", "BriefDescription": "Retired load instructions whose data sources was forwarded from a remote cache", "Data_LA": "1", + "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_L3_MISS_RETIRED.REMOTE_FWD", - "PublicDescription": "Retired load instructions whose data sources was forwarded from a remote cache.", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3" }, @@ -697,7 +696,7 @@ { "EventCode": "0xF2", "UMask": "0x2", - "BriefDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines can be either in modified state or clean state. Modified lines may either be written back to L3 or directly written to memory and not allocated in L3. Clean lines may either be allocated in L3 or dropped ", + "BriefDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines can be either in modified state or clean state. Modified lines may either be written back to L3 or directly written to memory and not allocated in L3. Clean lines may either be allocated in L3 or dropped", "Counter": "0,1,2,3", "EventName": "L2_LINES_OUT.NON_SILENT", "PublicDescription": "Counts the number of lines that are evicted by L2 cache when triggered by an L2 cache fill. Those lines can be either in modified state or clean state. Modified lines may either be written back to L3 or directly written to memory and not allocated in L3. Clean lines may either be allocated in L3 or dropped.", @@ -742,7 +741,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts demand data reads that have any response type.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -755,7 +754,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts demand data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -768,7 +767,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -781,7 +780,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "tbd; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -794,7 +793,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -807,7 +806,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_HIT.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts demand data reads that hit in the L3.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts demand data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -820,7 +819,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) that have any response type.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand data writes (RFOs) that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -833,7 +832,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -846,7 +845,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -859,7 +858,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "tbd; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -872,7 +871,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -885,7 +884,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_HIT.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand data writes (RFOs) that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -898,7 +897,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand code reads that have any response type.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand code reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -911,7 +910,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand code reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -924,7 +923,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -937,7 +936,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "tbd; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -950,7 +949,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand code reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -963,7 +962,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_HIT.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand code reads that hit in the L3.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand code reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -976,7 +975,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads that have any response type.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -989,7 +988,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1002,7 +1001,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1015,7 +1014,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "tbd; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1028,7 +1027,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1041,7 +1040,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_HIT.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1054,7 +1053,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that have any response type.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1067,7 +1066,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1080,7 +1079,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1093,7 +1092,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "tbd; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1106,7 +1105,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1119,7 +1118,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_HIT.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1132,7 +1131,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that have any response type.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1145,7 +1144,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1158,7 +1157,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1171,7 +1170,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "tbd; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1184,7 +1183,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1197,7 +1196,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_HIT.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1210,7 +1209,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that have any response type.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1223,7 +1222,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1236,7 +1235,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1249,7 +1248,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "tbd; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1262,7 +1261,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1275,7 +1274,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_HIT.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1288,7 +1287,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that have any response type.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1301,7 +1300,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1314,7 +1313,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1327,7 +1326,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "tbd; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1340,7 +1339,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1353,7 +1352,85 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_HIT.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts any other requests that have any response type.", + "MSRValue": "0x0000018000 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.OTHER.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts any other requests that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts any other requests that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.", + "MSRValue": "0x01003c8000 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts any other requests that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts any other requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "MSRValue": "0x04003c8000 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts any other requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "OTHER & L3_HIT & SNOOP_HIT_WITH_FWD", + "MSRValue": "0x08003c8000 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.SNOOP_HIT_WITH_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts any other requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.", + "MSRValue": "0x10003c8000 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts any other requests that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts any other requests that hit in the L3.", + "MSRValue": "0x3f803c8000 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_HIT.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts any other requests that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1366,7 +1443,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch data reads that have any response type.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1379,7 +1456,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1392,7 +1469,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1405,7 +1482,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "tbd; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1418,7 +1495,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1431,7 +1508,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_HIT.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch data reads that hit in the L3.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all prefetch data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1444,7 +1521,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch RFOs that have any response type.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1457,7 +1534,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1470,7 +1547,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1483,7 +1560,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "tbd; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1496,7 +1573,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1509,7 +1586,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_HIT.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch RFOs that hit in the L3.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts prefetch RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1522,7 +1599,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that have any response type.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1535,7 +1612,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1548,7 +1625,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1561,7 +1638,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "tbd; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1574,7 +1651,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1587,7 +1664,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_HIT.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch data reads that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1600,7 +1677,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.ANY_RESPONSE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch RFOs that have any response type.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs that have any response type. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1613,7 +1690,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.NO_SNOOP_NEEDED", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1626,7 +1703,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HIT_OTHER_CORE_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1639,7 +1716,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.SNOOP_HIT_WITH_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "tbd; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "tbd Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1652,7 +1729,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.HITM_OTHER_CORE", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3 and the snoop to one of the sibling cores hits the line in M state and the line is forwarded. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1665,7 +1742,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_HIT.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3.; Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "PublicDescription": "Counts all demand & prefetch RFOs that hit in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" } diff --git a/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json b/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json index 1c09a328df36..286ed1a37ec9 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/floating-point.json @@ -29,10 +29,9 @@ { "EventCode": "0xC7", "UMask": "0x8", - "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. ", + "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", "Counter": "0,1,2,3", "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE", - "PublicDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, diff --git a/tools/perf/pmu-events/arch/x86/skylakex/frontend.json b/tools/perf/pmu-events/arch/x86/skylakex/frontend.json index 40abc0852cd6..403a4f89e9b2 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/frontend.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/frontend.json @@ -182,7 +182,7 @@ "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled", "Counter": "0,1,2,3", "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE", - "PublicDescription": "Counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding \u201c4 \u2013 x\u201d when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when: a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread. b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions). c. Instruction Decode Queue (IDQ) delivers four uops.", + "PublicDescription": "Counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding 4 x when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when: a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread. b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions). c. Instruction Decode Queue (IDQ) delivers four uops.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -247,20 +247,20 @@ "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.", "Counter": "0,1,2,3", "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES", - "PublicDescription": "Counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. MM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.Penalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 0\u20132 cycles.", + "PublicDescription": "Counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. MM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.Penalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 02 cycles.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired Instructions who experienced decode stream buffer (DSB - the decoded instruction-cache) miss.", + "BriefDescription": "Retired Instructions who experienced decode stream buffer (DSB - the decoded instruction-cache) miss. Precise Event.", "PEBS": "1", "MSRValue": "0x11", "Counter": "0,1,2,3", "EventName": "FRONTEND_RETIRED.DSB_MISS", "MSRIndex": "0x3F7", - "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. ", + "PublicDescription": "Counts retired Instructions that experienced DSB (Decode stream buffer i.e. the decoded instruction-cache) miss. \r\n", "TakenAlone": "1", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3" @@ -268,7 +268,7 @@ { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss.", + "BriefDescription": "Retired Instructions who experienced Instruction L1 Cache true miss. Precise Event.", "PEBS": "1", "MSRValue": "0x12", "Counter": "0,1,2,3", @@ -281,7 +281,7 @@ { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss.", + "BriefDescription": "Retired Instructions who experienced Instruction L2 Cache true miss. Precise Event.", "PEBS": "1", "MSRValue": "0x13", "Counter": "0,1,2,3", @@ -294,7 +294,7 @@ { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired Instructions who experienced iTLB true miss.", + "BriefDescription": "Retired Instructions who experienced iTLB true miss. Precise Event.", "PEBS": "1", "MSRValue": "0x14", "Counter": "0,1,2,3", @@ -308,13 +308,13 @@ { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss.", + "BriefDescription": "Retired Instructions who experienced STLB (2nd level TLB) true miss. Precise Event.", "PEBS": "1", "MSRValue": "0x15", "Counter": "0,1,2,3", "EventName": "FRONTEND_RETIRED.STLB_MISS", "MSRIndex": "0x3F7", - "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss. ", + "PublicDescription": "Counts retired Instructions that experienced STLB (2nd level TLB) true miss.", "TakenAlone": "1", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3" @@ -322,7 +322,7 @@ { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 2 cycles which was not interrupted by a back-end stall.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.", "PEBS": "1", "MSRValue": "0x400206", "Counter": "0,1,2,3", @@ -335,7 +335,7 @@ { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 2 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 2 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.", "PEBS": "1", "MSRValue": "0x200206", "Counter": "0,1,2,3", @@ -348,7 +348,7 @@ { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 4 cycles which was not interrupted by a back-end stall. Precise Event.", "PEBS": "1", "MSRValue": "0x400406", "Counter": "0,1,2,3", @@ -367,7 +367,7 @@ "Counter": "0,1,2,3", "EventName": "FRONTEND_RETIRED.LATENCY_GE_8", "MSRIndex": "0x3F7", - "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops.", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 8 cycles. During this period the front-end delivered no uops. \r\n", "TakenAlone": "1", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3" @@ -375,13 +375,13 @@ { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 16 cycles which was not interrupted by a back-end stall. Precise Event.", "PEBS": "1", "MSRValue": "0x401006", "Counter": "0,1,2,3", "EventName": "FRONTEND_RETIRED.LATENCY_GE_16", "MSRIndex": "0x3F7", - "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 16 cycles. During this period the front-end delivered no uops.\r\n", "TakenAlone": "1", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3" @@ -389,13 +389,13 @@ { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 32 cycles which was not interrupted by a back-end stall. Precise Event.", "PEBS": "1", "MSRValue": "0x402006", "Counter": "0,1,2,3", "EventName": "FRONTEND_RETIRED.LATENCY_GE_32", "MSRIndex": "0x3F7", - "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after a front-end stall of at least 32 cycles. During this period the front-end delivered no uops.\r\n", "TakenAlone": "1", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3" @@ -403,7 +403,7 @@ { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 64 cycles which was not interrupted by a back-end stall. Precise Event.", "PEBS": "1", "MSRValue": "0x404006", "Counter": "0,1,2,3", @@ -416,7 +416,7 @@ { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 128 cycles which was not interrupted by a back-end stall. Precise Event.", "PEBS": "1", "MSRValue": "0x408006", "Counter": "0,1,2,3", @@ -429,7 +429,7 @@ { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 256 cycles which was not interrupted by a back-end stall. Precise Event.", "PEBS": "1", "MSRValue": "0x410006", "Counter": "0,1,2,3", @@ -442,7 +442,7 @@ { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end delivered no uops for a period of 512 cycles which was not interrupted by a back-end stall. Precise Event.", "PEBS": "1", "MSRValue": "0x420006", "Counter": "0,1,2,3", @@ -455,13 +455,13 @@ { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 1 bubble-slot for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.", "PEBS": "1", "MSRValue": "0x100206", "Counter": "0,1,2,3", "EventName": "FRONTEND_RETIRED.LATENCY_GE_2_BUBBLES_GE_1", "MSRIndex": "0x3F7", - "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.", + "PublicDescription": "Counts retired instructions that are delivered to the back-end after the front-end had at least 1 bubble-slot for a period of 2 cycles. A bubble-slot is an empty issue-pipeline slot while there was no RAT stall.\r\n", "TakenAlone": "1", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3" @@ -469,7 +469,7 @@ { "EventCode": "0xC6", "UMask": "0x1", - "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 3 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall.", + "BriefDescription": "Retired instructions that are fetched after an interval where the front-end had at least 3 bubble-slots for a period of 2 cycles which was not interrupted by a back-end stall. Precise Event.", "PEBS": "1", "MSRValue": "0x300206", "Counter": "0,1,2,3", diff --git a/tools/perf/pmu-events/arch/x86/skylakex/memory.json b/tools/perf/pmu-events/arch/x86/skylakex/memory.json index ca22a22c1abd..e7f1aa31226d 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/memory.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/memory.json @@ -214,7 +214,7 @@ "PEBS": "1", "Counter": "0,1,2,3", "EventName": "HLE_RETIRED.ABORTED", - "PublicDescription": "Number of times HLE abort was triggered.", + "PublicDescription": "Number of times HLE abort was triggered. (PEBS)", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -239,10 +239,9 @@ { "EventCode": "0xC8", "UMask": "0x20", - "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.). ", + "BriefDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).", "Counter": "0,1,2,3", "EventName": "HLE_RETIRED.ABORTED_UNFRIENDLY", - "PublicDescription": "Number of times an HLE execution aborted due to HLE-unfriendly instructions and certain unfriendly events (such as AD assists etc.).", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -292,7 +291,7 @@ "PEBS": "1", "Counter": "0,1,2,3", "EventName": "RTM_RETIRED.ABORTED", - "PublicDescription": "Number of times RTM abort was triggered.", + "PublicDescription": "Number of times RTM abort was triggered. (PEBS)", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -466,7 +465,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts demand data reads that miss in the L3. ", + "PublicDescription": "Counts demand data reads that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -479,7 +478,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts demand data reads that miss the L3 and clean or shared data is transferred from remote cache. ", + "PublicDescription": "Counts demand data reads that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -492,7 +491,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.REMOTE_HITM", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts demand data reads that miss the L3 and the modified data is transferred from remote cache. ", + "PublicDescription": "Counts demand data reads that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -505,7 +504,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts demand data reads that miss the L3 and the data is returned from local or remote dram. ", + "PublicDescription": "Counts demand data reads that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -518,7 +517,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts demand data reads that miss the L3 and the data is returned from remote dram. ", + "PublicDescription": "Counts demand data reads that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -531,7 +530,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts demand data reads that miss the L3 and the data is returned from local dram. ", + "PublicDescription": "Counts demand data reads that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -544,7 +543,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) that miss in the L3. ", + "PublicDescription": "Counts all demand data writes (RFOs) that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -557,7 +556,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and clean or shared data is transferred from remote cache. ", + "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -570,7 +569,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.REMOTE_HITM", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the modified data is transferred from remote cache. ", + "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -583,7 +582,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local or remote dram. ", + "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -596,7 +595,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from remote dram. ", + "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -609,7 +608,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local dram. ", + "PublicDescription": "Counts all demand data writes (RFOs) that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -622,7 +621,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand code reads that miss in the L3. ", + "PublicDescription": "Counts all demand code reads that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -635,7 +634,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand code reads that miss the L3 and clean or shared data is transferred from remote cache. ", + "PublicDescription": "Counts all demand code reads that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -648,7 +647,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.REMOTE_HITM", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand code reads that miss the L3 and the modified data is transferred from remote cache. ", + "PublicDescription": "Counts all demand code reads that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -661,7 +660,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand code reads that miss the L3 and the data is returned from local or remote dram. ", + "PublicDescription": "Counts all demand code reads that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -674,7 +673,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand code reads that miss the L3 and the data is returned from remote dram. ", + "PublicDescription": "Counts all demand code reads that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -687,7 +686,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand code reads that miss the L3 and the data is returned from local dram. ", + "PublicDescription": "Counts all demand code reads that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -700,7 +699,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss in the L3. ", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -713,7 +712,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and clean or shared data is transferred from remote cache. ", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -726,7 +725,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.REMOTE_HITM", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the modified data is transferred from remote cache. ", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -739,7 +738,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from local or remote dram. ", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -752,7 +751,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from remote dram. ", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -765,7 +764,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from local dram. ", + "PublicDescription": "Counts prefetch (that bring data to L2) data reads that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -778,7 +777,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss in the L3. ", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -791,7 +790,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and clean or shared data is transferred from remote cache. ", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -804,7 +803,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.REMOTE_HITM", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the modified data is transferred from remote cache. ", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -817,7 +816,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from local or remote dram. ", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -830,7 +829,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from remote dram. ", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -843,7 +842,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L2_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from local dram. ", + "PublicDescription": "Counts all prefetch (that bring data to L2) RFOs that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -856,7 +855,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss in the L3. ", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -869,7 +868,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and clean or shared data is transferred from remote cache. ", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -882,7 +881,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.REMOTE_HITM", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the modified data is transferred from remote cache. ", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -895,7 +894,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from local or remote dram. ", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -908,7 +907,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from remote dram. ", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -921,7 +920,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from local dram. ", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) data reads that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -934,7 +933,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss in the L3. ", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -947,7 +946,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and clean or shared data is transferred from remote cache. ", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -960,7 +959,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.REMOTE_HITM", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the modified data is transferred from remote cache. ", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -973,7 +972,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from local or remote dram. ", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -986,7 +985,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from remote dram. ", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -999,7 +998,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L3_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from local dram. ", + "PublicDescription": "Counts all prefetch (that bring data to LLC only) RFOs that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1012,7 +1011,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss in the L3. ", + "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1025,7 +1024,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and clean or shared data is transferred from remote cache. ", + "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1038,7 +1037,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.REMOTE_HITM", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the modified data is transferred from remote cache. ", + "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1051,7 +1050,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from local or remote dram. ", + "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1064,7 +1063,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from remote dram. ", + "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1077,7 +1076,85 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.PF_L1D_AND_SW.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from local dram. ", + "PublicDescription": "Counts L1 data cache hardware prefetch requests and software prefetch requests that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts any other requests that miss in the L3.", + "MSRValue": "0x3fbc008000 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.ANY_SNOOP", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts any other requests that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts any other requests that miss the L3 and clean or shared data is transferred from remote cache.", + "MSRValue": "0x083fc08000 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts any other requests that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts any other requests that miss the L3 and the modified data is transferred from remote cache.", + "MSRValue": "0x103fc08000 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts any other requests that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts any other requests that miss the L3 and the data is returned from local or remote dram.", + "MSRValue": "0x063fc08000 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts any other requests that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts any other requests that miss the L3 and the data is returned from remote dram.", + "MSRValue": "0x063b808000 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts any other requests that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, + { + "Offcore": "1", + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Counts any other requests that miss the L3 and the data is returned from local dram.", + "MSRValue": "0x0604008000 ", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE.OTHER.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "PublicDescription": "Counts any other requests that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1090,7 +1167,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch data reads that miss in the L3. ", + "PublicDescription": "Counts all prefetch data reads that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1103,7 +1180,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache. ", + "PublicDescription": "Counts all prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1116,7 +1193,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.REMOTE_HITM", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch data reads that miss the L3 and the modified data is transferred from remote cache. ", + "PublicDescription": "Counts all prefetch data reads that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1129,7 +1206,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from local or remote dram. ", + "PublicDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1142,7 +1219,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from remote dram. ", + "PublicDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1155,7 +1232,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from local dram. ", + "PublicDescription": "Counts all prefetch data reads that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1168,7 +1245,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch RFOs that miss in the L3. ", + "PublicDescription": "Counts prefetch RFOs that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1181,7 +1258,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch RFOs that miss the L3 and clean or shared data is transferred from remote cache. ", + "PublicDescription": "Counts prefetch RFOs that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1194,7 +1271,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.REMOTE_HITM", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch RFOs that miss the L3 and the modified data is transferred from remote cache. ", + "PublicDescription": "Counts prefetch RFOs that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1207,7 +1284,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from local or remote dram. ", + "PublicDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1220,7 +1297,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from remote dram. ", + "PublicDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1233,7 +1310,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_PF_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from local dram. ", + "PublicDescription": "Counts prefetch RFOs that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1246,7 +1323,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that miss in the L3. ", + "PublicDescription": "Counts all demand & prefetch data reads that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1259,7 +1336,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache. ", + "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1272,7 +1349,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.REMOTE_HITM", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the modified data is transferred from remote cache. ", + "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1285,7 +1362,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local or remote dram. ", + "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1298,7 +1375,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from remote dram. ", + "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1311,7 +1388,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram. ", + "PublicDescription": "Counts all demand & prefetch data reads that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1324,7 +1401,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.ANY_SNOOP", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch RFOs that miss in the L3. ", + "PublicDescription": "Counts all demand & prefetch RFOs that miss in the L3. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1337,7 +1414,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.REMOTE_HIT_FORWARD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and clean or shared data is transferred from remote cache. ", + "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and clean or shared data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1350,7 +1427,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.REMOTE_HITM", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the modified data is transferred from remote cache. ", + "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the modified data is transferred from remote cache. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1363,7 +1440,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local or remote dram. ", + "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local or remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1376,7 +1453,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_REMOTE_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from remote dram. ", + "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from remote dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, @@ -1389,8 +1466,8 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_RESPONSE.ALL_RFO.L3_MISS_LOCAL_DRAM.SNOOP_MISS_OR_NO_FWD", "MSRIndex": "0x1a6,0x1a7", - "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram.", + "PublicDescription": "Counts all demand & prefetch RFOs that miss the L3 and the data is returned from local dram. Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" } -] +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/skylakex/other.json b/tools/perf/pmu-events/arch/x86/skylakex/other.json index 70243b0b0586..778a541463eb 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/other.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/other.json @@ -39,6 +39,42 @@ "SampleAfterValue": "200003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x32", + "UMask": "0x1", + "BriefDescription": "Number of PREFETCHNTA instructions executed.", + "Counter": "0,1,2,3", + "EventName": "SW_PREFETCH_ACCESS.NTA", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x32", + "UMask": "0x2", + "BriefDescription": "Number of PREFETCHT0 instructions executed.", + "Counter": "0,1,2,3", + "EventName": "SW_PREFETCH_ACCESS.T0", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x32", + "UMask": "0x4", + "BriefDescription": "Number of PREFETCHT1 or PREFETCHT2 instructions executed.", + "Counter": "0,1,2,3", + "EventName": "SW_PREFETCH_ACCESS.T1_T2", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x32", + "UMask": "0x8", + "BriefDescription": "Number of PREFETCHW instructions executed.", + "Counter": "0,1,2,3", + "EventName": "SW_PREFETCH_ACCESS.PREFETCHW", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0xCB", "UMask": "0x1", @@ -49,6 +85,62 @@ "SampleAfterValue": "203", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0xEF", + "UMask": "0x1", + "Counter": "0,1,2,3", + "EventName": "CORE_SNOOP_RESPONSE.RSP_IHITI", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xEF", + "UMask": "0x2", + "Counter": "0,1,2,3", + "EventName": "CORE_SNOOP_RESPONSE.RSP_IHITFSE", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xEF", + "UMask": "0x4", + "Counter": "0,1,2,3", + "EventName": "CORE_SNOOP_RESPONSE.RSP_SHITFSE", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xEF", + "UMask": "0x8", + "Counter": "0,1,2,3", + "EventName": "CORE_SNOOP_RESPONSE.RSP_SFWDM", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xEF", + "UMask": "0x10", + "Counter": "0,1,2,3", + "EventName": "CORE_SNOOP_RESPONSE.RSP_IFWDM", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xEF", + "UMask": "0x20", + "Counter": "0,1,2,3", + "EventName": "CORE_SNOOP_RESPONSE.RSP_IFWDFE", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xEF", + "UMask": "0x40", + "Counter": "0,1,2,3", + "EventName": "CORE_SNOOP_RESPONSE.RSP_SFWDFE", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0xFE", "UMask": "0x2", @@ -69,4 +161,4 @@ "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" } -] +] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json b/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json index 0895d1e52a4a..f99f7ae27820 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/pipeline.json @@ -3,41 +3,41 @@ "EventCode": "0x00", "UMask": "0x1", "BriefDescription": "Instructions retired from execution.", - "Counter": "Fixed counter 1", + "Counter": "Fixed counter 0", "EventName": "INST_RETIRED.ANY", "PublicDescription": "Counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, Counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. Notes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. Counting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.", "SampleAfterValue": "2000003", - "CounterHTOff": "Fixed counter 1" + "CounterHTOff": "Fixed counter 0" }, { "EventCode": "0x00", "UMask": "0x2", "BriefDescription": "Core cycles when the thread is not in halt state", - "Counter": "Fixed counter 2", + "Counter": "Fixed counter 1", "EventName": "CPU_CLK_UNHALTED.THREAD", "PublicDescription": "Counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.", "SampleAfterValue": "2000003", - "CounterHTOff": "Fixed counter 2" + "CounterHTOff": "Fixed counter 1" }, { "EventCode": "0x00", "UMask": "0x2", "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", - "Counter": "Fixed counter 2", + "Counter": "Fixed counter 1", "EventName": "CPU_CLK_UNHALTED.THREAD_ANY", "AnyThread": "1", "SampleAfterValue": "2000003", - "CounterHTOff": "Fixed counter 2" + "CounterHTOff": "Fixed counter 1" }, { "EventCode": "0x00", "UMask": "0x3", "BriefDescription": "Reference cycles when the core is not in halt state.", - "Counter": "Fixed counter 3", + "Counter": "Fixed counter 2", "EventName": "CPU_CLK_UNHALTED.REF_TSC", "PublicDescription": "Counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. Note: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.", "SampleAfterValue": "2000003", - "CounterHTOff": "Fixed counter 3" + "CounterHTOff": "Fixed counter 2" }, { "EventCode": "0x03", @@ -126,7 +126,7 @@ "BriefDescription": "Uops inserted at issue-stage in order to preserve upper bits of vector registers.", "Counter": "0,1,2,3", "EventName": "UOPS_ISSUED.VECTOR_WIDTH_MISMATCH", - "PublicDescription": "Counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting with the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register. For more information, refer to \u201cMixing Intel AVX and Intel SSE Code\u201d section of the Optimization Guide.", + "PublicDescription": "Counts the number of Blend Uops issued by the Resource Allocation Table (RAT) to the reservation station (RS) in order to preserve upper bits of vector registers. Starting with the Skylake microarchitecture, these Blend uops are needed since every Intel SSE instruction executed in Dirty Upper State needs to preserve bits 128-255 of the destination register. For more information, refer to Mixing Intel AVX and Intel SSE Code section of the Optimization Guide.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -762,11 +762,10 @@ "EdgeDetect": "1", "EventCode": "0xC3", "UMask": "0x1", - "BriefDescription": "Number of machine clears (nukes) of any type. ", + "BriefDescription": "Number of machine clears (nukes) of any type.", "Counter": "0,1,2,3", "EventName": "MACHINE_CLEARS.COUNT", "CounterMask": "1", - "PublicDescription": "Number of machine clears (nukes) of any type.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -799,7 +798,7 @@ "Counter": "0,1,2,3", "EventName": "BR_INST_RETIRED.CONDITIONAL", "Errata": "SKL091", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts conditional branch instructions retired.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts conditional branch instructions retired.", "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -811,14 +810,14 @@ "Counter": "0,1,2,3", "EventName": "BR_INST_RETIRED.NEAR_CALL", "Errata": "SKL091", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts both direct and indirect near call instructions retired.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts both direct and indirect near call instructions retired.", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xC4", "UMask": "0x4", - "BriefDescription": "All (macro) branch instructions retired. ", + "BriefDescription": "All (macro) branch instructions retired.", "PEBS": "2", "Counter": "0,1,2,3", "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS", @@ -835,7 +834,7 @@ "Counter": "0,1,2,3", "EventName": "BR_INST_RETIRED.NEAR_RETURN", "Errata": "SKL091", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts return instructions retired.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts return instructions retired.", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -858,19 +857,19 @@ "Counter": "0,1,2,3", "EventName": "BR_INST_RETIRED.NEAR_TAKEN", "Errata": "SKL091", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts taken branch instructions retired.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts taken branch instructions retired.", "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xC4", "UMask": "0x40", - "BriefDescription": "Far branch instructions retired.", + "BriefDescription": "Counts the number of far branch instructions retired.", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "BR_INST_RETIRED.FAR_BRANCH", "Errata": "SKL091", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts far branch instructions retired.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts far branch instructions retired.", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -891,7 +890,7 @@ "PEBS": "1", "Counter": "0,1,2,3", "EventName": "BR_MISP_RETIRED.CONDITIONAL", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts mispredicted conditional branch instructions retired.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts mispredicted conditional branch instructions retired.", "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -902,14 +901,14 @@ "PEBS": "1", "Counter": "0,1,2,3", "EventName": "BR_MISP_RETIRED.NEAR_CALL", - "PublicDescription": "Counts both taken and not taken retired mispredicted direct and indirect near calls, including both register and memory indirect.", + "PublicDescription": "This event counts both taken and not taken retired mispredicted direct and indirect near calls, including both register and memory indirect.", "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xC5", "UMask": "0x4", - "BriefDescription": "Mispredicted macro branch instructions retired. ", + "BriefDescription": "Mispredicted macro branch instructions retired.", "PEBS": "2", "Counter": "0,1,2,3", "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS", @@ -920,10 +919,11 @@ { "EventCode": "0xC5", "UMask": "0x20", - "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken.", + "BriefDescription": "Number of near branch instructions retired that were mispredicted and taken. ", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", + "PublicDescription": "Number of near branch instructions retired that were mispredicted and taken.", "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, diff --git a/tools/perf/pmu-events/arch/x86/skylakex/virtual-memory.json b/tools/perf/pmu-events/arch/x86/skylakex/virtual-memory.json index 70750dab7ead..7f466c97e485 100644 --- a/tools/perf/pmu-events/arch/x86/skylakex/virtual-memory.json +++ b/tools/perf/pmu-events/arch/x86/skylakex/virtual-memory.json @@ -12,30 +12,30 @@ { "EventCode": "0x08", "UMask": "0x2", - "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes (4K).", + "BriefDescription": "Page walk completed due to a demand data load to a 4K page", "Counter": "0,1,2,3", "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_4K", - "PublicDescription": "Counts demand data loads that caused a completed page walk (4K page size). This implies it missed in all TLB levels. The page walk can end with or without a fault.", + "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x08", "UMask": "0x4", - "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes (2M/4M).", + "BriefDescription": "Page walk completed due to a demand data load to a 2M/4M page", "Counter": "0,1,2,3", "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_2M_4M", - "PublicDescription": "Counts demand data loads that caused a completed page walk (2M and 4M page sizes). This implies it missed in all TLB levels. The page walk can end with or without a fault.", + "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 2M/4M pages. The page walks can end with or without a page fault.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x08", "UMask": "0x8", - "BriefDescription": "Load miss in all TLB levels causes a page walk that completes. (1G)", + "BriefDescription": "Page walk completed due to a demand data load to a 1G page", "Counter": "0,1,2,3", "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED_1G", - "PublicDescription": "Counts load misses in all DTLB levels that cause a completed page walk (1G page size). The page walk can end with or without a fault.", + "PublicDescription": "Counts page walks completed due to demand data loads whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -52,17 +52,17 @@ { "EventCode": "0x08", "UMask": "0x10", - "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake. ", + "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.", "Counter": "0,1,2,3", "EventName": "DTLB_LOAD_MISSES.WALK_PENDING", - "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake microarchitecture. ", + "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a load. EPT page walk duration are excluded in Skylake microarchitecture.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x08", "UMask": "0x10", - "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a load. EPT page walk duration are excluded in Skylake. ", + "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a load. EPT page walk duration are excluded in Skylake.", "Counter": "0,1,2,3", "EventName": "DTLB_LOAD_MISSES.WALK_ACTIVE", "CounterMask": "1", @@ -93,30 +93,30 @@ { "EventCode": "0x49", "UMask": "0x2", - "BriefDescription": "Store miss in all TLB levels causes a page walk that completes. (4K)", + "BriefDescription": "Page walk completed due to a demand data store to a 4K page", "Counter": "0,1,2,3", "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_4K", - "PublicDescription": "Counts demand data stores that caused a completed page walk (4K page size). This implies it missed in all TLB levels. The page walk can end with or without a fault.", + "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 4K pages. The page walks can end with or without a page fault.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x49", "UMask": "0x4", - "BriefDescription": "Store misses in all DTLB levels that cause completed page walks (2M/4M)", + "BriefDescription": "Page walk completed due to a demand data store to a 2M/4M page", "Counter": "0,1,2,3", "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_2M_4M", - "PublicDescription": "Counts demand data stores that caused a completed page walk (2M and 4M page sizes). This implies it missed in all TLB levels. The page walk can end with or without a fault.", + "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 2M/4M pages. The page walks can end with or without a page fault.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x49", "UMask": "0x8", - "BriefDescription": "Store misses in all DTLB levels that cause completed page walks (1G)", + "BriefDescription": "Page walk completed due to a demand data store to a 1G page", "Counter": "0,1,2,3", "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED_1G", - "PublicDescription": "Counts store misses in all DTLB levels that cause a completed page walk (1G page size). The page walk can end with or without a fault.", + "PublicDescription": "Counts page walks completed due to demand data stores whose address translations missed in the TLB and were mapped to 1G pages. The page walks can end with or without a page fault.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -133,17 +133,17 @@ { "EventCode": "0x49", "UMask": "0x10", - "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake. ", + "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.", "Counter": "0,1,2,3", "EventName": "DTLB_STORE_MISSES.WALK_PENDING", - "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake microarchitecture. ", + "PublicDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for a store. EPT page walk duration are excluded in Skylake microarchitecture.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x49", "UMask": "0x10", - "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a store. EPT page walk duration are excluded in Skylake. ", + "BriefDescription": "Cycles when at least one PMH is busy with a page walk for a store. EPT page walk duration are excluded in Skylake.", "Counter": "0,1,2,3", "EventName": "DTLB_STORE_MISSES.WALK_ACTIVE", "CounterMask": "1", @@ -197,7 +197,7 @@ "BriefDescription": "Code miss in all TLB levels causes a page walk that completes. (2M/4M)", "Counter": "0,1,2,3", "EventName": "ITLB_MISSES.WALK_COMPLETED_2M_4M", - "PublicDescription": "Counts completed page walks of any page size (4K/2M/4M/1G) caused by a code fetch. This implies it missed in the ITLB and further levels of TLB. The page walk can end with or without a fault.", + "PublicDescription": "Counts code misses in all ITLB levels that caused a completed page walk (2M and 4M page sizes). The page walk can end with or without a fault.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -224,10 +224,10 @@ { "EventCode": "0x85", "UMask": "0x10", - "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake. ", + "BriefDescription": "Counts 1 per cycle for each PMH that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake.", "Counter": "0,1,2,3", "EventName": "ITLB_MISSES.WALK_PENDING", - "PublicDescription": "Counts 1 per cycle for each PMH (Page Miss Handler) that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake michroarchitecture. ", + "PublicDescription": "Counts 1 per cycle for each PMH (Page Miss Handler) that is busy with a page walk for an instruction fetch request. EPT page walk duration are excluded in Skylake michroarchitecture.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, -- cgit v1.2.3-70-g09d2 From fae0a4df1cc6bb1772fa653a2c8eb422d82b824d Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 18 Jan 2018 10:10:16 -0800 Subject: perf vendor events intel: Update BroadwellDE events to V7 Signed-off-by: Andi Kleen Link: https://lkml.kernel.org/r/20180118234518.GA27753@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../pmu-events/arch/x86/broadwellde/cache.json | 389 ++++--- .../arch/x86/broadwellde/floating-point.json | 108 +- .../pmu-events/arch/x86/broadwellde/frontend.json | 138 +-- .../pmu-events/arch/x86/broadwellde/memory.json | 9 +- .../pmu-events/arch/x86/broadwellde/other.json | 20 +- .../pmu-events/arch/x86/broadwellde/pipeline.json | 1214 ++++++++++---------- .../arch/x86/broadwellde/virtual-memory.json | 150 +-- 7 files changed, 1033 insertions(+), 995 deletions(-) (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/cache.json b/tools/perf/pmu-events/arch/x86/broadwellde/cache.json index 36fe398029b9..bf243fe2a0ec 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellde/cache.json +++ b/tools/perf/pmu-events/arch/x86/broadwellde/cache.json @@ -11,11 +11,28 @@ }, { "EventCode": "0x24", - "UMask": "0x41", - "BriefDescription": "Demand Data Read requests that hit L2 cache", + "UMask": "0x22", + "BriefDescription": "RFO requests that miss L2 cache.", "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT", - "PublicDescription": "This event counts the number of demand Data Read requests that hit L2 cache. Only not rejected loads are counted.", + "EventName": "L2_RQSTS.RFO_MISS", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "UMask": "0x24", + "BriefDescription": "L2 cache misses when fetching instructions.", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.CODE_RD_MISS", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "UMask": "0x27", + "BriefDescription": "Demand requests that miss L2 cache.", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.ALL_DEMAND_MISS", "SampleAfterValue": "200003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -29,6 +46,43 @@ "SampleAfterValue": "200003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x24", + "UMask": "0x3f", + "BriefDescription": "All requests that miss L2 cache.", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.MISS", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "UMask": "0x41", + "BriefDescription": "Demand Data Read requests that hit L2 cache", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.DEMAND_DATA_RD_HIT", + "PublicDescription": "This event counts the number of demand Data Read requests that hit L2 cache. Only not rejected loads are counted.", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "UMask": "0x42", + "BriefDescription": "RFO requests that hit L2 cache.", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.RFO_HIT", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x24", + "UMask": "0x44", + "BriefDescription": "L2 cache hits when fetching instructions, code reads.", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.CODE_RD_HIT", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x24", "UMask": "0x50", @@ -69,6 +123,15 @@ "SampleAfterValue": "200003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x24", + "UMask": "0xe7", + "BriefDescription": "Demand requests to L2 cache.", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x24", "UMask": "0xf8", @@ -79,6 +142,15 @@ "SampleAfterValue": "200003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x24", + "UMask": "0xff", + "BriefDescription": "All L2 requests.", + "Counter": "0,1,2,3", + "EventName": "L2_RQSTS.REFERENCES", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x27", "UMask": "0x50", @@ -130,6 +202,27 @@ "SampleAfterValue": "2000003", "CounterHTOff": "2" }, + { + "EventCode": "0x48", + "UMask": "0x1", + "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.", + "Counter": "2", + "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY", + "AnyThread": "1", + "CounterMask": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "2" + }, + { + "EventCode": "0x48", + "UMask": "0x2", + "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.", + "Counter": "0,1,2,3", + "EventName": "L1D_PEND_MISS.FB_FULL", + "CounterMask": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x51", "UMask": "0x1", @@ -151,6 +244,29 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x60", + "UMask": "0x1", + "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD", + "CounterMask": "1", + "Errata": "BDM76", + "PublicDescription": "This event counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x60", + "UMask": "0x1", + "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6", + "CounterMask": "6", + "Errata": "BDM76", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x60", "UMask": "0x2", @@ -158,7 +274,7 @@ "Counter": "0,1,2,3", "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_CODE_RD", "Errata": "BDM76", - "PublicDescription": "This event counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The \"Offcore outstanding\" state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", + "PublicDescription": "This event counts the number of offcore outstanding Code Reads transactions in the super queue every cycle. The Offcore outstanding state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -175,24 +291,24 @@ }, { "EventCode": "0x60", - "UMask": "0x8", - "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore", + "UMask": "0x4", + "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle", "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", + "CounterMask": "1", "Errata": "BDM76", - "PublicDescription": "This event counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", + "PublicDescription": "This event counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The Offcore outstanding state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x60", - "UMask": "0x1", - "BriefDescription": "Cycles when offcore outstanding Demand Data Read transactions are present in SuperQueue (SQ), queue to uncore", + "UMask": "0x8", + "BriefDescription": "Offcore outstanding cacheable Core Data Read transactions in SuperQueue (SQ), queue to uncore", "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_DATA_RD", - "CounterMask": "1", + "EventName": "OFFCORE_REQUESTS_OUTSTANDING.ALL_DATA_RD", "Errata": "BDM76", - "PublicDescription": "This event counts cycles when offcore outstanding Demand Data Read transactions are present in the super queue (SQ). A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation).", + "PublicDescription": "This event counts the number of offcore outstanding cacheable Core Data Read transactions in the super queue every cycle. A transaction is considered to be in the Offcore outstanding state between L2 miss and transaction completion sent to requestor (SQ de-allocation). See corresponding Umask under OFFCORE_REQUESTS.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -208,18 +324,6 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, - { - "EventCode": "0x60", - "UMask": "0x4", - "BriefDescription": "Offcore outstanding demand rfo reads transactions in SuperQueue (SQ), queue to uncore, every cycle", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.CYCLES_WITH_DEMAND_RFO", - "CounterMask": "1", - "Errata": "BDM76", - "PublicDescription": "This event counts the number of offcore outstanding demand rfo Reads transactions in the super queue every cycle. The \"Offcore outstanding\" state of the transaction lasts from the L2 miss until the sending transaction completion to requestor (SQ deallocation). See the corresponding Umask under OFFCORE_REQUESTS.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, { "EventCode": "0x63", "UMask": "0x2", @@ -266,7 +370,7 @@ "BriefDescription": "Demand and prefetch data reads", "Counter": "0,1,2,3", "EventName": "OFFCORE_REQUESTS.ALL_DATA_RD", - "PublicDescription": "This event counts the demand and prefetch data reads. All Core Data Reads include cacheable \"Demands\" and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.", + "PublicDescription": "This event counts the demand and prefetch data reads. All Core Data Reads include cacheable Demands and L2 prefetchers (not L3 prefetchers). Counting also covers reads due to page walks resulted from any request type.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -280,27 +384,36 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0xB7, 0xBB", + "UMask": "0x1", + "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", + "Counter": "0,1,2,3", + "EventName": "OFFCORE_RESPONSE", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3" + }, { "EventCode": "0xD0", "UMask": "0x11", - "BriefDescription": "Retired load uops that miss the STLB.", + "BriefDescription": "Retired load uops that miss the STLB. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.STLB_MISS_LOADS", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts load uops with true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts load uops with true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD0", "UMask": "0x12", - "BriefDescription": "Retired store uops that miss the STLB.", + "BriefDescription": "Retired store uops that miss the STLB. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.STLB_MISS_STORES", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts store uops with true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts store uops true STLB miss retired to the architected path. True STLB miss is an uop triggering page walk that gets completed without blocks, and later gets retired. This page walk can end up with or without a fault.", "SampleAfterValue": "100003", "L1_Hit_Indication": "1", "CounterHTOff": "0,1,2,3" @@ -308,37 +421,37 @@ { "EventCode": "0xD0", "UMask": "0x21", - "BriefDescription": "Retired load uops with locked access.", + "BriefDescription": "Retired load uops with locked access. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.LOCK_LOADS", "Errata": "BDM35", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts load uops with locked access retired to the architected path.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts load uops with locked access retired to the architected path.", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD0", "UMask": "0x41", - "BriefDescription": "Retired load uops that split across a cacheline boundary.", + "BriefDescription": "Retired load uops that split across a cacheline boundary.(Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.SPLIT_LOADS", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts line-splitted load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-splitted load uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD0", "UMask": "0x42", - "BriefDescription": "Retired store uops that split across a cacheline boundary.", + "BriefDescription": "Retired store uops that split across a cacheline boundary. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.SPLIT_STORES", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts line-splitted store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts line-splitted store uops retired to the architected path. A line split is across 64B cache-line which includes a page split (4K).", "SampleAfterValue": "100003", "L1_Hit_Indication": "1", "CounterHTOff": "0,1,2,3" @@ -346,24 +459,24 @@ { "EventCode": "0xD0", "UMask": "0x81", - "BriefDescription": "All retired load uops.", + "BriefDescription": "All retired load uops. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.ALL_LOADS", - "PublicDescription": "This event counts load uops retired to the architected path with a filter on bits 0 and 1 applied.\nNote: This event counts AVX-256bit load/store double-pump memory uops as a single uop at retirement. This event also counts SW prefetches.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts load uops retired to the architected path with a filter on bits 0 and 1 applied.\nNote: This event ?ounts AVX-256bit load/store double-pump memory uops as a single uop at retirement. This event also counts SW prefetches.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD0", "UMask": "0x82", - "BriefDescription": "All retired store uops.", + "BriefDescription": "Retired store uops that split across a cacheline boundary. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_UOPS_RETIRED.ALL_STORES", - "PublicDescription": "This event counts store uops retired to the architected path with a filter on bits 0 and 1 applied.\nNote: This event counts AVX-256bit load/store double-pump memory uops as a single uop at retirement.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts store uops retired to the architected path with a filter on bits 0 and 1 applied.\nNote: This event ?ounts AVX-256bit load/store double-pump memory uops as a single uop at retirement.", "SampleAfterValue": "2000003", "L1_Hit_Indication": "1", "CounterHTOff": "0,1,2,3" @@ -371,69 +484,69 @@ { "EventCode": "0xD1", "UMask": "0x1", - "BriefDescription": "Retired load uops with L1 cache hits as data sources.", + "BriefDescription": "Retired load uops with L1 cache hits as data sources. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.L1_HIT", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were hits in the nearest-level (L1) cache.\nNote: Only two data-sources of L1/FB are applicable for AVX-256bit even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load. This event also counts SW prefetches independent of the actual data source.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data source were hits in the nearest-level (L1) cache.\nNote: Only two data-sources of L1/FB are applicable for AVX-256bit even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load. This event also counts SW prefetches independent of the actual data source.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD1", "UMask": "0x2", - "BriefDescription": "Retired load uops with L2 cache hits as data sources.", + "BriefDescription": "Retired load uops with L2 cache hits as data sources. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.L2_HIT", "Errata": "BDM35", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were hits in the mid-level (L2) cache.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were hits in the mid-level (L2) cache.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD1", "UMask": "0x4", - "BriefDescription": "Retired load uops which data sources were data hits in L3 without snoops required.", + "BriefDescription": "Hit in last-level (L3) cache. Excludes Unknown data-source. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.L3_HIT", "Errata": "BDM100", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were data hits in the last-level (L3) cache without snoops required.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were data hits in the last-level (L3) cache without snoops required.", "SampleAfterValue": "50021", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD1", "UMask": "0x8", - "BriefDescription": "Retired load uops misses in L1 cache as data sources.", + "BriefDescription": "Retired load uops misses in L1 cache as data sources. Uses PEBS.", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.L1_MISS", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were misses in the nearest-level (L1) cache. Counting excludes unknown and UC data source.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were misses in the nearest-level (L1) cache. Counting excludes unknown and UC data source.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD1", "UMask": "0x10", - "BriefDescription": "Miss in mid-level (L2) cache. Excludes Unknown data-source.", + "BriefDescription": "Retired load uops with L2 cache misses as data sources. Uses PEBS.", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.L2_MISS", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were misses in the mid-level (L2) cache. Counting excludes unknown and UC data source.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were misses in the mid-level (L2) cache. Counting excludes unknown and UC data source.", "SampleAfterValue": "50021", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD1", "UMask": "0x20", - "BriefDescription": "Miss in last-level (L3) cache. Excludes Unknown data-source.", + "BriefDescription": "Miss in last-level (L3) cache. Excludes Unknown data-source. (Precise Event - PEBS).", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", @@ -445,77 +558,112 @@ { "EventCode": "0xD1", "UMask": "0x40", - "BriefDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready.", + "BriefDescription": "Retired load uops which data sources were load uops missed L1 but hit FB due to preceding miss to the same cache line with data not ready. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_RETIRED.HIT_LFB", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were load uops missed L1 but hit a fill buffer due to a preceding miss to the same cache line with the data not ready.\nNote: Only two data-sources of L1/FB are applicable for AVX-256bit even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were load uops missed L1 but hit a fill buffer due to a preceding miss to the same cache line with the data not ready.\nNote: Only two data-sources of L1/FB are applicable for AVX-256bit even though the corresponding AVX load could be serviced by a deeper level in the memory hierarchy. Data source is reported for the Low-half load.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD2", "UMask": "0x1", - "BriefDescription": "Retired load uops which data sources were L3 hit and cross-core snoop missed in on-pkg core cache.", + "BriefDescription": "Retired load uops which data sources were L3 hit and cross-core snoop missed in on-pkg core cache. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_MISS", "Errata": "BDM100", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were L3 Hit and a cross-core snoop missed in the on-pkg core cache.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were L3 Hit and a cross-core snoop missed in the on-pkg core cache.", "SampleAfterValue": "20011", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD2", "UMask": "0x2", - "BriefDescription": "Retired load uops which data sources were L3 and cross-core snoop hits in on-pkg core cache.", + "BriefDescription": "Retired load uops which data sources were L3 and cross-core snoop hits in on-pkg core cache. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HIT", "Errata": "BDM100", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were L3 hit and a cross-core snoop hit in the on-pkg core cache.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were L3 hit and a cross-core snoop hit in the on-pkg core cache.", "SampleAfterValue": "20011", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD2", "UMask": "0x4", - "BriefDescription": "Retired load uops which data sources were HitM responses from shared L3.", + "BriefDescription": "Retired load uops which data sources were HitM responses from shared L3. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_HITM", "Errata": "BDM100", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were HitM responses from a core on same socket (shared L3).", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were HitM responses from a core on same socket (shared L3).", "SampleAfterValue": "20011", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD2", "UMask": "0x8", - "BriefDescription": "Retired load uops which data sources were hits in L3 without snoops required.", + "BriefDescription": "Retired load uops which data sources were hits in L3 without snoops required. (Precise Event - PEBS)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_L3_HIT_RETIRED.XSNP_NONE", "Errata": "BDM100", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts retired load uops which data sources were hits in the last-level (L3) cache without snoops required.", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts retired load uops which data sources were hits in the last-level (L3) cache without snoops required.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xD3", "UMask": "0x1", - "BriefDescription": "Data from local DRAM either Snoop not needed or Snoop Miss (RspI)", "Data_LA": "1", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.LOCAL_DRAM", "Errata": "BDE70, BDM100", - "PublicDescription": "Retired load uop whose Data Source was: local DRAM either Snoop not needed or Snoop Miss (RspI).", + "PublicDescription": "This event counts retired load uops where the data came from local DRAM. This does not include hardware prefetches. This is a precise event.", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xD3", + "UMask": "0x4", + "BriefDescription": "Retired load uop whose Data Source was: remote DRAM either Snoop not needed or Snoop Miss (RspI) (Precise Event)", + "Data_LA": "1", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_DRAM", + "Errata": "BDE70", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xD3", + "UMask": "0x10", + "BriefDescription": "Retired load uop whose Data Source was: Remote cache HITM (Precise Event)", + "Data_LA": "1", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_HITM", + "Errata": "BDE70", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xD3", + "UMask": "0x20", + "BriefDescription": "Retired load uop whose Data Source was: forwarded from remote cache (Precise Event)", + "Data_LA": "1", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "MEM_LOAD_UOPS_L3_MISS_RETIRED.REMOTE_FWD", + "Errata": "BDE70", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3" }, @@ -657,118 +805,5 @@ "PublicDescription": "This event counts the number of split locks in the super queue.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0x42", - "BriefDescription": "RFO requests that hit L2 cache.", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.RFO_HIT", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0x22", - "BriefDescription": "RFO requests that miss L2 cache.", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.RFO_MISS", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0x44", - "BriefDescription": "L2 cache hits when fetching instructions, code reads.", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.CODE_RD_HIT", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0x24", - "BriefDescription": "L2 cache misses when fetching instructions.", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.CODE_RD_MISS", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0x27", - "BriefDescription": "Demand requests that miss L2 cache.", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.ALL_DEMAND_MISS", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0xe7", - "BriefDescription": "Demand requests to L2 cache.", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.ALL_DEMAND_REFERENCES", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0x3f", - "BriefDescription": "All requests that miss L2 cache.", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.MISS", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x24", - "UMask": "0xff", - "BriefDescription": "All L2 requests.", - "Counter": "0,1,2,3", - "EventName": "L2_RQSTS.REFERENCES", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xB7, 0xBB", - "UMask": "0x1", - "BriefDescription": "Offcore response can be programmed only with a specific pair of event select and counter MSR, and with specific event codes and predefine mask bit value in a dedicated MSR to specify attributes of the offcore transaction.", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_RESPONSE", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0x60", - "UMask": "0x1", - "BriefDescription": "Cycles with at least 6 offcore outstanding Demand Data Read transactions in uncore queue.", - "Counter": "0,1,2,3", - "EventName": "OFFCORE_REQUESTS_OUTSTANDING.DEMAND_DATA_RD_GE_6", - "CounterMask": "6", - "Errata": "BDM76", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x48", - "UMask": "0x1", - "BriefDescription": "Cycles with L1D load Misses outstanding from any thread on physical core.", - "Counter": "2", - "EventName": "L1D_PEND_MISS.PENDING_CYCLES_ANY", - "AnyThread": "1", - "CounterMask": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "2" - }, - { - "EventCode": "0x48", - "UMask": "0x2", - "BriefDescription": "Cycles a demand request was blocked due to Fill Buffers inavailability.", - "Counter": "0,1,2,3", - "EventName": "L1D_PEND_MISS.FB_FULL", - "CounterMask": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/floating-point.json b/tools/perf/pmu-events/arch/x86/broadwellde/floating-point.json index 4ae1ea24f22f..d7b9d9c9c518 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellde/floating-point.json +++ b/tools/perf/pmu-events/arch/x86/broadwellde/floating-point.json @@ -6,7 +6,7 @@ "Counter": "0,1,2,3", "EventName": "OTHER_ASSISTS.AVX_TO_SSE", "Errata": "BDM30", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts the number of transitions from AVX-256 to legacy SSE when penalty is applicable.", + "PublicDescription": "This event counts the number of transitions from AVX-256 to legacy SSE when penalty is applicable.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -17,7 +17,7 @@ "Counter": "0,1,2,3", "EventName": "OTHER_ASSISTS.SSE_TO_AVX", "Errata": "BDM30", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts the number of transitions from legacy SSE to AVX-256 when penalty is applicable.", + "PublicDescription": "This event counts the number of transitions from legacy SSE to AVX-256 when penalty is applicable.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -25,7 +25,6 @@ "EventCode": "0xC7", "UMask": "0x1", "BriefDescription": "Number of SSE/AVX computational scalar double precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "PEBS": "1", "Counter": "0,1,2,3", "EventName": "FP_ARITH_INST_RETIRED.SCALAR_DOUBLE", "SampleAfterValue": "2000003", @@ -35,17 +34,24 @@ "EventCode": "0xC7", "UMask": "0x2", "BriefDescription": "Number of SSE/AVX computational scalar single precision floating-point instructions retired. Each count represents 1 computation. Applies to SSE* and AVX* scalar single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "PEBS": "1", "Counter": "0,1,2,3", "EventName": "FP_ARITH_INST_RETIRED.SCALAR_SINGLE", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, + { + "EventCode": "0xC7", + "UMask": "0x3", + "BriefDescription": "Number of SSE/AVX computational scalar floating-point instructions retired. Applies to SSE* and AVX* scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", + "Counter": "0,1,2,3", + "EventName": "FP_ARITH_INST_RETIRED.SCALAR", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3" + }, { "EventCode": "0xC7", "UMask": "0x4", "BriefDescription": "Number of SSE/AVX computational 128-bit packed double precision floating-point instructions retired. Each count represents 2 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "PEBS": "1", "Counter": "0,1,2,3", "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_DOUBLE", "SampleAfterValue": "2000003", @@ -55,7 +61,6 @@ "EventCode": "0xC7", "UMask": "0x8", "BriefDescription": "Number of SSE/AVX computational 128-bit packed single precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "PEBS": "1", "Counter": "0,1,2,3", "EventName": "FP_ARITH_INST_RETIRED.128B_PACKED_SINGLE", "SampleAfterValue": "2000003", @@ -65,19 +70,54 @@ "EventCode": "0xC7", "UMask": "0x10", "BriefDescription": "Number of SSE/AVX computational 256-bit packed double precision floating-point instructions retired. Each count represents 4 computations. Applies to SSE* and AVX* packed double precision floating-point instructions: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "PEBS": "1", "Counter": "0,1,2,3", "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_DOUBLE", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, + { + "EventCode": "0xC7", + "UMask": "0x15", + "BriefDescription": "Number of SSE/AVX computational double precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. ?.", + "Counter": "0,1,2,3", + "EventName": "FP_ARITH_INST_RETIRED.DOUBLE", + "SampleAfterValue": "2000006", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xc7", + "UMask": "0x20", + "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired. Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", + "Counter": "0,1,2,3", + "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xC7", + "UMask": "0x2a", + "BriefDescription": "Number of SSE/AVX computational single precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. ?.", + "Counter": "0,1,2,3", + "EventName": "FP_ARITH_INST_RETIRED.SINGLE", + "SampleAfterValue": "2000005", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xC7", + "UMask": "0x3c", + "BriefDescription": "Number of SSE/AVX computational packed floating-point instructions retired. Applies to SSE* and AVX*, packed, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", + "Counter": "0,1,2,3", + "EventName": "FP_ARITH_INST_RETIRED.PACKED", + "SampleAfterValue": "2000004", + "CounterHTOff": "0,1,2,3" + }, { "EventCode": "0xCA", "UMask": "0x2", "BriefDescription": "Number of X87 assists due to output value.", "Counter": "0,1,2,3", "EventName": "FP_ASSIST.X87_OUTPUT", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts the number of x87 floating point (FP) micro-code assist (numeric overflow/underflow, inexact result) when the output value (destination register) is invalid.", + "PublicDescription": "This event counts the number of x87 floating point (FP) micro-code assist (numeric overflow/underflow, inexact result) when the output value (destination register) is invalid.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -87,7 +127,7 @@ "BriefDescription": "Number of X87 assists due to input value.", "Counter": "0,1,2,3", "EventName": "FP_ASSIST.X87_INPUT", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts x87 floating point (FP) micro-code assist (invalid operation, denormal operand, SNaN operand) when the input value (one of the source operands to an FP instruction) is invalid.", + "PublicDescription": "This event counts x87 floating point (FP) micro-code assist (invalid operation, denormal operand, SNaN operand) when the input value (one of the source operands to an FP instruction) is invalid.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -97,7 +137,7 @@ "BriefDescription": "Number of SIMD FP assists due to Output values", "Counter": "0,1,2,3", "EventName": "FP_ASSIST.SIMD_OUTPUT", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts the number of SSE* floating point (FP) micro-code assist (numeric overflow/underflow) when the output value (destination register) is invalid. Counting covers only cases involving penalties that require micro-code assist intervention.", + "PublicDescription": "This event counts the number of SSE* floating point (FP) micro-code assist (numeric overflow/underflow) when the output value (destination register) is invalid. Counting covers only cases involving penalties that require micro-code assist intervention.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -107,7 +147,7 @@ "BriefDescription": "Number of SIMD FP assists due to input values", "Counter": "0,1,2,3", "EventName": "FP_ASSIST.SIMD_INPUT", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts any input SSE* FP assist - invalid operation, denormal operand, dividing by zero, SNaN operand. Counting includes only cases involving penalties that required micro-code assist intervention.", + "PublicDescription": "This event counts any input SSE* FP assist - invalid operation, denormal operand, dividing by zero, SNaN operand. Counting includes only cases involving penalties that required micro-code assist intervention.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -121,51 +161,5 @@ "PublicDescription": "This event counts cycles with any input and output SSE or x87 FP assist. If an input and output assist are detected on the same cycle the event increments by 1.", "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xc7", - "UMask": "0x20", - "BriefDescription": "Number of SSE/AVX computational 256-bit packed single precision floating-point instructions retired. Each count represents 8 computations. Applies to SSE* and AVX* packed single precision floating-point instructions: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "FP_ARITH_INST_RETIRED.256B_PACKED_SINGLE", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xC7", - "UMask": "0x3", - "BriefDescription": "Number of SSE/AVX computational scalar floating-point instructions retired. Applies to SSE* and AVX* scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT FM(N)ADD/SUB. FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "Counter": "0,1,2,3", - "EventName": "FP_ARITH_INST_RETIRED.SCALAR", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xC7", - "UMask": "0x3c", - "BriefDescription": "Number of SSE/AVX computational packed floating-point instructions retired. Applies to SSE* and AVX*, packed, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RSQRT RCP SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element.", - "Counter": "0,1,2,3", - "EventName": "FP_ARITH_INST_RETIRED.PACKED", - "SampleAfterValue": "2000004", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xC7", - "UMask": "0x2a", - "BriefDescription": "Number of SSE/AVX computational single precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX RCP RSQRT SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. ?.", - "Counter": "0,1,2,3", - "EventName": "FP_ARITH_INST_RETIRED.SINGLE", - "SampleAfterValue": "2000005", - "CounterHTOff": "0,1,2,3" - }, - { - "EventCode": "0xC7", - "UMask": "0x15", - "BriefDescription": "Number of SSE/AVX computational double precision floating-point instructions retired. Applies to SSE* and AVX*scalar, double and single precision floating-point: ADD SUB MUL DIV MIN MAX SQRT DPP FM(N)ADD/SUB. DPP and FM(N)ADD/SUB instructions count twice as they perform multiple calculations per element. ?.", - "Counter": "0,1,2,3", - "EventName": "FP_ARITH_INST_RETIRED.DOUBLE", - "SampleAfterValue": "2000006", - "CounterHTOff": "0,1,2,3" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/frontend.json b/tools/perf/pmu-events/arch/x86/broadwellde/frontend.json index 06bf0a40e568..72781e1e3362 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellde/frontend.json +++ b/tools/perf/pmu-events/arch/x86/broadwellde/frontend.json @@ -15,80 +15,49 @@ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path", "Counter": "0,1,2,3", "EventName": "IDQ.MITE_UOPS", - "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may \"bypass\" the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", + "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x79", - "UMask": "0x8", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", - "Counter": "0,1,2,3", - "EventName": "IDQ.DSB_UOPS", - "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may \"bypass\" the IDQ.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x79", - "UMask": "0x10", - "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "Counter": "0,1,2,3", - "EventName": "IDQ.MS_DSB_UOPS", - "PublicDescription": "This event counts the number of uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may \"bypass\" the IDQ.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x79", - "UMask": "0x20", - "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", - "Counter": "0,1,2,3", - "EventName": "IDQ.MS_MITE_UOPS", - "PublicDescription": "This event counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may \"bypass\" the IDQ.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x79", - "UMask": "0x30", - "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "UMask": "0x4", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path", "Counter": "0,1,2,3", - "EventName": "IDQ.MS_UOPS", - "PublicDescription": "This event counts the total number of uops delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may \"bypass\" the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.", + "EventName": "IDQ.MITE_CYCLES", + "CounterMask": "1", + "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x79", - "UMask": "0x30", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "UMask": "0x8", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path", "Counter": "0,1,2,3", - "EventName": "IDQ.MS_CYCLES", - "CounterMask": "1", - "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may \"bypass\" the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.", + "EventName": "IDQ.DSB_UOPS", + "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x79", - "UMask": "0x4", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from MITE path", + "UMask": "0x8", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path", "Counter": "0,1,2,3", - "EventName": "IDQ.MITE_CYCLES", + "EventName": "IDQ.DSB_CYCLES", "CounterMask": "1", - "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may \"bypass\" the IDQ.", + "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x79", - "UMask": "0x8", - "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) from Decode Stream Buffer (DSB) path", + "UMask": "0x10", + "BriefDescription": "Uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", "Counter": "0,1,2,3", - "EventName": "IDQ.DSB_CYCLES", - "CounterMask": "1", - "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may \"bypass\" the IDQ.", + "EventName": "IDQ.MS_DSB_UOPS", + "PublicDescription": "This event counts the number of uops initiated by Decode Stream Buffer (DSB) that are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may bypass the IDQ.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -99,7 +68,7 @@ "Counter": "0,1,2,3", "EventName": "IDQ.MS_DSB_CYCLES", "CounterMask": "1", - "PublicDescription": "This event counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may \"bypass\" the IDQ.", + "PublicDescription": "This event counts cycles during which uops initiated by Decode Stream Buffer (DSB) are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequencer (MS) is busy. Counting includes uops that may bypass the IDQ.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -111,7 +80,7 @@ "Counter": "0,1,2,3", "EventName": "IDQ.MS_DSB_OCCUR", "CounterMask": "1", - "PublicDescription": "This event counts the number of deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while the Microcode Sequencer (MS) is busy. Counting includes uops that may \"bypass\" the IDQ.", + "PublicDescription": "This event counts the number of deliveries to Instruction Decode Queue (IDQ) initiated by Decode Stream Buffer (DSB) while the Microcode Sequencer (MS) is busy. Counting includes uops that may bypass the IDQ.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -122,7 +91,7 @@ "Counter": "0,1,2,3", "EventName": "IDQ.ALL_DSB_CYCLES_4_UOPS", "CounterMask": "4", - "PublicDescription": "This event counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may \"bypass\" the IDQ.", + "PublicDescription": "This event counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -133,7 +102,17 @@ "Counter": "0,1,2,3", "EventName": "IDQ.ALL_DSB_CYCLES_ANY_UOPS", "CounterMask": "1", - "PublicDescription": "This event counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may \"bypass\" the IDQ.", + "PublicDescription": "This event counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the Decode Stream Buffer (DSB) path. Counting includes uops that may bypass the IDQ.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x79", + "UMask": "0x20", + "BriefDescription": "Uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "Counter": "0,1,2,3", + "EventName": "IDQ.MS_MITE_UOPS", + "PublicDescription": "This event counts the number of uops initiated by MITE and delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may bypass the IDQ.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -144,7 +123,7 @@ "Counter": "0,1,2,3", "EventName": "IDQ.ALL_MITE_CYCLES_4_UOPS", "CounterMask": "4", - "PublicDescription": "This event counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may \"bypass\" the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", + "PublicDescription": "This event counts the number of cycles 4 uops were delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -155,7 +134,39 @@ "Counter": "0,1,2,3", "EventName": "IDQ.ALL_MITE_CYCLES_ANY_UOPS", "CounterMask": "1", - "PublicDescription": "This event counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may \"bypass\" the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", + "PublicDescription": "This event counts the number of cycles uops were delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x79", + "UMask": "0x30", + "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "Counter": "0,1,2,3", + "EventName": "IDQ.MS_UOPS", + "PublicDescription": "This event counts the total number of uops delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may bypass the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x79", + "UMask": "0x30", + "BriefDescription": "Cycles when uops are being delivered to Instruction Decode Queue (IDQ) while Microcode Sequenser (MS) is busy", + "Counter": "0,1,2,3", + "EventName": "IDQ.MS_CYCLES", + "CounterMask": "1", + "PublicDescription": "This event counts cycles during which uops are being delivered to Instruction Decode Queue (IDQ) while the Microcode Sequenser (MS) is busy. Counting includes uops that may bypass the IDQ. Uops maybe initiated by Decode Stream Buffer (DSB) or MITE.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EdgeDetect": "1", + "EventCode": "0x79", + "UMask": "0x30", + "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", + "Counter": "0,1,2,3", + "EventName": "IDQ.MS_SWITCHES", + "CounterMask": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -165,7 +176,7 @@ "BriefDescription": "Uops delivered to Instruction Decode Queue (IDQ) from MITE path", "Counter": "0,1,2,3", "EventName": "IDQ.MITE_ALL_UOPS", - "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may \"bypass\" the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", + "PublicDescription": "This event counts the number of uops delivered to Instruction Decode Queue (IDQ) from the MITE path. Counting includes uops that may bypass the IDQ. This also means that uops are not being delivered from the Decode Stream Buffer (DSB).", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -205,7 +216,7 @@ "BriefDescription": "Uops not delivered to Resource Allocation Table (RAT) per thread when backend of the machine is not stalled", "Counter": "0,1,2,3", "EventName": "IDQ_UOPS_NOT_DELIVERED.CORE", - "PublicDescription": "This event counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding ?4 ? x? when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when:\n a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread;\n b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions); \n c. Instruction Decode Queue (IDQ) delivers four uops.", + "PublicDescription": "This event counts the number of uops not delivered to Resource Allocation Table (RAT) per thread adding 4 x when Resource Allocation Table (RAT) is not stalled and Instruction Decode Queue (IDQ) delivers x uops to Resource Allocation Table (RAT) (where x belongs to {0,1,2,3}). Counting does not cover cases when:\n a. IDQ-Resource Allocation Table (RAT) pipe serves the other thread;\n b. Resource Allocation Table (RAT) is stalled for the thread (including uop drops and clear BE conditions); \n c. Instruction Decode Queue (IDQ) delivers four uops.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, @@ -268,18 +279,7 @@ "BriefDescription": "Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles.", "Counter": "0,1,2,3", "EventName": "DSB2MITE_SWITCHES.PENALTY_CYCLES", - "PublicDescription": "This event counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. \nMM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.\nPenalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 0?2 cycles.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EdgeDetect": "1", - "EventCode": "0x79", - "UMask": "0x30", - "BriefDescription": "Number of switches from DSB (Decode Stream Buffer) or MITE (legacy decode pipeline) to the Microcode Sequencer.", - "Counter": "0,1,2,3", - "EventName": "IDQ.MS_SWITCHES", - "CounterMask": "1", + "PublicDescription": "This event counts Decode Stream Buffer (DSB)-to-MITE switch true penalty cycles. These cycles do not include uops routed through because of the switch itself, for example, when Instruction Decode Queue (IDQ) pre-allocation is unavailable, or Instruction Decode Queue (IDQ) is full. SBD-to-MITE switch true penalty cycles happen after the merge mux (MM) receives Decode Stream Buffer (DSB) Sync-indication until receiving the first MITE uop. \nMM is placed before Instruction Decode Queue (IDQ) to merge uops being fed from the MITE and Decode Stream Buffer (DSB) paths. Decode Stream Buffer (DSB) inserts the Sync-indication whenever a Decode Stream Buffer (DSB)-to-MITE switch occurs.\nPenalty: A Decode Stream Buffer (DSB) hit followed by a Decode Stream Buffer (DSB) miss can cost up to six cycles in which no uops are delivered to the IDQ. Most often, such switches from the Decode Stream Buffer (DSB) to the legacy pipeline cost 02 cycles.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" } diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/memory.json b/tools/perf/pmu-events/arch/x86/broadwellde/memory.json index cfa1e5876ec3..e44f73c24ac8 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellde/memory.json +++ b/tools/perf/pmu-events/arch/x86/broadwellde/memory.json @@ -95,7 +95,6 @@ "BriefDescription": "Counts the number of times a class of instructions that may cause a transactional abort was executed. Since this is the count of execution, it may not always cause a transactional abort.", "Counter": "0,1,2,3", "EventName": "TX_EXEC.MISC1", - "PublicDescription": "Unfriendly TSX abort triggered by a flowmarker.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -171,11 +170,11 @@ { "EventCode": "0xc8", "UMask": "0x4", - "BriefDescription": "Number of times HLE abort was triggered", + "BriefDescription": "Number of times HLE abort was triggered (PEBS)", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "HLE_RETIRED.ABORTED", - "PublicDescription": "Number of times HLE abort was triggered.", + "PublicDescription": "Number of times HLE abort was triggered (PEBS).", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -252,11 +251,11 @@ { "EventCode": "0xc9", "UMask": "0x4", - "BriefDescription": "Number of times RTM abort was triggered", + "BriefDescription": "Number of times RTM abort was triggered (PEBS)", "PEBS": "1", "Counter": "0,1,2,3", "EventName": "RTM_RETIRED.ABORTED", - "PublicDescription": "Number of times RTM abort was triggered .", + "PublicDescription": "Number of times RTM abort was triggered (PEBS).", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/other.json b/tools/perf/pmu-events/arch/x86/broadwellde/other.json index 718fcb1db2ee..4475249ea9da 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellde/other.json +++ b/tools/perf/pmu-events/arch/x86/broadwellde/other.json @@ -9,16 +9,6 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, - { - "EventCode": "0x5C", - "UMask": "0x2", - "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3", - "Counter": "0,1,2,3", - "EventName": "CPL_CYCLES.RING123", - "PublicDescription": "This event counts unhalted core cycles during which the thread is in rings 1, 2, or 3.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, { "EdgeDetect": "1", "EventCode": "0x5C", @@ -31,6 +21,16 @@ "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x5C", + "UMask": "0x2", + "BriefDescription": "Unhalted core cycles when thread is in rings 1, 2, or 3", + "Counter": "0,1,2,3", + "EventName": "CPL_CYCLES.RING123", + "PublicDescription": "This event counts unhalted core cycles during which the thread is in rings 1, 2, or 3.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x63", "UMask": "0x1", diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/pipeline.json b/tools/perf/pmu-events/arch/x86/broadwellde/pipeline.json index 02b4e1035f2d..920c89da9111 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellde/pipeline.json +++ b/tools/perf/pmu-events/arch/x86/broadwellde/pipeline.json @@ -3,31 +3,41 @@ "EventCode": "0x00", "UMask": "0x1", "BriefDescription": "Instructions retired from execution.", - "Counter": "Fixed counter 1", + "Counter": "Fixed counter 0", "EventName": "INST_RETIRED.ANY", "PublicDescription": "This event counts the number of instructions retired from execution. For instructions that consist of multiple micro-ops, this event counts the retirement of the last micro-op of the instruction. Counting continues during hardware interrupts, traps, and inside interrupt handlers. \nNotes: INST_RETIRED.ANY is counted by a designated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. INST_RETIRED.ANY_P is counted by a programmable counter and it is an architectural performance event. \nCounting: Faulting executions of GETSEC/VM entry/VM Exit/MWait will not count as retired instructions.", "SampleAfterValue": "2000003", - "CounterHTOff": "Fixed counter 1" + "CounterHTOff": "Fixed counter 0" }, { "EventCode": "0x00", "UMask": "0x2", "BriefDescription": "Core cycles when the thread is not in halt state", - "Counter": "Fixed counter 2", + "Counter": "Fixed counter 1", "EventName": "CPU_CLK_UNHALTED.THREAD", "PublicDescription": "This event counts the number of core cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. This event is a component in many key event ratios. The core frequency may change from time to time due to transitions associated with Enhanced Intel SpeedStep Technology or TM2. For this reason this event may have a changing ratio with regards to time. When the core frequency is constant, this event can approximate elapsed time while the core was not in the halt state. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events.", "SampleAfterValue": "2000003", - "CounterHTOff": "Fixed counter 2" + "CounterHTOff": "Fixed counter 1" + }, + { + "EventCode": "0x00", + "UMask": "0x2", + "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", + "Counter": "Fixed counter 1", + "EventName": "CPU_CLK_UNHALTED.THREAD_ANY", + "AnyThread": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "Fixed counter 1" }, { "EventCode": "0x00", "UMask": "0x3", "BriefDescription": "Reference cycles when the core is not in halt state.", - "Counter": "Fixed counter 3", + "Counter": "Fixed counter 2", "EventName": "CPU_CLK_UNHALTED.REF_TSC", "PublicDescription": "This event counts the number of reference cycles when the core is not in a halt state. The core enters the halt state when it is running the HLT instruction or the MWAIT instruction. This event is not affected by core frequency changes (for example, P states, TM2 transitions) but has the same incrementing frequency as the time stamp counter. This event can approximate elapsed time while the core was not in a halt state. This event has a constant ratio with the CPU_CLK_UNHALTED.REF_XCLK event. It is counted on a dedicated fixed counter, leaving the four (eight when Hyperthreading is disabled) programmable counters available for other events. \nNote: On all current platforms this event stops counting during 'throttling (TM)' states duty off periods the processor is 'halted'. This event is clocked by base clock (100 Mhz) on Sandy Bridge. The counter update is done at a lower clock rate then the core clock the overflow status bit for this counter may appear 'sticky'. After the counter has overflowed and software clears the overflow status bit and resets the counter to less than MAX. The reset value to the counter is not clocked immediately so the overflow status bit will flip 'high (1)' and generate another PMI (if enabled) after which the reset value gets clocked into the counter. Therefore, software will get the interrupt, read the overflow status bit '1 for bit 34 while the counter value is less than MAX. Software should ignore this case.", "SampleAfterValue": "2000003", - "CounterHTOff": "Fixed counter 3" + "CounterHTOff": "Fixed counter 2" }, { "EventCode": "0x03", @@ -60,22 +70,33 @@ }, { "EventCode": "0x0D", - "UMask": "0x8", - "BriefDescription": "Cycles when Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the thread", + "UMask": "0x3", + "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for this thread (e.g. misprediction or memory nuke)", "Counter": "0,1,2,3", - "EventName": "INT_MISC.RAT_STALL_CYCLES", - "PublicDescription": "This event counts the number of cycles during which Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the current thread. This also includes the cycles during which the Allocator is serving another thread.", + "EventName": "INT_MISC.RECOVERY_CYCLES", + "CounterMask": "1", + "PublicDescription": "Cycles checkpoints in Resource Allocation Table (RAT) are recovering from JEClear or machine clear.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0x0D", "UMask": "0x3", - "BriefDescription": "Number of cycles waiting for the checkpoints in Resource Allocation Table (RAT) to be recovered after Nuke due to all other cases except JEClear (e.g. whenever a ucode assist is needed like SSE exception, memory disambiguation, etc...)", + "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).", "Counter": "0,1,2,3", - "EventName": "INT_MISC.RECOVERY_CYCLES", + "EventName": "INT_MISC.RECOVERY_CYCLES_ANY", + "AnyThread": "1", "CounterMask": "1", - "PublicDescription": "Cycles checkpoints in Resource Allocation Table (RAT) are recovering from JEClear or machine clear.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x0D", + "UMask": "0x8", + "BriefDescription": "Cycles when Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the thread", + "Counter": "0,1,2,3", + "EventName": "INT_MISC.RAT_STALL_CYCLES", + "PublicDescription": "This event counts the number of cycles during which Resource Allocation Table (RAT) external stall is sent to Instruction Decode Queue (IDQ) for the current thread. This also includes the cycles during which the Allocator is serving another thread.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, @@ -89,6 +110,18 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "Invert": "1", + "EventCode": "0x0E", + "UMask": "0x1", + "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread", + "Counter": "0,1,2,3", + "EventName": "UOPS_ISSUED.STALL_CYCLES", + "CounterMask": "1", + "PublicDescription": "This event counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3" + }, { "EventCode": "0x0E", "UMask": "0x10", @@ -117,18 +150,6 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, - { - "Invert": "1", - "EventCode": "0x0E", - "UMask": "0x1", - "BriefDescription": "Cycles when Resource Allocation Table (RAT) does not issue Uops to Reservation Station (RS) for the thread", - "Counter": "0,1,2,3", - "EventName": "UOPS_ISSUED.STALL_CYCLES", - "CounterMask": "1", - "PublicDescription": "This event counts cycles during which the Resource Allocation Table (RAT) does not issue any Uops to the reservation station (RS) for the current thread.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" - }, { "EventCode": "0x14", "UMask": "0x1", @@ -139,6 +160,26 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x3C", + "UMask": "0x0", + "BriefDescription": "Thread cycles when thread is not in halt state", + "Counter": "0,1,2,3", + "EventName": "CPU_CLK_UNHALTED.THREAD_P", + "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x3C", + "UMask": "0x0", + "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", + "Counter": "0,1,2,3", + "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY", + "AnyThread": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x3C", "UMask": "0x1", @@ -149,6 +190,36 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x3C", + "UMask": "0x1", + "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).", + "Counter": "0,1,2,3", + "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY", + "AnyThread": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x3C", + "UMask": "0x1", + "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)", + "Counter": "0,1,2,3", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK", + "PublicDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate).", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0x3C", + "UMask": "0x1", + "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).", + "Counter": "0,1,2,3", + "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY", + "AnyThread": "1", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x3c", "UMask": "0x2", @@ -158,6 +229,15 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, + { + "EventCode": "0x3C", + "UMask": "0x2", + "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.", + "Counter": "0,1,2,3", + "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x4c", "UMask": "0x1", @@ -224,6 +304,18 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EdgeDetect": "1", + "Invert": "1", + "EventCode": "0x5E", + "UMask": "0x1", + "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.", + "Counter": "0,1,2,3", + "EventName": "RS_EVENTS.EMPTY_END", + "CounterMask": "1", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x87", "UMask": "0x1", @@ -404,6 +496,15 @@ "SampleAfterValue": "200003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x89", + "UMask": "0xa0", + "BriefDescription": "Taken speculative and retired mispredicted indirect calls.", + "Counter": "0,1,2,3", + "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_NEAR_CALL", + "SampleAfterValue": "200003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x89", "UMask": "0xc1", @@ -434,6 +535,16 @@ "SampleAfterValue": "200003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0xA0", + "UMask": "0x3", + "BriefDescription": "Micro-op dispatches cancelled due to insufficient SIMD physical register file read ports", + "Counter": "0,1,2,3", + "EventName": "UOP_DISPATCHES_CANCELLED.SIMD_PRF", + "PublicDescription": "This event counts the number of micro-operations cancelled after they were dispatched from the scheduler to the execution units when the total number of physical register read ports across all dispatch ports exceeds the read bandwidth of the physical register file. The SIMD_PRF subevent applies to the following instructions: VDPPS, DPPS, VPCMPESTRI, PCMPESTRI, VPCMPESTRM, PCMPESTRM, VFMADD*, VFMADDSUB*, VFMSUB*, VMSUBADD*, VFNMADD*, VFNMSUB*. See the Broadwell Optimization Guide for more information.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3" + }, { "EventCode": "0xA1", "UMask": "0x1", @@ -446,601 +557,471 @@ }, { "EventCode": "0xA1", - "UMask": "0x2", - "BriefDescription": "Cycles per thread when uops are executed in port 1", + "UMask": "0x1", + "BriefDescription": "Cycles per core when uops are exectuted in port 0.", "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_1", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 1.", + "EventName": "UOPS_EXECUTED_PORT.PORT_0_CORE", + "AnyThread": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xA1", - "UMask": "0x4", - "BriefDescription": "Cycles per thread when uops are executed in port 2", + "UMask": "0x1", + "BriefDescription": "Cycles per thread when uops are executed in port 0", "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_2", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 2.", + "EventName": "UOPS_EXECUTED_PORT.PORT_0", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 0.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xA1", - "UMask": "0x8", - "BriefDescription": "Cycles per thread when uops are executed in port 3", + "UMask": "0x2", + "BriefDescription": "Cycles per thread when uops are executed in port 1", "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_3", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 3.", + "EventName": "UOPS_DISPATCHED_PORT.PORT_1", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 1.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xA1", - "UMask": "0x10", - "BriefDescription": "Cycles per thread when uops are executed in port 4", + "UMask": "0x2", + "BriefDescription": "Cycles per core when uops are exectuted in port 1.", "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_4", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 4.", + "EventName": "UOPS_EXECUTED_PORT.PORT_1_CORE", + "AnyThread": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xA1", - "UMask": "0x20", - "BriefDescription": "Cycles per thread when uops are executed in port 5", + "UMask": "0x2", + "BriefDescription": "Cycles per thread when uops are executed in port 1", "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_5", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 5.", + "EventName": "UOPS_EXECUTED_PORT.PORT_1", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 1.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xA1", - "UMask": "0x40", - "BriefDescription": "Cycles per thread when uops are executed in port 6", + "UMask": "0x4", + "BriefDescription": "Cycles per thread when uops are executed in port 2", "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_6", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 6.", + "EventName": "UOPS_DISPATCHED_PORT.PORT_2", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 2.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { "EventCode": "0xA1", - "UMask": "0x80", - "BriefDescription": "Cycles per thread when uops are executed in port 7", + "UMask": "0x4", + "BriefDescription": "Cycles per core when uops are dispatched to port 2.", "Counter": "0,1,2,3", - "EventName": "UOPS_DISPATCHED_PORT.PORT_7", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 7.", + "EventName": "UOPS_EXECUTED_PORT.PORT_2_CORE", + "AnyThread": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA2", - "UMask": "0x1", - "BriefDescription": "Resource-related stall cycles", + "EventCode": "0xA1", + "UMask": "0x4", + "BriefDescription": "Cycles per thread when uops are executed in port 2", "Counter": "0,1,2,3", - "EventName": "RESOURCE_STALLS.ANY", - "PublicDescription": "This event counts resource-related stall cycles. Reasons for stalls can be as follows:\n - *any* u-arch structure got full (LB, SB, RS, ROB, BOB, LM, Physical Register Reclaim Table (PRRT), or Physical History Table (PHT) slots)\n - *any* u-arch structure got empty (like INT/SIMD FreeLists)\n - FPU control word (FPCW), MXCSR\nand others. This counts cycles that the pipeline backend blocked uop delivery from the front end.", + "EventName": "UOPS_EXECUTED_PORT.PORT_2", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 2.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA2", - "UMask": "0x4", - "BriefDescription": "Cycles stalled due to no eligible RS entry available.", + "EventCode": "0xA1", + "UMask": "0x8", + "BriefDescription": "Cycles per thread when uops are executed in port 3", "Counter": "0,1,2,3", - "EventName": "RESOURCE_STALLS.RS", - "PublicDescription": "This event counts stall cycles caused by absence of eligible entries in the reservation station (RS). This may result from RS overflow, or from RS deallocation because of the RS array Write Port allocation scheme (each RS entry has two write ports instead of four. As a result, empty entries could not be used, although RS is not really full). This counts cycles that the pipeline backend blocked uop delivery from the front end.", + "EventName": "UOPS_DISPATCHED_PORT.PORT_3", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 3.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA2", + "EventCode": "0xA1", "UMask": "0x8", - "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).", + "BriefDescription": "Cycles per core when uops are dispatched to port 3.", "Counter": "0,1,2,3", - "EventName": "RESOURCE_STALLS.SB", - "PublicDescription": "This event counts stall cycles caused by the store buffer (SB) overflow (excluding draining from synch). This counts cycles that the pipeline backend blocked uop delivery from the front end.", + "EventName": "UOPS_EXECUTED_PORT.PORT_3_CORE", + "AnyThread": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA2", - "UMask": "0x10", - "BriefDescription": "Cycles stalled due to re-order buffer full.", + "EventCode": "0xA1", + "UMask": "0x8", + "BriefDescription": "Cycles per thread when uops are executed in port 3", "Counter": "0,1,2,3", - "EventName": "RESOURCE_STALLS.ROB", - "PublicDescription": "This event counts ROB full stall cycles. This counts cycles that the pipeline backend blocked uop delivery from the front end.", + "EventName": "UOPS_EXECUTED_PORT.PORT_3", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 3.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "UMask": "0x1", - "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.", + "EventCode": "0xA1", + "UMask": "0x10", + "BriefDescription": "Cycles per thread when uops are executed in port 4", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.CYCLES_L2_PENDING", - "CounterMask": "1", - "PublicDescription": "Counts number of cycles the CPU has at least one pending demand* load request missing the L2 cache.", + "EventName": "UOPS_DISPATCHED_PORT.PORT_4", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 4.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "UMask": "0x8", - "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", - "Counter": "2", - "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING", - "CounterMask": "8", - "PublicDescription": "Counts number of cycles the CPU has at least one pending demand load request missing the L1 data cache.", - "SampleAfterValue": "2000003", - "CounterHTOff": "2" - }, - { - "EventCode": "0xA3", - "UMask": "0x2", - "BriefDescription": "Cycles while memory subsystem has an outstanding load.", + "EventCode": "0xA1", + "UMask": "0x10", + "BriefDescription": "Cycles per core when uops are exectuted in port 4.", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.CYCLES_LDM_PENDING", - "CounterMask": "2", - "PublicDescription": "Counts number of cycles the CPU has at least one pending demand load request (that is cycles with non-completed load waiting for its data from memory subsystem).", + "EventName": "UOPS_EXECUTED_PORT.PORT_4_CORE", + "AnyThread": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "UMask": "0x4", - "BriefDescription": "Total execution stalls", + "EventCode": "0xA1", + "UMask": "0x10", + "BriefDescription": "Cycles per thread when uops are executed in port 4", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.CYCLES_NO_EXECUTE", - "CounterMask": "4", - "PublicDescription": "Counts number of cycles nothing is executed on any execution port.", + "EventName": "UOPS_EXECUTED_PORT.PORT_4", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 4.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "UMask": "0x5", - "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.", + "EventCode": "0xA1", + "UMask": "0x20", + "BriefDescription": "Cycles per thread when uops are executed in port 5", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.STALLS_L2_PENDING", - "CounterMask": "5", - "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand* load request missing the L2 cache.(as a footprint) * includes also L1 HW prefetch requests that may or may not be required by demands.", + "EventName": "UOPS_DISPATCHED_PORT.PORT_5", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 5.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "UMask": "0x6", - "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", + "EventCode": "0xA1", + "UMask": "0x20", + "BriefDescription": "Cycles per core when uops are exectuted in port 5.", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.STALLS_LDM_PENDING", - "CounterMask": "6", - "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand load request.", + "EventName": "UOPS_EXECUTED_PORT.PORT_5_CORE", + "AnyThread": "1", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "UMask": "0xc", - "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", - "Counter": "2", - "EventName": "CYCLE_ACTIVITY.STALLS_L1D_PENDING", - "CounterMask": "12", - "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand load request missing the L1 data cache.", + "EventCode": "0xA1", + "UMask": "0x20", + "BriefDescription": "Cycles per thread when uops are executed in port 5", + "Counter": "0,1,2,3", + "EventName": "UOPS_EXECUTED_PORT.PORT_5", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 5.", "SampleAfterValue": "2000003", - "CounterHTOff": "2" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA8", - "UMask": "0x1", - "BriefDescription": "Number of Uops delivered by the LSD.", + "EventCode": "0xA1", + "UMask": "0x40", + "BriefDescription": "Cycles per thread when uops are executed in port 6", "Counter": "0,1,2,3", - "EventName": "LSD.UOPS", - "PublicDescription": "Number of Uops delivered by the LSD. ", + "EventName": "UOPS_DISPATCHED_PORT.PORT_6", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 6.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB1", - "UMask": "0x1", - "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.", + "EventCode": "0xA1", + "UMask": "0x40", + "BriefDescription": "Cycles per core when uops are exectuted in port 6.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.THREAD", - "PublicDescription": "Number of uops to be executed per-thread each cycle.", + "EventName": "UOPS_EXECUTED_PORT.PORT_6_CORE", + "AnyThread": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xB1", - "UMask": "0x2", - "BriefDescription": "Number of uops executed on the core.", + "EventCode": "0xA1", + "UMask": "0x40", + "BriefDescription": "Cycles per thread when uops are executed in port 6", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE", - "PublicDescription": "Number of uops executed from any thread.", + "EventName": "UOPS_EXECUTED_PORT.PORT_6", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 6.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "Invert": "1", - "EventCode": "0xB1", - "UMask": "0x1", - "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.", + "EventCode": "0xA1", + "UMask": "0x80", + "BriefDescription": "Cycles per thread when uops are executed in port 7", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.STALL_CYCLES", - "CounterMask": "1", - "PublicDescription": "This event counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.", + "EventName": "UOPS_DISPATCHED_PORT.PORT_7", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 7.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC0", - "UMask": "0x0", - "BriefDescription": "Number of instructions retired. General Counter - architectural event", + "EventCode": "0xA1", + "UMask": "0x80", + "BriefDescription": "Cycles per core when uops are dispatched to port 7.", "Counter": "0,1,2,3", - "EventName": "INST_RETIRED.ANY_P", - "Errata": "BDM61", - "PublicDescription": "This event counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).", + "EventName": "UOPS_EXECUTED_PORT.PORT_7_CORE", + "AnyThread": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC0", - "UMask": "0x2", - "BriefDescription": "FP operations retired. X87 FP operations that have no exceptions:", + "EventCode": "0xA1", + "UMask": "0x80", + "BriefDescription": "Cycles per thread when uops are executed in port 7", "Counter": "0,1,2,3", - "EventName": "INST_RETIRED.X87", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts FP operations retired. For X87 FP operations that have no exceptions counting also includes flows that have several X87, or flows that use X87 uops in the exception handling.", + "EventName": "UOPS_EXECUTED_PORT.PORT_7", + "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 7.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC0", + "EventCode": "0xA2", "UMask": "0x1", - "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution", - "PEBS": "2", - "Counter": "1", - "EventName": "INST_RETIRED.PREC_DIST", - "Errata": "BDM11, BDM55", - "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts instructions retired.", + "BriefDescription": "Resource-related stall cycles", + "Counter": "0,1,2,3", + "EventName": "RESOURCE_STALLS.ANY", + "PublicDescription": "This event counts resource-related stall cycles. Reasons for stalls can be as follows:\n - *any* u-arch structure got full (LB, SB, RS, ROB, BOB, LM, Physical Register Reclaim Table (PRRT), or Physical History Table (PHT) slots)\n - *any* u-arch structure got empty (like INT/SIMD FreeLists)\n - FPU control word (FPCW), MXCSR\nand others. This counts cycles that the pipeline backend blocked uop delivery from the front end.", "SampleAfterValue": "2000003", - "CounterHTOff": "1" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC1", - "UMask": "0x40", - "BriefDescription": "Number of times any microcode assist is invoked by HW upon uop writeback.", + "EventCode": "0xA2", + "UMask": "0x4", + "BriefDescription": "Cycles stalled due to no eligible RS entry available.", "Counter": "0,1,2,3", - "EventName": "OTHER_ASSISTS.ANY_WB_ASSIST", - "SampleAfterValue": "100003", + "EventName": "RESOURCE_STALLS.RS", + "PublicDescription": "This event counts stall cycles caused by absence of eligible entries in the reservation station (RS). This may result from RS overflow, or from RS deallocation because of the RS array Write Port allocation scheme (each RS entry has two write ports instead of four. As a result, empty entries could not be used, although RS is not really full). This counts cycles that the pipeline backend blocked uop delivery from the front end.", + "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC2", - "UMask": "0x1", - "BriefDescription": "Actually retired uops.", - "Data_LA": "1", - "PEBS": "1", + "EventCode": "0xA2", + "UMask": "0x8", + "BriefDescription": "Cycles stalled due to no store buffers available. (not including draining form sync).", "Counter": "0,1,2,3", - "EventName": "UOPS_RETIRED.ALL", - "PublicDescription": "This event counts all actually retired uops. Counting increments by two for micro-fused uops, and by one for macro-fused and other uops. Maximal increment value for one cycle is eight.", + "EventName": "RESOURCE_STALLS.SB", + "PublicDescription": "This event counts stall cycles caused by the store buffer (SB) overflow (excluding draining from synch). This counts cycles that the pipeline backend blocked uop delivery from the front end.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC2", - "UMask": "0x2", - "BriefDescription": "Retirement slots used.", - "PEBS": "1", + "EventCode": "0xA2", + "UMask": "0x10", + "BriefDescription": "Cycles stalled due to re-order buffer full.", "Counter": "0,1,2,3", - "EventName": "UOPS_RETIRED.RETIRE_SLOTS", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts the number of retirement slots used.", + "EventName": "RESOURCE_STALLS.ROB", + "PublicDescription": "This event counts ROB full stall cycles. This counts cycles that the pipeline backend blocked uop delivery from the front end.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "Invert": "1", - "EventCode": "0xC2", + "EventCode": "0xA3", "UMask": "0x1", - "BriefDescription": "Cycles without actually retired uops.", + "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.", "Counter": "0,1,2,3", - "EventName": "UOPS_RETIRED.STALL_CYCLES", + "EventName": "CYCLE_ACTIVITY.CYCLES_L2_PENDING", "CounterMask": "1", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts cycles without actually retired uops.", + "PublicDescription": "Counts number of cycles the CPU has at least one pending demand* load request missing the L2 cache.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "Invert": "1", - "EventCode": "0xC2", + "EventCode": "0xA3", "UMask": "0x1", - "BriefDescription": "Cycles with less than 10 actually retired uops.", + "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.", "Counter": "0,1,2,3", - "EventName": "UOPS_RETIRED.TOTAL_CYCLES", - "CounterMask": "10", - "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.", + "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS", + "CounterMask": "1", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC3", - "UMask": "0x1", - "BriefDescription": "Cycles there was a Nuke. Account for both thread-specific and All Thread Nukes.", + "EventCode": "0xA3", + "UMask": "0x2", + "BriefDescription": "Cycles while memory subsystem has an outstanding load.", "Counter": "0,1,2,3", - "EventName": "MACHINE_CLEARS.CYCLES", - "PublicDescription": "This event counts both thread-specific (TS) and all-thread (AT) nukes.", + "EventName": "CYCLE_ACTIVITY.CYCLES_LDM_PENDING", + "CounterMask": "2", + "PublicDescription": "Counts number of cycles the CPU has at least one pending demand load request (that is cycles with non-completed load waiting for its data from memory subsystem).", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC3", + "EventCode": "0xA3", + "UMask": "0x2", + "BriefDescription": "Cycles while memory subsystem has an outstanding load.", + "Counter": "0,1,2,3", + "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY", + "CounterMask": "2", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xA3", "UMask": "0x4", - "BriefDescription": "Self-modifying code (SMC) detected.", + "BriefDescription": "This event increments by 1 for every cycle where there was no execute for this thread.", "Counter": "0,1,2,3", - "EventName": "MACHINE_CLEARS.SMC", - "PublicDescription": "This event counts self-modifying code (SMC) detected, which causes a machine clear.", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "EventName": "CYCLE_ACTIVITY.CYCLES_NO_EXECUTE", + "CounterMask": "4", + "PublicDescription": "Counts number of cycles nothing is executed on any execution port.", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xC3", - "UMask": "0x20", - "BriefDescription": "This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0.", + "EventCode": "0xA3", + "UMask": "0x4", + "BriefDescription": "Total execution stalls.", "Counter": "0,1,2,3", - "EventName": "MACHINE_CLEARS.MASKMOV", - "PublicDescription": "Maskmov false fault - counts number of time ucode passes through Maskmov flow due to instruction's mask being 0 while the flow was completed without raising a fault.", - "SampleAfterValue": "100003", + "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL", + "CounterMask": "4", + "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC4", - "UMask": "0x1", - "BriefDescription": "Conditional branch instructions retired.", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.CONDITIONAL", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts conditional branch instructions retired.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC4", - "UMask": "0x2", - "BriefDescription": "Direct and indirect near call instructions retired.", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.NEAR_CALL", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts both direct and indirect near call instructions retired.", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC4", - "UMask": "0x0", - "BriefDescription": "All (macro) branch instructions retired.", - "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.ALL_BRANCHES", - "PublicDescription": "This event counts all (macro) branch instructions retired.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC4", - "UMask": "0x8", - "BriefDescription": "Return instructions retired.", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.NEAR_RETURN", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts return instructions retired.", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC4", - "UMask": "0x10", - "BriefDescription": "Not taken branch instructions retired.", - "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.NOT_TAKEN", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts not taken branch instructions retired.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC4", - "UMask": "0x20", - "BriefDescription": "Taken branch instructions retired.", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.NEAR_TAKEN", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts taken branch instructions retired.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC4", - "UMask": "0x40", - "BriefDescription": "Far branch instructions retired.", - "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.FAR_BRANCH", - "Errata": "BDW98", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts far branch instructions retired.", - "SampleAfterValue": "100007", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC4", - "UMask": "0x4", - "BriefDescription": "All (macro) branch instructions retired. (Precise Event - PEBS)", - "PEBS": "2", + "EventCode": "0xA3", + "UMask": "0x5", + "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.", "Counter": "0,1,2,3", - "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS", - "Errata": "BDW98", - "PublicDescription": "This is a precise version of BR_INST_RETIRED.ALL_BRANCHES that counts all (macro) branch instructions retired.", - "SampleAfterValue": "400009", + "EventName": "CYCLE_ACTIVITY.STALLS_L2_PENDING", + "CounterMask": "5", + "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand* load request missing the L2 cache.(as a footprint) * includes also L1 HW prefetch requests that may or may not be required by demands.", + "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xC5", - "UMask": "0x1", - "BriefDescription": "Mispredicted conditional branch instructions retired.", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "BR_MISP_RETIRED.CONDITIONAL", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts mispredicted conditional branch instructions retired.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC5", - "UMask": "0x0", - "BriefDescription": "All mispredicted macro branch instructions retired.", - "Counter": "0,1,2,3", - "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", - "PublicDescription": "This event counts all mispredicted macro branch instructions retired.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC5", - "UMask": "0x8", - "BriefDescription": "This event counts the number of mispredicted ret instructions retired. Non PEBS", - "PEBS": "1", + "EventCode": "0xA3", + "UMask": "0x5", + "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.", "Counter": "0,1,2,3", - "EventName": "BR_MISP_RETIRED.RET", - "PublicDescription": "This is a non-precise version (that is, does not use PEBS) of the event that counts mispredicted return instructions retired.", - "SampleAfterValue": "100007", + "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS", + "CounterMask": "5", + "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xC5", - "UMask": "0x4", - "BriefDescription": "Mispredicted macro branch instructions retired. (Precise Event - PEBS)", - "PEBS": "2", + "EventCode": "0xA3", + "UMask": "0x6", + "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", "Counter": "0,1,2,3", - "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS", - "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.", - "SampleAfterValue": "400009", + "EventName": "CYCLE_ACTIVITY.STALLS_LDM_PENDING", + "CounterMask": "6", + "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand load request.", + "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xCC", - "UMask": "0x20", - "BriefDescription": "Count cases of saving new LBR", + "EventCode": "0xA3", + "UMask": "0x6", + "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", "Counter": "0,1,2,3", - "EventName": "ROB_MISC_EVENTS.LBR_INSERTS", - "PublicDescription": "This event counts cases of saving new LBR records by hardware. This assumes proper enabling of LBRs and takes into account LBR filtering done by the LBR_SELECT register.", + "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY", + "CounterMask": "6", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x3C", - "UMask": "0x0", - "BriefDescription": "Thread cycles when thread is not in halt state", - "Counter": "0,1,2,3", - "EventName": "CPU_CLK_UNHALTED.THREAD_P", - "PublicDescription": "This is an architectural event that counts the number of thread cycles while the thread is not in a halt state. The thread enters the halt state when it is running the HLT instruction. The core frequency may change from time to time due to power or thermal throttling. For this reason, this event may have a changing ratio with regards to wall clock time.", + "EventCode": "0xA3", + "UMask": "0x8", + "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", + "Counter": "2", + "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_PENDING", + "CounterMask": "8", + "PublicDescription": "Counts number of cycles the CPU has at least one pending demand load request missing the L1 data cache.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x89", - "UMask": "0xa0", - "BriefDescription": "Taken speculative and retired mispredicted indirect calls.", - "Counter": "0,1,2,3", - "EventName": "BR_MISP_EXEC.TAKEN_INDIRECT_NEAR_CALL", - "SampleAfterValue": "200003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "2" }, { - "EventCode": "0xA1", - "UMask": "0x1", - "BriefDescription": "Cycles per core when uops are exectuted in port 0.", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_0_CORE", - "AnyThread": "1", + "EventCode": "0xA3", + "UMask": "0x8", + "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", + "Counter": "2", + "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS", + "CounterMask": "8", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "2" }, { - "EventCode": "0xA1", - "UMask": "0x2", - "BriefDescription": "Cycles per core when uops are exectuted in port 1.", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_1_CORE", - "AnyThread": "1", + "EventCode": "0xA3", + "UMask": "0xc", + "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", + "Counter": "2", + "EventName": "CYCLE_ACTIVITY.STALLS_L1D_PENDING", + "CounterMask": "12", + "PublicDescription": "Counts number of cycles nothing is executed on any execution port, while there was at least one pending demand load request missing the L1 data cache.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "2" }, { - "EventCode": "0xA1", - "UMask": "0x4", - "BriefDescription": "Cycles per core when uops are dispatched to port 2.", - "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_2_CORE", - "AnyThread": "1", + "EventCode": "0xA3", + "UMask": "0xc", + "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", + "Counter": "2", + "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS", + "CounterMask": "12", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "2" }, { - "EventCode": "0xA1", - "UMask": "0x8", - "BriefDescription": "Cycles per core when uops are dispatched to port 3.", + "EventCode": "0xA8", + "UMask": "0x1", + "BriefDescription": "Number of Uops delivered by the LSD.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_3_CORE", - "AnyThread": "1", + "EventName": "LSD.UOPS", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", - "UMask": "0x10", - "BriefDescription": "Cycles per core when uops are exectuted in port 4.", + "EventCode": "0xA8", + "UMask": "0x1", + "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_4_CORE", - "AnyThread": "1", + "EventName": "LSD.CYCLES_4_UOPS", + "CounterMask": "4", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", - "UMask": "0x20", - "BriefDescription": "Cycles per core when uops are exectuted in port 5.", + "EventCode": "0xA8", + "UMask": "0x1", + "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_5_CORE", - "AnyThread": "1", + "EventName": "LSD.CYCLES_ACTIVE", + "CounterMask": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", - "UMask": "0x40", - "BriefDescription": "Cycles per core when uops are exectuted in port 6.", + "EventCode": "0xB1", + "UMask": "0x1", + "BriefDescription": "Counts the number of uops to be executed per-thread each cycle.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_6_CORE", - "AnyThread": "1", + "EventName": "UOPS_EXECUTED.THREAD", + "PublicDescription": "Number of uops to be executed per-thread each cycle.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", - "UMask": "0x80", - "BriefDescription": "Cycles per core when uops are dispatched to port 7.", + "Invert": "1", + "EventCode": "0xB1", + "UMask": "0x1", + "BriefDescription": "Counts number of cycles no uops were dispatched to be executed on this thread.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_7_CORE", - "AnyThread": "1", + "EventName": "UOPS_EXECUTED.STALL_CYCLES", + "CounterMask": "1", + "PublicDescription": "This event counts cycles during which no uops were dispatched from the Reservation Station (RS) per thread.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xC5", - "UMask": "0x20", - "BriefDescription": "number of near branch instructions retired that were mispredicted and taken.", - "PEBS": "1", - "Counter": "0,1,2,3", - "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", - "PublicDescription": "Number of near branch instructions retired that were mispredicted and taken.", - "SampleAfterValue": "400009", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xB1", @@ -1083,335 +1064,364 @@ "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xe6", - "UMask": "0x1f", - "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.", + "EventCode": "0xB1", + "UMask": "0x2", + "BriefDescription": "Number of uops executed on the core.", "Counter": "0,1,2,3", - "EventName": "BACLEARS.ANY", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0xA3", - "UMask": "0x8", - "BriefDescription": "Cycles while L1 cache miss demand load is outstanding.", - "Counter": "2", - "EventName": "CYCLE_ACTIVITY.CYCLES_L1D_MISS", - "CounterMask": "8", + "EventName": "UOPS_EXECUTED.CORE", + "PublicDescription": "Number of uops executed from any thread.", "SampleAfterValue": "2000003", - "CounterHTOff": "2" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "UMask": "0x1", - "BriefDescription": "Cycles while L2 cache miss demand load is outstanding.", + "EventCode": "0xb1", + "UMask": "0x2", + "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.CYCLES_L2_MISS", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", "CounterMask": "1", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", + "EventCode": "0xb1", "UMask": "0x2", - "BriefDescription": "Cycles while memory subsystem has an outstanding load.", + "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.CYCLES_MEM_ANY", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", "CounterMask": "2", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "UMask": "0x4", - "BriefDescription": "Total execution stalls.", + "EventCode": "0xb1", + "UMask": "0x2", + "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.STALLS_TOTAL", - "CounterMask": "4", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", + "CounterMask": "3", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "UMask": "0xc", - "BriefDescription": "Execution stalls while L1 cache miss demand load is outstanding.", - "Counter": "2", - "EventName": "CYCLE_ACTIVITY.STALLS_L1D_MISS", - "CounterMask": "12", + "EventCode": "0xb1", + "UMask": "0x2", + "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", + "Counter": "0,1,2,3", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", + "CounterMask": "4", "SampleAfterValue": "2000003", - "CounterHTOff": "2" + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "UMask": "0x5", - "BriefDescription": "Execution stalls while L2 cache miss demand load is outstanding.", + "Invert": "1", + "EventCode": "0xb1", + "UMask": "0x2", + "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.STALLS_L2_MISS", - "CounterMask": "5", + "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA3", - "UMask": "0x6", - "BriefDescription": "Execution stalls while memory subsystem has an outstanding load.", + "EventCode": "0xC0", + "UMask": "0x0", + "BriefDescription": "Number of instructions retired. General Counter - architectural event", "Counter": "0,1,2,3", - "EventName": "CYCLE_ACTIVITY.STALLS_MEM_ANY", - "CounterMask": "6", + "EventName": "INST_RETIRED.ANY_P", + "Errata": "BDM61", + "PublicDescription": "This event counts the number of instructions (EOMs) retired. Counting covers macro-fused instructions individually (that is, increments by two).", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EdgeDetect": "1", - "EventCode": "0xC3", + "EventCode": "0xC0", "UMask": "0x1", - "BriefDescription": "Number of machine clears (nukes) of any type.", + "BriefDescription": "Precise instruction retired event with HW to reduce effect of PEBS shadow in IP distribution", + "PEBS": "2", + "Counter": "1", + "EventName": "INST_RETIRED.PREC_DIST", + "Errata": "BDM11, BDM55", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts instructions retired.", + "SampleAfterValue": "2000003", + "CounterHTOff": "1" + }, + { + "EventCode": "0xC0", + "UMask": "0x2", + "BriefDescription": "FP operations retired. X87 FP operations that have no exceptions:", "Counter": "0,1,2,3", - "EventName": "MACHINE_CLEARS.COUNT", - "CounterMask": "1", - "SampleAfterValue": "100003", + "EventName": "INST_RETIRED.X87", + "PublicDescription": "This event counts FP operations retired. For X87 FP operations that have no exceptions counting also includes flows that have several X87, or flows that use X87 uops in the exception handling.", + "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA8", - "UMask": "0x1", - "BriefDescription": "Cycles 4 Uops delivered by the LSD, but didn't come from the decoder.", + "EventCode": "0xC1", + "UMask": "0x40", + "BriefDescription": "Number of times any microcode assist is invoked by HW upon uop writeback.", "Counter": "0,1,2,3", - "EventName": "LSD.CYCLES_4_UOPS", - "CounterMask": "4", - "SampleAfterValue": "2000003", + "EventName": "OTHER_ASSISTS.ANY_WB_ASSIST", + "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EdgeDetect": "1", - "Invert": "1", - "EventCode": "0x5E", + "EventCode": "0xC2", "UMask": "0x1", - "BriefDescription": "Counts end of periods where the Reservation Station (RS) was empty. Could be useful to precisely locate Frontend Latency Bound issues.", + "BriefDescription": "Actually retired uops. (Precise Event - PEBS)", + "Data_LA": "1", + "PEBS": "1", "Counter": "0,1,2,3", - "EventName": "RS_EVENTS.EMPTY_END", - "CounterMask": "1", - "SampleAfterValue": "200003", + "EventName": "UOPS_RETIRED.ALL", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts all actually retired uops. Counting increments by two for micro-fused uops, and by one for macro-fused and other uops. Maximal increment value for one cycle is eight.", + "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA8", + "Invert": "1", + "EventCode": "0xC2", "UMask": "0x1", - "BriefDescription": "Cycles Uops delivered by the LSD, but didn't come from the decoder.", + "BriefDescription": "Cycles without actually retired uops.", "Counter": "0,1,2,3", - "EventName": "LSD.CYCLES_ACTIVE", + "EventName": "UOPS_RETIRED.STALL_CYCLES", "CounterMask": "1", + "PublicDescription": "This event counts cycles without actually retired uops.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xA1", + "Invert": "1", + "EventCode": "0xC2", "UMask": "0x1", - "BriefDescription": "Cycles per thread when uops are executed in port 0", + "BriefDescription": "Cycles with less than 10 actually retired uops.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_0", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 0.", + "EventName": "UOPS_RETIRED.TOTAL_CYCLES", + "CounterMask": "10", + "PublicDescription": "Number of cycles using always true condition (uops_ret < 16) applied to non PEBS uops retired event.", "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0xA1", + "EventCode": "0xC2", "UMask": "0x2", - "BriefDescription": "Cycles per thread when uops are executed in port 1", + "BriefDescription": "Retirement slots used. (Precise Event - PEBS)", + "PEBS": "1", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_1", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 1.", + "EventName": "UOPS_RETIRED.RETIRE_SLOTS", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts the number of retirement slots used.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", - "UMask": "0x4", - "BriefDescription": "Cycles per thread when uops are executed in port 2", + "EventCode": "0xC3", + "UMask": "0x1", + "BriefDescription": "Cycles there was a Nuke. Account for both thread-specific and All Thread Nukes.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_2", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 2.", + "EventName": "MACHINE_CLEARS.CYCLES", + "PublicDescription": "This event counts both thread-specific (TS) and all-thread (AT) nukes.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", - "UMask": "0x8", - "BriefDescription": "Cycles per thread when uops are executed in port 3", + "EdgeDetect": "1", + "EventCode": "0xC3", + "UMask": "0x1", + "BriefDescription": "Number of machine clears (nukes) of any type.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_3", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 3.", - "SampleAfterValue": "2000003", + "EventName": "MACHINE_CLEARS.COUNT", + "CounterMask": "1", + "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", - "UMask": "0x10", - "BriefDescription": "Cycles per thread when uops are executed in port 4", + "EventCode": "0xC3", + "UMask": "0x4", + "BriefDescription": "Self-modifying code (SMC) detected.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_4", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 4.", - "SampleAfterValue": "2000003", + "EventName": "MACHINE_CLEARS.SMC", + "PublicDescription": "This event counts self-modifying code (SMC) detected, which causes a machine clear.", + "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", + "EventCode": "0xC3", "UMask": "0x20", - "BriefDescription": "Cycles per thread when uops are executed in port 5", + "BriefDescription": "This event counts the number of executed Intel AVX masked load operations that refer to an illegal address range with the mask bits set to 0.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_5", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 5.", - "SampleAfterValue": "2000003", + "EventName": "MACHINE_CLEARS.MASKMOV", + "PublicDescription": "Maskmov false fault - counts number of time ucode passes through Maskmov flow due to instruction's mask being 0 while the flow was completed without raising a fault.", + "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", - "UMask": "0x40", - "BriefDescription": "Cycles per thread when uops are executed in port 6", + "EventCode": "0xC4", + "UMask": "0x0", + "BriefDescription": "All (macro) branch instructions retired.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_6", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 6.", - "SampleAfterValue": "2000003", + "EventName": "BR_INST_RETIRED.ALL_BRANCHES", + "PublicDescription": "This event counts all (macro) branch instructions retired.", + "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA1", - "UMask": "0x80", - "BriefDescription": "Cycles per thread when uops are executed in port 7", + "EventCode": "0xC4", + "UMask": "0x1", + "BriefDescription": "Conditional branch instructions retired. (Precise Event - PEBS)", + "PEBS": "1", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED_PORT.PORT_7", - "PublicDescription": "This event counts, on the per-thread basis, cycles during which uops are dispatched from the Reservation Station (RS) to port 7.", - "SampleAfterValue": "2000003", + "EventName": "BR_INST_RETIRED.CONDITIONAL", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts conditional branch instructions retired.", + "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xA0", - "UMask": "0x3", - "BriefDescription": "Micro-op dispatches cancelled due to insufficient SIMD physical register file read ports", + "EventCode": "0xC4", + "UMask": "0x2", + "BriefDescription": "Direct and indirect near call instructions retired. (Precise Event - PEBS)", + "PEBS": "1", "Counter": "0,1,2,3", - "EventName": "UOP_DISPATCHES_CANCELLED.SIMD_PRF", - "PublicDescription": "This event counts the number of micro-operations cancelled after they were dispatched from the scheduler to the execution units when the total number of physical register read ports across all dispatch ports exceeds the read bandwidth of the physical register file. The SIMD_PRF subevent applies to the following instructions: VDPPS, DPPS, VPCMPESTRI, PCMPESTRI, VPCMPESTRM, PCMPESTRM, VFMADD*, VFMADDSUB*, VFMSUB*, VMSUBADD*, VFNMADD*, VFNMSUB*. See the Broadwell Optimization Guide for more information.", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3" + "EventName": "BR_INST_RETIRED.NEAR_CALL", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts both direct and indirect near call instructions retired.", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x00", + "EventCode": "0xC4", "UMask": "0x2", - "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", - "Counter": "Fixed counter 2", - "EventName": "CPU_CLK_UNHALTED.THREAD_ANY", - "AnyThread": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "Fixed counter 2" + "BriefDescription": "Direct and indirect macro near call instructions retired (captured in ring 3). (Precise Event - PEBS)", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "BR_INST_RETIRED.NEAR_CALL_R3", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts both direct and indirect macro near call instructions retired (captured in ring 3).", + "SampleAfterValue": "100007", + "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x3C", - "UMask": "0x0", - "BriefDescription": "Core cycles when at least one thread on the physical core is not in halt state.", + "EventCode": "0xC4", + "UMask": "0x4", + "BriefDescription": "All (macro) branch instructions retired. (Precise Event - PEBS)", + "PEBS": "2", "Counter": "0,1,2,3", - "EventName": "CPU_CLK_UNHALTED.THREAD_P_ANY", - "AnyThread": "1", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" + "EventName": "BR_INST_RETIRED.ALL_BRANCHES_PEBS", + "Errata": "BDW98", + "PublicDescription": "This is a precise version of BR_INST_RETIRED.ALL_BRANCHES that counts all (macro) branch instructions retired.", + "SampleAfterValue": "400009", + "CounterHTOff": "0,1,2,3" }, { - "EventCode": "0x3C", - "UMask": "0x1", - "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).", + "EventCode": "0xC4", + "UMask": "0x8", + "BriefDescription": "Return instructions retired. (Precise Event - PEBS)", + "PEBS": "1", "Counter": "0,1,2,3", - "EventName": "CPU_CLK_THREAD_UNHALTED.REF_XCLK_ANY", - "AnyThread": "1", - "SampleAfterValue": "2000003", + "EventName": "BR_INST_RETIRED.NEAR_RETURN", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts return instructions retired.", + "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x0D", - "UMask": "0x3", - "BriefDescription": "Core cycles the allocator was stalled due to recovery from earlier clear event for any thread running on the physical core (e.g. misprediction or memory nuke).", + "EventCode": "0xC4", + "UMask": "0x10", + "BriefDescription": "Not taken branch instructions retired.", "Counter": "0,1,2,3", - "EventName": "INT_MISC.RECOVERY_CYCLES_ANY", - "AnyThread": "1", - "CounterMask": "1", - "SampleAfterValue": "2000003", + "EventName": "BR_INST_RETIRED.NOT_TAKEN", + "PublicDescription": "This event counts not taken branch instructions retired.", + "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xb1", - "UMask": "0x2", - "BriefDescription": "Cycles at least 1 micro-op is executed from any thread on physical core.", + "EventCode": "0xC4", + "UMask": "0x20", + "BriefDescription": "Taken branch instructions retired. (Precise Event - PEBS)", + "PEBS": "1", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_1", - "CounterMask": "1", - "SampleAfterValue": "2000003", + "EventName": "BR_INST_RETIRED.NEAR_TAKEN", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts taken branch instructions retired.", + "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xb1", - "UMask": "0x2", - "BriefDescription": "Cycles at least 2 micro-op is executed from any thread on physical core.", + "EventCode": "0xC4", + "UMask": "0x40", + "BriefDescription": "Far branch instructions retired.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_2", - "CounterMask": "2", - "SampleAfterValue": "2000003", + "EventName": "BR_INST_RETIRED.FAR_BRANCH", + "Errata": "BDW98", + "PublicDescription": "This event counts far branch instructions retired.", + "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xb1", - "UMask": "0x2", - "BriefDescription": "Cycles at least 3 micro-op is executed from any thread on physical core.", + "EventCode": "0xC5", + "UMask": "0x0", + "BriefDescription": "All mispredicted macro branch instructions retired.", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_3", - "CounterMask": "3", - "SampleAfterValue": "2000003", + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES", + "PublicDescription": "This event counts all mispredicted macro branch instructions retired.", + "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0xb1", - "UMask": "0x2", - "BriefDescription": "Cycles at least 4 micro-op is executed from any thread on physical core.", + "EventCode": "0xC5", + "UMask": "0x1", + "BriefDescription": "Mispredicted conditional branch instructions retired. (Precise Event - PEBS)", + "PEBS": "1", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_GE_4", - "CounterMask": "4", - "SampleAfterValue": "2000003", + "EventName": "BR_MISP_RETIRED.CONDITIONAL", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts mispredicted conditional branch instructions retired.", + "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "Invert": "1", - "EventCode": "0xb1", - "UMask": "0x2", - "BriefDescription": "Cycles with no micro-ops executed from any thread on physical core.", + "EventCode": "0xC5", + "UMask": "0x4", + "BriefDescription": "Mispredicted macro branch instructions retired. (Precise Event - PEBS)", + "PEBS": "2", "Counter": "0,1,2,3", - "EventName": "UOPS_EXECUTED.CORE_CYCLES_NONE", - "SampleAfterValue": "2000003", + "EventName": "BR_MISP_RETIRED.ALL_BRANCHES_PEBS", + "PublicDescription": "This is a precise version of BR_MISP_RETIRED.ALL_BRANCHES that counts all mispredicted macro branch instructions retired.", + "SampleAfterValue": "400009", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xC5", + "UMask": "0x8", + "BriefDescription": "This event counts the number of mispredicted ret instructions retired.(Precise Event)", + "PEBS": "1", + "Counter": "0,1,2,3", + "EventName": "BR_MISP_RETIRED.RET", + "PublicDescription": "This is a precise version (that is, uses PEBS) of the event that counts mispredicted return instructions retired.", + "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x3C", - "UMask": "0x1", - "BriefDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate)", + "EventCode": "0xC5", + "UMask": "0x20", + "BriefDescription": "number of near branch instructions retired that were mispredicted and taken. (Precise Event - PEBS).", + "PEBS": "1", "Counter": "0,1,2,3", - "EventName": "CPU_CLK_UNHALTED.REF_XCLK", - "PublicDescription": "Reference cycles when the thread is unhalted (counts at 100 MHz rate).", - "SampleAfterValue": "2000003", + "EventName": "BR_MISP_RETIRED.NEAR_TAKEN", + "PublicDescription": "Number of near branch instructions retired that were mispredicted and taken. (Precise Event - PEBS).", + "SampleAfterValue": "400009", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x3C", - "UMask": "0x1", - "BriefDescription": "Reference cycles when the at least one thread on the physical core is unhalted (counts at 100 MHz rate).", + "EventCode": "0xCC", + "UMask": "0x20", + "BriefDescription": "Count cases of saving new LBR", "Counter": "0,1,2,3", - "EventName": "CPU_CLK_UNHALTED.REF_XCLK_ANY", - "AnyThread": "1", + "EventName": "ROB_MISC_EVENTS.LBR_INSERTS", + "PublicDescription": "This event counts cases of saving new LBR records by hardware. This assumes proper enabling of LBRs and takes into account LBR filtering done by the LBR_SELECT register.", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, { - "EventCode": "0x3C", - "UMask": "0x2", - "BriefDescription": "Count XClk pulses when this thread is unhalted and the other thread is halted.", + "EventCode": "0xe6", + "UMask": "0x1f", + "BriefDescription": "Counts the total number when the front end is resteered, mainly when the BPU cannot provide a correct prediction and this is corrected by other branch handling mechanisms at the front end.", "Counter": "0,1,2,3", - "EventName": "CPU_CLK_UNHALTED.ONE_THREAD_ACTIVE", - "SampleAfterValue": "2000003", + "EventName": "BACLEARS.ANY", + "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/broadwellde/virtual-memory.json b/tools/perf/pmu-events/arch/x86/broadwellde/virtual-memory.json index 5ce8b67ba076..7d79c707c6d1 100644 --- a/tools/perf/pmu-events/arch/x86/broadwellde/virtual-memory.json +++ b/tools/perf/pmu-events/arch/x86/broadwellde/virtual-memory.json @@ -43,6 +43,16 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x08", + "UMask": "0xe", + "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.", + "Counter": "0,1,2,3", + "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", + "Errata": "BDM69", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x08", "UMask": "0x10", @@ -72,6 +82,15 @@ "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x08", + "UMask": "0x60", + "BriefDescription": "Load operations that miss the first DTLB level but hit the second and do not cause page walks.", + "Counter": "0,1,2,3", + "EventName": "DTLB_LOAD_MISSES.STLB_HIT", + "SampleAfterValue": "2000003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x49", "UMask": "0x1", @@ -116,6 +135,16 @@ "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x49", + "UMask": "0xe", + "BriefDescription": "Store misses in all DTLB levels that cause completed page walks.", + "Counter": "0,1,2,3", + "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", + "Errata": "BDM69", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x49", "UMask": "0x10", @@ -145,6 +174,15 @@ "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x49", + "UMask": "0x60", + "BriefDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks.", + "Counter": "0,1,2,3", + "EventName": "DTLB_STORE_MISSES.STLB_HIT", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x4F", "UMask": "0x10", @@ -199,6 +237,16 @@ "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x85", + "UMask": "0xe", + "BriefDescription": "Misses in all ITLB levels that cause completed page walks.", + "Counter": "0,1,2,3", + "EventName": "ITLB_MISSES.WALK_COMPLETED", + "Errata": "BDM69", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0x85", "UMask": "0x10", @@ -228,6 +276,15 @@ "SampleAfterValue": "100003", "CounterHTOff": "0,1,2,3,4,5,6,7" }, + { + "EventCode": "0x85", + "UMask": "0x60", + "BriefDescription": "Operations that miss the first ITLB level but hit the second and do not cause any page walks.", + "Counter": "0,1,2,3", + "EventName": "ITLB_MISSES.STLB_HIT", + "SampleAfterValue": "100003", + "CounterHTOff": "0,1,2,3,4,5,6,7" + }, { "EventCode": "0xAE", "UMask": "0x1", @@ -250,60 +307,60 @@ }, { "EventCode": "0xBC", - "UMask": "0x21", - "BriefDescription": "Number of ITLB page walker hits in the L1+FB.", + "UMask": "0x12", + "BriefDescription": "Number of DTLB page walker hits in the L2.", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.ITLB_L1", + "EventName": "PAGE_WALKER_LOADS.DTLB_L2", "Errata": "BDM69, BDM98", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", - "UMask": "0x12", - "BriefDescription": "Number of DTLB page walker hits in the L2.", + "UMask": "0x14", + "BriefDescription": "Number of DTLB page walker hits in the L3 + XSNP.", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.DTLB_L2", + "EventName": "PAGE_WALKER_LOADS.DTLB_L3", "Errata": "BDM69, BDM98", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", - "UMask": "0x22", - "BriefDescription": "Number of ITLB page walker hits in the L2.", + "UMask": "0x18", + "BriefDescription": "Number of DTLB page walker hits in Memory.", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.ITLB_L2", + "EventName": "PAGE_WALKER_LOADS.DTLB_MEMORY", "Errata": "BDM69, BDM98", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", - "UMask": "0x14", - "BriefDescription": "Number of DTLB page walker hits in the L3 + XSNP.", + "UMask": "0x21", + "BriefDescription": "Number of ITLB page walker hits in the L1+FB.", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.DTLB_L3", + "EventName": "PAGE_WALKER_LOADS.ITLB_L1", "Errata": "BDM69, BDM98", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", - "UMask": "0x24", - "BriefDescription": "Number of ITLB page walker hits in the L3 + XSNP.", + "UMask": "0x22", + "BriefDescription": "Number of ITLB page walker hits in the L2.", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.ITLB_L3", + "EventName": "PAGE_WALKER_LOADS.ITLB_L2", "Errata": "BDM69, BDM98", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" }, { "EventCode": "0xBC", - "UMask": "0x18", - "BriefDescription": "Number of DTLB page walker hits in Memory.", + "UMask": "0x24", + "BriefDescription": "Number of ITLB page walker hits in the L3 + XSNP.", "Counter": "0,1,2,3", - "EventName": "PAGE_WALKER_LOADS.DTLB_MEMORY", + "EventName": "PAGE_WALKER_LOADS.ITLB_L3", "Errata": "BDM69, BDM98", "SampleAfterValue": "2000003", "CounterHTOff": "0,1,2,3" @@ -327,62 +384,5 @@ "PublicDescription": "This event counts the number of any STLB flush attempts (such as entire, VPID, PCID, InvPage, CR3 write, and so on).", "SampleAfterValue": "100007", "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x08", - "UMask": "0xe", - "BriefDescription": "Demand load Miss in all translation lookaside buffer (TLB) levels causes a page walk that completes of any page size.", - "Counter": "0,1,2,3", - "EventName": "DTLB_LOAD_MISSES.WALK_COMPLETED", - "Errata": "BDM69", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x08", - "UMask": "0x60", - "BriefDescription": "Load operations that miss the first DTLB level but hit the second and do not cause page walks.", - "Counter": "0,1,2,3", - "EventName": "DTLB_LOAD_MISSES.STLB_HIT", - "SampleAfterValue": "2000003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x49", - "UMask": "0xe", - "BriefDescription": "Store misses in all DTLB levels that cause completed page walks.", - "Counter": "0,1,2,3", - "EventName": "DTLB_STORE_MISSES.WALK_COMPLETED", - "Errata": "BDM69", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x49", - "UMask": "0x60", - "BriefDescription": "Store operations that miss the first TLB level but hit the second and do not cause page walks.", - "Counter": "0,1,2,3", - "EventName": "DTLB_STORE_MISSES.STLB_HIT", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x85", - "UMask": "0xe", - "BriefDescription": "Misses in all ITLB levels that cause completed page walks.", - "Counter": "0,1,2,3", - "EventName": "ITLB_MISSES.WALK_COMPLETED", - "Errata": "BDM69", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" - }, - { - "EventCode": "0x85", - "UMask": "0x60", - "BriefDescription": "Operations that miss the first ITLB level but hit the second and do not cause any page walks.", - "Counter": "0,1,2,3", - "EventName": "ITLB_MISSES.STLB_HIT", - "SampleAfterValue": "100003", - "CounterHTOff": "0,1,2,3,4,5,6,7" } ] \ No newline at end of file -- cgit v1.2.3-70-g09d2 From f5b5bdd92f62248fd354e34986b7a49f30159af9 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 18 Jan 2018 15:38:00 -0800 Subject: perf vendor events intel: Update IvyBridge files to V20 Signed-off-by: Andi Kleen Link: https://lkml.kernel.org/r/20180118234518.GA27753@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- .../perf/pmu-events/arch/x86/ivybridge/cache.json | 324 +++++++++++++++++++++ .../perf/pmu-events/arch/x86/ivybridge/memory.json | 72 +++++ 2 files changed, 396 insertions(+) (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/cache.json b/tools/perf/pmu-events/arch/x86/ivybridge/cache.json index 3c0a28e27d73..999a01bc6467 100644 --- a/tools/perf/pmu-events/arch/x86/ivybridge/cache.json +++ b/tools/perf/pmu-events/arch/x86/ivybridge/cache.json @@ -774,5 +774,329 @@ "SampleAfterValue": "100003", "BriefDescription": "Split locks in SQ", "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3f803c0244", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_HIT.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand & prefetch code reads that hit in the LLC", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x1003c0244", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand & prefetch code reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3f803c0091", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand & prefetch data reads that hit in the LLC", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x4003c0091", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x10003c0091", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x1003c0091", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3f803c0122", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_HIT.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand & prefetch RFOs that hit in the LLC", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x1003c0122", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.LLC_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand & prefetch RFOs that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x10008", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.COREWB.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all writebacks from the core to the LLC", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3f803c0004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_HIT.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand code reads that hit in the LLC", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x1003c0004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand code reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3f803c0001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data reads that hit in the LLC", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x4003c0001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x10003c0001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x1003c0001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3f803c0002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs) that hit in the LLC", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x10003c0002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data writes (RFOs) that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x1003c0002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data writes (RFOs) that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x18000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.OTHER.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts miscellaneous accesses that include port i/o, MMIO and uncacheable memory accesses. It also includes L2 hints sent to LLC to keep a line from being evicted out of the core caches", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x10400", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.SPLIT_LOCK_UC_LOCK.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts requests where the address of an atomic lock instruction spans a cache line boundary or the lock instruction is executed on uncacheable address ", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x10800", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts non-temporal stores", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x00010001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data reads ", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x00010002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand rfo's ", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x00010004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand code reads", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x000105B3", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand & prefetch data reads", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x00010122", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_RFO.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand & prefetch prefetch RFOs ", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x000107F7", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_READS.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all data/code/rfo references (demand & prefetch) ", + "CounterHTOff": "0,1,2,3" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/ivybridge/memory.json b/tools/perf/pmu-events/arch/x86/ivybridge/memory.json index 6005b364c580..a74d54f56192 100644 --- a/tools/perf/pmu-events/arch/x86/ivybridge/memory.json +++ b/tools/perf/pmu-events/arch/x86/ivybridge/memory.json @@ -160,5 +160,77 @@ "PRECISE_STORE": "1", "TakenAlone": "1", "CounterHTOff": "3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x300400244", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand & prefetch code reads that miss the LLC and the data returned from dram", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x300400091", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand & prefetch data reads that miss the LLC and the data returned from dram", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3004003f7", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the LLC and the data returned from dram", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x300400004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand code reads that miss the LLC and the data returned from dram", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x300400001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads that miss the LLC and the data returned from dram", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x6004001b3", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DATA_IN_SOCKET.LLC_MISS.LOCAL_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts LLC replacements", + "CounterHTOff": "0,1,2,3" } ] \ No newline at end of file -- cgit v1.2.3-70-g09d2 From 5b50758c4b5e2b122309ee307bd2ebd7fcd8871f Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Thu, 18 Jan 2018 15:38:33 -0800 Subject: perf vendor events intel: Update IvyTown files to V20 Signed-off-by: Andi Kleen Link: https://lkml.kernel.org/r/20180118234518.GA27753@tassilo.jf.intel.com Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/pmu-events/arch/x86/ivytown/cache.json | 456 +++++++++++++++++++++ tools/perf/pmu-events/arch/x86/ivytown/memory.json | 348 ++++++++++++++++ 2 files changed, 804 insertions(+) (limited to 'tools') diff --git a/tools/perf/pmu-events/arch/x86/ivytown/cache.json b/tools/perf/pmu-events/arch/x86/ivytown/cache.json index d8cc93b3a04c..6dad3ad6b102 100644 --- a/tools/perf/pmu-events/arch/x86/ivytown/cache.json +++ b/tools/perf/pmu-events/arch/x86/ivytown/cache.json @@ -800,5 +800,461 @@ "SampleAfterValue": "100003", "BriefDescription": "Split locks in SQ", "CounterHTOff": "0,1,2,3,4,5,6,7" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x4003c0091", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x10003c0091", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x1003c0091", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x2003c0091", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_HIT.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand & prefetch data reads that hit in the LLC and sibling core snoop returned a clean response", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3f803c0090", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all prefetch data reads that hit the LLC", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x4003c0090", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts prefetch data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x10003c0090", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts prefetch data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x1003c0090", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts prefetch data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x2003c0090", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_PF_DATA_RD.LLC_HIT.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts prefetch data reads that hit in the LLC and sibling core snoop returned a clean response", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3f803c03f7", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the LLC", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x4003c03f7", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x10003c03f7", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x1003c03f7", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x2003c03f7", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_HIT.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit in the LLC and sibling core snoop returned a clean response", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x10008", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.COREWB.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all writebacks from the core to the LLC", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3f803c0004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_HIT.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand code reads that hit in the LLC", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3f803c0001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data reads that hit in the LLC", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x4003c0001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x10003c0001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x1003c0001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x2003c0001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_HIT.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads that hit in the LLC and sibling core snoop returned a clean response", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x10003c0002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data writes (RFOs) that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x803c8000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.OTHER.LRU_HINTS", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts L2 hints sent to LLC to keep a line from being evicted out of the core caches", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x23ffc08000", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.OTHER.PORTIO_MMIO_UC", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts miscellaneous accesses that include port i/o, MMIO and uncacheable memory accesses", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3f803c0040", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.LLC_HIT.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all prefetch (that bring data to L2) code reads that hit in the LLC", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3f803c0010", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x4003c0010", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x10003c0010", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x1003c0010", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x2003c0010", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_HIT.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that hit in the LLC and the snoops sent to sibling cores return clean response", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3f803c0200", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_HIT.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that hit in the LLC", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3f803c0080", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x4003c0080", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.HIT_OTHER_CORE_NO_FWD", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC and the snoops to sibling cores hit in either E/S state and the line is not forwarded", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x10003c0080", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.HITM_OTHER_CORE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC and the snoop to one of the sibling cores hits the line in M state and the line is forwarded", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x1003c0080", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.NO_SNOOP_NEEDED", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC and sibling core snoops are not needed as either the core-valid bit is not set or the shared line is present in multiple cores", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x2003c0080", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_HIT.SNOOP_MISS", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that hit in the LLC and the snoops sent to sibling cores return clean response", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x10400", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.SPLIT_LOCK_UC_LOCK.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts requests where the address of an atomic lock instruction spans a cache line boundary or the lock instruction is executed on uncacheable address", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x10800", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.STREAMING_STORES.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts non-temporal stores", + "CounterHTOff": "0,1,2,3" } ] \ No newline at end of file diff --git a/tools/perf/pmu-events/arch/x86/ivytown/memory.json b/tools/perf/pmu-events/arch/x86/ivytown/memory.json index 4ec94df8d70b..3a7b86af8816 100644 --- a/tools/perf/pmu-events/arch/x86/ivytown/memory.json +++ b/tools/perf/pmu-events/arch/x86/ivytown/memory.json @@ -151,5 +151,353 @@ "PRECISE_STORE": "1", "TakenAlone": "1", "CounterHTOff": "3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3fffc00244", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand & prefetch code reads that miss the LLC", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x67f800244", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.REMOTE_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand & prefetch code reads that miss the LLC and the data returned from remote dram", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x87f800244", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_CODE_RD.LLC_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand & prefetch code reads that miss the LLC and the data forwarded from remote cache", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3fffc20091", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_DATA_RD.LLC_MISS.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand & prefetch data reads that hits the LLC", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3fffc203f7", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that hit the LLC", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x6004003f7", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.LOCAL_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the LLC and the data returned from local dram", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x87f8203f7", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the LLC and the data forwarded from remote cache", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x107fc003f7", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.ALL_READS.LLC_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all data/code/rfo reads (demand & prefetch) that miss the LLC the data is found in M state in remote cache and forwarded from there", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3fffc20004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand code reads that miss the LLC", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x600400004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.LOCAL_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand code reads that miss the LLC and the data returned from local dram", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x67f800004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.REMOTE_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand code reads that miss the LLC and the data returned from remote dram", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x87f820004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand code reads that miss the LLC and the data forwarded from remote cache", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x107fc00004", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_CODE_RD.LLC_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand code reads that miss the LLC the data is found in M state in remote cache and forwarded from there", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x67fc00001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.ANY_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads that miss the LLC and the data returned from remote & local dram", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3fffc20001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads that miss in the LLC", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x600400001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.LOCAL_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads that miss the LLC and the data returned from local dram", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x67f800001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.REMOTE_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads that miss the LLC and the data returned from remote dram", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x87f820001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads that miss the LLC and the data forwarded from remote cache", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x107fc00001", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_DATA_RD.LLC_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts demand data reads that miss the LLC the data is found in M state in remote cache and forwarded from there", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x107fc20002", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.DEMAND_RFO.LLC_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all demand data writes (RFOs) that miss the LLC and the data is found in M state in remote cache and forwarded from there.", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3fffc20040", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.PF_L2_CODE_RD.LLC_MISS.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all prefetch (that bring data to L2) code reads that miss the LLC and the data returned from remote & local dram", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x67fc00010", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.ANY_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the LLC and the data returned from remote & local dram", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3fffc20010", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss in the LLC", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x600400010", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.LOCAL_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the LLC and the data returned from local dram", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x67f800010", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.REMOTE_DRAM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the LLC and the data returned from remote dram", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x87f820010", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.REMOTE_HIT_FORWARD", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the LLC and the data forwarded from remote cache", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x107fc00010", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.PF_L2_DATA_RD.LLC_MISS.REMOTE_HITM", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts prefetch (that bring data to L2) data reads that miss the LLC the data is found in M state in remote cache and forwarded from there", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3fffc20200", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.PF_LLC_CODE_RD.LLC_MISS.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts all prefetch (that bring data to LLC only) code reads that miss in the LLC", + "CounterHTOff": "0,1,2,3" + }, + { + "EventCode": "0xB7, 0xBB", + "MSRValue": "0x3fffc20080", + "Counter": "0,1,2,3", + "UMask": "0x1", + "Offcore": "1", + "EventName": "OFFCORE_RESPONSE.PF_LLC_DATA_RD.LLC_MISS.ANY_RESPONSE", + "MSRIndex": "0x1a6,0x1a7", + "SampleAfterValue": "100003", + "BriefDescription": "Counts prefetch (that bring data to LLC only) data reads that miss in the LLC", + "CounterHTOff": "0,1,2,3" } ] \ No newline at end of file -- cgit v1.2.3-70-g09d2 From aa6292f4845e7921fca60b146403ea6682b8f65d Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Wed, 17 Jan 2018 10:52:10 -0700 Subject: perf tools: Integrating the CoreSight decoding library The Open CoreSight Decoding Library (openCSD) is a free and open library to decode traces collected by the CoreSight hardware infrastructure. This patch adds the required mechanic to recognise the presence of the openCSD library on a system and set up miscellaneous flags to be used in the compilation of the trace decoding feature. Signed-off-by: Mathieu Poirier Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Kim Phillips Cc: linux-arm-kernel@lists.infradead.org Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Link: http://lkml.kernel.org/r/1516211539-5166-2-git-send-email-mathieu.poirier@linaro.org Link: http://lkml.kernel.org/r/1516635644-24819-1-git-send-email-mathieu.poirier@linaro.org [ Merged missing test-libopencsd.c file, provided later by Mathieu ] Signed-off-by: Arnaldo Carvalho de Melo --- tools/build/Makefile.feature | 3 ++- tools/build/feature/Makefile | 7 ++++++- tools/build/feature/test-all.c | 5 +++++ tools/build/feature/test-libopencsd.c | 8 ++++++++ tools/perf/Makefile.config | 25 +++++++++++++++++++++++++ tools/perf/Makefile.perf | 2 ++ 6 files changed, 48 insertions(+), 2 deletions(-) create mode 100644 tools/build/feature/test-libopencsd.c (limited to 'tools') diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature index e52fcefee379..c378f003b007 100644 --- a/tools/build/Makefile.feature +++ b/tools/build/Makefile.feature @@ -66,7 +66,8 @@ FEATURE_TESTS_BASIC := \ bpf \ sched_getcpu \ sdt \ - setns + setns \ + libopencsd # FEATURE_TESTS_BASIC + FEATURE_TESTS_EXTRA is the complete list # of all feature tests diff --git a/tools/build/feature/Makefile b/tools/build/feature/Makefile index cff38f342283..59585fe20221 100644 --- a/tools/build/feature/Makefile +++ b/tools/build/feature/Makefile @@ -52,7 +52,8 @@ FILES= \ test-cxx.bin \ test-jvmti.bin \ test-sched_getcpu.bin \ - test-setns.bin + test-setns.bin \ + test-libopencsd.bin FILES := $(addprefix $(OUTPUT),$(FILES)) @@ -104,6 +105,10 @@ $(OUTPUT)test-sched_getcpu.bin: $(OUTPUT)test-setns.bin: $(BUILD) +$(OUTPUT)test-libopencsd.bin: + $(BUILD) # -lopencsd_c_api -lopencsd provided by + # $(FEATURE_CHECK_LDFLAGS-libopencsd) + DWARFLIBS := -ldw ifeq ($(findstring -static,${LDFLAGS}),-static) DWARFLIBS += -lelf -lebl -lz -llzma -lbz2 diff --git a/tools/build/feature/test-all.c b/tools/build/feature/test-all.c index 6fdf83263ab7..8dc20a61341f 100644 --- a/tools/build/feature/test-all.c +++ b/tools/build/feature/test-all.c @@ -162,6 +162,10 @@ # include "test-setns.c" #undef main +#define main main_test_libopencsd +# include "test-libopencsd.c" +#undef main + int main(int argc, char *argv[]) { main_test_libpython(); @@ -199,6 +203,7 @@ int main(int argc, char *argv[]) main_test_sched_getcpu(); main_test_sdt(); main_test_setns(); + main_test_libopencsd(); return 0; } diff --git a/tools/build/feature/test-libopencsd.c b/tools/build/feature/test-libopencsd.c new file mode 100644 index 000000000000..5ff1246e6194 --- /dev/null +++ b/tools/build/feature/test-libopencsd.c @@ -0,0 +1,8 @@ +// SPDX-License-Identifier: GPL-2.0 +#include + +int main(void) +{ + (void)ocsd_get_version(); + return 0; +} diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index a042ccca4e93..0dfdaa9fa81e 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -105,6 +105,16 @@ FEATURE_CHECK_LDFLAGS-libunwind = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS) FEATURE_CHECK_CFLAGS-libunwind-debug-frame = $(LIBUNWIND_CFLAGS) FEATURE_CHECK_LDFLAGS-libunwind-debug-frame = $(LIBUNWIND_LDFLAGS) $(LIBUNWIND_LIBS) +ifdef CSINCLUDES + LIBOPENCSD_CFLAGS := -I$(CSINCLUDES) +endif +OPENCSDLIBS := -lopencsd_c_api -lopencsd +ifdef CSLIBS + LIBOPENCSD_LDFLAGS := -L$(CSLIBS) +endif +FEATURE_CHECK_CFLAGS-libopencsd := $(LIBOPENCSD_CFLAGS) +FEATURE_CHECK_LDFLAGS-libopencsd := $(LIBOPENCSD_LDFLAGS) $(OPENCSDLIBS) + ifeq ($(NO_PERF_REGS),0) CFLAGS += -DHAVE_PERF_REGS_SUPPORT endif @@ -353,6 +363,21 @@ ifeq ($(feature-setns), 1) $(call detected,CONFIG_SETNS) endif +ifndef NO_CORESIGHT + ifeq ($(feature-libopencsd), 1) + CFLAGS += -DHAVE_CSTRACE_SUPPORT $(LIBOPENCSD_CFLAGS) + LDFLAGS += $(LIBOPENCSD_LDFLAGS) + EXTLIBS += $(OPENCSDLIBS) + $(call detected,CONFIG_LIBOPENCSD) + ifdef CSTRACE_RAW + CFLAGS += -DCS_DEBUG_RAW + ifeq (${CSTRACE_RAW}, packed) + CFLAGS += -DCS_RAW_PACKED + endif + endif + endif +endif + ifndef NO_LIBELF CFLAGS += -DHAVE_LIBELF_SUPPORT EXTLIBS += -lelf diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index 9a9b528a88bb..9b0351d3ce34 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -98,6 +98,8 @@ include ../scripts/utilities.mak # When selected, pass LLVM_CONFIG=/path/to/llvm-config to `make' if # llvm-config is not in $PATH. +# Define NO_CORESIGHT if you do not want support for CoreSight trace decoding. + # As per kernel Makefile, avoid funny character set dependencies unexport LC_ALL LC_COLLATE=C -- cgit v1.2.3-70-g09d2 From 440a23b34c06104bd92b876b40efa45c2d7a0e27 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Wed, 17 Jan 2018 10:52:11 -0700 Subject: perf tools: Add initial entry point for decoder CoreSight traces This patch adds the entry point for CoreSight trace decoding, serving as a jumping board for furhter expansions. Co-authored-by: Tor Jeremiassen Signed-off-by: Mathieu Poirier Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Kim Phillips Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1516211539-5166-3-git-send-email-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/Build | 5 ++ tools/perf/util/auxtrace.c | 2 + tools/perf/util/cs-etm.c | 213 +++++++++++++++++++++++++++++++++++++++++++++ tools/perf/util/cs-etm.h | 15 ++++ 4 files changed, 235 insertions(+) create mode 100644 tools/perf/util/cs-etm.c (limited to 'tools') diff --git a/tools/perf/util/Build b/tools/perf/util/Build index 4eef0c243306..c054ff802efb 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -88,6 +88,11 @@ libperf-$(CONFIG_AUXTRACE) += intel-pt.o libperf-$(CONFIG_AUXTRACE) += intel-bts.o libperf-$(CONFIG_AUXTRACE) += arm-spe.o libperf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o + +ifdef CONFIG_LIBOPENCSD +libperf-$(CONFIG_AUXTRACE) += cs-etm.o +endif + libperf-y += parse-branch-options.o libperf-y += dump-insn.o libperf-y += parse-regs-options.o diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 3bba9947ab7f..9faf3b5367db 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -52,6 +52,7 @@ #include "debug.h" #include +#include "cs-etm.h" #include "intel-pt.h" #include "intel-bts.h" #include "arm-spe.h" @@ -914,6 +915,7 @@ int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused, case PERF_AUXTRACE_ARM_SPE: return arm_spe_process_auxtrace_info(event, session); case PERF_AUXTRACE_CS_ETM: + return cs_etm__process_auxtrace_info(event, session); case PERF_AUXTRACE_UNKNOWN: default: return -EINVAL; diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c new file mode 100644 index 000000000000..f47797101857 --- /dev/null +++ b/tools/perf/util/cs-etm.c @@ -0,0 +1,213 @@ +/* + * SPDX-License-Identifier: GPL-2.0 + * + * Copyright(C) 2015-2018 Linaro Limited. + * + * Author: Tor Jeremiassen + * Author: Mathieu Poirier + */ + +#include +#include +#include +#include +#include + +#include + +#include "auxtrace.h" +#include "color.h" +#include "cs-etm.h" +#include "debug.h" +#include "evlist.h" +#include "intlist.h" +#include "machine.h" +#include "map.h" +#include "perf.h" +#include "thread.h" +#include "thread_map.h" +#include "thread-stack.h" +#include "util.h" + +#define MAX_TIMESTAMP (~0ULL) + +struct cs_etm_auxtrace { + struct auxtrace auxtrace; + struct auxtrace_queues queues; + struct auxtrace_heap heap; + struct itrace_synth_opts synth_opts; + struct perf_session *session; + struct machine *machine; + struct thread *unknown_thread; + + u8 timeless_decoding; + u8 snapshot_mode; + u8 data_queued; + u8 sample_branches; + + int num_cpu; + u32 auxtrace_type; + u64 branches_sample_type; + u64 branches_id; + u64 **metadata; + u64 kernel_start; + unsigned int pmu_type; +}; + +struct cs_etm_queue { + struct cs_etm_auxtrace *etm; + struct thread *thread; + struct cs_etm_decoder *decoder; + struct auxtrace_buffer *buffer; + const struct cs_etm_state *state; + union perf_event *event_buf; + unsigned int queue_nr; + pid_t pid, tid; + int cpu; + u64 time; + u64 timestamp; + u64 offset; +}; + +static int cs_etm__flush_events(struct perf_session *session, + struct perf_tool *tool) +{ + (void) session; + (void) tool; + return 0; +} + +static void cs_etm__free_queue(void *priv) +{ + struct cs_etm_queue *etmq = priv; + + free(etmq); +} + +static void cs_etm__free_events(struct perf_session *session) +{ + unsigned int i; + struct cs_etm_auxtrace *aux = container_of(session->auxtrace, + struct cs_etm_auxtrace, + auxtrace); + struct auxtrace_queues *queues = &aux->queues; + + for (i = 0; i < queues->nr_queues; i++) { + cs_etm__free_queue(queues->queue_array[i].priv); + queues->queue_array[i].priv = NULL; + } + + auxtrace_queues__free(queues); +} + +static void cs_etm__free(struct perf_session *session) +{ + struct cs_etm_auxtrace *aux = container_of(session->auxtrace, + struct cs_etm_auxtrace, + auxtrace); + cs_etm__free_events(session); + session->auxtrace = NULL; + + zfree(&aux); +} + +static int cs_etm__process_event(struct perf_session *session, + union perf_event *event, + struct perf_sample *sample, + struct perf_tool *tool) +{ + (void) session; + (void) event; + (void) sample; + (void) tool; + return 0; +} + +static int cs_etm__process_auxtrace_event(struct perf_session *session, + union perf_event *event, + struct perf_tool *tool) +{ + (void) session; + (void) event; + (void) tool; + return 0; +} + +static bool cs_etm__is_timeless_decoding(struct cs_etm_auxtrace *etm) +{ + struct perf_evsel *evsel; + struct perf_evlist *evlist = etm->session->evlist; + bool timeless_decoding = true; + + /* + * Circle through the list of event and complain if we find one + * with the time bit set. + */ + evlist__for_each_entry(evlist, evsel) { + if ((evsel->attr.sample_type & PERF_SAMPLE_TIME)) + timeless_decoding = false; + } + + return timeless_decoding; +} + +int cs_etm__process_auxtrace_info(union perf_event *event, + struct perf_session *session) +{ + struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info; + struct cs_etm_auxtrace *etm = NULL; + int event_header_size = sizeof(struct perf_event_header); + int info_header_size; + int total_size = auxtrace_info->header.size; + int err = 0; + + /* + * sizeof(auxtrace_info_event::type) + + * sizeof(auxtrace_info_event::reserved) == 8 + */ + info_header_size = 8; + + if (total_size < (event_header_size + info_header_size)) + return -EINVAL; + + etm = zalloc(sizeof(*etm)); + + if (!etm) + err = -ENOMEM; + + err = auxtrace_queues__init(&etm->queues); + if (err) + goto err_free_etm; + + etm->session = session; + etm->machine = &session->machines.host; + + etm->auxtrace_type = auxtrace_info->type; + etm->timeless_decoding = cs_etm__is_timeless_decoding(etm); + + etm->auxtrace.process_event = cs_etm__process_event; + etm->auxtrace.process_auxtrace_event = cs_etm__process_auxtrace_event; + etm->auxtrace.flush_events = cs_etm__flush_events; + etm->auxtrace.free_events = cs_etm__free_events; + etm->auxtrace.free = cs_etm__free; + session->auxtrace = &etm->auxtrace; + + if (dump_trace) + return 0; + + err = auxtrace_queues__process_index(&etm->queues, session); + if (err) + goto err_free_queues; + + etm->data_queued = etm->queues.populated; + + return 0; + +err_free_queues: + auxtrace_queues__free(&etm->queues); + session->auxtrace = NULL; +err_free_etm: + zfree(&etm); + + return -EINVAL; +} diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index 3cc6bc3263fe..5ab6a8ef1b32 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -18,6 +18,9 @@ #ifndef INCLUDE__UTIL_PERF_CS_ETM_H__ #define INCLUDE__UTIL_PERF_CS_ETM_H__ +#include "util/event.h" +#include "util/session.h" + /* Versionning header in case things need tro change in the future. That way * decoding of old snapshot is still possible. */ @@ -71,4 +74,16 @@ static const u64 __perf_cs_etmv4_magic = 0x4040404040404040ULL; #define CS_ETMV3_PRIV_SIZE (CS_ETM_PRIV_MAX * sizeof(u64)) #define CS_ETMV4_PRIV_SIZE (CS_ETMV4_PRIV_MAX * sizeof(u64)) +#ifdef HAVE_CSTRACE_SUPPORT +int cs_etm__process_auxtrace_info(union perf_event *event, + struct perf_session *session); +#else +static inline int +cs_etm__process_auxtrace_info(union perf_event *event __maybe_unused, + struct perf_session *session __maybe_unused) +{ + return -1; +} +#endif + #endif -- cgit v1.2.3-70-g09d2 From cd8bfd8c973eaff8e15260cf1f02e2b84c9b1cb9 Mon Sep 17 00:00:00 2001 From: Tor Jeremiassen Date: Wed, 17 Jan 2018 10:52:12 -0700 Subject: perf tools: Add processing of coresight metadata The auxtrace_info section contains metadata that describes the number of trace capable CPUs, their ETM version and trace configuration, including trace id values. This information is required by the trace decoder in order to properly decode the compressed trace packets. This patch adds code to read and parse this metadata, and store it for use in configuring instances of the cs-etm trace decoder. Co-authored-by: Mathieu Poirier Signed-off-by: Tor Jeremiassen Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Kim Phillips Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1516211539-5166-4-git-send-email-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 194 ++++++++++++++++++++++++++++++++++++++++++++++- tools/perf/util/cs-etm.h | 3 + 2 files changed, 194 insertions(+), 3 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index f47797101857..18894ee7aa0b 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -102,12 +102,24 @@ static void cs_etm__free_events(struct perf_session *session) static void cs_etm__free(struct perf_session *session) { + int i; + struct int_node *inode, *tmp; struct cs_etm_auxtrace *aux = container_of(session->auxtrace, struct cs_etm_auxtrace, auxtrace); cs_etm__free_events(session); session->auxtrace = NULL; + /* First remove all traceID/CPU# nodes for the RB tree */ + intlist__for_each_entry_safe(inode, tmp, traceid_list) + intlist__remove(traceid_list, inode); + /* Then the RB tree itself */ + intlist__delete(traceid_list); + + for (i = 0; i < aux->num_cpu; i++) + zfree(&aux->metadata[i]); + + zfree(&aux->metadata); zfree(&aux); } @@ -151,15 +163,69 @@ static bool cs_etm__is_timeless_decoding(struct cs_etm_auxtrace *etm) return timeless_decoding; } +static const char * const cs_etm_global_header_fmts[] = { + [CS_HEADER_VERSION_0] = " Header version %llx\n", + [CS_PMU_TYPE_CPUS] = " PMU type/num cpus %llx\n", + [CS_ETM_SNAPSHOT] = " Snapshot %llx\n", +}; + +static const char * const cs_etm_priv_fmts[] = { + [CS_ETM_MAGIC] = " Magic number %llx\n", + [CS_ETM_CPU] = " CPU %lld\n", + [CS_ETM_ETMCR] = " ETMCR %llx\n", + [CS_ETM_ETMTRACEIDR] = " ETMTRACEIDR %llx\n", + [CS_ETM_ETMCCER] = " ETMCCER %llx\n", + [CS_ETM_ETMIDR] = " ETMIDR %llx\n", +}; + +static const char * const cs_etmv4_priv_fmts[] = { + [CS_ETM_MAGIC] = " Magic number %llx\n", + [CS_ETM_CPU] = " CPU %lld\n", + [CS_ETMV4_TRCCONFIGR] = " TRCCONFIGR %llx\n", + [CS_ETMV4_TRCTRACEIDR] = " TRCTRACEIDR %llx\n", + [CS_ETMV4_TRCIDR0] = " TRCIDR0 %llx\n", + [CS_ETMV4_TRCIDR1] = " TRCIDR1 %llx\n", + [CS_ETMV4_TRCIDR2] = " TRCIDR2 %llx\n", + [CS_ETMV4_TRCIDR8] = " TRCIDR8 %llx\n", + [CS_ETMV4_TRCAUTHSTATUS] = " TRCAUTHSTATUS %llx\n", +}; + +static void cs_etm__print_auxtrace_info(u64 *val, int num) +{ + int i, j, cpu = 0; + + for (i = 0; i < CS_HEADER_VERSION_0_MAX; i++) + fprintf(stdout, cs_etm_global_header_fmts[i], val[i]); + + for (i = CS_HEADER_VERSION_0_MAX; cpu < num; cpu++) { + if (val[i] == __perf_cs_etmv3_magic) + for (j = 0; j < CS_ETM_PRIV_MAX; j++, i++) + fprintf(stdout, cs_etm_priv_fmts[j], val[i]); + else if (val[i] == __perf_cs_etmv4_magic) + for (j = 0; j < CS_ETMV4_PRIV_MAX; j++, i++) + fprintf(stdout, cs_etmv4_priv_fmts[j], val[i]); + else + /* failure.. return */ + return; + } +} + int cs_etm__process_auxtrace_info(union perf_event *event, struct perf_session *session) { struct auxtrace_info_event *auxtrace_info = &event->auxtrace_info; struct cs_etm_auxtrace *etm = NULL; + struct int_node *inode; + unsigned int pmu_type; int event_header_size = sizeof(struct perf_event_header); int info_header_size; int total_size = auxtrace_info->header.size; - int err = 0; + int priv_size = 0; + int num_cpu; + int err = 0, idx = -1; + int i, j, k; + u64 *ptr, *hdr = NULL; + u64 **metadata = NULL; /* * sizeof(auxtrace_info_event::type) + @@ -170,10 +236,117 @@ int cs_etm__process_auxtrace_info(union perf_event *event, if (total_size < (event_header_size + info_header_size)) return -EINVAL; + priv_size = total_size - event_header_size - info_header_size; + + /* First the global part */ + ptr = (u64 *) auxtrace_info->priv; + + /* Look for version '0' of the header */ + if (ptr[0] != 0) + return -EINVAL; + + hdr = zalloc(sizeof(*hdr) * CS_HEADER_VERSION_0_MAX); + if (!hdr) + return -ENOMEM; + + /* Extract header information - see cs-etm.h for format */ + for (i = 0; i < CS_HEADER_VERSION_0_MAX; i++) + hdr[i] = ptr[i]; + num_cpu = hdr[CS_PMU_TYPE_CPUS] & 0xffffffff; + pmu_type = (unsigned int) ((hdr[CS_PMU_TYPE_CPUS] >> 32) & + 0xffffffff); + + /* + * Create an RB tree for traceID-CPU# tuple. Since the conversion has + * to be made for each packet that gets decoded, optimizing access in + * anything other than a sequential array is worth doing. + */ + traceid_list = intlist__new(NULL); + if (!traceid_list) { + err = -ENOMEM; + goto err_free_hdr; + } + + metadata = zalloc(sizeof(*metadata) * num_cpu); + if (!metadata) { + err = -ENOMEM; + goto err_free_traceid_list; + } + + /* + * The metadata is stored in the auxtrace_info section and encodes + * the configuration of the ARM embedded trace macrocell which is + * required by the trace decoder to properly decode the trace due + * to its highly compressed nature. + */ + for (j = 0; j < num_cpu; j++) { + if (ptr[i] == __perf_cs_etmv3_magic) { + metadata[j] = zalloc(sizeof(*metadata[j]) * + CS_ETM_PRIV_MAX); + if (!metadata[j]) { + err = -ENOMEM; + goto err_free_metadata; + } + for (k = 0; k < CS_ETM_PRIV_MAX; k++) + metadata[j][k] = ptr[i + k]; + + /* The traceID is our handle */ + idx = metadata[j][CS_ETM_ETMTRACEIDR]; + i += CS_ETM_PRIV_MAX; + } else if (ptr[i] == __perf_cs_etmv4_magic) { + metadata[j] = zalloc(sizeof(*metadata[j]) * + CS_ETMV4_PRIV_MAX); + if (!metadata[j]) { + err = -ENOMEM; + goto err_free_metadata; + } + for (k = 0; k < CS_ETMV4_PRIV_MAX; k++) + metadata[j][k] = ptr[i + k]; + + /* The traceID is our handle */ + idx = metadata[j][CS_ETMV4_TRCTRACEIDR]; + i += CS_ETMV4_PRIV_MAX; + } + + /* Get an RB node for this CPU */ + inode = intlist__findnew(traceid_list, idx); + + /* Something went wrong, no need to continue */ + if (!inode) { + err = PTR_ERR(inode); + goto err_free_metadata; + } + + /* + * The node for that CPU should not be taken. + * Back out if that's the case. + */ + if (inode->priv) { + err = -EINVAL; + goto err_free_metadata; + } + /* All good, associate the traceID with the CPU# */ + inode->priv = &metadata[j][CS_ETM_CPU]; + } + + /* + * Each of CS_HEADER_VERSION_0_MAX, CS_ETM_PRIV_MAX and + * CS_ETMV4_PRIV_MAX mark how many double words are in the + * global metadata, and each cpu's metadata respectively. + * The following tests if the correct number of double words was + * present in the auxtrace info section. + */ + if (i * 8 != priv_size) { + err = -EINVAL; + goto err_free_metadata; + } + etm = zalloc(sizeof(*etm)); - if (!etm) + if (!etm) { err = -ENOMEM; + goto err_free_metadata; + } err = auxtrace_queues__init(&etm->queues); if (err) @@ -182,6 +355,10 @@ int cs_etm__process_auxtrace_info(union perf_event *event, etm->session = session; etm->machine = &session->machines.host; + etm->num_cpu = num_cpu; + etm->pmu_type = pmu_type; + etm->snapshot_mode = (hdr[CS_ETM_SNAPSHOT] != 0); + etm->metadata = metadata; etm->auxtrace_type = auxtrace_info->type; etm->timeless_decoding = cs_etm__is_timeless_decoding(etm); @@ -192,8 +369,10 @@ int cs_etm__process_auxtrace_info(union perf_event *event, etm->auxtrace.free = cs_etm__free; session->auxtrace = &etm->auxtrace; - if (dump_trace) + if (dump_trace) { + cs_etm__print_auxtrace_info(auxtrace_info->priv, num_cpu); return 0; + } err = auxtrace_queues__process_index(&etm->queues, session); if (err) @@ -208,6 +387,15 @@ err_free_queues: session->auxtrace = NULL; err_free_etm: zfree(&etm); +err_free_metadata: + /* No need to check @metadata[j], free(NULL) is supported */ + for (j = 0; j < num_cpu; j++) + free(metadata[j]); + zfree(&metadata); +err_free_traceid_list: + intlist__delete(traceid_list); +err_free_hdr: + zfree(&hdr); return -EINVAL; } diff --git a/tools/perf/util/cs-etm.h b/tools/perf/util/cs-etm.h index 5ab6a8ef1b32..5864d5dca616 100644 --- a/tools/perf/util/cs-etm.h +++ b/tools/perf/util/cs-etm.h @@ -64,6 +64,9 @@ enum { CS_ETMV4_PRIV_MAX, }; +/* RB tree for quick conversion between traceID and CPUs */ +struct intlist *traceid_list; + #define KiB(x) ((x) * 1024) #define MiB(x) ((x) * 1024 * 1024) -- cgit v1.2.3-70-g09d2 From 68ffe39028982d08bf382700642ed46cc0539e1b Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Wed, 17 Jan 2018 10:52:13 -0700 Subject: perf tools: Add decoder mechanic to support dumping trace data This patch adds the required interface to the openCSD library to support dumping CoreSight trace packet using the "report --dump" command. The information conveyed is related to the type of packets gathered by a trace session rather than full decoding. Co-authored-by: Tor Jeremiassen Signed-off-by: Mathieu Poirier Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Kim Phillips Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1516211539-5166-5-git-send-email-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/Build | 1 + tools/perf/util/cs-etm-decoder/Build | 1 + tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 334 ++++++++++++++++++++++++ tools/perf/util/cs-etm-decoder/cs-etm-decoder.h | 96 +++++++ tools/perf/util/cs-etm.c | 108 +++++++- 5 files changed, 536 insertions(+), 4 deletions(-) create mode 100644 tools/perf/util/cs-etm-decoder/Build create mode 100644 tools/perf/util/cs-etm-decoder/cs-etm-decoder.c create mode 100644 tools/perf/util/cs-etm-decoder/cs-etm-decoder.h (limited to 'tools') diff --git a/tools/perf/util/Build b/tools/perf/util/Build index c054ff802efb..ea0a452550b0 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -91,6 +91,7 @@ libperf-$(CONFIG_AUXTRACE) += arm-spe-pkt-decoder.o ifdef CONFIG_LIBOPENCSD libperf-$(CONFIG_AUXTRACE) += cs-etm.o +libperf-$(CONFIG_AUXTRACE) += cs-etm-decoder/ endif libperf-y += parse-branch-options.o diff --git a/tools/perf/util/cs-etm-decoder/Build b/tools/perf/util/cs-etm-decoder/Build new file mode 100644 index 000000000000..bc22c39c727f --- /dev/null +++ b/tools/perf/util/cs-etm-decoder/Build @@ -0,0 +1 @@ +libperf-$(CONFIG_AUXTRACE) += cs-etm-decoder.o diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c new file mode 100644 index 000000000000..6a4c86b1431f --- /dev/null +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -0,0 +1,334 @@ +/* + * SPDX-License-Identifier: GPL-2.0 + * + * Copyright(C) 2015-2018 Linaro Limited. + * + * Author: Tor Jeremiassen + * Author: Mathieu Poirier + */ + +#include +#include +#include +#include +#include +#include + +#include "cs-etm.h" +#include "cs-etm-decoder.h" +#include "intlist.h" +#include "util.h" + +#define MAX_BUFFER 1024 + +/* use raw logging */ +#ifdef CS_DEBUG_RAW +#define CS_LOG_RAW_FRAMES +#ifdef CS_RAW_PACKED +#define CS_RAW_DEBUG_FLAGS (OCSD_DFRMTR_UNPACKED_RAW_OUT | \ + OCSD_DFRMTR_PACKED_RAW_OUT) +#else +#define CS_RAW_DEBUG_FLAGS (OCSD_DFRMTR_UNPACKED_RAW_OUT) +#endif +#endif + +struct cs_etm_decoder { + void *data; + void (*packet_printer)(const char *msg); + bool trace_on; + dcd_tree_handle_t dcd_tree; + cs_etm_mem_cb_type mem_access; + ocsd_datapath_resp_t prev_return; + u32 packet_count; + u32 head; + u32 tail; + struct cs_etm_packet packet_buffer[MAX_BUFFER]; +}; + +static void cs_etm_decoder__gen_etmv4_config(struct cs_etm_trace_params *params, + ocsd_etmv4_cfg *config) +{ + config->reg_configr = params->etmv4.reg_configr; + config->reg_traceidr = params->etmv4.reg_traceidr; + config->reg_idr0 = params->etmv4.reg_idr0; + config->reg_idr1 = params->etmv4.reg_idr1; + config->reg_idr2 = params->etmv4.reg_idr2; + config->reg_idr8 = params->etmv4.reg_idr8; + config->reg_idr9 = 0; + config->reg_idr10 = 0; + config->reg_idr11 = 0; + config->reg_idr12 = 0; + config->reg_idr13 = 0; + config->arch_ver = ARCH_V8; + config->core_prof = profile_CortexA; +} + +static void cs_etm_decoder__print_str_cb(const void *p_context, + const char *msg, + const int str_len) +{ + if (p_context && str_len) + ((struct cs_etm_decoder *)p_context)->packet_printer(msg); +} + +static int +cs_etm_decoder__init_def_logger_printing(struct cs_etm_decoder_params *d_params, + struct cs_etm_decoder *decoder) +{ + int ret = 0; + + if (d_params->packet_printer == NULL) + return -1; + + decoder->packet_printer = d_params->packet_printer; + + /* + * Set up a library default logger to process any printers + * (packet/raw frame) we add later. + */ + ret = ocsd_def_errlog_init(OCSD_ERR_SEV_ERROR, 1); + if (ret != 0) + return -1; + + /* no stdout / err / file output */ + ret = ocsd_def_errlog_config_output(C_API_MSGLOGOUT_FLG_NONE, NULL); + if (ret != 0) + return -1; + + /* + * Set the string CB for the default logger, passes strings to + * perf print logger. + */ + ret = ocsd_def_errlog_set_strprint_cb(decoder->dcd_tree, + (void *)decoder, + cs_etm_decoder__print_str_cb); + if (ret != 0) + ret = -1; + + return 0; +} + +#ifdef CS_LOG_RAW_FRAMES +static void +cs_etm_decoder__init_raw_frame_logging(struct cs_etm_decoder_params *d_params, + struct cs_etm_decoder *decoder) +{ + /* Only log these during a --dump operation */ + if (d_params->operation == CS_ETM_OPERATION_PRINT) { + /* set up a library default logger to process the + * raw frame printer we add later + */ + ocsd_def_errlog_init(OCSD_ERR_SEV_ERROR, 1); + + /* no stdout / err / file output */ + ocsd_def_errlog_config_output(C_API_MSGLOGOUT_FLG_NONE, NULL); + + /* set the string CB for the default logger, + * passes strings to perf print logger. + */ + ocsd_def_errlog_set_strprint_cb(decoder->dcd_tree, + (void *)decoder, + cs_etm_decoder__print_str_cb); + + /* use the built in library printer for the raw frames */ + ocsd_dt_set_raw_frame_printer(decoder->dcd_tree, + CS_RAW_DEBUG_FLAGS); + } +} +#else +static void +cs_etm_decoder__init_raw_frame_logging( + struct cs_etm_decoder_params *d_params __maybe_unused, + struct cs_etm_decoder *decoder __maybe_unused) +{ +} +#endif + +static int cs_etm_decoder__create_packet_printer(struct cs_etm_decoder *decoder, + const char *decoder_name, + void *trace_config) +{ + u8 csid; + + if (ocsd_dt_create_decoder(decoder->dcd_tree, decoder_name, + OCSD_CREATE_FLG_PACKET_PROC, + trace_config, &csid)) + return -1; + + if (ocsd_dt_set_pkt_protocol_printer(decoder->dcd_tree, csid, 0)) + return -1; + + return 0; +} + +static int +cs_etm_decoder__create_etm_packet_printer(struct cs_etm_trace_params *t_params, + struct cs_etm_decoder *decoder) +{ + const char *decoder_name; + ocsd_etmv4_cfg trace_config_etmv4; + void *trace_config; + + switch (t_params->protocol) { + case CS_ETM_PROTO_ETMV4i: + cs_etm_decoder__gen_etmv4_config(t_params, &trace_config_etmv4); + decoder_name = OCSD_BUILTIN_DCD_ETMV4I; + trace_config = &trace_config_etmv4; + break; + default: + return -1; + } + + return cs_etm_decoder__create_packet_printer(decoder, + decoder_name, + trace_config); +} + +static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder) +{ + int i; + + decoder->head = 0; + decoder->tail = 0; + decoder->packet_count = 0; + for (i = 0; i < MAX_BUFFER; i++) { + decoder->packet_buffer[i].start_addr = 0xdeadbeefdeadbeefUL; + decoder->packet_buffer[i].end_addr = 0xdeadbeefdeadbeefUL; + decoder->packet_buffer[i].exc = false; + decoder->packet_buffer[i].exc_ret = false; + decoder->packet_buffer[i].cpu = INT_MIN; + } +} + +static int +cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params, + struct cs_etm_trace_params *t_params, + struct cs_etm_decoder *decoder) +{ + if (d_params->operation == CS_ETM_OPERATION_PRINT) + return cs_etm_decoder__create_etm_packet_printer(t_params, + decoder); + return -1; +} + +struct cs_etm_decoder * +cs_etm_decoder__new(int num_cpu, struct cs_etm_decoder_params *d_params, + struct cs_etm_trace_params t_params[]) +{ + struct cs_etm_decoder *decoder; + ocsd_dcd_tree_src_t format; + u32 flags; + int i, ret; + + if ((!t_params) || (!d_params)) + return NULL; + + decoder = zalloc(sizeof(*decoder)); + + if (!decoder) + return NULL; + + decoder->data = d_params->data; + decoder->prev_return = OCSD_RESP_CONT; + cs_etm_decoder__clear_buffer(decoder); + format = (d_params->formatted ? OCSD_TRC_SRC_FRAME_FORMATTED : + OCSD_TRC_SRC_SINGLE); + flags = 0; + flags |= (d_params->fsyncs ? OCSD_DFRMTR_HAS_FSYNCS : 0); + flags |= (d_params->hsyncs ? OCSD_DFRMTR_HAS_HSYNCS : 0); + flags |= (d_params->frame_aligned ? OCSD_DFRMTR_FRAME_MEM_ALIGN : 0); + + /* + * Drivers may add barrier frames when used with perf, set up to + * handle this. Barriers const of FSYNC packet repeated 4 times. + */ + flags |= OCSD_DFRMTR_RESET_ON_4X_FSYNC; + + /* Create decode tree for the data source */ + decoder->dcd_tree = ocsd_create_dcd_tree(format, flags); + + if (decoder->dcd_tree == 0) + goto err_free_decoder; + + /* init library print logging support */ + ret = cs_etm_decoder__init_def_logger_printing(d_params, decoder); + if (ret != 0) + goto err_free_decoder_tree; + + /* init raw frame logging if required */ + cs_etm_decoder__init_raw_frame_logging(d_params, decoder); + + for (i = 0; i < num_cpu; i++) { + ret = cs_etm_decoder__create_etm_decoder(d_params, + &t_params[i], + decoder); + if (ret != 0) + goto err_free_decoder_tree; + } + + return decoder; + +err_free_decoder_tree: + ocsd_destroy_dcd_tree(decoder->dcd_tree); +err_free_decoder: + free(decoder); + return NULL; +} + +int cs_etm_decoder__process_data_block(struct cs_etm_decoder *decoder, + u64 indx, const u8 *buf, + size_t len, size_t *consumed) +{ + int ret = 0; + ocsd_datapath_resp_t cur = OCSD_RESP_CONT; + ocsd_datapath_resp_t prev_return = decoder->prev_return; + size_t processed = 0; + u32 count; + + while (processed < len) { + if (OCSD_DATA_RESP_IS_WAIT(prev_return)) { + cur = ocsd_dt_process_data(decoder->dcd_tree, + OCSD_OP_FLUSH, + 0, + 0, + NULL, + NULL); + } else if (OCSD_DATA_RESP_IS_CONT(prev_return)) { + cur = ocsd_dt_process_data(decoder->dcd_tree, + OCSD_OP_DATA, + indx + processed, + len - processed, + &buf[processed], + &count); + processed += count; + } else { + ret = -EINVAL; + break; + } + + /* + * Return to the input code if the packet buffer is full. + * Flushing will get done once the packet buffer has been + * processed. + */ + if (OCSD_DATA_RESP_IS_WAIT(cur)) + break; + + prev_return = cur; + } + + decoder->prev_return = cur; + *consumed = processed; + + return ret; +} + +void cs_etm_decoder__free(struct cs_etm_decoder *decoder) +{ + if (!decoder) + return; + + ocsd_destroy_dcd_tree(decoder->dcd_tree); + decoder->dcd_tree = NULL; + free(decoder); +} diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h new file mode 100644 index 000000000000..a1e9b0ac5965 --- /dev/null +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h @@ -0,0 +1,96 @@ +/* + * SPDX-License-Identifier: GPL-2.0 + * + * Copyright(C) 2015-2018 Linaro Limited. + * + * Author: Tor Jeremiassen + * Author: Mathieu Poirier + */ + +#ifndef INCLUDE__CS_ETM_DECODER_H__ +#define INCLUDE__CS_ETM_DECODER_H__ + +#include +#include + +struct cs_etm_decoder; + +struct cs_etm_buffer { + const unsigned char *buf; + size_t len; + u64 offset; + u64 ref_timestamp; +}; + +enum cs_etm_sample_type { + CS_ETM_RANGE = 1 << 0, +}; + +struct cs_etm_packet { + enum cs_etm_sample_type sample_type; + u64 start_addr; + u64 end_addr; + u8 exc; + u8 exc_ret; + int cpu; +}; + +struct cs_etm_queue; + +typedef u32 (*cs_etm_mem_cb_type)(struct cs_etm_queue *, u64, + size_t, u8 *); + +struct cs_etmv4_trace_params { + u32 reg_idr0; + u32 reg_idr1; + u32 reg_idr2; + u32 reg_idr8; + u32 reg_configr; + u32 reg_traceidr; +}; + +struct cs_etm_trace_params { + int protocol; + union { + struct cs_etmv4_trace_params etmv4; + }; +}; + +struct cs_etm_decoder_params { + int operation; + void (*packet_printer)(const char *msg); + cs_etm_mem_cb_type mem_acc_cb; + u8 formatted; + u8 fsyncs; + u8 hsyncs; + u8 frame_aligned; + void *data; +}; + +/* + * The following enums are indexed starting with 1 to align with the + * open source coresight trace decoder library. + */ +enum { + CS_ETM_PROTO_ETMV3 = 1, + CS_ETM_PROTO_ETMV4i, + CS_ETM_PROTO_ETMV4d, +}; + +enum { + CS_ETM_OPERATION_PRINT = 1, + CS_ETM_OPERATION_DECODE, +}; + +int cs_etm_decoder__process_data_block(struct cs_etm_decoder *decoder, + u64 indx, const u8 *buf, + size_t len, size_t *consumed); + +struct cs_etm_decoder * +cs_etm_decoder__new(int num_cpu, + struct cs_etm_decoder_params *d_params, + struct cs_etm_trace_params t_params[]); + +void cs_etm_decoder__free(struct cs_etm_decoder *decoder); + +#endif /* INCLUDE__CS_ETM_DECODER_H__ */ diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 18894ee7aa0b..cad429ce3c00 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -18,6 +18,7 @@ #include "auxtrace.h" #include "color.h" #include "cs-etm.h" +#include "cs-etm-decoder/cs-etm-decoder.h" #include "debug.h" #include "evlist.h" #include "intlist.h" @@ -69,6 +70,78 @@ struct cs_etm_queue { u64 offset; }; +static void cs_etm__packet_dump(const char *pkt_string) +{ + const char *color = PERF_COLOR_BLUE; + int len = strlen(pkt_string); + + if (len && (pkt_string[len-1] == '\n')) + color_fprintf(stdout, color, " %s", pkt_string); + else + color_fprintf(stdout, color, " %s\n", pkt_string); + + fflush(stdout); +} + +static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, + struct auxtrace_buffer *buffer) +{ + int i, ret; + const char *color = PERF_COLOR_BLUE; + struct cs_etm_decoder_params d_params; + struct cs_etm_trace_params *t_params; + struct cs_etm_decoder *decoder; + size_t buffer_used = 0; + + fprintf(stdout, "\n"); + color_fprintf(stdout, color, + ". ... CoreSight ETM Trace data: size %zu bytes\n", + buffer->size); + + /* Use metadata to fill in trace parameters for trace decoder */ + t_params = zalloc(sizeof(*t_params) * etm->num_cpu); + for (i = 0; i < etm->num_cpu; i++) { + t_params[i].protocol = CS_ETM_PROTO_ETMV4i; + t_params[i].etmv4.reg_idr0 = etm->metadata[i][CS_ETMV4_TRCIDR0]; + t_params[i].etmv4.reg_idr1 = etm->metadata[i][CS_ETMV4_TRCIDR1]; + t_params[i].etmv4.reg_idr2 = etm->metadata[i][CS_ETMV4_TRCIDR2]; + t_params[i].etmv4.reg_idr8 = etm->metadata[i][CS_ETMV4_TRCIDR8]; + t_params[i].etmv4.reg_configr = + etm->metadata[i][CS_ETMV4_TRCCONFIGR]; + t_params[i].etmv4.reg_traceidr = + etm->metadata[i][CS_ETMV4_TRCTRACEIDR]; + } + + /* Set decoder parameters to simply print the trace packets */ + d_params.packet_printer = cs_etm__packet_dump; + d_params.operation = CS_ETM_OPERATION_PRINT; + d_params.formatted = true; + d_params.fsyncs = false; + d_params.hsyncs = false; + d_params.frame_aligned = true; + + decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); + + zfree(&t_params); + + if (!decoder) + return; + do { + size_t consumed; + + ret = cs_etm_decoder__process_data_block( + decoder, buffer->offset, + &((u8 *)buffer->data)[buffer_used], + buffer->size - buffer_used, &consumed); + if (ret) + break; + + buffer_used += consumed; + } while (buffer_used < buffer->size); + + cs_etm_decoder__free(decoder); +} + static int cs_etm__flush_events(struct perf_session *session, struct perf_tool *tool) { @@ -137,11 +210,38 @@ static int cs_etm__process_event(struct perf_session *session, static int cs_etm__process_auxtrace_event(struct perf_session *session, union perf_event *event, - struct perf_tool *tool) + struct perf_tool *tool __maybe_unused) { - (void) session; - (void) event; - (void) tool; + struct cs_etm_auxtrace *etm = container_of(session->auxtrace, + struct cs_etm_auxtrace, + auxtrace); + if (!etm->data_queued) { + struct auxtrace_buffer *buffer; + off_t data_offset; + int fd = perf_data__fd(session->data); + bool is_pipe = perf_data__is_pipe(session->data); + int err; + + if (is_pipe) + data_offset = 0; + else { + data_offset = lseek(fd, 0, SEEK_CUR); + if (data_offset == -1) + return -errno; + } + + err = auxtrace_queues__add_event(&etm->queues, session, + event, data_offset, &buffer); + if (err) + return err; + + if (dump_trace) + if (auxtrace_buffer__get_data(buffer, fd)) { + cs_etm__dump_event(etm, buffer); + auxtrace_buffer__put_data(buffer); + } + } + return 0; } -- cgit v1.2.3-70-g09d2 From c9a01a11dfe63e5b981b1fe8c99435e12c758007 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Wed, 17 Jan 2018 10:52:14 -0700 Subject: perf tools: Add support for decoding CoreSight trace data Adding functionality to create a CoreSight trace decoder capable of decoding trace data pushed by a client application. Co-authored-by: Tor Jeremiassen Signed-off-by: Mathieu Poirier Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Kim Phillips Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1516211539-5166-6-git-send-email-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 119 ++++++++++++++++++++++++ 1 file changed, 119 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 6a4c86b1431f..57b020b0b36f 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -200,6 +200,121 @@ static void cs_etm_decoder__clear_buffer(struct cs_etm_decoder *decoder) } } +static ocsd_datapath_resp_t +cs_etm_decoder__buffer_packet(struct cs_etm_decoder *decoder, + const ocsd_generic_trace_elem *elem, + const u8 trace_chan_id, + enum cs_etm_sample_type sample_type) +{ + u32 et = 0; + struct int_node *inode = NULL; + + if (decoder->packet_count >= MAX_BUFFER - 1) + return OCSD_RESP_FATAL_SYS_ERR; + + /* Search the RB tree for the cpu associated with this traceID */ + inode = intlist__find(traceid_list, trace_chan_id); + if (!inode) + return OCSD_RESP_FATAL_SYS_ERR; + + et = decoder->tail; + decoder->packet_buffer[et].sample_type = sample_type; + decoder->packet_buffer[et].start_addr = elem->st_addr; + decoder->packet_buffer[et].end_addr = elem->en_addr; + decoder->packet_buffer[et].exc = false; + decoder->packet_buffer[et].exc_ret = false; + decoder->packet_buffer[et].cpu = *((int *)inode->priv); + + /* Wrap around if need be */ + et = (et + 1) & (MAX_BUFFER - 1); + + decoder->tail = et; + decoder->packet_count++; + + if (decoder->packet_count == MAX_BUFFER - 1) + return OCSD_RESP_WAIT; + + return OCSD_RESP_CONT; +} + +static ocsd_datapath_resp_t cs_etm_decoder__gen_trace_elem_printer( + const void *context, + const ocsd_trc_index_t indx __maybe_unused, + const u8 trace_chan_id __maybe_unused, + const ocsd_generic_trace_elem *elem) +{ + ocsd_datapath_resp_t resp = OCSD_RESP_CONT; + struct cs_etm_decoder *decoder = (struct cs_etm_decoder *) context; + + switch (elem->elem_type) { + case OCSD_GEN_TRC_ELEM_UNKNOWN: + break; + case OCSD_GEN_TRC_ELEM_NO_SYNC: + decoder->trace_on = false; + break; + case OCSD_GEN_TRC_ELEM_TRACE_ON: + decoder->trace_on = true; + break; + case OCSD_GEN_TRC_ELEM_INSTR_RANGE: + resp = cs_etm_decoder__buffer_packet(decoder, elem, + trace_chan_id, + CS_ETM_RANGE); + break; + case OCSD_GEN_TRC_ELEM_EXCEPTION: + decoder->packet_buffer[decoder->tail].exc = true; + break; + case OCSD_GEN_TRC_ELEM_EXCEPTION_RET: + decoder->packet_buffer[decoder->tail].exc_ret = true; + break; + case OCSD_GEN_TRC_ELEM_PE_CONTEXT: + case OCSD_GEN_TRC_ELEM_EO_TRACE: + case OCSD_GEN_TRC_ELEM_ADDR_NACC: + case OCSD_GEN_TRC_ELEM_TIMESTAMP: + case OCSD_GEN_TRC_ELEM_CYCLE_COUNT: + case OCSD_GEN_TRC_ELEM_ADDR_UNKNOWN: + case OCSD_GEN_TRC_ELEM_EVENT: + case OCSD_GEN_TRC_ELEM_SWTRACE: + case OCSD_GEN_TRC_ELEM_CUSTOM: + default: + break; + } + + return resp; +} + +static int cs_etm_decoder__create_etm_packet_decoder( + struct cs_etm_trace_params *t_params, + struct cs_etm_decoder *decoder) +{ + const char *decoder_name; + ocsd_etmv4_cfg trace_config_etmv4; + void *trace_config; + u8 csid; + + switch (t_params->protocol) { + case CS_ETM_PROTO_ETMV4i: + cs_etm_decoder__gen_etmv4_config(t_params, &trace_config_etmv4); + decoder_name = OCSD_BUILTIN_DCD_ETMV4I; + trace_config = &trace_config_etmv4; + break; + default: + return -1; + } + + if (ocsd_dt_create_decoder(decoder->dcd_tree, + decoder_name, + OCSD_CREATE_FLG_FULL_DECODER, + trace_config, &csid)) + return -1; + + if (ocsd_dt_set_gen_elem_outfn(decoder->dcd_tree, + cs_etm_decoder__gen_trace_elem_printer, + decoder)) + return -1; + + return 0; +} + static int cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params, struct cs_etm_trace_params *t_params, @@ -208,6 +323,10 @@ cs_etm_decoder__create_etm_decoder(struct cs_etm_decoder_params *d_params, if (d_params->operation == CS_ETM_OPERATION_PRINT) return cs_etm_decoder__create_etm_packet_printer(t_params, decoder); + else if (d_params->operation == CS_ETM_OPERATION_DECODE) + return cs_etm_decoder__create_etm_packet_decoder(t_params, + decoder); + return -1; } -- cgit v1.2.3-70-g09d2 From 290598be0e84badee2ce93b32e4146184720b2f4 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Wed, 17 Jan 2018 10:52:15 -0700 Subject: perf tools: Add functionality to communicate with the openCSD decoder This patch adds functions to communicate with the openCSD trace decoder, more specifically to access program memory, fetch trace packets and reset the decoder. Co-authored-by: Tor Jeremiassen Signed-off-by: Mathieu Poirier Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Kim Phillips Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1516211539-5166-7-git-send-email-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm-decoder/cs-etm-decoder.c | 60 +++++++++++++++++++++++++ tools/perf/util/cs-etm-decoder/cs-etm-decoder.h | 9 ++++ 2 files changed, 69 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c index 57b020b0b36f..1fb01849f1c7 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.c @@ -45,6 +45,66 @@ struct cs_etm_decoder { struct cs_etm_packet packet_buffer[MAX_BUFFER]; }; +static u32 +cs_etm_decoder__mem_access(const void *context, + const ocsd_vaddr_t address, + const ocsd_mem_space_acc_t mem_space __maybe_unused, + const u32 req_size, + u8 *buffer) +{ + struct cs_etm_decoder *decoder = (struct cs_etm_decoder *) context; + + return decoder->mem_access(decoder->data, + address, + req_size, + buffer); +} + +int cs_etm_decoder__add_mem_access_cb(struct cs_etm_decoder *decoder, + u64 start, u64 end, + cs_etm_mem_cb_type cb_func) +{ + decoder->mem_access = cb_func; + + if (ocsd_dt_add_callback_mem_acc(decoder->dcd_tree, start, end, + OCSD_MEM_SPACE_ANY, + cs_etm_decoder__mem_access, decoder)) + return -1; + + return 0; +} + +int cs_etm_decoder__reset(struct cs_etm_decoder *decoder) +{ + ocsd_datapath_resp_t dp_ret; + + dp_ret = ocsd_dt_process_data(decoder->dcd_tree, OCSD_OP_RESET, + 0, 0, NULL, NULL); + if (OCSD_DATA_RESP_IS_FATAL(dp_ret)) + return -1; + + return 0; +} + +int cs_etm_decoder__get_packet(struct cs_etm_decoder *decoder, + struct cs_etm_packet *packet) +{ + if (!decoder || !packet) + return -EINVAL; + + /* Nothing to do, might as well just return */ + if (decoder->packet_count == 0) + return 0; + + *packet = decoder->packet_buffer[decoder->head]; + + decoder->head = (decoder->head + 1) & (MAX_BUFFER - 1); + + decoder->packet_count--; + + return 1; +} + static void cs_etm_decoder__gen_etmv4_config(struct cs_etm_trace_params *params, ocsd_etmv4_cfg *config) { diff --git a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h index a1e9b0ac5965..3d2e6205d186 100644 --- a/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h +++ b/tools/perf/util/cs-etm-decoder/cs-etm-decoder.h @@ -93,4 +93,13 @@ cs_etm_decoder__new(int num_cpu, void cs_etm_decoder__free(struct cs_etm_decoder *decoder); +int cs_etm_decoder__add_mem_access_cb(struct cs_etm_decoder *decoder, + u64 start, u64 end, + cs_etm_mem_cb_type cb_func); + +int cs_etm_decoder__get_packet(struct cs_etm_decoder *decoder, + struct cs_etm_packet *packet); + +int cs_etm_decoder__reset(struct cs_etm_decoder *decoder); + #endif /* INCLUDE__CS_ETM_DECODER_H__ */ -- cgit v1.2.3-70-g09d2 From 20d9c478b01aa1a652db54c1fe867dc92636bc70 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Wed, 17 Jan 2018 10:52:16 -0700 Subject: pert tools: Add queue management functionality Add functionatlity to setup trace queues so that traces associated with CoreSight auxtrace events found in the perf.data file can be classified properly. The decoder and memory callback associated with each queue are then used to decode the traces that have been assigned to that queue. Co-authored-by: Tor Jeremiassen Signed-off-by: Mathieu Poirier Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Kim Phillips Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1516211539-5166-8-git-send-email-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 208 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 204 insertions(+), 4 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index cad429ce3c00..83eb676274b5 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -196,15 +196,215 @@ static void cs_etm__free(struct perf_session *session) zfree(&aux); } +static u32 cs_etm__mem_access(struct cs_etm_queue *etmq, u64 address, + size_t size, u8 *buffer) +{ + u8 cpumode; + u64 offset; + int len; + struct thread *thread; + struct machine *machine; + struct addr_location al; + + if (!etmq) + return -1; + + machine = etmq->etm->machine; + if (address >= etmq->etm->kernel_start) + cpumode = PERF_RECORD_MISC_KERNEL; + else + cpumode = PERF_RECORD_MISC_USER; + + thread = etmq->thread; + if (!thread) { + if (cpumode != PERF_RECORD_MISC_KERNEL) + return -EINVAL; + thread = etmq->etm->unknown_thread; + } + + thread__find_addr_map(thread, cpumode, MAP__FUNCTION, address, &al); + + if (!al.map || !al.map->dso) + return 0; + + if (al.map->dso->data.status == DSO_DATA_STATUS_ERROR && + dso__data_status_seen(al.map->dso, DSO_DATA_STATUS_SEEN_ITRACE)) + return 0; + + offset = al.map->map_ip(al.map, address); + + map__load(al.map); + + len = dso__data_read_offset(al.map->dso, machine, offset, buffer, size); + + if (len <= 0) + return 0; + + return len; +} + +static struct cs_etm_queue *cs_etm__alloc_queue(struct cs_etm_auxtrace *etm, + unsigned int queue_nr) +{ + int i; + struct cs_etm_decoder_params d_params; + struct cs_etm_trace_params *t_params; + struct cs_etm_queue *etmq; + + etmq = zalloc(sizeof(*etmq)); + if (!etmq) + return NULL; + + etmq->event_buf = malloc(PERF_SAMPLE_MAX_SIZE); + if (!etmq->event_buf) + goto out_free; + + etmq->etm = etm; + etmq->queue_nr = queue_nr; + etmq->pid = -1; + etmq->tid = -1; + etmq->cpu = -1; + + /* Use metadata to fill in trace parameters for trace decoder */ + t_params = zalloc(sizeof(*t_params) * etm->num_cpu); + + if (!t_params) + goto out_free; + + for (i = 0; i < etm->num_cpu; i++) { + t_params[i].protocol = CS_ETM_PROTO_ETMV4i; + t_params[i].etmv4.reg_idr0 = etm->metadata[i][CS_ETMV4_TRCIDR0]; + t_params[i].etmv4.reg_idr1 = etm->metadata[i][CS_ETMV4_TRCIDR1]; + t_params[i].etmv4.reg_idr2 = etm->metadata[i][CS_ETMV4_TRCIDR2]; + t_params[i].etmv4.reg_idr8 = etm->metadata[i][CS_ETMV4_TRCIDR8]; + t_params[i].etmv4.reg_configr = + etm->metadata[i][CS_ETMV4_TRCCONFIGR]; + t_params[i].etmv4.reg_traceidr = + etm->metadata[i][CS_ETMV4_TRCTRACEIDR]; + } + + /* Set decoder parameters to simply print the trace packets */ + d_params.packet_printer = cs_etm__packet_dump; + d_params.operation = CS_ETM_OPERATION_DECODE; + d_params.formatted = true; + d_params.fsyncs = false; + d_params.hsyncs = false; + d_params.frame_aligned = true; + d_params.data = etmq; + + etmq->decoder = cs_etm_decoder__new(etm->num_cpu, &d_params, t_params); + + zfree(&t_params); + + if (!etmq->decoder) + goto out_free; + + /* + * Register a function to handle all memory accesses required by + * the trace decoder library. + */ + if (cs_etm_decoder__add_mem_access_cb(etmq->decoder, + 0x0L, ((u64) -1L), + cs_etm__mem_access)) + goto out_free_decoder; + + etmq->offset = 0; + + return etmq; + +out_free_decoder: + cs_etm_decoder__free(etmq->decoder); +out_free: + zfree(&etmq->event_buf); + free(etmq); + + return NULL; +} + +static int cs_etm__setup_queue(struct cs_etm_auxtrace *etm, + struct auxtrace_queue *queue, + unsigned int queue_nr) +{ + struct cs_etm_queue *etmq = queue->priv; + + if (list_empty(&queue->head) || etmq) + return 0; + + etmq = cs_etm__alloc_queue(etm, queue_nr); + + if (!etmq) + return -ENOMEM; + + queue->priv = etmq; + + if (queue->cpu != -1) + etmq->cpu = queue->cpu; + + etmq->tid = queue->tid; + + return 0; +} + +static int cs_etm__setup_queues(struct cs_etm_auxtrace *etm) +{ + unsigned int i; + int ret; + + for (i = 0; i < etm->queues.nr_queues; i++) { + ret = cs_etm__setup_queue(etm, &etm->queues.queue_array[i], i); + if (ret) + return ret; + } + + return 0; +} + +static int cs_etm__update_queues(struct cs_etm_auxtrace *etm) +{ + if (etm->queues.new_data) { + etm->queues.new_data = false; + return cs_etm__setup_queues(etm); + } + + return 0; +} + static int cs_etm__process_event(struct perf_session *session, union perf_event *event, struct perf_sample *sample, struct perf_tool *tool) { - (void) session; - (void) event; - (void) sample; - (void) tool; + int err = 0; + u64 timestamp; + struct cs_etm_auxtrace *etm = container_of(session->auxtrace, + struct cs_etm_auxtrace, + auxtrace); + + /* Keep compiler happy */ + (void)event; + + if (dump_trace) + return 0; + + if (!tool->ordered_events) { + pr_err("CoreSight ETM Trace requires ordered events\n"); + return -EINVAL; + } + + if (!etm->timeless_decoding) + return -EINVAL; + + if (sample->time && (sample->time != (u64) -1)) + timestamp = sample->time; + else + timestamp = 0; + + if (timestamp || etm->timeless_decoding) { + err = cs_etm__update_queues(etm); + if (err) + return err; + } + return 0; } -- cgit v1.2.3-70-g09d2 From 9f878b29da969e195e106992ea8572da3d244811 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Wed, 17 Jan 2018 10:52:17 -0700 Subject: perf tools: Add full support for CoreSight trace decoding This patch adds support for complete packet decoding, allowing traces collected during a trace session to be decoder from the "report" infrastructure. Co-authored-by: Tor Jeremiassen Signed-off-by: Mathieu Poirier Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Kim Phillips Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1516211539-5166-9-git-send-email-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 166 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 160 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 83eb676274b5..407095af1456 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -70,6 +70,10 @@ struct cs_etm_queue { u64 offset; }; +static int cs_etm__update_queues(struct cs_etm_auxtrace *etm); +static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, + pid_t tid, u64 time_); + static void cs_etm__packet_dump(const char *pkt_string) { const char *color = PERF_COLOR_BLUE; @@ -145,9 +149,25 @@ static void cs_etm__dump_event(struct cs_etm_auxtrace *etm, static int cs_etm__flush_events(struct perf_session *session, struct perf_tool *tool) { - (void) session; - (void) tool; - return 0; + int ret; + struct cs_etm_auxtrace *etm = container_of(session->auxtrace, + struct cs_etm_auxtrace, + auxtrace); + if (dump_trace) + return 0; + + if (!tool->ordered_events) + return -EINVAL; + + if (!etm->timeless_decoding) + return -EINVAL; + + ret = cs_etm__update_queues(etm); + + if (ret < 0) + return ret; + + return cs_etm__process_timeless_queues(etm, -1, MAX_TIMESTAMP - 1); } static void cs_etm__free_queue(void *priv) @@ -369,6 +389,138 @@ static int cs_etm__update_queues(struct cs_etm_auxtrace *etm) return 0; } +static int +cs_etm__get_trace(struct cs_etm_buffer *buff, struct cs_etm_queue *etmq) +{ + struct auxtrace_buffer *aux_buffer = etmq->buffer; + struct auxtrace_buffer *old_buffer = aux_buffer; + struct auxtrace_queue *queue; + + queue = &etmq->etm->queues.queue_array[etmq->queue_nr]; + + aux_buffer = auxtrace_buffer__next(queue, aux_buffer); + + /* If no more data, drop the previous auxtrace_buffer and return */ + if (!aux_buffer) { + if (old_buffer) + auxtrace_buffer__drop_data(old_buffer); + buff->len = 0; + return 0; + } + + etmq->buffer = aux_buffer; + + /* If the aux_buffer doesn't have data associated, try to load it */ + if (!aux_buffer->data) { + /* get the file desc associated with the perf data file */ + int fd = perf_data__fd(etmq->etm->session->data); + + aux_buffer->data = auxtrace_buffer__get_data(aux_buffer, fd); + if (!aux_buffer->data) + return -ENOMEM; + } + + /* If valid, drop the previous buffer */ + if (old_buffer) + auxtrace_buffer__drop_data(old_buffer); + + buff->offset = aux_buffer->offset; + buff->len = aux_buffer->size; + buff->buf = aux_buffer->data; + + buff->ref_timestamp = aux_buffer->reference; + + return buff->len; +} + +static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm, + struct auxtrace_queue *queue) +{ + struct cs_etm_queue *etmq = queue->priv; + + /* CPU-wide tracing isn't supported yet */ + if (queue->tid == -1) + return; + + if ((!etmq->thread) && (etmq->tid != -1)) + etmq->thread = machine__find_thread(etm->machine, -1, + etmq->tid); + + if (etmq->thread) { + etmq->pid = etmq->thread->pid_; + if (queue->cpu == -1) + etmq->cpu = etmq->thread->cpu; + } +} + +static int cs_etm__run_decoder(struct cs_etm_queue *etmq) +{ + struct cs_etm_auxtrace *etm = etmq->etm; + struct cs_etm_buffer buffer; + size_t buffer_used, processed; + int err = 0; + + if (!etm->kernel_start) + etm->kernel_start = machine__kernel_start(etm->machine); + + /* Go through each buffer in the queue and decode them one by one */ +more: + buffer_used = 0; + memset(&buffer, 0, sizeof(buffer)); + err = cs_etm__get_trace(&buffer, etmq); + if (err <= 0) + return err; + /* + * We cannot assume consecutive blocks in the data file are contiguous, + * reset the decoder to force re-sync. + */ + err = cs_etm_decoder__reset(etmq->decoder); + if (err != 0) + return err; + + /* Run trace decoder until buffer consumed or end of trace */ + do { + processed = 0; + + err = cs_etm_decoder__process_data_block( + etmq->decoder, + etmq->offset, + &buffer.buf[buffer_used], + buffer.len - buffer_used, + &processed); + + if (err) + return err; + + etmq->offset += processed; + buffer_used += processed; + } while (buffer.len > buffer_used); + +goto more; + + return err; +} + +static int cs_etm__process_timeless_queues(struct cs_etm_auxtrace *etm, + pid_t tid, u64 time_) +{ + unsigned int i; + struct auxtrace_queues *queues = &etm->queues; + + for (i = 0; i < queues->nr_queues; i++) { + struct auxtrace_queue *queue = &etm->queues.queue_array[i]; + struct cs_etm_queue *etmq = queue->priv; + + if (etmq && ((tid == -1) || (etmq->tid == tid))) { + etmq->time = time_; + cs_etm__set_pid_tid_cpu(etm, queue); + cs_etm__run_decoder(etmq); + } + } + + return 0; +} + static int cs_etm__process_event(struct perf_session *session, union perf_event *event, struct perf_sample *sample, @@ -380,9 +532,6 @@ static int cs_etm__process_event(struct perf_session *session, struct cs_etm_auxtrace, auxtrace); - /* Keep compiler happy */ - (void)event; - if (dump_trace) return 0; @@ -405,6 +554,11 @@ static int cs_etm__process_event(struct perf_session *session, return err; } + if (event->header.type == PERF_RECORD_EXIT) + return cs_etm__process_timeless_queues(etm, + event->fork.tid, + sample->time); + return 0; } -- cgit v1.2.3-70-g09d2 From b12235b113cfd7e4a31f0c8bdb0d8e8588ba6683 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Wed, 17 Jan 2018 10:52:18 -0700 Subject: perf tools: Add mechanic to synthesise CoreSight trace packets Once decoded from trace packets information on trace range needs to be communicated to the perf synthesis infrastructure so that it is available to the perf tools built-in rendering tools and scripts. Co-authored-by: Tor Jeremiassen Signed-off-by: Mathieu Poirier Acked-by: Jiri Olsa Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Kim Phillips Cc: Mike Leach Cc: Namhyung Kim Cc: Peter Zijlstra Cc: Suzuki Poulouse Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1516211539-5166-10-git-send-email-mathieu.poirier@linaro.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/cs-etm.c | 168 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 168 insertions(+) (limited to 'tools') diff --git a/tools/perf/util/cs-etm.c b/tools/perf/util/cs-etm.c index 407095af1456..b9f0a53dfa65 100644 --- a/tools/perf/util/cs-etm.c +++ b/tools/perf/util/cs-etm.c @@ -453,6 +453,157 @@ static void cs_etm__set_pid_tid_cpu(struct cs_etm_auxtrace *etm, } } +/* + * The cs etm packet encodes an instruction range between a branch target + * and the next taken branch. Generate sample accordingly. + */ +static int cs_etm__synth_branch_sample(struct cs_etm_queue *etmq, + struct cs_etm_packet *packet) +{ + int ret = 0; + struct cs_etm_auxtrace *etm = etmq->etm; + struct perf_sample sample = {.ip = 0,}; + union perf_event *event = etmq->event_buf; + u64 start_addr = packet->start_addr; + u64 end_addr = packet->end_addr; + + event->sample.header.type = PERF_RECORD_SAMPLE; + event->sample.header.misc = PERF_RECORD_MISC_USER; + event->sample.header.size = sizeof(struct perf_event_header); + + sample.ip = start_addr; + sample.pid = etmq->pid; + sample.tid = etmq->tid; + sample.addr = end_addr; + sample.id = etmq->etm->branches_id; + sample.stream_id = etmq->etm->branches_id; + sample.period = 1; + sample.cpu = packet->cpu; + sample.flags = 0; + sample.cpumode = PERF_RECORD_MISC_USER; + + ret = perf_session__deliver_synth_event(etm->session, event, &sample); + + if (ret) + pr_err( + "CS ETM Trace: failed to deliver instruction event, error %d\n", + ret); + + return ret; +} + +struct cs_etm_synth { + struct perf_tool dummy_tool; + struct perf_session *session; +}; + +static int cs_etm__event_synth(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample __maybe_unused, + struct machine *machine __maybe_unused) +{ + struct cs_etm_synth *cs_etm_synth = + container_of(tool, struct cs_etm_synth, dummy_tool); + + return perf_session__deliver_synth_event(cs_etm_synth->session, + event, NULL); +} + +static int cs_etm__synth_event(struct perf_session *session, + struct perf_event_attr *attr, u64 id) +{ + struct cs_etm_synth cs_etm_synth; + + memset(&cs_etm_synth, 0, sizeof(struct cs_etm_synth)); + cs_etm_synth.session = session; + + return perf_event__synthesize_attr(&cs_etm_synth.dummy_tool, attr, 1, + &id, cs_etm__event_synth); +} + +static int cs_etm__synth_events(struct cs_etm_auxtrace *etm, + struct perf_session *session) +{ + struct perf_evlist *evlist = session->evlist; + struct perf_evsel *evsel; + struct perf_event_attr attr; + bool found = false; + u64 id; + int err; + + evlist__for_each_entry(evlist, evsel) { + if (evsel->attr.type == etm->pmu_type) { + found = true; + break; + } + } + + if (!found) { + pr_debug("No selected events with CoreSight Trace data\n"); + return 0; + } + + memset(&attr, 0, sizeof(struct perf_event_attr)); + attr.size = sizeof(struct perf_event_attr); + attr.type = PERF_TYPE_HARDWARE; + attr.sample_type = evsel->attr.sample_type & PERF_SAMPLE_MASK; + attr.sample_type |= PERF_SAMPLE_IP | PERF_SAMPLE_TID | + PERF_SAMPLE_PERIOD; + if (etm->timeless_decoding) + attr.sample_type &= ~(u64)PERF_SAMPLE_TIME; + else + attr.sample_type |= PERF_SAMPLE_TIME; + + attr.exclude_user = evsel->attr.exclude_user; + attr.exclude_kernel = evsel->attr.exclude_kernel; + attr.exclude_hv = evsel->attr.exclude_hv; + attr.exclude_host = evsel->attr.exclude_host; + attr.exclude_guest = evsel->attr.exclude_guest; + attr.sample_id_all = evsel->attr.sample_id_all; + attr.read_format = evsel->attr.read_format; + + /* create new id val to be a fixed offset from evsel id */ + id = evsel->id[0] + 1000000000; + + if (!id) + id = 1; + + if (etm->synth_opts.branches) { + attr.config = PERF_COUNT_HW_BRANCH_INSTRUCTIONS; + attr.sample_period = 1; + attr.sample_type |= PERF_SAMPLE_ADDR; + err = cs_etm__synth_event(session, &attr, id); + if (err) + return err; + etm->sample_branches = true; + etm->branches_sample_type = attr.sample_type; + etm->branches_id = id; + } + + return 0; +} + +static int cs_etm__sample(struct cs_etm_queue *etmq) +{ + int ret; + struct cs_etm_packet packet; + + while (1) { + ret = cs_etm_decoder__get_packet(etmq->decoder, &packet); + if (ret <= 0) + return ret; + + /* + * If the packet contains an instruction range, generate an + * instruction sequence event. + */ + if (packet.sample_type & CS_ETM_RANGE) + cs_etm__synth_branch_sample(etmq, &packet); + } + + return 0; +} + static int cs_etm__run_decoder(struct cs_etm_queue *etmq) { struct cs_etm_auxtrace *etm = etmq->etm; @@ -494,6 +645,12 @@ more: etmq->offset += processed; buffer_used += processed; + + /* + * Nothing to do with an error condition, let's hope the next + * chunk will be better. + */ + err = cs_etm__sample(etmq); } while (buffer.len > buffer_used); goto more; @@ -828,6 +985,17 @@ int cs_etm__process_auxtrace_info(union perf_event *event, return 0; } + if (session->itrace_synth_opts && session->itrace_synth_opts->set) { + etm->synth_opts = *session->itrace_synth_opts; + } else { + itrace_synth_opts__set_default(&etm->synth_opts); + etm->synth_opts.callchain = false; + } + + err = cs_etm__synth_events(etm, session); + if (err) + goto err_free_queues; + err = auxtrace_queues__process_index(&etm->queues, session); if (err) goto err_free_queues; -- cgit v1.2.3-70-g09d2 From 78c436907c94660edc76f499b80dbebbbe6fd572 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 22 Jan 2018 10:42:55 -0300 Subject: perf bpf: Remove misplaced __maybe_unused attribute The bpf__setup_stdout() function uses that evlist argument, remove the misleading __maybe_unused attribute. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-7vbhhzbd33nvdm7l35gdfryt@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/util/bpf-loader.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index ab2598af91eb..af7ad814b2c3 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -1533,7 +1533,7 @@ int bpf__apply_obj_config(void) (strcmp("__bpf_stdout__", \ bpf_map__name(pos)) == 0)) -int bpf__setup_stdout(struct perf_evlist *evlist __maybe_unused) +int bpf__setup_stdout(struct perf_evlist *evlist) { struct bpf_map_priv *tmpl_priv = NULL; struct bpf_object *obj, *tmp; -- cgit v1.2.3-70-g09d2 From 591421e151ddf95e43d690a5c9b291d8e1cb8065 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 22 Jan 2018 11:38:54 -0300 Subject: perf trace: Add --print-sample To help with debugging, like the interrupted out of order issue that will be dealt with in the next patch in this series, changing the code to deal with: raw_syscalls:sys_enter 411967179.269 Timer 9609/9626 [2] raw_syscalls:sys_enter 411967179.213 file:// Content 9609/9609 [3] 328.038 (18446744073709.496 ms): Timer/9626 futex(uaddr: 0x7fc0d4027044, op: WAIT|PRIV, utime: 0x7fc0b0ffdb50 ) ... raw_syscalls:sys_exit 411967179.225 file:// Content 9609/9609 [3] 327.982 ( 0.012 ms): file:// Conten/9609 futex(uaddr: 0x7fc0d4027040, op: WAKE|PRIV, val: 1 ) = 1 That long duration is the bug. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-fljqiibjn7wet24jd1ed7abc@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/Documentation/perf-trace.txt | 4 ++++ tools/perf/builtin-trace.c | 24 ++++++++++++++++++++++++ 2 files changed, 28 insertions(+) (limited to 'tools') diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index 6909cf1e0eea..33a88e984e66 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -163,6 +163,10 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs. Implies '--call-graph dwarf' when --call-graph not present on the command line, on systems where DWARF unwinding was built in. +--print-sample:: + Print the PERF_RECORD_SAMPLE PERF_SAMPLE_ info for the + raw_syscalls:sys_{enter,exit} tracepoints, for debugging. + --proc-map-timeout:: When processing pre-existing threads /proc/XXX/mmap, it may take a long time, because the file may be huge. A time out is needed in such cases. diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 7dece5e0cdbb..322c2b15e407 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -110,6 +110,7 @@ struct trace { bool summary; bool summary_only; bool show_comm; + bool print_sample; bool show_tool_stats; bool trace_syscalls; bool kernel_syscallchains; @@ -1578,6 +1579,23 @@ static int trace__printf_interrupted_entry(struct trace *trace, struct perf_samp return printed; } +static int trace__fprintf_sample(struct trace *trace, struct perf_evsel *evsel, + struct perf_sample *sample, struct thread *thread) +{ + int printed = 0; + + if (trace->print_sample) { + double ts = (double)sample->time / NSEC_PER_MSEC; + + printed += fprintf(trace->output, "%22s %10.3f %s %d/%d [%d]\n", + perf_evsel__name(evsel), ts, + thread__comm_str(thread), + sample->pid, sample->tid, sample->cpu); + } + + return printed; +} + static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, union perf_event *event __maybe_unused, struct perf_sample *sample) @@ -1598,6 +1616,8 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, if (ttrace == NULL) goto out_put; + trace__fprintf_sample(trace, evsel, sample, thread); + args = perf_evsel__sc_tp_ptr(evsel, args, sample); if (ttrace->entry_str == NULL) { @@ -1688,6 +1708,8 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, if (ttrace == NULL) goto out_put; + trace__fprintf_sample(trace, evsel, sample, thread); + if (trace->summary) thread__update_stats(ttrace, id, sample); @@ -3034,6 +3056,8 @@ int cmd_trace(int argc, const char **argv) "Set the maximum stack depth when parsing the callchain, " "anything beyond the specified depth will be ignored. " "Default: kernel.perf_event_max_stack or " __stringify(PERF_MAX_STACK_DEPTH)), + OPT_BOOLEAN(0, "print-sample", &trace.print_sample, + "print the PERF_RECORD_SAMPLE PERF_SAMPLE_ info, for debugging"), OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout, "per thread proc mmap processing timeout in ms"), OPT_UINTEGER('D', "delay", &trace.opts.initial_delay, -- cgit v1.2.3-70-g09d2 From 522283fec7d3f312224360da48057e923ee22765 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 22 Jan 2018 11:42:11 -0300 Subject: perf trace: Do not print from time delta for interrupted syscall lines We were calculating the delta from a in-flight syscall that got its output interrupted by another syscall, which doesn't seem like useful information, we will print the syscall duration (sys_exit - sys_enter) when the raw_syscalls:sys_exit event happens. The problem here is how we're consuming the multiple ring buffers, without using the ordered_events code used by perf_session, which may cause some reordering of syscalls for diferent CPUs, so just stop printing that delta, to avoid things like: # trace --print-sample -p 9626 -e futex raw_syscalls:sys_enter 411967179.269 Timer 9609/9626 [2] raw_syscalls:sys_enter 411967179.213 file:// Content 9609/9609 [3] 328.038 (18446744073709.496 ms): Timer/9626 futex(uaddr: 0x7fc0d4027044, op: WAIT|PRIV, utime: 0x7fc0b0ffdb50 ) ... raw_syscalls:sys_exit 411967179.225 file:// Content 9609/9609 [3] 327.982 ( 0.012 ms): file:// Conten/9609 futex(uaddr: 0x7fc0d4027040, op: WAKE|PRIV, val: 1 ) = 1 This is a bandaid, we should better try and use the ordered_events code, possibly with some refactoring prep work, but for now at least we don't show those false long deltas for the lines ending in '...'. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-q6xgsqrju1sr6ltud9kjjhmb@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 322c2b15e407..ab00096328e4 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -821,7 +821,7 @@ static size_t fprintf_duration(unsigned long t, bool calculated, FILE *fp) size_t printed = fprintf(fp, "("); if (!calculated) - printed += fprintf(fp, " ? "); + printed += fprintf(fp, " "); else if (duration >= 1.0) printed += color_fprintf(fp, PERF_COLOR_RED, "%6.3f ms", duration); else if (duration >= 0.01) @@ -1556,10 +1556,9 @@ static void thread__update_stats(struct thread_trace *ttrace, update_stats(stats, duration); } -static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample) +static int trace__printf_interrupted_entry(struct trace *trace) { struct thread_trace *ttrace; - u64 duration; size_t printed; if (trace->current == NULL) @@ -1570,9 +1569,7 @@ static int trace__printf_interrupted_entry(struct trace *trace, struct perf_samp if (!ttrace->entry_pending) return 0; - duration = sample->time - ttrace->entry_time; - - printed = trace__fprintf_entry_head(trace, trace->current, duration, true, ttrace->entry_time, trace->output); + printed = trace__fprintf_entry_head(trace, trace->current, 0, false, ttrace->entry_time, trace->output); printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str); ttrace->entry_pending = false; @@ -1627,7 +1624,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, } if (!(trace->duration_filter || trace->summary_only || trace->min_stack)) - trace__printf_interrupted_entry(trace, sample); + trace__printf_interrupted_entry(trace); ttrace->entry_time = sample->time; msg = ttrace->entry_str; @@ -1941,7 +1938,7 @@ static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, } } - trace__printf_interrupted_entry(trace, sample); + trace__printf_interrupted_entry(trace); trace__fprintf_tstamp(trace, sample->time, trace->output); if (trace->trace_syscalls) -- cgit v1.2.3-70-g09d2 From 3258abe0991590a182be0a20ef6b79b65fe2c9cd Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 22 Jan 2018 12:56:59 -0300 Subject: perf trace beauty futex: Beautify FUTEX_BITSET_MATCH_ANY E.g.: # strace -e futex -p 14437 strace: Process 14437 attached futex(0x7f46f4808d70, FUTEX_WAKE_PRIVATE, 1) = 0 futex(0x7f46f24e68b0, FUTEX_WAIT_BITSET_PRIVATE|FUTEX_CLOCK_REALTIME, 0, {tv_sec=1516636744, tv_nsec=221969000}, 0xffffffff) = -1 ETIMEDOUT (Connection timed out) # Should pretty print that 0xffffffff value, like: # trace -e futex --tid 14437 0.028 ( 0.005 ms): futex(uaddr: 0x7f46f4808d70, op: WAKE|PRIV, val: 1 ) = 0 0.037 (1000.092 ms): futex(uaddr: 0x7f46f24e68b0, op: WAIT_BITSET|PRIV|CLKRT, utime: 0x7f46f23fedf0, val3: MATCH_ANY) = -1 ETIMEDOUT Connection timed out ^C# Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-raef6e352la90600yksthao1@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 4 +++- tools/perf/trace/beauty/futex_val3.c | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+), 1 deletion(-) create mode 100644 tools/perf/trace/beauty/futex_val3.c (limited to 'tools') diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index ab00096328e4..46d3ff09440c 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -549,6 +549,7 @@ static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size, #include "trace/beauty/eventfd.c" #include "trace/beauty/flock.c" #include "trace/beauty/futex_op.c" +#include "trace/beauty/futex_val3.c" #include "trace/beauty/mmap.c" #include "trace/beauty/mode_t.c" #include "trace/beauty/msg_flags.c" @@ -611,7 +612,8 @@ static struct syscall_fmt { { .name = "fstat", .alias = "newfstat", }, { .name = "fstatat", .alias = "newfstatat", }, { .name = "futex", - .arg = { [1] = { .scnprintf = SCA_FUTEX_OP, /* op */ }, }, }, + .arg = { [1] = { .scnprintf = SCA_FUTEX_OP, /* op */ }, + [5] = { .scnprintf = SCA_FUTEX_VAL3, /* val3 */ }, }, }, { .name = "futimesat", .arg = { [0] = { .scnprintf = SCA_FDAT, /* fd */ }, }, }, { .name = "getitimer", diff --git a/tools/perf/trace/beauty/futex_val3.c b/tools/perf/trace/beauty/futex_val3.c new file mode 100644 index 000000000000..26f6b3253511 --- /dev/null +++ b/tools/perf/trace/beauty/futex_val3.c @@ -0,0 +1,18 @@ +// SPDX-License-Identifier: GPL-2.0 +#include + +#ifndef FUTEX_BITSET_MATCH_ANY +#define FUTEX_BITSET_MATCH_ANY 0xffffffff +#endif + +static size_t syscall_arg__scnprintf_futex_val3(char *bf, size_t size, struct syscall_arg *arg) +{ + unsigned int bitset = arg->val; + + if (bitset == FUTEX_BITSET_MATCH_ANY) + return scnprintf(bf, size, "MATCH_ANY"); + + return scnprintf(bf, size, "%#xd", bitset); +} + +#define SCA_FUTEX_VAL3 syscall_arg__scnprintf_futex_val3 -- cgit v1.2.3-70-g09d2 From bafae98e7a95df74ce4529ae96251cb12c86fdf3 Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 22 Jan 2018 16:42:16 -0300 Subject: perf evlist: Remove fcntl.h from evlist.h Not needed there, fixup the places where it is needed and was getting only by luck via evlist.h. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-yxjpetn64z8vjuguu84gr6x6@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-kvm.c | 3 +++ tools/perf/builtin-script.c | 1 + tools/perf/builtin-trace.c | 1 + tools/perf/tests/bpf.c | 1 + tools/perf/tests/openat-syscall-tp-fields.c | 3 +++ tools/perf/util/cgroup.c | 3 +++ tools/perf/util/evlist.c | 1 + tools/perf/util/evlist.h | 1 - tools/perf/util/parse-events.c | 3 +++ 9 files changed, 16 insertions(+), 1 deletion(-) (limited to 'tools') diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 98853162eae9..55d919dc5bc6 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -26,6 +26,9 @@ #include #endif #include +#include +#include +#include #include #include diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 3499d68e1d70..ab19a6ee4093 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include "sane_ctype.h" diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 46d3ff09440c..868306ccd8b8 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -56,6 +56,7 @@ #include #include #include +#include #include "sane_ctype.h" diff --git a/tools/perf/tests/bpf.c b/tools/perf/tests/bpf.c index 8e709c9d512c..e8399beca62b 100644 --- a/tools/perf/tests/bpf.c +++ b/tools/perf/tests/bpf.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c index 97c9407d02a0..43519267b93b 100644 --- a/tools/perf/tests/openat-syscall-tp-fields.c +++ b/tools/perf/tests/openat-syscall-tp-fields.c @@ -1,5 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include +#include +#include #include "perf.h" #include "evlist.h" #include "evsel.h" diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index d9ffc1e6eb39..984f69144f87 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -6,6 +6,9 @@ #include "cgroup.h" #include "evlist.h" #include +#include +#include +#include int nr_cgroups; diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 120efd85f2c8..ac35cd214feb 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -25,6 +25,7 @@ #include "parse-events.h" #include +#include #include #include diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index e7fbca69cbac..75f8e0ad5d76 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -7,7 +7,6 @@ #include #include #include -#include #include #include "../perf.h" #include "event.h" diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 170316795a18..34589c427e52 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -4,6 +4,9 @@ #include #include #include +#include +#include +#include #include #include "term.h" #include "../perf.h" -- cgit v1.2.3-70-g09d2 From c19d0847b2dce3dc4219fa1a21f4ad2256b42d9d Mon Sep 17 00:00:00 2001 From: Arnaldo Carvalho de Melo Date: Mon, 22 Jan 2018 16:50:16 -0300 Subject: perf trace beauty flock: Move to separate object file To resolve some header conflicts that were preventing the build to succeed in the Alpine Linux distribution. Cc: Adrian Hunter Cc: David Ahern Cc: Jiri Olsa Cc: Namhyung Kim Cc: Wang Nan Link: https://lkml.kernel.org/n/tip-bvud0dvzvip3kibeplupdbmc@git.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/perf/builtin-trace.c | 1 - tools/perf/trace/beauty/Build | 1 + tools/perf/trace/beauty/beauty.h | 3 +++ tools/perf/trace/beauty/flock.c | 10 +++++----- 4 files changed, 9 insertions(+), 6 deletions(-) (limited to 'tools') diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 868306ccd8b8..17d11deeb88d 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -548,7 +548,6 @@ static size_t syscall_arg__scnprintf_getrandom_flags(char *bf, size_t size, #include "trace/beauty/arch_errno_names.c" #include "trace/beauty/eventfd.c" -#include "trace/beauty/flock.c" #include "trace/beauty/futex_op.c" #include "trace/beauty/futex_val3.c" #include "trace/beauty/mmap.c" diff --git a/tools/perf/trace/beauty/Build b/tools/perf/trace/beauty/Build index 066bbf0f4a74..66330d4b739b 100644 --- a/tools/perf/trace/beauty/Build +++ b/tools/perf/trace/beauty/Build @@ -1,5 +1,6 @@ libperf-y += clone.o libperf-y += fcntl.o +libperf-y += flock.o ifeq ($(SRCARCH),$(filter $(SRCARCH),x86)) libperf-y += ioctl.o endif diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h index d8f6b2ec7fc5..984a504d335c 100644 --- a/tools/perf/trace/beauty/beauty.h +++ b/tools/perf/trace/beauty/beauty.h @@ -79,6 +79,9 @@ size_t syscall_arg__scnprintf_fcntl_cmd(char *bf, size_t size, struct syscall_ar size_t syscall_arg__scnprintf_fcntl_arg(char *bf, size_t size, struct syscall_arg *arg); #define SCA_FCNTL_ARG syscall_arg__scnprintf_fcntl_arg +size_t syscall_arg__scnprintf_flock(char *bf, size_t size, struct syscall_arg *arg); +#define SCA_FLOCK syscall_arg__scnprintf_flock + size_t syscall_arg__scnprintf_ioctl_cmd(char *bf, size_t size, struct syscall_arg *arg); #define SCA_IOCTL_CMD syscall_arg__scnprintf_ioctl_cmd diff --git a/tools/perf/trace/beauty/flock.c b/tools/perf/trace/beauty/flock.c index f9707f57566c..c4ff6ad30b06 100644 --- a/tools/perf/trace/beauty/flock.c +++ b/tools/perf/trace/beauty/flock.c @@ -1,5 +1,8 @@ // SPDX-License-Identifier: GPL-2.0 -#include + +#include "trace/beauty/beauty.h" +#include +#include #ifndef LOCK_MAND #define LOCK_MAND 32 @@ -17,8 +20,7 @@ #define LOCK_RW 192 #endif -static size_t syscall_arg__scnprintf_flock(char *bf, size_t size, - struct syscall_arg *arg) +size_t syscall_arg__scnprintf_flock(char *bf, size_t size, struct syscall_arg *arg) { int printed = 0, op = arg->val; @@ -45,5 +47,3 @@ static size_t syscall_arg__scnprintf_flock(char *bf, size_t size, return printed; } - -#define SCA_FLOCK syscall_arg__scnprintf_flock -- cgit v1.2.3-70-g09d2