diff options
author | Peter Zijlstra <peterz@infradead.org> | 2017-09-05 16:26:44 +0200 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2017-10-27 10:31:59 +0200 |
commit | 0c1cbc18df9e38182a0604b15535699c84d7342a (patch) | |
tree | 85c7d42284fd594af32ca8d176aa8fc44e618a60 /kernel/events | |
parent | 7f0ec32526d2446fdd79b0c6c6d08e64ef9fb1f3 (diff) |
perf/core: Fix perf_event_read()
perf_event_read() has a number of issues regarding the timekeeping bits.
- The IPI didn't update group times when it found INACTIVE
- The direct call would not re-check ->state after taking ctx->lock
which can result in ->count and timestamps getting out of sync.
And we can make use of the ordering introduced for perf_event_stop()
to make it more accurate for ACTIVE.
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'kernel/events')
-rw-r--r-- | kernel/events/core.c | 55 |
1 files changed, 39 insertions, 16 deletions
diff --git a/kernel/events/core.c b/kernel/events/core.c index 205a4321f678..6f74f9c35490 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -2081,8 +2081,9 @@ event_sched_in(struct perf_event *event, WRITE_ONCE(event->oncpu, smp_processor_id()); /* - * Order event::oncpu write to happen before the ACTIVE state - * is visible. + * Order event::oncpu write to happen before the ACTIVE state is + * visible. This allows perf_event_{stop,read}() to observe the correct + * ->oncpu if it sees ACTIVE. */ smp_wmb(); WRITE_ONCE(event->state, PERF_EVENT_STATE_ACTIVE); @@ -3638,12 +3639,16 @@ static void __perf_event_read(void *info) return; raw_spin_lock(&ctx->lock); - if (ctx->is_active) { + if (ctx->is_active & EVENT_TIME) { update_context_time(ctx); update_cgrp_time_from_event(event); } - update_event_times(event); + if (!data->group) + update_event_times(event); + else + update_group_times(event); + if (event->state != PERF_EVENT_STATE_ACTIVE) goto unlock; @@ -3658,7 +3663,6 @@ static void __perf_event_read(void *info) pmu->read(event); list_for_each_entry(sub, &event->sibling_list, group_entry) { - update_event_times(sub); if (sub->state == PERF_EVENT_STATE_ACTIVE) { /* * Use sibling's PMU rather than @event's since @@ -3748,23 +3752,35 @@ out: static int perf_event_read(struct perf_event *event, bool group) { + enum perf_event_state state = READ_ONCE(event->state); int event_cpu, ret = 0; /* * If event is enabled and currently active on a CPU, update the * value in the event structure: */ - if (event->state == PERF_EVENT_STATE_ACTIVE) { - struct perf_read_data data = { - .event = event, - .group = group, - .ret = 0, - }; +again: + if (state == PERF_EVENT_STATE_ACTIVE) { + struct perf_read_data data; + + /* + * Orders the ->state and ->oncpu loads such that if we see + * ACTIVE we must also see the right ->oncpu. + * + * Matches the smp_wmb() from event_sched_in(). + */ + smp_rmb(); event_cpu = READ_ONCE(event->oncpu); if ((unsigned)event_cpu >= nr_cpu_ids) return 0; + data = (struct perf_read_data){ + .event = event, + .group = group, + .ret = 0, + }; + preempt_disable(); event_cpu = __perf_event_read_cpu(event, event_cpu); @@ -3781,20 +3797,27 @@ static int perf_event_read(struct perf_event *event, bool group) (void)smp_call_function_single(event_cpu, __perf_event_read, &data, 1); preempt_enable(); ret = data.ret; - } else if (event->state == PERF_EVENT_STATE_INACTIVE) { + + } else if (state == PERF_EVENT_STATE_INACTIVE) { struct perf_event_context *ctx = event->ctx; unsigned long flags; raw_spin_lock_irqsave(&ctx->lock, flags); + state = event->state; + if (state != PERF_EVENT_STATE_INACTIVE) { + raw_spin_unlock_irqrestore(&ctx->lock, flags); + goto again; + } + /* - * may read while context is not active - * (e.g., thread is blocked), in that case - * we cannot update context time + * May read while context is not active (e.g., thread is + * blocked), in that case we cannot update context time */ - if (ctx->is_active) { + if (ctx->is_active & EVENT_TIME) { update_context_time(ctx); update_cgrp_time_from_event(event); } + if (group) update_group_times(event); else |