From abf917cd91cbb73952758f9741e2fa65002a48ee Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Wed, 25 Jul 2012 07:56:04 +0200 Subject: cputime: Generic on-demand virtual cputime accounting If we want to stop the tick further idle, we need to be able to account the cputime without using the tick. Virtual based cputime accounting solves that problem by hooking into kernel/user boundaries. However implementing CONFIG_VIRT_CPU_ACCOUNTING require low level hooks and involves more overhead. But we already have a generic context tracking subsystem that is required for RCU needs by archs which plan to shut down the tick outside idle. This patch implements a generic virtual based cputime accounting that relies on these generic kernel/user hooks. There are some upsides of doing this: - This requires no arch code to implement CONFIG_VIRT_CPU_ACCOUNTING if context tracking is already built (already necessary for RCU in full tickless mode). - We can rely on the generic context tracking subsystem to dynamically (de)activate the hooks, so that we can switch anytime between virtual and tick based accounting. This way we don't have the overhead of the virtual accounting when the tick is running periodically. And one downside: - There is probably more overhead than a native virtual based cputime accounting. But this relies on hooks that are already set anyway. Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Ingo Molnar Cc: Li Zhong Cc: Namhyung Kim Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- include/linux/vtime.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux/vtime.h') diff --git a/include/linux/vtime.h b/include/linux/vtime.h index ae30ab58431a..21ef703d1b25 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -14,9 +14,25 @@ extern void vtime_account(struct task_struct *tsk); static inline void vtime_task_switch(struct task_struct *prev) { } static inline void vtime_account_system(struct task_struct *tsk) { } static inline void vtime_account_system_irqsafe(struct task_struct *tsk) { } +static inline void vtime_account_user(struct task_struct *tsk) { } static inline void vtime_account(struct task_struct *tsk) { } #endif +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN +static inline void arch_vtime_task_switch(struct task_struct *tsk) { } +static inline void vtime_user_enter(struct task_struct *tsk) +{ + vtime_account_system(tsk); +} +static inline void vtime_user_exit(struct task_struct *tsk) +{ + vtime_account_user(tsk); +} +#else +static inline void vtime_user_enter(struct task_struct *tsk) { } +static inline void vtime_user_exit(struct task_struct *tsk) { } +#endif + #ifdef CONFIG_IRQ_TIME_ACCOUNTING extern void irqtime_account_irq(struct task_struct *tsk); #else -- cgit v1.2.3-70-g09d2 From 3f4724ea85b7d9055a9976fa8f30b471bdfbca93 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 16 Jul 2012 18:00:34 +0200 Subject: cputime: Allow dynamic switch between tick/virtual based cputime accounting Allow to dynamically switch between tick and virtual based cputime accounting. This way we can provide a kind of "on-demand" virtual based cputime accounting. In this mode, the kernel relies on the context tracking subsystem to dynamically probe on kernel boundaries. This is in preparation for being able to stop the timer tick in more places than just the idle state. Doing so will depend on CONFIG_VIRT_CPU_ACCOUNTING_GEN which makes it possible to account the cputime without the tick by hooking on kernel/user boundaries. Depending whether the tick is stopped or not, we can switch between tick and vtime based accounting anytime in order to minimize the overhead associated to user hooks. Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Ingo Molnar Cc: Li Zhong Cc: Namhyung Kim Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- include/linux/vtime.h | 8 ++++++++ kernel/sched/cputime.c | 41 +++++++++++++++++++++++++++++++---------- kernel/time/tick-sched.c | 5 ++++- 3 files changed, 43 insertions(+), 11 deletions(-) (limited to 'include/linux/vtime.h') diff --git a/include/linux/vtime.h b/include/linux/vtime.h index 21ef703d1b25..5368af9bdf06 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -10,12 +10,20 @@ extern void vtime_account_system_irqsafe(struct task_struct *tsk); extern void vtime_account_idle(struct task_struct *tsk); extern void vtime_account_user(struct task_struct *tsk); extern void vtime_account(struct task_struct *tsk); + +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN +extern bool vtime_accounting_enabled(void); #else +static inline bool vtime_accounting_enabled(void) { return true; } +#endif + +#else /* !CONFIG_VIRT_CPU_ACCOUNTING */ static inline void vtime_task_switch(struct task_struct *prev) { } static inline void vtime_account_system(struct task_struct *tsk) { } static inline void vtime_account_system_irqsafe(struct task_struct *tsk) { } static inline void vtime_account_user(struct task_struct *tsk) { } static inline void vtime_account(struct task_struct *tsk) { } +static inline bool vtime_accounting_enabled(void) { return false; } #endif #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 1c964eced92c..e1939d38bf73 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -317,8 +317,6 @@ out: rcu_read_unlock(); } -#ifndef CONFIG_VIRT_CPU_ACCOUNTING - #ifdef CONFIG_IRQ_TIME_ACCOUNTING /* * Account a tick to a process and cpustat @@ -383,11 +381,12 @@ static void irqtime_account_idle_ticks(int ticks) irqtime_account_process_tick(current, 0, rq); } #else /* CONFIG_IRQ_TIME_ACCOUNTING */ -static void irqtime_account_idle_ticks(int ticks) {} -static void irqtime_account_process_tick(struct task_struct *p, int user_tick, +static inline void irqtime_account_idle_ticks(int ticks) {} +static inline void irqtime_account_process_tick(struct task_struct *p, int user_tick, struct rq *rq) {} #endif /* CONFIG_IRQ_TIME_ACCOUNTING */ +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE /* * Account a single tick of cpu time. * @p: the process that the cpu time gets accounted to @@ -398,6 +397,9 @@ void account_process_tick(struct task_struct *p, int user_tick) cputime_t one_jiffy_scaled = cputime_to_scaled(cputime_one_jiffy); struct rq *rq = this_rq(); + if (vtime_accounting_enabled()) + return; + if (sched_clock_irqtime) { irqtime_account_process_tick(p, user_tick, rq); return; @@ -439,8 +441,7 @@ void account_idle_ticks(unsigned long ticks) account_idle_time(jiffies_to_cputime(ticks)); } - -#endif +#endif /* !CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ /* * Use precise platform statistics if available: @@ -475,6 +476,9 @@ EXPORT_SYMBOL_GPL(vtime_account_system_irqsafe); #ifndef __ARCH_HAS_VTIME_TASK_SWITCH void vtime_task_switch(struct task_struct *prev) { + if (!vtime_accounting_enabled()) + return; + if (is_idle_task(prev)) vtime_account_idle(prev); else @@ -498,6 +502,9 @@ void vtime_task_switch(struct task_struct *prev) #ifndef __ARCH_HAS_VTIME_ACCOUNT void vtime_account(struct task_struct *tsk) { + if (!vtime_accounting_enabled()) + return; + if (!in_interrupt()) { /* * If we interrupted user, context_tracking_in_user() @@ -520,7 +527,7 @@ void vtime_account(struct task_struct *tsk) EXPORT_SYMBOL_GPL(vtime_account); #endif /* __ARCH_HAS_VTIME_ACCOUNT */ -#else +#else /* !CONFIG_VIRT_CPU_ACCOUNTING */ static cputime_t scale_utime(cputime_t utime, cputime_t rtime, cputime_t total) { @@ -599,7 +606,7 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime thread_group_cputime(p, &cputime); cputime_adjust(&cputime, &p->signal->prev_cputime, ut, st); } -#endif +#endif /* !CONFIG_VIRT_CPU_ACCOUNTING */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN static DEFINE_PER_CPU(unsigned long long, cputime_snap); @@ -617,14 +624,23 @@ static cputime_t get_vtime_delta(void) void vtime_account_system(struct task_struct *tsk) { - cputime_t delta_cpu = get_vtime_delta(); + cputime_t delta_cpu; + + if (!vtime_accounting_enabled()) + return; + delta_cpu = get_vtime_delta(); account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu)); } void vtime_account_user(struct task_struct *tsk) { - cputime_t delta_cpu = get_vtime_delta(); + cputime_t delta_cpu; + + if (!vtime_accounting_enabled()) + return; + + delta_cpu = get_vtime_delta(); account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu)); } @@ -635,4 +651,9 @@ void vtime_account_idle(struct task_struct *tsk) account_idle_time(delta_cpu); } + +bool vtime_accounting_enabled(void) +{ + return context_tracking_active(); +} #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */ diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index d58e552d9fd1..46dfb6d94b1c 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -631,8 +631,11 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now) static void tick_nohz_account_idle_ticks(struct tick_sched *ts) { -#ifndef CONFIG_VIRT_CPU_ACCOUNTING +#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE unsigned long ticks; + + if (vtime_accounting_enabled()) + return; /* * We stopped the tick in idle. Update process times would miss the * time we slept as update_process_times does only a 1 tick -- cgit v1.2.3-70-g09d2 From c11f11fcbdb5be790c565aed46411486a7586afc Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 21 Jan 2013 00:50:22 +0100 Subject: kvm: Prepare to add generic guest entry/exit callbacks Do some ground preparatory work before adding guest_enter() and guest_exit() context tracking callbacks. Those will be later used to read the guest cputime safely when we run in full dynticks mode. Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Gleb Natapov Cc: Ingo Molnar Cc: Li Zhong Cc: Marcelo Tosatti Cc: Namhyung Kim Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner --- arch/ia64/kernel/time.c | 1 + arch/powerpc/kernel/time.c | 1 + include/linux/kvm_host.h | 39 ++++++++++++++++++++++++++++++--------- include/linux/vtime.h | 2 -- kernel/sched/cputime.c | 10 ---------- 5 files changed, 32 insertions(+), 21 deletions(-) (limited to 'include/linux/vtime.h') diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index a3a3f5a1cb3a..fbaac1afb844 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -136,6 +136,7 @@ void vtime_account_system(struct task_struct *tsk) account_system_time(tsk, 0, delta, delta); } +EXPORT_SYMBOL_GPL(vtime_account_system); void vtime_account_idle(struct task_struct *tsk) { diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 22c9b67f9983..2e04b37f67f9 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -347,6 +347,7 @@ void vtime_account_system(struct task_struct *tsk) if (stolen) account_steal_time(stolen); } +EXPORT_SYMBOL_GPL(vtime_account_system); void vtime_account_idle(struct task_struct *tsk) { diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 2c497ab0d03d..4fe2396401da 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -740,15 +741,36 @@ static inline int kvm_deassign_device(struct kvm *kvm, } #endif /* CONFIG_IOMMU_API */ -static inline void kvm_guest_enter(void) +static inline void guest_enter(void) { - BUG_ON(preemptible()); /* * This is running in ioctl context so we can avoid * the call to vtime_account() with its unnecessary idle check. */ - vtime_account_system_irqsafe(current); + vtime_account_system(current); current->flags |= PF_VCPU; +} + +static inline void guest_exit(void) +{ + /* + * This is running in ioctl context so we can avoid + * the call to vtime_account() with its unnecessary idle check. + */ + vtime_account_system(current); + current->flags &= ~PF_VCPU; +} + +static inline void kvm_guest_enter(void) +{ + unsigned long flags; + + BUG_ON(preemptible()); + + local_irq_save(flags); + guest_enter(); + local_irq_restore(flags); + /* KVM does not hold any references to rcu protected data when it * switches CPU into a guest mode. In fact switching to a guest mode * is very similar to exiting to userspase from rcu point of view. In @@ -761,12 +783,11 @@ static inline void kvm_guest_enter(void) static inline void kvm_guest_exit(void) { - /* - * This is running in ioctl context so we can avoid - * the call to vtime_account() with its unnecessary idle check. - */ - vtime_account_system_irqsafe(current); - current->flags &= ~PF_VCPU; + unsigned long flags; + + local_irq_save(flags); + guest_exit(); + local_irq_restore(flags); } /* diff --git a/include/linux/vtime.h b/include/linux/vtime.h index 5368af9bdf06..bb50c3ca0d79 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -6,7 +6,6 @@ struct task_struct; #ifdef CONFIG_VIRT_CPU_ACCOUNTING extern void vtime_task_switch(struct task_struct *prev); extern void vtime_account_system(struct task_struct *tsk); -extern void vtime_account_system_irqsafe(struct task_struct *tsk); extern void vtime_account_idle(struct task_struct *tsk); extern void vtime_account_user(struct task_struct *tsk); extern void vtime_account(struct task_struct *tsk); @@ -20,7 +19,6 @@ static inline bool vtime_accounting_enabled(void) { return true; } #else /* !CONFIG_VIRT_CPU_ACCOUNTING */ static inline void vtime_task_switch(struct task_struct *prev) { } static inline void vtime_account_system(struct task_struct *tsk) { } -static inline void vtime_account_system_irqsafe(struct task_struct *tsk) { } static inline void vtime_account_user(struct task_struct *tsk) { } static inline void vtime_account(struct task_struct *tsk) { } static inline bool vtime_accounting_enabled(void) { return false; } diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index c533deaf06d5..a44ecdf809a1 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -465,16 +465,6 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime *st = cputime.stime; } -void vtime_account_system_irqsafe(struct task_struct *tsk) -{ - unsigned long flags; - - local_irq_save(flags); - vtime_account_system(tsk); - local_irq_restore(flags); -} -EXPORT_SYMBOL_GPL(vtime_account_system_irqsafe); - #ifndef __ARCH_HAS_VTIME_TASK_SWITCH void vtime_task_switch(struct task_struct *prev) { -- cgit v1.2.3-70-g09d2 From 6a61671bb2f3a1bd12cd17b8fca811a624782632 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sun, 16 Dec 2012 20:00:34 +0100 Subject: cputime: Safely read cputime of full dynticks CPUs While remotely reading the cputime of a task running in a full dynticks CPU, the values stored in utime/stime fields of struct task_struct may be stale. Its values may be those of the last kernel <-> user transition time snapshot and we need to add the tickless time spent since this snapshot. To fix this, flush the cputime of the dynticks CPUs on kernel <-> user transition and record the time / context where we did this. Then on top of this snapshot and the current time, perform the fixup on the reader side from task_times() accessors. Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Ingo Molnar Cc: Li Zhong Cc: Namhyung Kim Cc: Paul E. McKenney Cc: Paul Gortmaker Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner [fixed kvm module related build errors] Signed-off-by: Sedat Dilek --- arch/s390/kernel/vtime.c | 6 +- include/linux/hardirq.h | 4 +- include/linux/init_task.h | 11 +++ include/linux/kvm_host.h | 20 ++++- include/linux/sched.h | 27 +++++-- include/linux/vtime.h | 47 ++++++----- kernel/context_tracking.c | 21 ++++- kernel/fork.c | 6 ++ kernel/sched/core.c | 1 + kernel/sched/cputime.c | 193 +++++++++++++++++++++++++++++++++++++++++++--- kernel/softirq.c | 6 +- 11 files changed, 290 insertions(+), 52 deletions(-) (limited to 'include/linux/vtime.h') diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index e84b8b68444a..ce9cc5aa2033 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -127,7 +127,7 @@ void vtime_account_user(struct task_struct *tsk) * Update process times based on virtual cpu times stored by entry.S * to the lowcore fields user_timer, system_timer & steal_clock. */ -void vtime_account(struct task_struct *tsk) +void vtime_account_irq_enter(struct task_struct *tsk) { struct thread_info *ti = task_thread_info(tsk); u64 timer, system; @@ -145,10 +145,10 @@ void vtime_account(struct task_struct *tsk) virt_timer_forward(system); } -EXPORT_SYMBOL_GPL(vtime_account); +EXPORT_SYMBOL_GPL(vtime_account_irq_enter); void vtime_account_system(struct task_struct *tsk) -__attribute__((alias("vtime_account"))); +__attribute__((alias("vtime_account_irq_enter"))); EXPORT_SYMBOL_GPL(vtime_account_system); void __kprobes vtime_stop_cpu(void) diff --git a/include/linux/hardirq.h b/include/linux/hardirq.h index 624ef3f45c8e..7105d5cbb762 100644 --- a/include/linux/hardirq.h +++ b/include/linux/hardirq.h @@ -153,7 +153,7 @@ extern void rcu_nmi_exit(void); */ #define __irq_enter() \ do { \ - vtime_account_irq_enter(current); \ + account_irq_enter_time(current); \ add_preempt_count(HARDIRQ_OFFSET); \ trace_hardirq_enter(); \ } while (0) @@ -169,7 +169,7 @@ extern void irq_enter(void); #define __irq_exit() \ do { \ trace_hardirq_exit(); \ - vtime_account_irq_exit(current); \ + account_irq_exit_time(current); \ sub_preempt_count(HARDIRQ_OFFSET); \ } while (0) diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 6d087c5f57f7..cc898b871cef 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -10,6 +10,7 @@ #include #include #include +#include #include #ifdef CONFIG_SMP @@ -141,6 +142,15 @@ extern struct task_group root_task_group; # define INIT_PERF_EVENTS(tsk) #endif +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN +# define INIT_VTIME(tsk) \ + .vtime_seqlock = __SEQLOCK_UNLOCKED(tsk.vtime_seqlock), \ + .vtime_snap = 0, \ + .vtime_snap_whence = VTIME_SYS, +#else +# define INIT_VTIME(tsk) +#endif + #define INIT_TASK_COMM "swapper" /* @@ -210,6 +220,7 @@ extern struct task_group root_task_group; INIT_TRACE_RECURSION \ INIT_TASK_RCU_PREEMPT(tsk) \ INIT_CPUSET_SEQ \ + INIT_VTIME(tsk) \ } diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 4fe2396401da..b7996a768eb2 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -741,7 +741,7 @@ static inline int kvm_deassign_device(struct kvm *kvm, } #endif /* CONFIG_IOMMU_API */ -static inline void guest_enter(void) +static inline void __guest_enter(void) { /* * This is running in ioctl context so we can avoid @@ -751,7 +751,7 @@ static inline void guest_enter(void) current->flags |= PF_VCPU; } -static inline void guest_exit(void) +static inline void __guest_exit(void) { /* * This is running in ioctl context so we can avoid @@ -761,6 +761,22 @@ static inline void guest_exit(void) current->flags &= ~PF_VCPU; } +#ifdef CONFIG_CONTEXT_TRACKING +extern void guest_enter(void); +extern void guest_exit(void); + +#else /* !CONFIG_CONTEXT_TRACKING */ +static inline void guest_enter(void) +{ + __guest_enter(); +} + +static inline void guest_exit(void) +{ + __guest_exit(); +} +#endif /* !CONFIG_CONTEXT_TRACKING */ + static inline void kvm_guest_enter(void) { unsigned long flags; diff --git a/include/linux/sched.h b/include/linux/sched.h index a9c608b6154e..a9fa5145e1a7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1367,6 +1367,15 @@ struct task_struct { cputime_t gtime; #ifndef CONFIG_VIRT_CPU_ACCOUNTING struct cputime prev_cputime; +#endif +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN + seqlock_t vtime_seqlock; + unsigned long long vtime_snap; + enum { + VTIME_SLEEPING = 0, + VTIME_USER, + VTIME_SYS, + } vtime_snap_whence; #endif unsigned long nvcsw, nivcsw; /* context switch counts */ struct timespec start_time; /* monotonic time */ @@ -1792,11 +1801,13 @@ static inline void put_task_struct(struct task_struct *t) __put_task_struct(t); } -static inline cputime_t task_gtime(struct task_struct *t) -{ - return t->gtime; -} - +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN +extern void task_cputime(struct task_struct *t, + cputime_t *utime, cputime_t *stime); +extern void task_cputime_scaled(struct task_struct *t, + cputime_t *utimescaled, cputime_t *stimescaled); +extern cputime_t task_gtime(struct task_struct *t); +#else static inline void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime) { @@ -1815,6 +1826,12 @@ static inline void task_cputime_scaled(struct task_struct *t, if (stimescaled) *stimescaled = t->stimescaled; } + +static inline cputime_t task_gtime(struct task_struct *t) +{ + return t->gtime; +} +#endif extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st); extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st); diff --git a/include/linux/vtime.h b/include/linux/vtime.h index bb50c3ca0d79..71a5782d8c59 100644 --- a/include/linux/vtime.h +++ b/include/linux/vtime.h @@ -8,35 +8,44 @@ extern void vtime_task_switch(struct task_struct *prev); extern void vtime_account_system(struct task_struct *tsk); extern void vtime_account_idle(struct task_struct *tsk); extern void vtime_account_user(struct task_struct *tsk); -extern void vtime_account(struct task_struct *tsk); +extern void vtime_account_irq_enter(struct task_struct *tsk); -#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN -extern bool vtime_accounting_enabled(void); -#else +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE static inline bool vtime_accounting_enabled(void) { return true; } #endif #else /* !CONFIG_VIRT_CPU_ACCOUNTING */ + static inline void vtime_task_switch(struct task_struct *prev) { } static inline void vtime_account_system(struct task_struct *tsk) { } static inline void vtime_account_user(struct task_struct *tsk) { } -static inline void vtime_account(struct task_struct *tsk) { } +static inline void vtime_account_irq_enter(struct task_struct *tsk) { } static inline bool vtime_accounting_enabled(void) { return false; } #endif #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN -static inline void arch_vtime_task_switch(struct task_struct *tsk) { } -static inline void vtime_user_enter(struct task_struct *tsk) -{ - vtime_account_system(tsk); -} +extern void arch_vtime_task_switch(struct task_struct *tsk); +extern void vtime_account_irq_exit(struct task_struct *tsk); +extern bool vtime_accounting_enabled(void); +extern void vtime_user_enter(struct task_struct *tsk); static inline void vtime_user_exit(struct task_struct *tsk) { vtime_account_user(tsk); } +extern void vtime_guest_enter(struct task_struct *tsk); +extern void vtime_guest_exit(struct task_struct *tsk); +extern void vtime_init_idle(struct task_struct *tsk); #else +static inline void vtime_account_irq_exit(struct task_struct *tsk) +{ + /* On hard|softirq exit we always account to hard|softirq cputime */ + vtime_account_system(tsk); +} static inline void vtime_user_enter(struct task_struct *tsk) { } static inline void vtime_user_exit(struct task_struct *tsk) { } +static inline void vtime_guest_enter(struct task_struct *tsk) { } +static inline void vtime_guest_exit(struct task_struct *tsk) { } +static inline void vtime_init_idle(struct task_struct *tsk) { } #endif #ifdef CONFIG_IRQ_TIME_ACCOUNTING @@ -45,25 +54,15 @@ extern void irqtime_account_irq(struct task_struct *tsk); static inline void irqtime_account_irq(struct task_struct *tsk) { } #endif -static inline void vtime_account_irq_enter(struct task_struct *tsk) +static inline void account_irq_enter_time(struct task_struct *tsk) { - /* - * Hardirq can interrupt idle task anytime. So we need vtime_account() - * that performs the idle check in CONFIG_VIRT_CPU_ACCOUNTING. - * Softirq can also interrupt idle task directly if it calls - * local_bh_enable(). Such case probably don't exist but we never know. - * Ksoftirqd is not concerned because idle time is flushed on context - * switch. Softirqs in the end of hardirqs are also not a problem because - * the idle time is flushed on hardirq time already. - */ - vtime_account(tsk); + vtime_account_irq_enter(tsk); irqtime_account_irq(tsk); } -static inline void vtime_account_irq_exit(struct task_struct *tsk) +static inline void account_irq_exit_time(struct task_struct *tsk) { - /* On hard|softirq exit we always account to hard|softirq cputime */ - vtime_account_system(tsk); + vtime_account_irq_exit(tsk); irqtime_account_irq(tsk); } diff --git a/kernel/context_tracking.c b/kernel/context_tracking.c index 9002e92e6372..74f68f4dc6c2 100644 --- a/kernel/context_tracking.c +++ b/kernel/context_tracking.c @@ -1,8 +1,9 @@ #include +#include #include #include #include - +#include DEFINE_PER_CPU(struct context_tracking, context_tracking) = { #ifdef CONFIG_CONTEXT_TRACKING_FORCE @@ -61,6 +62,24 @@ void user_exit(void) local_irq_restore(flags); } +void guest_enter(void) +{ + if (vtime_accounting_enabled()) + vtime_guest_enter(current); + else + __guest_enter(); +} +EXPORT_SYMBOL_GPL(guest_enter); + +void guest_exit(void) +{ + if (vtime_accounting_enabled()) + vtime_guest_exit(current); + else + __guest_exit(); +} +EXPORT_SYMBOL_GPL(guest_exit); + void context_tracking_task_switch(struct task_struct *prev, struct task_struct *next) { diff --git a/kernel/fork.c b/kernel/fork.c index 65ca6d27f24e..e68a95b4cf26 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1233,6 +1233,12 @@ static struct task_struct *copy_process(unsigned long clone_flags, #ifndef CONFIG_VIRT_CPU_ACCOUNTING p->prev_cputime.utime = p->prev_cputime.stime = 0; #endif +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN + seqlock_init(&p->vtime_seqlock); + p->vtime_snap = 0; + p->vtime_snap_whence = VTIME_SLEEPING; +#endif + #if defined(SPLIT_RSS_COUNTING) memset(&p->rss_stat, 0, sizeof(p->rss_stat)); #endif diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 257002c13bb0..261022d7e79d 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -4666,6 +4666,7 @@ void __cpuinit init_idle(struct task_struct *idle, int cpu) */ idle->sched_class = &idle_sched_class; ftrace_graph_init_idle_task(idle, cpu); + vtime_init_idle(idle); #if defined(CONFIG_SMP) sprintf(idle->comm, "%s/%d", INIT_TASK_COMM, cpu); #endif diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index a44ecdf809a1..082e05d915b4 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -492,7 +492,7 @@ void vtime_task_switch(struct task_struct *prev) * vtime_account(). */ #ifndef __ARCH_HAS_VTIME_ACCOUNT -void vtime_account(struct task_struct *tsk) +void vtime_account_irq_enter(struct task_struct *tsk) { if (!vtime_accounting_enabled()) return; @@ -516,7 +516,7 @@ void vtime_account(struct task_struct *tsk) } vtime_account_system(tsk); } -EXPORT_SYMBOL_GPL(vtime_account); +EXPORT_SYMBOL_GPL(vtime_account_irq_enter); #endif /* __ARCH_HAS_VTIME_ACCOUNT */ #else /* !CONFIG_VIRT_CPU_ACCOUNTING */ @@ -600,28 +600,55 @@ void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime #endif /* !CONFIG_VIRT_CPU_ACCOUNTING */ #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN -static DEFINE_PER_CPU(unsigned long long, cputime_snap); +static unsigned long long vtime_delta(struct task_struct *tsk) +{ + unsigned long long clock; + + clock = sched_clock(); + if (clock < tsk->vtime_snap) + return 0; -static cputime_t get_vtime_delta(void) + return clock - tsk->vtime_snap; +} + +static cputime_t get_vtime_delta(struct task_struct *tsk) { - unsigned long long delta; + unsigned long long delta = vtime_delta(tsk); - delta = sched_clock() - __this_cpu_read(cputime_snap); - __this_cpu_add(cputime_snap, delta); + WARN_ON_ONCE(tsk->vtime_snap_whence == VTIME_SLEEPING); + tsk->vtime_snap += delta; /* CHECKME: always safe to convert nsecs to cputime? */ return nsecs_to_cputime(delta); } +static void __vtime_account_system(struct task_struct *tsk) +{ + cputime_t delta_cpu = get_vtime_delta(tsk); + + account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu)); +} + void vtime_account_system(struct task_struct *tsk) { - cputime_t delta_cpu; + if (!vtime_accounting_enabled()) + return; + + write_seqlock(&tsk->vtime_seqlock); + __vtime_account_system(tsk); + write_sequnlock(&tsk->vtime_seqlock); +} +void vtime_account_irq_exit(struct task_struct *tsk) +{ if (!vtime_accounting_enabled()) return; - delta_cpu = get_vtime_delta(); - account_system_time(tsk, irq_count(), delta_cpu, cputime_to_scaled(delta_cpu)); + write_seqlock(&tsk->vtime_seqlock); + if (context_tracking_in_user()) + tsk->vtime_snap_whence = VTIME_USER; + __vtime_account_system(tsk); + write_sequnlock(&tsk->vtime_seqlock); } void vtime_account_user(struct task_struct *tsk) @@ -631,14 +658,44 @@ void vtime_account_user(struct task_struct *tsk) if (!vtime_accounting_enabled()) return; - delta_cpu = get_vtime_delta(); + delta_cpu = get_vtime_delta(tsk); + write_seqlock(&tsk->vtime_seqlock); + tsk->vtime_snap_whence = VTIME_SYS; account_user_time(tsk, delta_cpu, cputime_to_scaled(delta_cpu)); + write_sequnlock(&tsk->vtime_seqlock); +} + +void vtime_user_enter(struct task_struct *tsk) +{ + if (!vtime_accounting_enabled()) + return; + + write_seqlock(&tsk->vtime_seqlock); + tsk->vtime_snap_whence = VTIME_USER; + __vtime_account_system(tsk); + write_sequnlock(&tsk->vtime_seqlock); +} + +void vtime_guest_enter(struct task_struct *tsk) +{ + write_seqlock(&tsk->vtime_seqlock); + __vtime_account_system(tsk); + current->flags |= PF_VCPU; + write_sequnlock(&tsk->vtime_seqlock); +} + +void vtime_guest_exit(struct task_struct *tsk) +{ + write_seqlock(&tsk->vtime_seqlock); + __vtime_account_system(tsk); + current->flags &= ~PF_VCPU; + write_sequnlock(&tsk->vtime_seqlock); } void vtime_account_idle(struct task_struct *tsk) { - cputime_t delta_cpu = get_vtime_delta(); + cputime_t delta_cpu = get_vtime_delta(tsk); account_idle_time(delta_cpu); } @@ -647,4 +704,116 @@ bool vtime_accounting_enabled(void) { return context_tracking_active(); } + +void arch_vtime_task_switch(struct task_struct *prev) +{ + write_seqlock(&prev->vtime_seqlock); + prev->vtime_snap_whence = VTIME_SLEEPING; + write_sequnlock(&prev->vtime_seqlock); + + write_seqlock(¤t->vtime_seqlock); + current->vtime_snap_whence = VTIME_SYS; + current->vtime_snap = sched_clock(); + write_sequnlock(¤t->vtime_seqlock); +} + +void vtime_init_idle(struct task_struct *t) +{ + unsigned long flags; + + write_seqlock_irqsave(&t->vtime_seqlock, flags); + t->vtime_snap_whence = VTIME_SYS; + t->vtime_snap = sched_clock(); + write_sequnlock_irqrestore(&t->vtime_seqlock, flags); +} + +cputime_t task_gtime(struct task_struct *t) +{ + unsigned long flags; + unsigned int seq; + cputime_t gtime; + + do { + seq = read_seqbegin_irqsave(&t->vtime_seqlock, flags); + + gtime = t->gtime; + if (t->flags & PF_VCPU) + gtime += vtime_delta(t); + + } while (read_seqretry_irqrestore(&t->vtime_seqlock, seq, flags)); + + return gtime; +} + +/* + * Fetch cputime raw values from fields of task_struct and + * add up the pending nohz execution time since the last + * cputime snapshot. + */ +static void +fetch_task_cputime(struct task_struct *t, + cputime_t *u_dst, cputime_t *s_dst, + cputime_t *u_src, cputime_t *s_src, + cputime_t *udelta, cputime_t *sdelta) +{ + unsigned long flags; + unsigned int seq; + unsigned long long delta; + + do { + *udelta = 0; + *sdelta = 0; + + seq = read_seqbegin_irqsave(&t->vtime_seqlock, flags); + + if (u_dst) + *u_dst = *u_src; + if (s_dst) + *s_dst = *s_src; + + /* Task is sleeping, nothing to add */ + if (t->vtime_snap_whence == VTIME_SLEEPING || + is_idle_task(t)) + continue; + + delta = vtime_delta(t); + + /* + * Task runs either in user or kernel space, add pending nohz time to + * the right place. + */ + if (t->vtime_snap_whence == VTIME_USER || t->flags & PF_VCPU) { + *udelta = delta; + } else { + if (t->vtime_snap_whence == VTIME_SYS) + *sdelta = delta; + } + } while (read_seqretry_irqrestore(&t->vtime_seqlock, seq, flags)); +} + + +void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime) +{ + cputime_t udelta, sdelta; + + fetch_task_cputime(t, utime, stime, &t->utime, + &t->stime, &udelta, &sdelta); + if (utime) + *utime += udelta; + if (stime) + *stime += sdelta; +} + +void task_cputime_scaled(struct task_struct *t, + cputime_t *utimescaled, cputime_t *stimescaled) +{ + cputime_t udelta, sdelta; + + fetch_task_cputime(t, utimescaled, stimescaled, + &t->utimescaled, &t->stimescaled, &udelta, &sdelta); + if (utimescaled) + *utimescaled += cputime_to_scaled(udelta); + if (stimescaled) + *stimescaled += cputime_to_scaled(sdelta); +} #endif /* CONFIG_VIRT_CPU_ACCOUNTING_GEN */ diff --git a/kernel/softirq.c b/kernel/softirq.c index ed567babe789..f5cc25f147a6 100644 --- a/kernel/softirq.c +++ b/kernel/softirq.c @@ -221,7 +221,7 @@ asmlinkage void __do_softirq(void) current->flags &= ~PF_MEMALLOC; pending = local_softirq_pending(); - vtime_account_irq_enter(current); + account_irq_enter_time(current); __local_bh_disable((unsigned long)__builtin_return_address(0), SOFTIRQ_OFFSET); @@ -272,7 +272,7 @@ restart: lockdep_softirq_exit(); - vtime_account_irq_exit(current); + account_irq_exit_time(current); __local_bh_enable(SOFTIRQ_OFFSET); tsk_restore_flags(current, old_flags, PF_MEMALLOC); } @@ -341,7 +341,7 @@ static inline void invoke_softirq(void) */ void irq_exit(void) { - vtime_account_irq_exit(current); + account_irq_exit_time(current); trace_hardirq_exit(); sub_preempt_count(IRQ_EXIT_OFFSET); if (!in_interrupt() && local_softirq_pending()) -- cgit v1.2.3-70-g09d2