diff options
author | Andy Lutomirski <luto@amacapital.net> | 2014-10-24 15:58:08 -0700 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2015-02-04 12:10:42 +0100 |
commit | 1e02ce4cccdcb9688386e5b8d2c9fa4660b45389 (patch) | |
tree | 7d514286844acea505228590119ac1a886cf6995 /arch/x86/include | |
parent | 375074cc736ab1d89a708c0a8d7baa4a70d5d476 (diff) |
x86: Store a per-cpu shadow copy of CR4
Context switches and TLB flushes can change individual bits of CR4.
CR4 reads take several cycles, so store a shadow copy of CR4 in a
per-cpu variable.
To avoid wasting a cache line, I added the CR4 shadow to
cpu_tlbstate, which is already touched in switch_mm. The heaviest
users of the cr4 shadow will be switch_mm and __switch_to_xtra, and
__switch_to_xtra is called shortly after switch_mm during context
switch, so the cacheline is likely to be hot.
Signed-off-by: Andy Lutomirski <luto@amacapital.net>
Reviewed-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Andrea Arcangeli <aarcange@redhat.com>
Cc: Vince Weaver <vince@deater.net>
Cc: "hillf.zj" <hillf.zj@alibaba-inc.com>
Cc: Valdis Kletnieks <Valdis.Kletnieks@vt.edu>
Cc: Paul Mackerras <paulus@samba.org>
Cc: Arnaldo Carvalho de Melo <acme@kernel.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: http://lkml.kernel.org/r/3a54dd3353fffbf84804398e00dfdc5b7c1afd7d.1414190806.git.luto@amacapital.net
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'arch/x86/include')
-rw-r--r-- | arch/x86/include/asm/paravirt.h | 6 | ||||
-rw-r--r-- | arch/x86/include/asm/special_insns.h | 6 | ||||
-rw-r--r-- | arch/x86/include/asm/tlbflush.h | 52 | ||||
-rw-r--r-- | arch/x86/include/asm/virtext.h | 2 |
4 files changed, 46 insertions, 20 deletions
diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 32444ae939ca..965c47d254aa 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -80,16 +80,16 @@ static inline void write_cr3(unsigned long x) PVOP_VCALL1(pv_mmu_ops.write_cr3, x); } -static inline unsigned long read_cr4(void) +static inline unsigned long __read_cr4(void) { return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4); } -static inline unsigned long read_cr4_safe(void) +static inline unsigned long __read_cr4_safe(void) { return PVOP_CALL0(unsigned long, pv_cpu_ops.read_cr4_safe); } -static inline void write_cr4(unsigned long x) +static inline void __write_cr4(unsigned long x) { PVOP_VCALL1(pv_cpu_ops.write_cr4, x); } diff --git a/arch/x86/include/asm/special_insns.h b/arch/x86/include/asm/special_insns.h index e820c080a4e9..6a4b00fafb00 100644 --- a/arch/x86/include/asm/special_insns.h +++ b/arch/x86/include/asm/special_insns.h @@ -137,17 +137,17 @@ static inline void write_cr3(unsigned long x) native_write_cr3(x); } -static inline unsigned long read_cr4(void) +static inline unsigned long __read_cr4(void) { return native_read_cr4(); } -static inline unsigned long read_cr4_safe(void) +static inline unsigned long __read_cr4_safe(void) { return native_read_cr4_safe(); } -static inline void write_cr4(unsigned long x) +static inline void __write_cr4(unsigned long x) { native_write_cr4(x); } diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index fc0c4bc356ce..cd791948b286 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -15,14 +15,37 @@ #define __flush_tlb_single(addr) __native_flush_tlb_single(addr) #endif +struct tlb_state { +#ifdef CONFIG_SMP + struct mm_struct *active_mm; + int state; +#endif + + /* + * Access to this CR4 shadow and to H/W CR4 is protected by + * disabling interrupts when modifying either one. + */ + unsigned long cr4; +}; +DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate); + +/* Initialize cr4 shadow for this CPU. */ +static inline void cr4_init_shadow(void) +{ + this_cpu_write(cpu_tlbstate.cr4, __read_cr4()); +} + /* Set in this cpu's CR4. */ static inline void cr4_set_bits(unsigned long mask) { unsigned long cr4; - cr4 = read_cr4(); - cr4 |= mask; - write_cr4(cr4); + cr4 = this_cpu_read(cpu_tlbstate.cr4); + if ((cr4 | mask) != cr4) { + cr4 |= mask; + this_cpu_write(cpu_tlbstate.cr4, cr4); + __write_cr4(cr4); + } } /* Clear in this cpu's CR4. */ @@ -30,9 +53,18 @@ static inline void cr4_clear_bits(unsigned long mask) { unsigned long cr4; - cr4 = read_cr4(); - cr4 &= ~mask; - write_cr4(cr4); + cr4 = this_cpu_read(cpu_tlbstate.cr4); + if ((cr4 & ~mask) != cr4) { + cr4 &= ~mask; + this_cpu_write(cpu_tlbstate.cr4, cr4); + __write_cr4(cr4); + } +} + +/* Read the CR4 shadow. */ +static inline unsigned long cr4_read_shadow(void) +{ + return this_cpu_read(cpu_tlbstate.cr4); } /* @@ -61,7 +93,7 @@ static inline void __native_flush_tlb_global_irq_disabled(void) { unsigned long cr4; - cr4 = native_read_cr4(); + cr4 = this_cpu_read(cpu_tlbstate.cr4); /* clear PGE */ native_write_cr4(cr4 & ~X86_CR4_PGE); /* write old PGE again and flush TLBs */ @@ -221,12 +253,6 @@ void native_flush_tlb_others(const struct cpumask *cpumask, #define TLBSTATE_OK 1 #define TLBSTATE_LAZY 2 -struct tlb_state { - struct mm_struct *active_mm; - int state; -}; -DECLARE_PER_CPU_SHARED_ALIGNED(struct tlb_state, cpu_tlbstate); - static inline void reset_lazy_tlbstate(void) { this_cpu_write(cpu_tlbstate.state, 0); diff --git a/arch/x86/include/asm/virtext.h b/arch/x86/include/asm/virtext.h index f41e19ca717b..cce9ee68e335 100644 --- a/arch/x86/include/asm/virtext.h +++ b/arch/x86/include/asm/virtext.h @@ -46,7 +46,7 @@ static inline void cpu_vmxoff(void) static inline int cpu_vmx_enabled(void) { - return read_cr4() & X86_CR4_VMXE; + return __read_cr4() & X86_CR4_VMXE; } /** Disable VMX if it is enabled on the current CPU |