From 1c1ed4731cc81942a5b25f284a85257573829b9e Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 10 Dec 2018 12:21:48 +0100 Subject: x86/alternatives: Add macro comments ... so that when one stares at the .s output, one can find her way around the resulting asm magic. With it, ALTERNATIVE looks like this now: # ALT: oldnstr 661: ... 662: # ALT: padding .skip ... 663: .pushsection .altinstructions,"a" ... .popsection .pushsection .altinstr_replacement, "ax" # ALT: replacement 1 6641: ... 6651: .popsection Merge __OLDINSTR() into OLDINSTR(), while at it. No functional changes. Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: "H. Peter Anvin" Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Tom Lendacky Cc: X86 ML Link: https://lkml.kernel.org/r/20181211222326.14581-2-bp@alien8.de --- arch/x86/include/asm/alternative.h | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 0660e14690c8..373e2baca6ce 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -94,13 +94,12 @@ static inline int alternatives_text_reserved(void *start, void *end) #define alt_total_slen alt_end_marker"b-661b" #define alt_rlen(num) e_replacement(num)"f-"b_replacement(num)"f" -#define __OLDINSTR(oldinstr, num) \ +#define OLDINSTR(oldinstr, num) \ + "# ALT: oldnstr\n" \ "661:\n\t" oldinstr "\n662:\n" \ + "# ALT: padding\n" \ ".skip -(((" alt_rlen(num) ")-(" alt_slen ")) > 0) * " \ - "((" alt_rlen(num) ")-(" alt_slen ")),0x90\n" - -#define OLDINSTR(oldinstr, num) \ - __OLDINSTR(oldinstr, num) \ + "((" alt_rlen(num) ")-(" alt_slen ")),0x90\n" \ alt_end_marker ":\n" /* @@ -116,7 +115,9 @@ static inline int alternatives_text_reserved(void *start, void *end) * additionally longer than the first replacement alternative. */ #define OLDINSTR_2(oldinstr, num1, num2) \ + "# ALT: oldinstr2\n" \ "661:\n\t" oldinstr "\n662:\n" \ + "# ALT: padding2\n" \ ".skip -((" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")) > 0) * " \ "(" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")), 0x90\n" \ alt_end_marker ":\n" @@ -129,8 +130,9 @@ static inline int alternatives_text_reserved(void *start, void *end) " .byte " alt_rlen(num) "\n" /* replacement len */ \ " .byte " alt_pad_len "\n" /* pad len */ -#define ALTINSTR_REPLACEMENT(newinstr, feature, num) /* replacement */ \ - b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n\t" +#define ALTINSTR_REPLACEMENT(newinstr, feature, num) /* replacement */ \ + "# ALT: replacement " #num "\n" \ + b_replacement(num)":\n\t" newinstr "\n" e_replacement(num) ":\n" /* alternative assembly primitive: */ #define ALTERNATIVE(oldinstr, newinstr, feature) \ -- cgit v1.2.3-70-g09d2 From c1d4e4192aa4e7408a81c32a77e7c867a07f8aa2 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 10 Dec 2018 12:30:30 +0100 Subject: x86/alternatives: Print containing function ... in the "debug-alternative" output so that one can find her way easier when staring at the vmlinux disassembly. For example: apply_alternatives: feat: 3*32+18, old: (read_tsc+0x0/0x10 (ffffffff8101d1c0) len: 5), repl: (ffffffff824e6d33, len: 5) ^^^^^^^^^^^^^^^^^ ffffffff8101d1c0: old_insn: 0f 31 90 90 90 ffffffff824e6d33: rpl_insn: 0f ae e8 0f 31 ffffffff8101d1c0: final_insn: 0f ae e8 0f 31 No functional changes. Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: "H. Peter Anvin" Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Tom Lendacky Cc: X86 ML Link: https://lkml.kernel.org/r/20181211222326.14581-3-bp@alien8.de --- arch/x86/kernel/alternative.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index ebeac487a20c..d458c7973c56 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -393,10 +393,10 @@ void __init_or_module noinline apply_alternatives(struct alt_instr *start, continue; } - DPRINTK("feat: %d*32+%d, old: (%px len: %d), repl: (%px, len: %d), pad: %d", + DPRINTK("feat: %d*32+%d, old: (%pS (%px) len: %d), repl: (%px, len: %d), pad: %d", a->cpuid >> 5, a->cpuid & 0x1f, - instr, a->instrlen, + instr, instr, a->instrlen, replacement, a->replacementlen, a->padlen); DUMP_BYTES(instr, a->instrlen, "%px: old_insn: ", instr); -- cgit v1.2.3-70-g09d2 From 71a93c26930471e976dd184ef91931b2a5393afc Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 10 Dec 2018 16:17:23 +0100 Subject: x86/alternatives: Add an ALTERNATIVE_3() macro Similar to ALTERNATIVE_2(), ALTERNATIVE_3() selects between 3 possible variants. Will be used for adding RDTSCP to the rdtsc_ordered() alternatives. Signed-off-by: Borislav Petkov Cc: Andy Lutomirski Cc: "H. Peter Anvin" Cc: Josh Poimboeuf Cc: Peter Zijlstra Cc: Tom Lendacky Cc: X86 ML Link: https://lkml.kernel.org/r/20181211222326.14581-4-bp@alien8.de --- arch/x86/include/asm/alternative.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'arch') diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 373e2baca6ce..4c74073a19cc 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -122,6 +122,16 @@ static inline int alternatives_text_reserved(void *start, void *end) "(" alt_max_short(alt_rlen(num1), alt_rlen(num2)) " - (" alt_slen ")), 0x90\n" \ alt_end_marker ":\n" +#define OLDINSTR_3(oldinsn, n1, n2, n3) \ + "# ALT: oldinstr3\n" \ + "661:\n\t" oldinsn "\n662:\n" \ + "# ALT: padding3\n" \ + ".skip -((" alt_max_short(alt_max_short(alt_rlen(n1), alt_rlen(n2)), alt_rlen(n3)) \ + " - (" alt_slen ")) > 0) * " \ + "(" alt_max_short(alt_max_short(alt_rlen(n1), alt_rlen(n2)), alt_rlen(n3)) \ + " - (" alt_slen ")), 0x90\n" \ + alt_end_marker ":\n" + #define ALTINSTR_ENTRY(feature, num) \ " .long 661b - .\n" /* label */ \ " .long " b_replacement(num)"f - .\n" /* new instruction */ \ @@ -155,6 +165,19 @@ static inline int alternatives_text_reserved(void *start, void *end) ALTINSTR_REPLACEMENT(newinstr2, feature2, 2) \ ".popsection\n" +#define ALTERNATIVE_3(oldinsn, newinsn1, feat1, newinsn2, feat2, newinsn3, feat3) \ + OLDINSTR_3(oldinsn, 1, 2, 3) \ + ".pushsection .altinstructions,\"a\"\n" \ + ALTINSTR_ENTRY(feat1, 1) \ + ALTINSTR_ENTRY(feat2, 2) \ + ALTINSTR_ENTRY(feat3, 3) \ + ".popsection\n" \ + ".pushsection .altinstr_replacement, \"ax\"\n" \ + ALTINSTR_REPLACEMENT(newinsn1, feat1, 1) \ + ALTINSTR_REPLACEMENT(newinsn2, feat2, 2) \ + ALTINSTR_REPLACEMENT(newinsn3, feat3, 3) \ + ".popsection\n" + /* * Alternative instructions for different CPU types or capabilities. * -- cgit v1.2.3-70-g09d2 From 093ae8f9a86a974c920b613860f1f7fd5bbd70ab Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 12 Apr 2018 13:11:36 +0200 Subject: x86/TSC: Use RDTSCP Currently, the kernel uses [LM]FENCE; RDTSC in the timekeeping code, to guarantee monotonicity of time where the *FENCE is selected based on vendor. Replace that sequence with RDTSCP which is faster or on-par and gives the same guarantees. A microbenchmark on Intel shows that the change is on-par. On AMD, the change is either on-par with the current LFENCE-prefixed RDTSC or slightly better with RDTSCP. The comparison is done with the LFENCE-prefixed RDTSC (and not with the MFENCE-prefixed one, as one would normally expect) because all modern AMD families make LFENCE serializing and thus avoid the heavy MFENCE by effectively enabling X86_FEATURE_LFENCE_RDTSC. Co-developed-by: Thomas Gleixner Signed-off-by: Thomas Gleixner Signed-off-by: Borislav Petkov Cc: Tom Lendacky Cc: Andy Lutomirski Cc: "H. Peter Anvin" Cc: John Stultz Cc: x86@kernel.org Link: https://lkml.kernel.org/r/20181119184556.11479-1-bp@alien8.de --- arch/x86/include/asm/msr.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index 91e4cf189914..5cc3930cb465 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -217,6 +217,8 @@ static __always_inline unsigned long long rdtsc(void) */ static __always_inline unsigned long long rdtsc_ordered(void) { + DECLARE_ARGS(val, low, high); + /* * The RDTSC instruction is not ordered relative to memory * access. The Intel SDM and the AMD APM are both vague on this @@ -227,9 +229,19 @@ static __always_inline unsigned long long rdtsc_ordered(void) * ordering guarantees as reading from a global memory location * that some other imaginary CPU is updating continuously with a * time stamp. + * + * Thus, use the preferred barrier on the respective CPU, aiming for + * RDTSCP as the default. */ - barrier_nospec(); - return rdtsc(); + asm volatile(ALTERNATIVE_3("rdtsc", + "mfence; rdtsc", X86_FEATURE_MFENCE_RDTSC, + "lfence; rdtsc", X86_FEATURE_LFENCE_RDTSC, + "rdtscp", X86_FEATURE_RDTSCP) + : EAX_EDX_RET(val, low, high) + /* RDTSCP clobbers ECX with MSR_TSC_AUX. */ + :: "ecx"); + + return EAX_EDX_VAL(val, low, high); } static inline unsigned long long native_read_pmc(int counter) -- cgit v1.2.3-70-g09d2