diff options
Diffstat (limited to 'arch/x86/mm')
-rw-r--r-- | arch/x86/mm/cpu_entry_area.c | 3 | ||||
-rw-r--r-- | arch/x86/mm/extable.c | 25 | ||||
-rw-r--r-- | arch/x86/mm/fault.c | 15 | ||||
-rw-r--r-- | arch/x86/mm/init_64.c | 20 | ||||
-rw-r--r-- | arch/x86/mm/mem_encrypt.c | 42 | ||||
-rw-r--r-- | arch/x86/mm/mem_encrypt_identity.c | 5 | ||||
-rw-r--r-- | arch/x86/mm/numa.c | 34 | ||||
-rw-r--r-- | arch/x86/mm/numa_emulation.c | 3 | ||||
-rw-r--r-- | arch/x86/mm/pat/set_memory.c | 2 | ||||
-rw-r--r-- | arch/x86/mm/tlb.c | 24 |
10 files changed, 111 insertions, 62 deletions
diff --git a/arch/x86/mm/cpu_entry_area.c b/arch/x86/mm/cpu_entry_area.c index 770b613790b3..f5e1e60c9095 100644 --- a/arch/x86/mm/cpu_entry_area.c +++ b/arch/x86/mm/cpu_entry_area.c @@ -21,7 +21,8 @@ DEFINE_PER_CPU(struct cea_exception_stacks*, cea_exception_stacks); DECLARE_PER_CPU_PAGE_ALIGNED(struct doublefault_stack, doublefault_stack); #endif -struct cpu_entry_area *get_cpu_entry_area(int cpu) +/* Is called from entry code, so must be noinstr */ +noinstr struct cpu_entry_area *get_cpu_entry_area(int cpu) { unsigned long va = CPU_ENTRY_AREA_PER_CPU + cpu * CPU_ENTRY_AREA_SIZE; BUILD_BUG_ON(sizeof(struct cpu_entry_area) % PAGE_SIZE != 0); diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 1d6cb07f4f86..b93d6cd08a7f 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -5,6 +5,7 @@ #include <xen/xen.h> #include <asm/fpu/internal.h> +#include <asm/sev-es.h> #include <asm/traps.h> #include <asm/kdebug.h> @@ -80,6 +81,18 @@ __visible bool ex_handler_uaccess(const struct exception_table_entry *fixup, } EXPORT_SYMBOL(ex_handler_uaccess); +__visible bool ex_handler_copy(const struct exception_table_entry *fixup, + struct pt_regs *regs, int trapnr, + unsigned long error_code, + unsigned long fault_addr) +{ + WARN_ONCE(trapnr == X86_TRAP_GP, "General protection fault in user access. Non-canonical address?"); + regs->ip = ex_fixup_addr(fixup); + regs->ax = trapnr; + return true; +} +EXPORT_SYMBOL(ex_handler_copy); + __visible bool ex_handler_rdmsr_unsafe(const struct exception_table_entry *fixup, struct pt_regs *regs, int trapnr, unsigned long error_code, @@ -125,17 +138,21 @@ __visible bool ex_handler_clear_fs(const struct exception_table_entry *fixup, } EXPORT_SYMBOL(ex_handler_clear_fs); -__visible bool ex_has_fault_handler(unsigned long ip) +enum handler_type ex_get_fault_handler_type(unsigned long ip) { const struct exception_table_entry *e; ex_handler_t handler; e = search_exception_tables(ip); if (!e) - return false; + return EX_HANDLER_NONE; handler = ex_fixup_handler(e); - - return handler == ex_handler_fault; + if (handler == ex_handler_fault) + return EX_HANDLER_FAULT; + else if (handler == ex_handler_uaccess || handler == ex_handler_copy) + return EX_HANDLER_UACCESS; + else + return EX_HANDLER_OTHER; } int fixup_exception(struct pt_regs *regs, int trapnr, unsigned long error_code, diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index 6e3e8a124903..82bf37a5c9ec 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -1128,7 +1128,7 @@ access_error(unsigned long error_code, struct vm_area_struct *vma) return 0; } -static int fault_in_kernel_space(unsigned long address) +bool fault_in_kernel_space(unsigned long address) { /* * On 64-bit systems, the vsyscall page is at an address above @@ -1446,11 +1446,14 @@ DEFINE_IDTENTRY_RAW_ERRORCODE(exc_page_fault) prefetchw(¤t->mm->mmap_lock); /* - * KVM has two types of events that are, logically, interrupts, but - * are unfortunately delivered using the #PF vector. These events are - * "you just accessed valid memory, but the host doesn't have it right - * now, so I'll put you to sleep if you continue" and "that memory - * you tried to access earlier is available now." + * KVM uses #PF vector to deliver 'page not present' events to guests + * (asynchronous page fault mechanism). The event happens when a + * userspace task is trying to access some valid (from guest's point of + * view) memory which is not currently mapped by the host (e.g. the + * memory is swapped out). Note, the corresponding "page ready" event + * which is injected when the memory becomes available, is delived via + * an interrupt mechanism and not a #PF exception + * (see arch/x86/kernel/kvm.c: sysvec_kvm_asyncpf_interrupt()). * * We are relying on the interrupted context being sane (valid RSP, * relevant locks not held, etc.), which is fine as long as the diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index a4ac13cc3fdc..b5a3fa4033d3 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -217,11 +217,6 @@ static void sync_global_pgds(unsigned long start, unsigned long end) sync_global_pgds_l4(start, end); } -void arch_sync_kernel_mappings(unsigned long start, unsigned long end) -{ - sync_global_pgds(start, end); -} - /* * NOTE: This function is marked __ref because it calls __init function * (alloc_bootmem_pages). It's safe to do it ONLY when after_bootmem == 0. @@ -1257,14 +1252,19 @@ static void __init preallocate_vmalloc_pages(void) if (!p4d) goto failed; - /* - * With 5-level paging the P4D level is not folded. So the PGDs - * are now populated and there is no need to walk down to the - * PUD level. - */ if (pgtable_l5_enabled()) continue; + /* + * The goal here is to allocate all possibly required + * hardware page tables pointed to by the top hardware + * level. + * + * On 4-level systems, the P4D layer is folded away and + * the above code does no preallocation. Below, go down + * to the pud _software_ level to ensure the second + * hardware level is allocated on 4-level systems too. + */ lvl = "pud"; pud = pud_alloc(&init_mm, p4d, addr); if (!pud) diff --git a/arch/x86/mm/mem_encrypt.c b/arch/x86/mm/mem_encrypt.c index 9f1177edc2e7..efbb3de472df 100644 --- a/arch/x86/mm/mem_encrypt.c +++ b/arch/x86/mm/mem_encrypt.c @@ -37,12 +37,13 @@ * reside in the .data section so as not to be zeroed out when the .bss * section is later cleared. */ -u64 sme_me_mask __section(.data) = 0; +u64 sme_me_mask __section(".data") = 0; +u64 sev_status __section(".data") = 0; EXPORT_SYMBOL(sme_me_mask); DEFINE_STATIC_KEY_FALSE(sev_enable_key); EXPORT_SYMBOL_GPL(sev_enable_key); -bool sev_enabled __section(.data); +bool sev_enabled __section(".data"); /* Buffer used for early in-place encryption by BSP, no locking needed */ static char sme_early_buffer[PAGE_SIZE] __initdata __aligned(PAGE_SIZE); @@ -347,7 +348,13 @@ bool sme_active(void) bool sev_active(void) { - return sme_me_mask && sev_enabled; + return sev_status & MSR_AMD64_SEV_ENABLED; +} + +/* Needs to be called from non-instrumentable code */ +bool noinstr sev_es_active(void) +{ + return sev_status & MSR_AMD64_SEV_ES_ENABLED; } /* Override for DMA direct allocation check - ARCH_HAS_FORCE_DMA_UNENCRYPTED */ @@ -400,6 +407,31 @@ void __init mem_encrypt_free_decrypted_mem(void) free_init_pages("unused decrypted", vaddr, vaddr_end); } +static void print_mem_encrypt_feature_info(void) +{ + pr_info("AMD Memory Encryption Features active:"); + + /* Secure Memory Encryption */ + if (sme_active()) { + /* + * SME is mutually exclusive with any of the SEV + * features below. + */ + pr_cont(" SME\n"); + return; + } + + /* Secure Encrypted Virtualization */ + if (sev_active()) + pr_cont(" SEV"); + + /* Encrypted Register State */ + if (sev_es_active()) + pr_cont(" SEV-ES"); + + pr_cont("\n"); +} + /* Architecture __weak replacement functions */ void __init mem_encrypt_init(void) { @@ -415,8 +447,6 @@ void __init mem_encrypt_init(void) if (sev_active()) static_branch_enable(&sev_enable_key); - pr_info("AMD %s active\n", - sev_active() ? "Secure Encrypted Virtualization (SEV)" - : "Secure Memory Encryption (SME)"); + print_mem_encrypt_feature_info(); } diff --git a/arch/x86/mm/mem_encrypt_identity.c b/arch/x86/mm/mem_encrypt_identity.c index e2b0e2ac07bb..733b983f3a89 100644 --- a/arch/x86/mm/mem_encrypt_identity.c +++ b/arch/x86/mm/mem_encrypt_identity.c @@ -81,7 +81,7 @@ struct sme_populate_pgd_data { * section is 2MB aligned to allow for simple pagetable setup using only * PMD entries (see vmlinux.lds.S). */ -static char sme_workarea[2 * PMD_PAGE_SIZE] __section(.init.scratch); +static char sme_workarea[2 * PMD_PAGE_SIZE] __section(".init.scratch"); static char sme_cmdline_arg[] __initdata = "mem_encrypt"; static char sme_cmdline_on[] __initdata = "on"; @@ -540,6 +540,9 @@ void __init sme_enable(struct boot_params *bp) if (!(msr & MSR_AMD64_SEV_ENABLED)) return; + /* Save SEV_STATUS to avoid reading MSR again */ + sev_status = msr; + /* SEV state cannot be controlled by a command line option */ sme_me_mask = me_mask; sev_enabled = true; diff --git a/arch/x86/mm/numa.c b/arch/x86/mm/numa.c index aa76ec2d359b..44148691d78b 100644 --- a/arch/x86/mm/numa.c +++ b/arch/x86/mm/numa.c @@ -37,14 +37,12 @@ static __init int numa_setup(char *opt) return -EINVAL; if (!strncmp(opt, "off", 3)) numa_off = 1; -#ifdef CONFIG_NUMA_EMU if (!strncmp(opt, "fake=", 5)) - numa_emu_cmdline(opt + 5); -#endif -#ifdef CONFIG_ACPI_NUMA + return numa_emu_cmdline(opt + 5); if (!strncmp(opt, "noacpi", 6)) - acpi_numa = -1; -#endif + disable_srat(); + if (!strncmp(opt, "nohmat", 6)) + disable_hmat(); return 0; } early_param("numa", numa_setup); @@ -516,7 +514,7 @@ static void __init numa_clear_kernel_node_hotplug(void) * memory ranges, because quirks such as trim_snb_memory() * reserve specific pages for Sandy Bridge graphics. ] */ - for_each_memblock(reserved, mb_region) { + for_each_reserved_mem_region(mb_region) { int nid = memblock_get_region_node(mb_region); if (nid != MAX_NUMNODES) @@ -748,6 +746,27 @@ static void __init init_memory_less_node(int nid) } /* + * A node may exist which has one or more Generic Initiators but no CPUs and no + * memory. + * + * This function must be called after init_cpu_to_node(), to ensure that any + * memoryless CPU nodes have already been brought online, and before the + * node_data[nid] is needed for zone list setup in build_all_zonelists(). + * + * When this function is called, any nodes containing either memory and/or CPUs + * will already be online and there is no need to do anything extra, even if + * they also contain one or more Generic Initiators. + */ +void __init init_gi_nodes(void) +{ + int nid; + + for_each_node_state(nid, N_GENERIC_INITIATOR) + if (!node_online(nid)) + init_memory_less_node(nid); +} + +/* * Setup early cpu_to_node. * * Populate cpu_to_node[] only if x86_cpu_to_apicid[], @@ -919,7 +938,6 @@ int phys_to_target_node(phys_addr_t start) return meminfo_to_nid(&numa_reserved_meminfo, start); } -EXPORT_SYMBOL_GPL(phys_to_target_node); int memory_add_physaddr_to_nid(u64 start) { diff --git a/arch/x86/mm/numa_emulation.c b/arch/x86/mm/numa_emulation.c index 683cd12f4793..87d77cc52f86 100644 --- a/arch/x86/mm/numa_emulation.c +++ b/arch/x86/mm/numa_emulation.c @@ -13,9 +13,10 @@ static int emu_nid_to_phys[MAX_NUMNODES]; static char *emu_cmdline __initdata; -void __init numa_emu_cmdline(char *str) +int __init numa_emu_cmdline(char *str) { emu_cmdline = str; + return 0; } static int __init emu_find_memblk_by_nid(int nid, const struct numa_meminfo *mi) diff --git a/arch/x86/mm/pat/set_memory.c b/arch/x86/mm/pat/set_memory.c index d1b2a889f035..40baa90e74f4 100644 --- a/arch/x86/mm/pat/set_memory.c +++ b/arch/x86/mm/pat/set_memory.c @@ -1999,7 +1999,7 @@ static int __set_memory_enc_dec(unsigned long addr, int numpages, bool enc) /* * Before changing the encryption attribute, we need to flush caches. */ - cpa_flush(&cpa, 1); + cpa_flush(&cpa, !this_cpu_has(X86_FEATURE_SME_COHERENT)); ret = __change_page_attr_set_clr(&cpa, 1); diff --git a/arch/x86/mm/tlb.c b/arch/x86/mm/tlb.c index 0951b47e64c1..11666ba19b62 100644 --- a/arch/x86/mm/tlb.c +++ b/arch/x86/mm/tlb.c @@ -14,7 +14,6 @@ #include <asm/nospec-branch.h> #include <asm/cache.h> #include <asm/apic.h> -#include <asm/uv/uv.h> #include "mm_internal.h" @@ -800,29 +799,6 @@ STATIC_NOPV void native_flush_tlb_others(const struct cpumask *cpumask, trace_tlb_flush(TLB_REMOTE_SEND_IPI, (info->end - info->start) >> PAGE_SHIFT); - if (is_uv_system()) { - /* - * This whole special case is confused. UV has a "Broadcast - * Assist Unit", which seems to be a fancy way to send IPIs. - * Back when x86 used an explicit TLB flush IPI, UV was - * optimized to use its own mechanism. These days, x86 uses - * smp_call_function_many(), but UV still uses a manual IPI, - * and that IPI's action is out of date -- it does a manual - * flush instead of calling flush_tlb_func_remote(). This - * means that the percpu tlb_gen variables won't be updated - * and we'll do pointless flushes on future context switches. - * - * Rather than hooking native_flush_tlb_others() here, I think - * that UV should be updated so that smp_call_function_many(), - * etc, are optimal on UV. - */ - cpumask = uv_flush_tlb_others(cpumask, info); - if (cpumask) - smp_call_function_many(cpumask, flush_tlb_func_remote, - (void *)info, 1); - return; - } - /* * If no page tables were freed, we can skip sending IPIs to * CPUs in lazy TLB mode. They will flush the CPU themselves |