diff options
Diffstat (limited to 'arch/powerpc')
33 files changed, 664 insertions, 171 deletions
diff --git a/arch/powerpc/boot/Makefile b/arch/powerpc/boot/Makefile index 39354365f54a..ed9883169190 100644 --- a/arch/powerpc/boot/Makefile +++ b/arch/powerpc/boot/Makefile @@ -197,7 +197,7 @@ $(obj)/empty.c: $(obj)/zImage.coff.lds $(obj)/zImage.ps3.lds : $(obj)/%: $(srctree)/$(src)/%.S $(Q)cp $< $@ -$(obj)/serial.c: $(obj)/autoconf.h +$(srctree)/$(src)/serial.c: $(obj)/autoconf.h $(obj)/autoconf.h: $(obj)/%: $(objtree)/include/generated/% $(Q)cp $< $@ diff --git a/arch/powerpc/boot/crt0.S b/arch/powerpc/boot/crt0.S index 32dfe6d083f3..9b9d17437373 100644 --- a/arch/powerpc/boot/crt0.S +++ b/arch/powerpc/boot/crt0.S @@ -15,7 +15,7 @@ RELA = 7 RELACOUNT = 0x6ffffff9 - .text + .data /* A procedure descriptor used when booting this as a COFF file. * When making COFF, this comes first in the link and we're * linked at 0x500000. @@ -23,6 +23,8 @@ RELACOUNT = 0x6ffffff9 .globl _zimage_start_opd _zimage_start_opd: .long 0x500000, 0, 0, 0 + .text + b _zimage_start #ifdef __powerpc64__ .balign 8 diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index 33a4fc891947..463c63a9fcf1 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -335,6 +335,7 @@ #define H_SET_PARTITION_TABLE 0xF800 #define H_ENTER_NESTED 0xF804 #define H_TLB_INVALIDATE 0xF808 +#define H_COPY_TOFROM_GUEST 0xF80C /* Values for 2nd argument to H_SET_MODE */ #define H_SET_MODE_RESOURCE_SET_CIABR 1 diff --git a/arch/powerpc/include/asm/kvm_book3s.h b/arch/powerpc/include/asm/kvm_book3s.h index 09f8e9ba69bc..38f1b879f569 100644 --- a/arch/powerpc/include/asm/kvm_book3s.h +++ b/arch/powerpc/include/asm/kvm_book3s.h @@ -188,6 +188,13 @@ extern int kvmppc_book3s_hcall_implemented(struct kvm *kvm, unsigned long hc); extern int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned long ea, unsigned long dsisr); +extern unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid, + gva_t eaddr, void *to, void *from, + unsigned long n); +extern long kvmhv_copy_from_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, + void *to, unsigned long n); +extern long kvmhv_copy_to_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, + void *from, unsigned long n); extern int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, gva_t eaddr, struct kvmppc_pte *gpte, u64 root, u64 *pte_ret_p); @@ -196,8 +203,11 @@ extern int kvmppc_mmu_radix_translate_table(struct kvm_vcpu *vcpu, gva_t eaddr, int table_index, u64 *pte_ret_p); extern int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, struct kvmppc_pte *gpte, bool data, bool iswrite); +extern void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr, + unsigned int pshift, unsigned int lpid); extern void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, unsigned long gpa, - unsigned int shift, struct kvm_memory_slot *memslot, + unsigned int shift, + const struct kvm_memory_slot *memslot, unsigned int lpid); extern bool kvmppc_hv_handle_set_rc(struct kvm *kvm, pgd_t *pgtable, bool writing, unsigned long gpa, @@ -215,16 +225,14 @@ extern int kvmppc_radix_init(void); extern void kvmppc_radix_exit(void); extern int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long gfn); -extern void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, - unsigned long gpa, unsigned int shift, - struct kvm_memory_slot *memslot, - unsigned int lpid); extern int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long gfn); extern int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long gfn); extern long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long *map); +extern void kvmppc_radix_flush_memslot(struct kvm *kvm, + const struct kvm_memory_slot *memslot); extern int kvmhv_get_rmmu_info(struct kvm *kvm, struct kvm_ppc_rmmu_info *info); /* XXX remove this export when load_last_inst() is generic */ @@ -242,7 +250,7 @@ extern kvm_pfn_t kvmppc_gpa_to_pfn(struct kvm_vcpu *vcpu, gpa_t gpa, bool writing, bool *writable); extern void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, unsigned long *rmap, long pte_index, int realmode); -extern void kvmppc_update_dirty_map(struct kvm_memory_slot *memslot, +extern void kvmppc_update_dirty_map(const struct kvm_memory_slot *memslot, unsigned long gfn, unsigned long psize); extern void kvmppc_invalidate_hpte(struct kvm *kvm, __be64 *hptep, unsigned long pte_index); @@ -298,6 +306,7 @@ long kvmhv_nested_init(void); void kvmhv_nested_exit(void); void kvmhv_vm_nested_init(struct kvm *kvm); long kvmhv_set_partition_table(struct kvm_vcpu *vcpu); +long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu); void kvmhv_set_ptbl_entry(unsigned int lpid, u64 dw0, u64 dw1); void kvmhv_release_all_nested(struct kvm *kvm); long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu); @@ -307,7 +316,7 @@ int kvmhv_run_single_vcpu(struct kvm_run *kvm_run, struct kvm_vcpu *vcpu, void kvmhv_save_hv_regs(struct kvm_vcpu *vcpu, struct hv_guest_state *hr); void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu, struct hv_guest_state *hr); -long int kvmhv_nested_page_fault(struct kvm_vcpu *vcpu); +long int kvmhv_nested_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu); void kvmppc_giveup_fac(struct kvm_vcpu *vcpu, ulong fac); diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index 6d298145d564..21b1ed5df888 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -55,6 +55,7 @@ struct kvm_nested_guest { cpumask_t need_tlb_flush; cpumask_t cpu_in_guest; short prev_cpu[NR_CPUS]; + u8 radix; /* is this nested guest radix */ }; /* @@ -150,6 +151,18 @@ static inline bool kvm_is_radix(struct kvm *kvm) return kvm->arch.radix; } +static inline bool kvmhv_vcpu_is_radix(struct kvm_vcpu *vcpu) +{ + bool radix; + + if (vcpu->arch.nested) + radix = vcpu->arch.nested->radix; + else + radix = kvm_is_radix(vcpu->kvm); + + return radix; +} + #define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */ #endif @@ -624,8 +637,11 @@ extern int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, unsigned long *rmapp, struct rmap_nested **n_rmap); extern void kvmhv_insert_nest_rmap(struct kvm *kvm, unsigned long *rmapp, struct rmap_nested **n_rmap); +extern void kvmhv_update_nest_rmap_rc_list(struct kvm *kvm, unsigned long *rmapp, + unsigned long clr, unsigned long set, + unsigned long hpa, unsigned long nbytes); extern void kvmhv_remove_nest_rmap_range(struct kvm *kvm, - struct kvm_memory_slot *memslot, + const struct kvm_memory_slot *memslot, unsigned long gpa, unsigned long hpa, unsigned long nbytes); diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index fac6f631ed29..0f98f00da2ea 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -72,7 +72,7 @@ extern int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end); extern int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end); extern int kvm_test_age_hva(struct kvm *kvm, unsigned long hva); -extern void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); +extern int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte); #define HPTEG_CACHE_NUM (1 << 15) #define HPTEG_HASH_BITS_PTE 13 @@ -793,6 +793,7 @@ struct kvm_vcpu_arch { /* For support of nested guests */ struct kvm_nested_guest *nested; u32 nested_vcpu_id; + gpa_t nested_io_gpr; #endif #ifdef CONFIG_KVM_BOOK3S_HV_EXIT_TIMING @@ -827,6 +828,8 @@ struct kvm_vcpu_arch { #define KVM_MMIO_REG_FQPR 0x00c0 #define KVM_MMIO_REG_VSX 0x0100 #define KVM_MMIO_REG_VMX 0x0180 +#define KVM_MMIO_REG_NESTED_GPR 0xffc0 + #define __KVM_HAVE_ARCH_WQP #define __KVM_HAVE_CREATE_DEVICE diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index 9b89b1918dfc..eb0d79f0ca45 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -224,7 +224,8 @@ extern int kvmppc_core_prepare_memory_region(struct kvm *kvm, extern void kvmppc_core_commit_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old, - const struct kvm_memory_slot *new); + const struct kvm_memory_slot *new, + enum kvm_mr_change change); extern int kvm_vm_ioctl_get_smmu_info(struct kvm *kvm, struct kvm_ppc_smmu_info *info); extern void kvmppc_core_flush_memslot(struct kvm *kvm, @@ -294,7 +295,8 @@ struct kvmppc_ops { void (*commit_memory_region)(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old, - const struct kvm_memory_slot *new); + const struct kvm_memory_slot *new, + enum kvm_mr_change change); int (*unmap_hva_range)(struct kvm *kvm, unsigned long start, unsigned long end); int (*age_hva)(struct kvm *kvm, unsigned long start, unsigned long end); @@ -326,6 +328,10 @@ struct kvmppc_ops { unsigned long flags); void (*giveup_ext)(struct kvm_vcpu *vcpu, ulong msr); int (*enable_nested)(struct kvm *kvm); + int (*load_from_eaddr)(struct kvm_vcpu *vcpu, ulong *eaddr, void *ptr, + int size); + int (*store_to_eaddr)(struct kvm_vcpu *vcpu, ulong *eaddr, void *ptr, + int size); }; extern struct kvmppc_ops *kvmppc_hv_ops; diff --git a/arch/powerpc/include/asm/perf_event.h b/arch/powerpc/include/asm/perf_event.h index 8bf1b6351716..16a49819da9a 100644 --- a/arch/powerpc/include/asm/perf_event.h +++ b/arch/powerpc/include/asm/perf_event.h @@ -26,6 +26,8 @@ #include <asm/ptrace.h> #include <asm/reg.h> +#define perf_arch_bpf_user_pt_regs(regs) ®s->user_regs + /* * Overload regs->result to specify whether we should use the MSR (result * is zero) or the SIAR (result is non zero). diff --git a/arch/powerpc/include/uapi/asm/Kbuild b/arch/powerpc/include/uapi/asm/Kbuild index a658091a19f9..3712152206f3 100644 --- a/arch/powerpc/include/uapi/asm/Kbuild +++ b/arch/powerpc/include/uapi/asm/Kbuild @@ -1,7 +1,6 @@ # UAPI Header export list include include/uapi/asm-generic/Kbuild.asm -generic-y += bpf_perf_event.h generic-y += param.h generic-y += poll.h generic-y += resource.h diff --git a/arch/powerpc/include/uapi/asm/bpf_perf_event.h b/arch/powerpc/include/uapi/asm/bpf_perf_event.h new file mode 100644 index 000000000000..b551b741653d --- /dev/null +++ b/arch/powerpc/include/uapi/asm/bpf_perf_event.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__ +#define _UAPI__ASM_BPF_PERF_EVENT_H__ + +#include <asm/ptrace.h> + +typedef struct user_pt_regs bpf_user_pt_regs_t; + +#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 89d32bb79d5e..db2691ff4c0b 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -995,7 +995,16 @@ EXC_COMMON_BEGIN(h_data_storage_common) bl save_nvgprs RECONCILE_IRQ_STATE(r10, r11) addi r3,r1,STACK_FRAME_OVERHEAD +BEGIN_MMU_FTR_SECTION + ld r4,PACA_EXGEN+EX_DAR(r13) + lwz r5,PACA_EXGEN+EX_DSISR(r13) + std r4,_DAR(r1) + std r5,_DSISR(r1) + li r5,SIGSEGV + bl bad_page_fault +MMU_FTR_SECTION_ELSE bl unknown_exception +ALT_MMU_FTR_SECTION_END_IFSET(MMU_FTR_TYPE_RADIX) b ret_from_except diff --git a/arch/powerpc/kernel/legacy_serial.c b/arch/powerpc/kernel/legacy_serial.c index 33b34a58fc62..5b9dce17f0c9 100644 --- a/arch/powerpc/kernel/legacy_serial.c +++ b/arch/powerpc/kernel/legacy_serial.c @@ -372,6 +372,8 @@ void __init find_legacy_serial_ports(void) /* Now find out if one of these is out firmware console */ path = of_get_property(of_chosen, "linux,stdout-path", NULL); + if (path == NULL) + path = of_get_property(of_chosen, "stdout-path", NULL); if (path != NULL) { stdout = of_find_node_by_path(path); if (stdout) @@ -595,8 +597,10 @@ static int __init check_legacy_serial_console(void) /* We are getting a weird phandle from OF ... */ /* ... So use the full path instead */ name = of_get_property(of_chosen, "linux,stdout-path", NULL); + if (name == NULL) + name = of_get_property(of_chosen, "stdout-path", NULL); if (name == NULL) { - DBG(" no linux,stdout-path !\n"); + DBG(" no stdout-path !\n"); return -ENODEV; } prom_stdout = of_find_node_by_path(name); diff --git a/arch/powerpc/kernel/machine_kexec_file_64.c b/arch/powerpc/kernel/machine_kexec_file_64.c index c77e95e9b384..0d20c7ad40fa 100644 --- a/arch/powerpc/kernel/machine_kexec_file_64.c +++ b/arch/powerpc/kernel/machine_kexec_file_64.c @@ -24,7 +24,6 @@ #include <linux/slab.h> #include <linux/kexec.h> -#include <linux/memblock.h> #include <linux/of_fdt.h> #include <linux/libfdt.h> #include <asm/ima.h> @@ -47,59 +46,6 @@ int arch_kexec_kernel_image_probe(struct kimage *image, void *buf, } /** - * arch_kexec_walk_mem - call func(data) for each unreserved memory block - * @kbuf: Context info for the search. Also passed to @func. - * @func: Function to call for each memory block. - * - * This function is used by kexec_add_buffer and kexec_locate_mem_hole - * to find unreserved memory to load kexec segments into. - * - * Return: The memory walk will stop when func returns a non-zero value - * and that value will be returned. If all free regions are visited without - * func returning non-zero, then zero will be returned. - */ -int arch_kexec_walk_mem(struct kexec_buf *kbuf, - int (*func)(struct resource *, void *)) -{ - int ret = 0; - u64 i; - phys_addr_t mstart, mend; - struct resource res = { }; - - if (kbuf->top_down) { - for_each_free_mem_range_reverse(i, NUMA_NO_NODE, 0, - &mstart, &mend, NULL) { - /* - * In memblock, end points to the first byte after the - * range while in kexec, end points to the last byte - * in the range. - */ - res.start = mstart; - res.end = mend - 1; - ret = func(&res, kbuf); - if (ret) - break; - } - } else { - for_each_free_mem_range(i, NUMA_NO_NODE, 0, &mstart, &mend, - NULL) { - /* - * In memblock, end points to the first byte after the - * range while in kexec, end points to the last byte - * in the range. - */ - res.start = mstart; - res.end = mend - 1; - ret = func(&res, kbuf); - if (ret) - break; - } - } - - return ret; -} - -/** * setup_purgatory - initialize the purgatory's global variables * @image: kexec image. * @slave_code: Slave code for the purgatory. diff --git a/arch/powerpc/kernel/msi.c b/arch/powerpc/kernel/msi.c index dab616a33b8d..f2197654be07 100644 --- a/arch/powerpc/kernel/msi.c +++ b/arch/powerpc/kernel/msi.c @@ -34,5 +34,10 @@ void arch_teardown_msi_irqs(struct pci_dev *dev) { struct pci_controller *phb = pci_bus_to_host(dev->bus); - phb->controller_ops.teardown_msi_irqs(dev); + /* + * We can be called even when arch_setup_msi_irqs() returns -ENOSYS, + * so check the pointer again. + */ + if (phb->controller_ops.teardown_msi_irqs) + phb->controller_ops.teardown_msi_irqs(dev); } diff --git a/arch/powerpc/kernel/ptrace.c b/arch/powerpc/kernel/ptrace.c index afb819f4ca68..714c3480c52d 100644 --- a/arch/powerpc/kernel/ptrace.c +++ b/arch/powerpc/kernel/ptrace.c @@ -3266,12 +3266,17 @@ long do_syscall_trace_enter(struct pt_regs *regs) user_exit(); if (test_thread_flag(TIF_SYSCALL_EMU)) { - ptrace_report_syscall(regs); /* + * A nonzero return code from tracehook_report_syscall_entry() + * tells us to prevent the syscall execution, but we are not + * going to execute it anyway. + * * Returning -1 will skip the syscall execution. We want to * avoid clobbering any register also, thus, not 'gotoing' * skip label. */ + if (tracehook_report_syscall_entry(regs)) + ; return -1; } diff --git a/arch/powerpc/kvm/book3s.c b/arch/powerpc/kvm/book3s.c index fd9893bc7aa1..bd1a677dd9e4 100644 --- a/arch/powerpc/kvm/book3s.c +++ b/arch/powerpc/kvm/book3s.c @@ -830,9 +830,10 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, void kvmppc_core_commit_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old, - const struct kvm_memory_slot *new) + const struct kvm_memory_slot *new, + enum kvm_mr_change change) { - kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old, new); + kvm->arch.kvm_ops->commit_memory_region(kvm, mem, old, new, change); } int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end) @@ -850,9 +851,10 @@ int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) return kvm->arch.kvm_ops->test_age_hva(kvm, hva); } -void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) +int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) { kvm->arch.kvm_ops->set_spte_hva(kvm, hva, pte); + return 0; } void kvmppc_mmu_destroy(struct kvm_vcpu *vcpu) diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index c615617e78ac..6f2d2fb4e098 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -743,12 +743,15 @@ void kvmppc_rmap_reset(struct kvm *kvm) srcu_idx = srcu_read_lock(&kvm->srcu); slots = kvm_memslots(kvm); kvm_for_each_memslot(memslot, slots) { + /* Mutual exclusion with kvm_unmap_hva_range etc. */ + spin_lock(&kvm->mmu_lock); /* * This assumes it is acceptable to lose reference and * change bits across a reset. */ memset(memslot->arch.rmap, 0, memslot->npages * sizeof(*memslot->arch.rmap)); + spin_unlock(&kvm->mmu_lock); } srcu_read_unlock(&kvm->srcu, srcu_idx); } @@ -896,11 +899,12 @@ void kvmppc_core_flush_memslot_hv(struct kvm *kvm, gfn = memslot->base_gfn; rmapp = memslot->arch.rmap; + if (kvm_is_radix(kvm)) { + kvmppc_radix_flush_memslot(kvm, memslot); + return; + } + for (n = memslot->npages; n; --n, ++gfn) { - if (kvm_is_radix(kvm)) { - kvm_unmap_radix(kvm, memslot, gfn); - continue; - } /* * Testing the present bit without locking is OK because * the memslot has been marked invalid already, and hence diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index d68162ee159b..fb88167a402a 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -29,6 +29,103 @@ */ static int p9_supported_radix_bits[4] = { 5, 9, 9, 13 }; +unsigned long __kvmhv_copy_tofrom_guest_radix(int lpid, int pid, + gva_t eaddr, void *to, void *from, + unsigned long n) +{ + unsigned long quadrant, ret = n; + int old_pid, old_lpid; + bool is_load = !!to; + + /* Can't access quadrants 1 or 2 in non-HV mode, call the HV to do it */ + if (kvmhv_on_pseries()) + return plpar_hcall_norets(H_COPY_TOFROM_GUEST, lpid, pid, eaddr, + __pa(to), __pa(from), n); + + quadrant = 1; + if (!pid) + quadrant = 2; + if (is_load) + from = (void *) (eaddr | (quadrant << 62)); + else + to = (void *) (eaddr | (quadrant << 62)); + + preempt_disable(); + + /* switch the lpid first to avoid running host with unallocated pid */ + old_lpid = mfspr(SPRN_LPID); + if (old_lpid != lpid) + mtspr(SPRN_LPID, lpid); + if (quadrant == 1) { + old_pid = mfspr(SPRN_PID); + if (old_pid != pid) + mtspr(SPRN_PID, pid); + } + isync(); + + pagefault_disable(); + if (is_load) + ret = raw_copy_from_user(to, from, n); + else + ret = raw_copy_to_user(to, from, n); + pagefault_enable(); + + /* switch the pid first to avoid running host with unallocated pid */ + if (quadrant == 1 && pid != old_pid) + mtspr(SPRN_PID, old_pid); + if (lpid != old_lpid) + mtspr(SPRN_LPID, old_lpid); + isync(); + + preempt_enable(); + + return ret; +} +EXPORT_SYMBOL_GPL(__kvmhv_copy_tofrom_guest_radix); + +static long kvmhv_copy_tofrom_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, + void *to, void *from, unsigned long n) +{ + int lpid = vcpu->kvm->arch.lpid; + int pid = vcpu->arch.pid; + + /* This would cause a data segment intr so don't allow the access */ + if (eaddr & (0x3FFUL << 52)) + return -EINVAL; + + /* Should we be using the nested lpid */ + if (vcpu->arch.nested) + lpid = vcpu->arch.nested->shadow_lpid; + + /* If accessing quadrant 3 then pid is expected to be 0 */ + if (((eaddr >> 62) & 0x3) == 0x3) + pid = 0; + + eaddr &= ~(0xFFFUL << 52); + + return __kvmhv_copy_tofrom_guest_radix(lpid, pid, eaddr, to, from, n); +} + +long kvmhv_copy_from_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, void *to, + unsigned long n) +{ + long ret; + + ret = kvmhv_copy_tofrom_guest_radix(vcpu, eaddr, to, NULL, n); + if (ret > 0) + memset(to + (n - ret), 0, ret); + + return ret; +} +EXPORT_SYMBOL_GPL(kvmhv_copy_from_guest_radix); + +long kvmhv_copy_to_guest_radix(struct kvm_vcpu *vcpu, gva_t eaddr, void *from, + unsigned long n) +{ + return kvmhv_copy_tofrom_guest_radix(vcpu, eaddr, NULL, from, n); +} +EXPORT_SYMBOL_GPL(kvmhv_copy_to_guest_radix); + int kvmppc_mmu_walk_radix_tree(struct kvm_vcpu *vcpu, gva_t eaddr, struct kvmppc_pte *gpte, u64 root, u64 *pte_ret_p) @@ -197,8 +294,8 @@ int kvmppc_mmu_radix_xlate(struct kvm_vcpu *vcpu, gva_t eaddr, return 0; } -static void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr, - unsigned int pshift, unsigned int lpid) +void kvmppc_radix_tlbie_page(struct kvm *kvm, unsigned long addr, + unsigned int pshift, unsigned int lpid) { unsigned long psize = PAGE_SIZE; int psi; @@ -284,7 +381,8 @@ static void kvmppc_pmd_free(pmd_t *pmdp) /* Called with kvm->mmu_lock held */ void kvmppc_unmap_pte(struct kvm *kvm, pte_t *pte, unsigned long gpa, - unsigned int shift, struct kvm_memory_slot *memslot, + unsigned int shift, + const struct kvm_memory_slot *memslot, unsigned int lpid) { @@ -683,6 +781,7 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu, pte_t pte, *ptep; unsigned int shift, level; int ret; + bool large_enable; /* used to check for invalidations in progress */ mmu_seq = kvm->mmu_notifier_seq; @@ -732,12 +831,15 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu, pte = *ptep; local_irq_enable(); + /* If we're logging dirty pages, always map single pages */ + large_enable = !(memslot->flags & KVM_MEM_LOG_DIRTY_PAGES); + /* Get pte level from shift/size */ - if (shift == PUD_SHIFT && + if (large_enable && shift == PUD_SHIFT && (gpa & (PUD_SIZE - PAGE_SIZE)) == (hva & (PUD_SIZE - PAGE_SIZE))) { level = 2; - } else if (shift == PMD_SHIFT && + } else if (large_enable && shift == PMD_SHIFT && (gpa & (PMD_SIZE - PAGE_SIZE)) == (hva & (PMD_SIZE - PAGE_SIZE))) { level = 1; @@ -857,7 +959,7 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, return ret; } -/* Called with kvm->lock held */ +/* Called with kvm->mmu_lock held */ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long gfn) { @@ -872,7 +974,7 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, return 0; } -/* Called with kvm->lock held */ +/* Called with kvm->mmu_lock held */ int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long gfn) { @@ -880,18 +982,24 @@ int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long gpa = gfn << PAGE_SHIFT; unsigned int shift; int ref = 0; + unsigned long old, *rmapp; ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); if (ptep && pte_present(*ptep) && pte_young(*ptep)) { - kvmppc_radix_update_pte(kvm, ptep, _PAGE_ACCESSED, 0, - gpa, shift); + old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_ACCESSED, 0, + gpa, shift); /* XXX need to flush tlb here? */ + /* Also clear bit in ptes in shadow pgtable for nested guests */ + rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn]; + kvmhv_update_nest_rmap_rc_list(kvm, rmapp, _PAGE_ACCESSED, 0, + old & PTE_RPN_MASK, + 1UL << shift); ref = 1; } return ref; } -/* Called with kvm->lock held */ +/* Called with kvm->mmu_lock held */ int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, unsigned long gfn) { @@ -915,15 +1023,23 @@ static int kvm_radix_test_clear_dirty(struct kvm *kvm, pte_t *ptep; unsigned int shift; int ret = 0; + unsigned long old, *rmapp; ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); if (ptep && pte_present(*ptep) && pte_dirty(*ptep)) { ret = 1; if (shift) ret = 1 << (shift - PAGE_SHIFT); - kvmppc_radix_update_pte(kvm, ptep, _PAGE_DIRTY, 0, - gpa, shift); + spin_lock(&kvm->mmu_lock); + old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_DIRTY, 0, + gpa, shift); kvmppc_radix_tlbie_page(kvm, gpa, shift, kvm->arch.lpid); + /* Also clear bit in ptes in shadow pgtable for nested guests */ + rmapp = &memslot->arch.rmap[gfn - memslot->base_gfn]; + kvmhv_update_nest_rmap_rc_list(kvm, rmapp, _PAGE_DIRTY, 0, + old & PTE_RPN_MASK, + 1UL << shift); + spin_unlock(&kvm->mmu_lock); } return ret; } @@ -953,6 +1069,26 @@ long kvmppc_hv_get_dirty_log_radix(struct kvm *kvm, return 0; } +void kvmppc_radix_flush_memslot(struct kvm *kvm, + const struct kvm_memory_slot *memslot) +{ + unsigned long n; + pte_t *ptep; + unsigned long gpa; + unsigned int shift; + + gpa = memslot->base_gfn << PAGE_SHIFT; + spin_lock(&kvm->mmu_lock); + for (n = memslot->npages; n; --n) { + ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); + if (ptep && pte_present(*ptep)) + kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot, + kvm->arch.lpid); + gpa += PAGE_SIZE; + } + spin_unlock(&kvm->mmu_lock); +} + static void add_rmmu_ap_encoding(struct kvm_ppc_rmmu_info *info, int psize, int *indexp) { diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index a56f8413758a..5a066fc299e1 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -985,6 +985,10 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) kvmppc_set_gpr(vcpu, 3, 0); vcpu->arch.hcall_needed = 0; return -EINTR; + } else if (ret == H_TOO_HARD) { + kvmppc_set_gpr(vcpu, 3, 0); + vcpu->arch.hcall_needed = 0; + return RESUME_HOST; } break; case H_TLB_INVALIDATE: @@ -992,7 +996,11 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) if (nesting_enabled(vcpu->kvm)) ret = kvmhv_do_nested_tlbie(vcpu); break; - + case H_COPY_TOFROM_GUEST: + ret = H_FUNCTION; + if (nesting_enabled(vcpu->kvm)) + ret = kvmhv_copy_tofrom_guest_nested(vcpu); + break; default: return RESUME_HOST; } @@ -1336,7 +1344,7 @@ static int kvmppc_handle_exit_hv(struct kvm_run *run, struct kvm_vcpu *vcpu, return r; } -static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu) +static int kvmppc_handle_nested_exit(struct kvm_run *run, struct kvm_vcpu *vcpu) { int r; int srcu_idx; @@ -1394,7 +1402,7 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu) */ case BOOK3S_INTERRUPT_H_DATA_STORAGE: srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); - r = kvmhv_nested_page_fault(vcpu); + r = kvmhv_nested_page_fault(run, vcpu); srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); break; case BOOK3S_INTERRUPT_H_INST_STORAGE: @@ -1404,7 +1412,7 @@ static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu) if (vcpu->arch.shregs.msr & HSRR1_HISI_WRITE) vcpu->arch.fault_dsisr |= DSISR_ISSTORE; srcu_idx = srcu_read_lock(&vcpu->kvm->srcu); - r = kvmhv_nested_page_fault(vcpu); + r = kvmhv_nested_page_fault(run, vcpu); srcu_read_unlock(&vcpu->kvm->srcu, srcu_idx); break; @@ -4059,7 +4067,7 @@ int kvmhv_run_single_vcpu(struct kvm_run *kvm_run, if (!nested) r = kvmppc_handle_exit_hv(kvm_run, vcpu, current); else - r = kvmppc_handle_nested_exit(vcpu); + r = kvmppc_handle_nested_exit(kvm_run, vcpu); } vcpu->arch.ret = r; @@ -4371,7 +4379,8 @@ static int kvmppc_core_prepare_memory_region_hv(struct kvm *kvm, static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old, - const struct kvm_memory_slot *new) + const struct kvm_memory_slot *new, + enum kvm_mr_change change) { unsigned long npages = mem->memory_size >> PAGE_SHIFT; @@ -4383,6 +4392,23 @@ static void kvmppc_core_commit_memory_region_hv(struct kvm *kvm, */ if (npages) atomic64_inc(&kvm->arch.mmio_update); + + /* + * For change == KVM_MR_MOVE or KVM_MR_DELETE, higher levels + * have already called kvm_arch_flush_shadow_memslot() to + * flush shadow mappings. For KVM_MR_CREATE we have no + * previous mappings. So the only case to handle is + * KVM_MR_FLAGS_ONLY when the KVM_MEM_LOG_DIRTY_PAGES bit + * has been changed. + * For radix guests, we flush on setting KVM_MEM_LOG_DIRTY_PAGES + * to get rid of any THP PTEs in the partition-scoped page tables + * so we can track dirtiness at the page level; we flush when + * clearing KVM_MEM_LOG_DIRTY_PAGES so that we can go back to + * using THP PTEs. + */ + if (change == KVM_MR_FLAGS_ONLY && kvm_is_radix(kvm) && + ((new->flags ^ old->flags) & KVM_MEM_LOG_DIRTY_PAGES)) + kvmppc_radix_flush_memslot(kvm, old); } /* @@ -4532,12 +4558,15 @@ int kvmppc_switch_mmu_to_hpt(struct kvm *kvm) { if (nesting_enabled(kvm)) kvmhv_release_all_nested(kvm); + kvmppc_rmap_reset(kvm); + kvm->arch.process_table = 0; + /* Mutual exclusion with kvm_unmap_hva_range etc. */ + spin_lock(&kvm->mmu_lock); + kvm->arch.radix = 0; + spin_unlock(&kvm->mmu_lock); kvmppc_free_radix(kvm); kvmppc_update_lpcr(kvm, LPCR_VPM1, LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR); - kvmppc_rmap_reset(kvm); - kvm->arch.radix = 0; - kvm->arch.process_table = 0; return 0; } @@ -4549,12 +4578,14 @@ int kvmppc_switch_mmu_to_radix(struct kvm *kvm) err = kvmppc_init_vm_radix(kvm); if (err) return err; - + kvmppc_rmap_reset(kvm); + /* Mutual exclusion with kvm_unmap_hva_range etc. */ + spin_lock(&kvm->mmu_lock); + kvm->arch.radix = 1; + spin_unlock(&kvm->mmu_lock); kvmppc_free_hpt(&kvm->arch.hpt); kvmppc_update_lpcr(kvm, LPCR_UPRT | LPCR_GTSE | LPCR_HR, LPCR_VPM1 | LPCR_UPRT | LPCR_GTSE | LPCR_HR); - kvmppc_rmap_reset(kvm); - kvm->arch.radix = 1; return 0; } @@ -5214,6 +5245,44 @@ static int kvmhv_enable_nested(struct kvm *kvm) return 0; } +static int kvmhv_load_from_eaddr(struct kvm_vcpu *vcpu, ulong *eaddr, void *ptr, + int size) +{ + int rc = -EINVAL; + + if (kvmhv_vcpu_is_radix(vcpu)) { + rc = kvmhv_copy_from_guest_radix(vcpu, *eaddr, ptr, size); + + if (rc > 0) + rc = -EINVAL; + } + + /* For now quadrants are the only way to access nested guest memory */ + if (rc && vcpu->arch.nested) + rc = -EAGAIN; + + return rc; +} + +static int kvmhv_store_to_eaddr(struct kvm_vcpu *vcpu, ulong *eaddr, void *ptr, + int size) +{ + int rc = -EINVAL; + + if (kvmhv_vcpu_is_radix(vcpu)) { + rc = kvmhv_copy_to_guest_radix(vcpu, *eaddr, ptr, size); + + if (rc > 0) + rc = -EINVAL; + } + + /* For now quadrants are the only way to access nested guest memory */ + if (rc && vcpu->arch.nested) + rc = -EAGAIN; + + return rc; +} + static struct kvmppc_ops kvm_ops_hv = { .get_sregs = kvm_arch_vcpu_ioctl_get_sregs_hv, .set_sregs = kvm_arch_vcpu_ioctl_set_sregs_hv, @@ -5254,6 +5323,8 @@ static struct kvmppc_ops kvm_ops_hv = { .get_rmmu_info = kvmhv_get_rmmu_info, .set_smt_mode = kvmhv_set_smt_mode, .enable_nested = kvmhv_enable_nested, + .load_from_eaddr = kvmhv_load_from_eaddr, + .store_to_eaddr = kvmhv_store_to_eaddr, }; static int kvm_init_subcore_bitmap(void) diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index 401d2ecbebc5..735e0ac6f5b2 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -195,6 +195,26 @@ void kvmhv_restore_hv_return_state(struct kvm_vcpu *vcpu, vcpu->arch.ppr = hr->ppr; } +static void kvmhv_nested_mmio_needed(struct kvm_vcpu *vcpu, u64 regs_ptr) +{ + /* No need to reflect the page fault to L1, we've handled it */ + vcpu->arch.trap = 0; + + /* + * Since the L2 gprs have already been written back into L1 memory when + * we complete the mmio, store the L1 memory location of the L2 gpr + * being loaded into by the mmio so that the loaded value can be + * written there in kvmppc_complete_mmio_load() + */ + if (((vcpu->arch.io_gpr & KVM_MMIO_REG_EXT_MASK) == KVM_MMIO_REG_GPR) + && (vcpu->mmio_is_write == 0)) { + vcpu->arch.nested_io_gpr = (gpa_t) regs_ptr + + offsetof(struct pt_regs, + gpr[vcpu->arch.io_gpr]); + vcpu->arch.io_gpr = KVM_MMIO_REG_NESTED_GPR; + } +} + long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) { long int err, r; @@ -316,6 +336,11 @@ long kvmhv_enter_nested_guest(struct kvm_vcpu *vcpu) if (r == -EINTR) return H_INTERRUPT; + if (vcpu->mmio_needed) { + kvmhv_nested_mmio_needed(vcpu, regs_ptr); + return H_TOO_HARD; + } + return vcpu->arch.trap; } @@ -437,6 +462,81 @@ long kvmhv_set_partition_table(struct kvm_vcpu *vcpu) } /* + * Handle the H_COPY_TOFROM_GUEST hcall. + * r4 = L1 lpid of nested guest + * r5 = pid + * r6 = eaddr to access + * r7 = to buffer (L1 gpa) + * r8 = from buffer (L1 gpa) + * r9 = n bytes to copy + */ +long kvmhv_copy_tofrom_guest_nested(struct kvm_vcpu *vcpu) +{ + struct kvm_nested_guest *gp; + int l1_lpid = kvmppc_get_gpr(vcpu, 4); + int pid = kvmppc_get_gpr(vcpu, 5); + gva_t eaddr = kvmppc_get_gpr(vcpu, 6); + gpa_t gp_to = (gpa_t) kvmppc_get_gpr(vcpu, 7); + gpa_t gp_from = (gpa_t) kvmppc_get_gpr(vcpu, 8); + void *buf; + unsigned long n = kvmppc_get_gpr(vcpu, 9); + bool is_load = !!gp_to; + long rc; + + if (gp_to && gp_from) /* One must be NULL to determine the direction */ + return H_PARAMETER; + + if (eaddr & (0xFFFUL << 52)) + return H_PARAMETER; + + buf = kzalloc(n, GFP_KERNEL); + if (!buf) + return H_NO_MEM; + + gp = kvmhv_get_nested(vcpu->kvm, l1_lpid, false); + if (!gp) { + rc = H_PARAMETER; + goto out_free; + } + + mutex_lock(&gp->tlb_lock); + + if (is_load) { + /* Load from the nested guest into our buffer */ + rc = __kvmhv_copy_tofrom_guest_radix(gp->shadow_lpid, pid, + eaddr, buf, NULL, n); + if (rc) + goto not_found; + + /* Write what was loaded into our buffer back to the L1 guest */ + rc = kvm_vcpu_write_guest(vcpu, gp_to, buf, n); + if (rc) + goto not_found; + } else { + /* Load the data to be stored from the L1 guest into our buf */ + rc = kvm_vcpu_read_guest(vcpu, gp_from, buf, n); + if (rc) + goto not_found; + + /* Store from our buffer into the nested guest */ + rc = __kvmhv_copy_tofrom_guest_radix(gp->shadow_lpid, pid, + eaddr, NULL, buf, n); + if (rc) + goto not_found; + } + +out_unlock: + mutex_unlock(&gp->tlb_lock); + kvmhv_put_nested(gp); +out_free: + kfree(buf); + return rc; +not_found: + rc = H_NOT_FOUND; + goto out_unlock; +} + +/* * Reload the partition table entry for a guest. * Caller must hold gp->tlb_lock. */ @@ -480,6 +580,7 @@ struct kvm_nested_guest *kvmhv_alloc_nested(struct kvm *kvm, unsigned int lpid) if (shadow_lpid < 0) goto out_free2; gp->shadow_lpid = shadow_lpid; + gp->radix = 1; memset(gp->prev_cpu, -1, sizeof(gp->prev_cpu)); @@ -687,6 +788,57 @@ void kvmhv_insert_nest_rmap(struct kvm *kvm, unsigned long *rmapp, *n_rmap = NULL; } +static void kvmhv_update_nest_rmap_rc(struct kvm *kvm, u64 n_rmap, + unsigned long clr, unsigned long set, + unsigned long hpa, unsigned long mask) +{ + struct kvm_nested_guest *gp; + unsigned long gpa; + unsigned int shift, lpid; + pte_t *ptep; + + gpa = n_rmap & RMAP_NESTED_GPA_MASK; + lpid = (n_rmap & RMAP_NESTED_LPID_MASK) >> RMAP_NESTED_LPID_SHIFT; + gp = kvmhv_find_nested(kvm, lpid); + if (!gp) + return; + + /* Find the pte */ + ptep = __find_linux_pte(gp->shadow_pgtable, gpa, NULL, &shift); + /* + * If the pte is present and the pfn is still the same, update the pte. + * If the pfn has changed then this is a stale rmap entry, the nested + * gpa actually points somewhere else now, and there is nothing to do. + * XXX A future optimisation would be to remove the rmap entry here. + */ + if (ptep && pte_present(*ptep) && ((pte_val(*ptep) & mask) == hpa)) { + __radix_pte_update(ptep, clr, set); + kvmppc_radix_tlbie_page(kvm, gpa, shift, lpid); + } +} + +/* + * For a given list of rmap entries, update the rc bits in all ptes in shadow + * page tables for nested guests which are referenced by the rmap list. + */ +void kvmhv_update_nest_rmap_rc_list(struct kvm *kvm, unsigned long *rmapp, + unsigned long clr, unsigned long set, + unsigned long hpa, unsigned long nbytes) +{ + struct llist_node *entry = ((struct llist_head *) rmapp)->first; + struct rmap_nested *cursor; + unsigned long rmap, mask; + + if ((clr | set) & ~(_PAGE_DIRTY | _PAGE_ACCESSED)) + return; + + mask = PTE_RPN_MASK & ~(nbytes - 1); + hpa &= mask; + + for_each_nest_rmap_safe(cursor, entry, &rmap) + kvmhv_update_nest_rmap_rc(kvm, rmap, clr, set, hpa, mask); +} + static void kvmhv_remove_nest_rmap(struct kvm *kvm, u64 n_rmap, unsigned long hpa, unsigned long mask) { @@ -723,7 +875,7 @@ static void kvmhv_remove_nest_rmap_list(struct kvm *kvm, unsigned long *rmapp, /* called with kvm->mmu_lock held */ void kvmhv_remove_nest_rmap_range(struct kvm *kvm, - struct kvm_memory_slot *memslot, + const struct kvm_memory_slot *memslot, unsigned long gpa, unsigned long hpa, unsigned long nbytes) { @@ -1049,7 +1201,7 @@ static long kvmhv_handle_nested_set_rc(struct kvm_vcpu *vcpu, struct kvm *kvm = vcpu->kvm; bool writing = !!(dsisr & DSISR_ISSTORE); u64 pgflags; - bool ret; + long ret; /* Are the rc bits set in the L1 partition scoped pte? */ pgflags = _PAGE_ACCESSED; @@ -1062,16 +1214,22 @@ static long kvmhv_handle_nested_set_rc(struct kvm_vcpu *vcpu, /* Set the rc bit in the pte of our (L0) pgtable for the L1 guest */ ret = kvmppc_hv_handle_set_rc(kvm, kvm->arch.pgtable, writing, gpte.raddr, kvm->arch.lpid); - spin_unlock(&kvm->mmu_lock); - if (!ret) - return -EINVAL; + if (!ret) { + ret = -EINVAL; + goto out_unlock; + } /* Set the rc bit in the pte of the shadow_pgtable for the nest guest */ ret = kvmppc_hv_handle_set_rc(kvm, gp->shadow_pgtable, writing, n_gpa, gp->shadow_lpid); if (!ret) - return -EINVAL; - return 0; + ret = -EINVAL; + else + ret = 0; + +out_unlock: + spin_unlock(&kvm->mmu_lock); + return ret; } static inline int kvmppc_radix_level_to_shift(int level) @@ -1099,7 +1257,8 @@ static inline int kvmppc_radix_shift_to_level(int shift) } /* called with gp->tlb_lock held */ -static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu, +static long int __kvmhv_nested_page_fault(struct kvm_run *run, + struct kvm_vcpu *vcpu, struct kvm_nested_guest *gp) { struct kvm *kvm = vcpu->kvm; @@ -1180,9 +1339,9 @@ static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu, kvmppc_core_queue_data_storage(vcpu, ea, dsisr); return RESUME_GUEST; } - /* passthrough of emulated MMIO case... */ - pr_err("emulated MMIO passthrough?\n"); - return -EINVAL; + + /* passthrough of emulated MMIO case */ + return kvmppc_hv_emulate_mmio(run, vcpu, gpa, ea, writing); } if (memslot->flags & KVM_MEM_READONLY) { if (writing) { @@ -1220,6 +1379,8 @@ static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu, return ret; shift = kvmppc_radix_level_to_shift(level); } + /* Align gfn to the start of the page */ + gfn = (gpa & ~((1UL << shift) - 1)) >> PAGE_SHIFT; /* 3. Compute the pte we need to insert for nest_gpa -> host r_addr */ @@ -1227,6 +1388,9 @@ static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu, perm |= gpte.may_read ? 0UL : _PAGE_READ; perm |= gpte.may_write ? 0UL : _PAGE_WRITE; perm |= gpte.may_execute ? 0UL : _PAGE_EXEC; + /* Only set accessed/dirty (rc) bits if set in host and l1 guest ptes */ + perm |= (gpte.rc & _PAGE_ACCESSED) ? 0UL : _PAGE_ACCESSED; + perm |= ((gpte.rc & _PAGE_DIRTY) && writing) ? 0UL : _PAGE_DIRTY; pte = __pte(pte_val(pte) & ~perm); /* What size pte can we insert? */ @@ -1264,13 +1428,13 @@ static long int __kvmhv_nested_page_fault(struct kvm_vcpu *vcpu, return RESUME_GUEST; } -long int kvmhv_nested_page_fault(struct kvm_vcpu *vcpu) +long int kvmhv_nested_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu) { struct kvm_nested_guest *gp = vcpu->arch.nested; long int ret; mutex_lock(&gp->tlb_lock); - ret = __kvmhv_nested_page_fault(vcpu, gp); + ret = __kvmhv_nested_page_fault(run, vcpu, gp); mutex_unlock(&gp->tlb_lock); return ret; } diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index a67cf1cdeda4..3b3791ed74a6 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -107,7 +107,7 @@ void kvmppc_add_revmap_chain(struct kvm *kvm, struct revmap_entry *rev, EXPORT_SYMBOL_GPL(kvmppc_add_revmap_chain); /* Update the dirty bitmap of a memslot */ -void kvmppc_update_dirty_map(struct kvm_memory_slot *memslot, +void kvmppc_update_dirty_map(const struct kvm_memory_slot *memslot, unsigned long gfn, unsigned long psize) { unsigned long npages; diff --git a/arch/powerpc/kvm/book3s_pr.c b/arch/powerpc/kvm/book3s_pr.c index 4efd65d9e828..811a3c2fb0e9 100644 --- a/arch/powerpc/kvm/book3s_pr.c +++ b/arch/powerpc/kvm/book3s_pr.c @@ -587,6 +587,7 @@ void kvmppc_set_pvr_pr(struct kvm_vcpu *vcpu, u32 pvr) case PVR_POWER8: case PVR_POWER8E: case PVR_POWER8NVL: + case PVR_POWER9: vcpu->arch.hflags |= BOOK3S_HFLAG_MULTI_PGSIZE | BOOK3S_HFLAG_NEW_TLBIE; break; @@ -1913,7 +1914,8 @@ static int kvmppc_core_prepare_memory_region_pr(struct kvm *kvm, static void kvmppc_core_commit_memory_region_pr(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old, - const struct kvm_memory_slot *new) + const struct kvm_memory_slot *new, + enum kvm_mr_change change) { return; } diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index b0b2bfc2ff51..f27ee57ab46e 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -1015,17 +1015,7 @@ static int xics_debug_show(struct seq_file *m, void *private) return 0; } -static int xics_debug_open(struct inode *inode, struct file *file) -{ - return single_open(file, xics_debug_show, inode->i_private); -} - -static const struct file_operations xics_debug_fops = { - .open = xics_debug_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(xics_debug); static void xics_debugfs_init(struct kvmppc_xics *xics) { diff --git a/arch/powerpc/kvm/book3s_xive.c b/arch/powerpc/kvm/book3s_xive.c index ad4a370703d3..f78d002f0fe0 100644 --- a/arch/powerpc/kvm/book3s_xive.c +++ b/arch/powerpc/kvm/book3s_xive.c @@ -1968,17 +1968,7 @@ static int xive_debug_show(struct seq_file *m, void *private) return 0; } -static int xive_debug_open(struct inode *inode, struct file *file) -{ - return single_open(file, xive_debug_show, inode->i_private); -} - -static const struct file_operations xive_debug_fops = { - .open = xive_debug_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; +DEFINE_SHOW_ATTRIBUTE(xive_debug); static void xive_debugfs_init(struct kvmppc_xive *xive) { diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index a9ca016da670..dbec4128bb51 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -1833,7 +1833,8 @@ int kvmppc_core_prepare_memory_region(struct kvm *kvm, void kvmppc_core_commit_memory_region(struct kvm *kvm, const struct kvm_userspace_memory_region *mem, const struct kvm_memory_slot *old, - const struct kvm_memory_slot *new) + const struct kvm_memory_slot *new, + enum kvm_mr_change change) { } diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index 8f2985e46f6f..c3f312b2bcb3 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -757,10 +757,11 @@ int kvm_test_age_hva(struct kvm *kvm, unsigned long hva) return 0; } -void kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) +int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte) { /* The page will get remapped properly on its next fault */ kvm_unmap_hva(kvm, hva); + return 0; } /*****************************************/ diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 2869a299c4ed..b90a7d154180 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -331,10 +331,17 @@ int kvmppc_st(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, { ulong mp_pa = vcpu->arch.magic_page_pa & KVM_PAM & PAGE_MASK; struct kvmppc_pte pte; - int r; + int r = -EINVAL; vcpu->stat.st++; + if (vcpu->kvm->arch.kvm_ops && vcpu->kvm->arch.kvm_ops->store_to_eaddr) + r = vcpu->kvm->arch.kvm_ops->store_to_eaddr(vcpu, eaddr, ptr, + size); + + if ((!r) || (r == -EAGAIN)) + return r; + r = kvmppc_xlate(vcpu, *eaddr, data ? XLATE_DATA : XLATE_INST, XLATE_WRITE, &pte); if (r < 0) @@ -367,10 +374,17 @@ int kvmppc_ld(struct kvm_vcpu *vcpu, ulong *eaddr, int size, void *ptr, { ulong mp_pa = vcpu->arch.magic_page_pa & KVM_PAM & PAGE_MASK; struct kvmppc_pte pte; - int rc; + int rc = -EINVAL; vcpu->stat.ld++; + if (vcpu->kvm->arch.kvm_ops && vcpu->kvm->arch.kvm_ops->load_from_eaddr) + rc = vcpu->kvm->arch.kvm_ops->load_from_eaddr(vcpu, eaddr, ptr, + size); + + if ((!rc) || (rc == -EAGAIN)) + return rc; + rc = kvmppc_xlate(vcpu, *eaddr, data ? XLATE_DATA : XLATE_INST, XLATE_READ, &pte); if (rc) @@ -518,7 +532,6 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_PPC_UNSET_IRQ: case KVM_CAP_PPC_IRQ_LEVEL: case KVM_CAP_ENABLE_CAP: - case KVM_CAP_ENABLE_CAP_VM: case KVM_CAP_ONE_REG: case KVM_CAP_IOEVENTFD: case KVM_CAP_DEVICE_CTRL: @@ -543,8 +556,11 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) #ifdef CONFIG_PPC_BOOK3S_64 case KVM_CAP_SPAPR_TCE: case KVM_CAP_SPAPR_TCE_64: - /* fallthrough */ + r = 1; + break; case KVM_CAP_SPAPR_TCE_VFIO: + r = !!cpu_has_feature(CPU_FTR_HVMODE); + break; case KVM_CAP_PPC_RTAS: case KVM_CAP_PPC_FIXUP_HCALL: case KVM_CAP_PPC_ENABLE_HCALL: @@ -696,7 +712,7 @@ void kvm_arch_commit_memory_region(struct kvm *kvm, const struct kvm_memory_slot *new, enum kvm_mr_change change) { - kvmppc_core_commit_memory_region(kvm, mem, old, new); + kvmppc_core_commit_memory_region(kvm, mem, old, new, change); } void kvm_arch_flush_shadow_memslot(struct kvm *kvm, @@ -1192,6 +1208,14 @@ static void kvmppc_complete_mmio_load(struct kvm_vcpu *vcpu, kvmppc_set_vmx_byte(vcpu, gpr); break; #endif +#ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE + case KVM_MMIO_REG_NESTED_GPR: + if (kvmppc_need_byteswap(vcpu)) + gpr = swab64(gpr); + kvm_vcpu_write_guest(vcpu, vcpu->arch.nested_io_gpr, &gpr, + sizeof(gpr)); + break; +#endif default: BUG(); } @@ -2084,8 +2108,8 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_event, } -static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, - struct kvm_enable_cap *cap) +int kvm_vm_ioctl_enable_cap(struct kvm *kvm, + struct kvm_enable_cap *cap) { int r; @@ -2273,15 +2297,6 @@ long kvm_arch_vm_ioctl(struct file *filp, break; } - case KVM_ENABLE_CAP: - { - struct kvm_enable_cap cap; - r = -EFAULT; - if (copy_from_user(&cap, argp, sizeof(cap))) - goto out; - r = kvm_vm_ioctl_enable_cap(kvm, &cap); - break; - } #ifdef CONFIG_SPAPR_TCE_IOMMU case KVM_CREATE_SPAPR_TCE_64: { struct kvm_create_spapr_tce_64 create_tce_64; diff --git a/arch/powerpc/mm/dump_linuxpagetables.c b/arch/powerpc/mm/dump_linuxpagetables.c index 2b74f8adf4d0..6aa41669ac1a 100644 --- a/arch/powerpc/mm/dump_linuxpagetables.c +++ b/arch/powerpc/mm/dump_linuxpagetables.c @@ -19,6 +19,7 @@ #include <linux/hugetlb.h> #include <linux/io.h> #include <linux/mm.h> +#include <linux/highmem.h> #include <linux/sched.h> #include <linux/seq_file.h> #include <asm/fixmap.h> diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 1697e903bbf2..2e6fb1d758c3 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -636,6 +636,7 @@ void bad_page_fault(struct pt_regs *regs, unsigned long address, int sig) switch (TRAP(regs)) { case 0x300: case 0x380: + case 0xe00: printk(KERN_ALERT "Unable to handle kernel paging request for " "data at address 0x%08lx\n", regs->dar); break; diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 7a9886f98b0c..a5091c034747 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -188,15 +188,20 @@ int __meminit vmemmap_populate(unsigned long start, unsigned long end, int node, pr_debug("vmemmap_populate %lx..%lx, node %d\n", start, end, node); for (; start < end; start += page_size) { - void *p; + void *p = NULL; int rc; if (vmemmap_populated(start, page_size)) continue; + /* + * Allocate from the altmap first if we have one. This may + * fail due to alignment issues when using 16MB hugepages, so + * fall back to system memory if the altmap allocation fail. + */ if (altmap) p = altmap_alloc_block_buf(page_size, altmap); - else + if (!p) p = vmemmap_alloc_block_buf(page_size, node); if (!p) return -ENOMEM; @@ -255,8 +260,15 @@ void __ref vmemmap_free(unsigned long start, unsigned long end, { unsigned long page_size = 1 << mmu_psize_defs[mmu_vmemmap_psize].shift; unsigned long page_order = get_order(page_size); + unsigned long alt_start = ~0, alt_end = ~0; + unsigned long base_pfn; start = _ALIGN_DOWN(start, page_size); + if (altmap) { + alt_start = altmap->base_pfn; + alt_end = altmap->base_pfn + altmap->reserve + + altmap->free + altmap->alloc + altmap->align; + } pr_debug("vmemmap_free %lx...%lx\n", start, end); @@ -280,8 +292,9 @@ void __ref vmemmap_free(unsigned long start, unsigned long end, page = pfn_to_page(addr >> PAGE_SHIFT); section_base = pfn_to_page(vmemmap_section_start(start)); nr_pages = 1 << page_order; + base_pfn = PHYS_PFN(addr); - if (altmap) { + if (base_pfn >= alt_start && base_pfn < alt_end) { vmem_altmap_free(altmap, nr_pages); } else if (PageReserved(page)) { /* allocated from bootmem */ diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 17482f5de3e2..9393e231cbc2 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -891,6 +891,55 @@ cond_branch: return 0; } +/* Fix the branch target addresses for subprog calls */ +static int bpf_jit_fixup_subprog_calls(struct bpf_prog *fp, u32 *image, + struct codegen_context *ctx, u32 *addrs) +{ + const struct bpf_insn *insn = fp->insnsi; + bool func_addr_fixed; + u64 func_addr; + u32 tmp_idx; + int i, ret; + + for (i = 0; i < fp->len; i++) { + /* + * During the extra pass, only the branch target addresses for + * the subprog calls need to be fixed. All other instructions + * can left untouched. + * + * The JITed image length does not change because we already + * ensure that the JITed instruction sequence for these calls + * are of fixed length by padding them with NOPs. + */ + if (insn[i].code == (BPF_JMP | BPF_CALL) && + insn[i].src_reg == BPF_PSEUDO_CALL) { + ret = bpf_jit_get_func_addr(fp, &insn[i], true, + &func_addr, + &func_addr_fixed); + if (ret < 0) + return ret; + + /* + * Save ctx->idx as this would currently point to the + * end of the JITed image and set it to the offset of + * the instruction sequence corresponding to the + * subprog call temporarily. + */ + tmp_idx = ctx->idx; + ctx->idx = addrs[i] / 4; + bpf_jit_emit_func_call_rel(image, ctx, func_addr); + + /* + * Restore ctx->idx here. This is safe as the length + * of the JITed sequence remains unchanged. + */ + ctx->idx = tmp_idx; + } + } + + return 0; +} + struct powerpc64_jit_data { struct bpf_binary_header *header; u32 *addrs; @@ -989,6 +1038,22 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) skip_init_ctx: code_base = (u32 *)(image + FUNCTION_DESCR_SIZE); + if (extra_pass) { + /* + * Do not touch the prologue and epilogue as they will remain + * unchanged. Only fix the branch target address for subprog + * calls in the body. + * + * This does not change the offsets and lengths of the subprog + * call instruction sequences and hence, the size of the JITed + * image as well. + */ + bpf_jit_fixup_subprog_calls(fp, code_base, &cgctx, addrs); + + /* There is no need to perform the usual passes. */ + goto skip_codegen_passes; + } + /* Code generation passes 1-2 */ for (pass = 1; pass < 3; pass++) { /* Now build the prologue, body code & epilogue for real. */ @@ -1002,6 +1067,7 @@ skip_init_ctx: proglen - (cgctx.idx * 4), cgctx.seen); } +skip_codegen_passes: if (bpf_jit_enable > 1) /* * Note that we output the base address of the code_base diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index 2e4bd32154b5..472b784f01eb 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -140,8 +140,7 @@ config IBMEBUS Bus device driver for GX bus based adapters. config PAPR_SCM - depends on PPC_PSERIES && MEMORY_HOTPLUG - select LIBNVDIMM + depends on PPC_PSERIES && MEMORY_HOTPLUG && LIBNVDIMM tristate "Support for the PAPR Storage Class Memory interface" help Enable access to hypervisor provided storage class memory. diff --git a/arch/powerpc/platforms/pseries/papr_scm.c b/arch/powerpc/platforms/pseries/papr_scm.c index ee9372b65ca5..7d6457ab5d34 100644 --- a/arch/powerpc/platforms/pseries/papr_scm.c +++ b/arch/powerpc/platforms/pseries/papr_scm.c @@ -55,7 +55,7 @@ static int drc_pmem_bind(struct papr_scm_priv *p) do { rc = plpar_hcall(H_SCM_BIND_MEM, ret, p->drc_index, 0, p->blocks, BIND_ANY_ADDR, token); - token = be64_to_cpu(ret[0]); + token = ret[0]; cond_resched(); } while (rc == H_BUSY); @@ -64,7 +64,7 @@ static int drc_pmem_bind(struct papr_scm_priv *p) return -ENXIO; } - p->bound_addr = be64_to_cpu(ret[1]); + p->bound_addr = ret[1]; dev_dbg(&p->pdev->dev, "bound drc %x to %pR\n", p->drc_index, &p->res); @@ -82,7 +82,7 @@ static int drc_pmem_unbind(struct papr_scm_priv *p) do { rc = plpar_hcall(H_SCM_UNBIND_MEM, ret, p->drc_index, p->bound_addr, p->blocks, token); - token = be64_to_cpu(ret); + token = ret[0]; cond_resched(); } while (rc == H_BUSY); @@ -223,6 +223,9 @@ static int papr_scm_nvdimm_init(struct papr_scm_priv *p) goto err; } + if (nvdimm_bus_check_dimm_count(p->bus, 1)) + goto err; + /* now add the region */ memset(&mapping, 0, sizeof(mapping)); @@ -257,9 +260,12 @@ err: nvdimm_bus_unregister(p->bus); static int papr_scm_probe(struct platform_device *pdev) { - uint32_t drc_index, metadata_size, unit_cap[2]; struct device_node *dn = pdev->dev.of_node; + u32 drc_index, metadata_size; + u64 blocks, block_size; struct papr_scm_priv *p; + const char *uuid_str; + u64 uuid[2]; int rc; /* check we have all the required DT properties */ @@ -268,8 +274,18 @@ static int papr_scm_probe(struct platform_device *pdev) return -ENODEV; } - if (of_property_read_u32_array(dn, "ibm,unit-capacity", unit_cap, 2)) { - dev_err(&pdev->dev, "%pOF: missing unit-capacity!\n", dn); + if (of_property_read_u64(dn, "ibm,block-size", &block_size)) { + dev_err(&pdev->dev, "%pOF: missing block-size!\n", dn); + return -ENODEV; + } + + if (of_property_read_u64(dn, "ibm,number-of-blocks", &blocks)) { + dev_err(&pdev->dev, "%pOF: missing number-of-blocks!\n", dn); + return -ENODEV; + } + + if (of_property_read_string(dn, "ibm,unit-guid", &uuid_str)) { + dev_err(&pdev->dev, "%pOF: missing unit-guid!\n", dn); return -ENODEV; } @@ -282,8 +298,13 @@ static int papr_scm_probe(struct platform_device *pdev) p->dn = dn; p->drc_index = drc_index; - p->block_size = unit_cap[0]; - p->blocks = unit_cap[1]; + p->block_size = block_size; + p->blocks = blocks; + + /* We just need to ensure that set cookies are unique across */ + uuid_parse(uuid_str, (uuid_t *) uuid); + p->nd_set.cookie1 = uuid[0]; + p->nd_set.cookie2 = uuid[1]; /* might be zero */ p->metadata_size = metadata_size; @@ -296,7 +317,7 @@ static int papr_scm_probe(struct platform_device *pdev) /* setup the resource for the newly bound range */ p->res.start = p->bound_addr; - p->res.end = p->bound_addr + p->blocks * p->block_size; + p->res.end = p->bound_addr + p->blocks * p->block_size - 1; p->res.name = pdev->name; p->res.flags = IORESOURCE_MEM; |