diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-07-24 12:01:20 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-07-24 12:01:20 -0700 |
commit | 5fecc9d8f59e765c2a48379dd7c6f5cf88c7d75a (patch) | |
tree | d1fc25d9650d3ac24591bba6f5e2e7a1afc54796 /arch/powerpc | |
parent | 3c4cfadef6a1665d9cd02a543782d03d3e6740c6 (diff) | |
parent | 1a577b72475d161b6677c05abe57301362023bb2 (diff) |
Merge tag 'kvm-3.6-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm
Pull KVM updates from Avi Kivity:
"Highlights include
- full big real mode emulation on pre-Westmere Intel hosts (can be
disabled with emulate_invalid_guest_state=0)
- relatively small ppc and s390 updates
- PCID/INVPCID support in guests
- EOI avoidance; 3.6 guests should perform better on 3.6 hosts on
interrupt intensive workloads)
- Lockless write faults during live migration
- EPT accessed/dirty bits support for new Intel processors"
Fix up conflicts in:
- Documentation/virtual/kvm/api.txt:
Stupid subchapter numbering, added next to each other.
- arch/powerpc/kvm/booke_interrupts.S:
PPC asm changes clashing with the KVM fixes
- arch/s390/include/asm/sigp.h, arch/s390/kvm/sigp.c:
Duplicated commits through the kvm tree and the s390 tree, with
subsequent edits in the KVM tree.
* tag 'kvm-3.6-1' of git://git.kernel.org/pub/scm/virt/kvm/kvm: (93 commits)
KVM: fix race with level interrupts
x86, hyper: fix build with !CONFIG_KVM_GUEST
Revert "apic: fix kvm build on UP without IOAPIC"
KVM guest: switch to apic_set_eoi_write, apic_write
apic: add apic_set_eoi_write for PV use
KVM: VMX: Implement PCID/INVPCID for guests with EPT
KVM: Add x86_hyper_kvm to complete detect_hypervisor_platform check
KVM: PPC: Critical interrupt emulation support
KVM: PPC: e500mc: Fix tlbilx emulation for 64-bit guests
KVM: PPC64: booke: Set interrupt computation mode for 64-bit host
KVM: PPC: bookehv: Add ESR flag to Data Storage Interrupt
KVM: PPC: bookehv64: Add support for std/ld emulation.
booke: Added crit/mc exception handler for e500v2
booke/bookehv: Add host crit-watchdog exception support
KVM: MMU: document mmu-lock and fast page fault
KVM: MMU: fix kvm_mmu_pagetable_walk tracepoint
KVM: MMU: trace fast page fault
KVM: MMU: fast path of handling guest page fault
KVM: MMU: introduce SPTE_MMU_WRITEABLE bit
KVM: MMU: fold tlb flush judgement into mmu_spte_update
...
Diffstat (limited to 'arch/powerpc')
23 files changed, 365 insertions, 121 deletions
diff --git a/arch/powerpc/include/asm/epapr_hcalls.h b/arch/powerpc/include/asm/epapr_hcalls.h index 976835d8f22e..bf2c06c33871 100644 --- a/arch/powerpc/include/asm/epapr_hcalls.h +++ b/arch/powerpc/include/asm/epapr_hcalls.h @@ -153,6 +153,8 @@ #define EV_HCALL_CLOBBERS2 EV_HCALL_CLOBBERS3, "r5" #define EV_HCALL_CLOBBERS1 EV_HCALL_CLOBBERS2, "r4" +extern bool epapr_paravirt_enabled; +extern u32 epapr_hypercall_start[]; /* * We use "uintptr_t" to define a register because it's guaranteed to be a diff --git a/arch/powerpc/include/asm/hw_irq.h b/arch/powerpc/include/asm/hw_irq.h index 0554ab062bdc..e45c4947a772 100644 --- a/arch/powerpc/include/asm/hw_irq.h +++ b/arch/powerpc/include/asm/hw_irq.h @@ -34,6 +34,8 @@ extern void __replay_interrupt(unsigned int vector); extern void timer_interrupt(struct pt_regs *); extern void performance_monitor_exception(struct pt_regs *regs); +extern void WatchdogException(struct pt_regs *regs); +extern void unknown_exception(struct pt_regs *regs); #ifdef CONFIG_PPC64 #include <asm/paca.h> diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index b0c08b142770..0dd1d86d3e31 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -36,11 +36,8 @@ static inline void svcpu_put(struct kvmppc_book3s_shadow_vcpu *svcpu) #define SPAPR_TCE_SHIFT 12 #ifdef CONFIG_KVM_BOOK3S_64_HV -/* For now use fixed-size 16MB page table */ -#define HPT_ORDER 24 -#define HPT_NPTEG (1ul << (HPT_ORDER - 7)) /* 128B per pteg */ -#define HPT_NPTE (HPT_NPTEG << 3) /* 8 PTEs per PTEG */ -#define HPT_HASH_MASK (HPT_NPTEG - 1) +#define KVM_DEFAULT_HPT_ORDER 24 /* 16MB HPT by default */ +extern int kvm_hpt_order; /* order of preallocated HPTs */ #endif #define VRMA_VSID 0x1ffffffUL /* 1TB VSID reserved for VRMA */ diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index d848cdc49715..50ea12fd7bf5 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -237,6 +237,10 @@ struct kvm_arch { unsigned long vrma_slb_v; int rma_setup_done; int using_mmu_notifiers; + u32 hpt_order; + atomic_t vcpus_running; + unsigned long hpt_npte; + unsigned long hpt_mask; spinlock_t slot_phys_lock; unsigned long *slot_phys[KVM_MEM_SLOTS_NUM]; int slot_npages[KVM_MEM_SLOTS_NUM]; @@ -414,7 +418,9 @@ struct kvm_vcpu_arch { ulong mcsrr1; ulong mcsr; u32 dec; +#ifdef CONFIG_BOOKE u32 decar; +#endif u32 tbl; u32 tbu; u32 tcr; diff --git a/arch/powerpc/include/asm/kvm_ppc.h b/arch/powerpc/include/asm/kvm_ppc.h index f68c22fa2fce..0124937a23b9 100644 --- a/arch/powerpc/include/asm/kvm_ppc.h +++ b/arch/powerpc/include/asm/kvm_ppc.h @@ -119,7 +119,8 @@ extern void kvmppc_core_destroy_mmu(struct kvm_vcpu *vcpu); extern int kvmppc_kvm_pv(struct kvm_vcpu *vcpu); extern void kvmppc_map_magic(struct kvm_vcpu *vcpu); -extern long kvmppc_alloc_hpt(struct kvm *kvm); +extern long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp); +extern long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp); extern void kvmppc_free_hpt(struct kvm *kvm); extern long kvmppc_prepare_vrma(struct kvm *kvm, struct kvm_userspace_memory_region *mem); diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile index 83afacd3ba7b..bb282dd81612 100644 --- a/arch/powerpc/kernel/Makefile +++ b/arch/powerpc/kernel/Makefile @@ -128,6 +128,7 @@ ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC),) obj-y += ppc_save_regs.o endif +obj-$(CONFIG_EPAPR_PARAVIRT) += epapr_paravirt.o epapr_hcalls.o obj-$(CONFIG_KVM_GUEST) += kvm.o kvm_emul.o # Disable GCOV in odd or sensitive code diff --git a/arch/powerpc/kernel/epapr_hcalls.S b/arch/powerpc/kernel/epapr_hcalls.S new file mode 100644 index 000000000000..697b390ebfd8 --- /dev/null +++ b/arch/powerpc/kernel/epapr_hcalls.S @@ -0,0 +1,25 @@ +/* + * Copyright (C) 2012 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/threads.h> +#include <asm/reg.h> +#include <asm/page.h> +#include <asm/cputable.h> +#include <asm/thread_info.h> +#include <asm/ppc_asm.h> +#include <asm/asm-offsets.h> + +/* Hypercall entry point. Will be patched with device tree instructions. */ +.global epapr_hypercall_start +epapr_hypercall_start: + li r3, -1 + nop + nop + nop + blr diff --git a/arch/powerpc/kernel/epapr_paravirt.c b/arch/powerpc/kernel/epapr_paravirt.c new file mode 100644 index 000000000000..028aeae370b6 --- /dev/null +++ b/arch/powerpc/kernel/epapr_paravirt.c @@ -0,0 +1,52 @@ +/* + * ePAPR para-virtualization support. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + * + * Copyright (C) 2012 Freescale Semiconductor, Inc. + */ + +#include <linux/of.h> +#include <asm/epapr_hcalls.h> +#include <asm/cacheflush.h> +#include <asm/code-patching.h> + +bool epapr_paravirt_enabled; + +static int __init epapr_paravirt_init(void) +{ + struct device_node *hyper_node; + const u32 *insts; + int len, i; + + hyper_node = of_find_node_by_path("/hypervisor"); + if (!hyper_node) + return -ENODEV; + + insts = of_get_property(hyper_node, "hcall-instructions", &len); + if (!insts) + return -ENODEV; + + if (len % 4 || len > (4 * 4)) + return -ENODEV; + + for (i = 0; i < (len / 4); i++) + patch_instruction(epapr_hypercall_start + i, insts[i]); + + epapr_paravirt_enabled = true; + + return 0; +} + +early_initcall(epapr_paravirt_init); diff --git a/arch/powerpc/kernel/kvm.c b/arch/powerpc/kernel/kvm.c index 02c167db6ba0..867db1de8949 100644 --- a/arch/powerpc/kernel/kvm.c +++ b/arch/powerpc/kernel/kvm.c @@ -31,6 +31,7 @@ #include <asm/cacheflush.h> #include <asm/disassemble.h> #include <asm/ppc-opcode.h> +#include <asm/epapr_hcalls.h> #define KVM_MAGIC_PAGE (-4096L) #define magic_var(x) KVM_MAGIC_PAGE + offsetof(struct kvm_vcpu_arch_shared, x) @@ -726,7 +727,7 @@ unsigned long kvm_hypercall(unsigned long *in, unsigned long register r11 asm("r11") = nr; unsigned long register r12 asm("r12"); - asm volatile("bl kvm_hypercall_start" + asm volatile("bl epapr_hypercall_start" : "=r"(r0), "=r"(r3), "=r"(r4), "=r"(r5), "=r"(r6), "=r"(r7), "=r"(r8), "=r"(r9), "=r"(r10), "=r"(r11), "=r"(r12) @@ -747,29 +748,6 @@ unsigned long kvm_hypercall(unsigned long *in, } EXPORT_SYMBOL_GPL(kvm_hypercall); -static int kvm_para_setup(void) -{ - extern u32 kvm_hypercall_start; - struct device_node *hyper_node; - u32 *insts; - int len, i; - - hyper_node = of_find_node_by_path("/hypervisor"); - if (!hyper_node) - return -1; - - insts = (u32*)of_get_property(hyper_node, "hcall-instructions", &len); - if (len % 4) - return -1; - if (len > (4 * 4)) - return -1; - - for (i = 0; i < (len / 4); i++) - kvm_patch_ins(&(&kvm_hypercall_start)[i], insts[i]); - - return 0; -} - static __init void kvm_free_tmp(void) { unsigned long start, end; @@ -791,7 +769,7 @@ static int __init kvm_guest_init(void) if (!kvm_para_available()) goto free_tmp; - if (kvm_para_setup()) + if (!epapr_paravirt_enabled) goto free_tmp; if (kvm_para_has_feature(KVM_FEATURE_MAGIC_PAGE)) diff --git a/arch/powerpc/kernel/kvm_emul.S b/arch/powerpc/kernel/kvm_emul.S index e291cf3cf954..e100ff324a85 100644 --- a/arch/powerpc/kernel/kvm_emul.S +++ b/arch/powerpc/kernel/kvm_emul.S @@ -24,16 +24,6 @@ #include <asm/page.h> #include <asm/asm-offsets.h> -/* Hypercall entry point. Will be patched with device tree instructions. */ - -.global kvm_hypercall_start -kvm_hypercall_start: - li r3, -1 - nop - nop - nop - blr - #define KVM_MAGIC_PAGE (-4096) #ifdef CONFIG_64BIT @@ -132,7 +122,7 @@ kvm_emulate_mtmsrd_len: .long (kvm_emulate_mtmsrd_end - kvm_emulate_mtmsrd) / 4 -#define MSR_SAFE_BITS (MSR_EE | MSR_CE | MSR_ME | MSR_RI) +#define MSR_SAFE_BITS (MSR_EE | MSR_RI) #define MSR_CRITICAL_BITS ~MSR_SAFE_BITS .global kvm_emulate_mtmsr diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 80a577517584..d03eb6f7b058 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -37,56 +37,121 @@ /* POWER7 has 10-bit LPIDs, PPC970 has 6-bit LPIDs */ #define MAX_LPID_970 63 -long kvmppc_alloc_hpt(struct kvm *kvm) +/* Power architecture requires HPT is at least 256kB */ +#define PPC_MIN_HPT_ORDER 18 + +long kvmppc_alloc_hpt(struct kvm *kvm, u32 *htab_orderp) { unsigned long hpt; - long lpid; struct revmap_entry *rev; struct kvmppc_linear_info *li; + long order = kvm_hpt_order; - /* Allocate guest's hashed page table */ - li = kvm_alloc_hpt(); - if (li) { - /* using preallocated memory */ - hpt = (ulong)li->base_virt; - kvm->arch.hpt_li = li; - } else { - /* using dynamic memory */ + if (htab_orderp) { + order = *htab_orderp; + if (order < PPC_MIN_HPT_ORDER) + order = PPC_MIN_HPT_ORDER; + } + + /* + * If the user wants a different size from default, + * try first to allocate it from the kernel page allocator. + */ + hpt = 0; + if (order != kvm_hpt_order) { hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT| - __GFP_NOWARN, HPT_ORDER - PAGE_SHIFT); + __GFP_NOWARN, order - PAGE_SHIFT); + if (!hpt) + --order; } + /* Next try to allocate from the preallocated pool */ if (!hpt) { - pr_err("kvm_alloc_hpt: Couldn't alloc HPT\n"); - return -ENOMEM; + li = kvm_alloc_hpt(); + if (li) { + hpt = (ulong)li->base_virt; + kvm->arch.hpt_li = li; + order = kvm_hpt_order; + } } + + /* Lastly try successively smaller sizes from the page allocator */ + while (!hpt && order > PPC_MIN_HPT_ORDER) { + hpt = __get_free_pages(GFP_KERNEL|__GFP_ZERO|__GFP_REPEAT| + __GFP_NOWARN, order - PAGE_SHIFT); + if (!hpt) + --order; + } + + if (!hpt) + return -ENOMEM; + kvm->arch.hpt_virt = hpt; + kvm->arch.hpt_order = order; + /* HPTEs are 2**4 bytes long */ + kvm->arch.hpt_npte = 1ul << (order - 4); + /* 128 (2**7) bytes in each HPTEG */ + kvm->arch.hpt_mask = (1ul << (order - 7)) - 1; /* Allocate reverse map array */ - rev = vmalloc(sizeof(struct revmap_entry) * HPT_NPTE); + rev = vmalloc(sizeof(struct revmap_entry) * kvm->arch.hpt_npte); if (!rev) { pr_err("kvmppc_alloc_hpt: Couldn't alloc reverse map array\n"); goto out_freehpt; } kvm->arch.revmap = rev; + kvm->arch.sdr1 = __pa(hpt) | (order - 18); - lpid = kvmppc_alloc_lpid(); - if (lpid < 0) - goto out_freeboth; + pr_info("KVM guest htab at %lx (order %ld), LPID %x\n", + hpt, order, kvm->arch.lpid); - kvm->arch.sdr1 = __pa(hpt) | (HPT_ORDER - 18); - kvm->arch.lpid = lpid; - - pr_info("KVM guest htab at %lx, LPID %lx\n", hpt, lpid); + if (htab_orderp) + *htab_orderp = order; return 0; - out_freeboth: - vfree(rev); out_freehpt: - free_pages(hpt, HPT_ORDER - PAGE_SHIFT); + if (kvm->arch.hpt_li) + kvm_release_hpt(kvm->arch.hpt_li); + else + free_pages(hpt, order - PAGE_SHIFT); return -ENOMEM; } +long kvmppc_alloc_reset_hpt(struct kvm *kvm, u32 *htab_orderp) +{ + long err = -EBUSY; + long order; + + mutex_lock(&kvm->lock); + if (kvm->arch.rma_setup_done) { + kvm->arch.rma_setup_done = 0; + /* order rma_setup_done vs. vcpus_running */ + smp_mb(); + if (atomic_read(&kvm->arch.vcpus_running)) { + kvm->arch.rma_setup_done = 1; + goto out; + } + } + if (kvm->arch.hpt_virt) { + order = kvm->arch.hpt_order; + /* Set the entire HPT to 0, i.e. invalid HPTEs */ + memset((void *)kvm->arch.hpt_virt, 0, 1ul << order); + /* + * Set the whole last_vcpu array to an invalid vcpu number. + * This ensures that each vcpu will flush its TLB on next entry. + */ + memset(kvm->arch.last_vcpu, 0xff, sizeof(kvm->arch.last_vcpu)); + *htab_orderp = order; + err = 0; + } else { + err = kvmppc_alloc_hpt(kvm, htab_orderp); + order = *htab_orderp; + } + out: + mutex_unlock(&kvm->lock); + return err; +} + void kvmppc_free_hpt(struct kvm *kvm) { kvmppc_free_lpid(kvm->arch.lpid); @@ -94,7 +159,8 @@ void kvmppc_free_hpt(struct kvm *kvm) if (kvm->arch.hpt_li) kvm_release_hpt(kvm->arch.hpt_li); else - free_pages(kvm->arch.hpt_virt, HPT_ORDER - PAGE_SHIFT); + free_pages(kvm->arch.hpt_virt, + kvm->arch.hpt_order - PAGE_SHIFT); } /* Bits in first HPTE dword for pagesize 4k, 64k or 16M */ @@ -119,6 +185,7 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, unsigned long psize; unsigned long hp0, hp1; long ret; + struct kvm *kvm = vcpu->kvm; psize = 1ul << porder; npages = memslot->npages >> (porder - PAGE_SHIFT); @@ -127,8 +194,8 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, if (npages > 1ul << (40 - porder)) npages = 1ul << (40 - porder); /* Can't use more than 1 HPTE per HPTEG */ - if (npages > HPT_NPTEG) - npages = HPT_NPTEG; + if (npages > kvm->arch.hpt_mask + 1) + npages = kvm->arch.hpt_mask + 1; hp0 = HPTE_V_1TB_SEG | (VRMA_VSID << (40 - 16)) | HPTE_V_BOLTED | hpte0_pgsize_encoding(psize); @@ -138,7 +205,7 @@ void kvmppc_map_vrma(struct kvm_vcpu *vcpu, struct kvm_memory_slot *memslot, for (i = 0; i < npages; ++i) { addr = i << porder; /* can't use hpt_hash since va > 64 bits */ - hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & HPT_HASH_MASK; + hash = (i ^ (VRMA_VSID ^ (VRMA_VSID << 25))) & kvm->arch.hpt_mask; /* * We assume that the hash table is empty and no * vcpus are using it at this stage. Since we create diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 3abe1b86e583..83e929e66f9d 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -56,7 +56,7 @@ /* #define EXIT_DEBUG_INT */ static void kvmppc_end_cede(struct kvm_vcpu *vcpu); -static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu); +static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu); void kvmppc_core_vcpu_load(struct kvm_vcpu *vcpu, int cpu) { @@ -1104,11 +1104,15 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) return -EINTR; } - /* On the first time here, set up VRMA or RMA */ + atomic_inc(&vcpu->kvm->arch.vcpus_running); + /* Order vcpus_running vs. rma_setup_done, see kvmppc_alloc_reset_hpt */ + smp_mb(); + + /* On the first time here, set up HTAB and VRMA or RMA */ if (!vcpu->kvm->arch.rma_setup_done) { - r = kvmppc_hv_setup_rma(vcpu); + r = kvmppc_hv_setup_htab_rma(vcpu); if (r) - return r; + goto out; } flush_fp_to_thread(current); @@ -1126,6 +1130,9 @@ int kvmppc_vcpu_run(struct kvm_run *run, struct kvm_vcpu *vcpu) kvmppc_core_prepare_to_enter(vcpu); } } while (r == RESUME_GUEST); + + out: + atomic_dec(&vcpu->kvm->arch.vcpus_running); return r; } @@ -1341,7 +1348,7 @@ void kvmppc_core_commit_memory_region(struct kvm *kvm, { } -static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu) +static int kvmppc_hv_setup_htab_rma(struct kvm_vcpu *vcpu) { int err = 0; struct kvm *kvm = vcpu->kvm; @@ -1360,6 +1367,15 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu) if (kvm->arch.rma_setup_done) goto out; /* another vcpu beat us to it */ + /* Allocate hashed page table (if not done already) and reset it */ + if (!kvm->arch.hpt_virt) { + err = kvmppc_alloc_hpt(kvm, NULL); + if (err) { + pr_err("KVM: Couldn't alloc HPT\n"); + goto out; + } + } + /* Look up the memslot for guest physical address 0 */ memslot = gfn_to_memslot(kvm, 0); @@ -1471,13 +1487,14 @@ static int kvmppc_hv_setup_rma(struct kvm_vcpu *vcpu) int kvmppc_core_init_vm(struct kvm *kvm) { - long r; - unsigned long lpcr; + unsigned long lpcr, lpid; - /* Allocate hashed page table */ - r = kvmppc_alloc_hpt(kvm); - if (r) - return r; + /* Allocate the guest's logical partition ID */ + + lpid = kvmppc_alloc_lpid(); + if (lpid < 0) + return -ENOMEM; + kvm->arch.lpid = lpid; INIT_LIST_HEAD(&kvm->arch.spapr_tce_tables); @@ -1487,7 +1504,6 @@ int kvmppc_core_init_vm(struct kvm *kvm) if (cpu_has_feature(CPU_FTR_ARCH_201)) { /* PPC970; HID4 is effectively the LPCR */ - unsigned long lpid = kvm->arch.lpid; kvm->arch.host_lpid = 0; kvm->arch.host_lpcr = lpcr = mfspr(SPRN_HID4); lpcr &= ~((3 << HID4_LPID1_SH) | (0xful << HID4_LPID5_SH)); diff --git a/arch/powerpc/kvm/book3s_hv_builtin.c b/arch/powerpc/kvm/book3s_hv_builtin.c index e1b60f56f2a1..fb4eac290fef 100644 --- a/arch/powerpc/kvm/book3s_hv_builtin.c +++ b/arch/powerpc/kvm/book3s_hv_builtin.c @@ -25,6 +25,9 @@ static void __init kvm_linear_init_one(ulong size, int count, int type); static struct kvmppc_linear_info *kvm_alloc_linear(int type); static void kvm_release_linear(struct kvmppc_linear_info *ri); +int kvm_hpt_order = KVM_DEFAULT_HPT_ORDER; +EXPORT_SYMBOL_GPL(kvm_hpt_order); + /*************** RMA *************/ /* @@ -209,7 +212,7 @@ static void kvm_release_linear(struct kvmppc_linear_info *ri) void __init kvm_linear_init(void) { /* HPT */ - kvm_linear_init_one(1 << HPT_ORDER, kvm_hpt_count, KVM_LINEAR_HPT); + kvm_linear_init_one(1 << kvm_hpt_order, kvm_hpt_count, KVM_LINEAR_HPT); /* RMA */ /* Only do this on PPC970 in HV mode */ diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index cec4daddbf31..5c70d19494f9 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -237,7 +237,7 @@ long kvmppc_h_enter(struct kvm_vcpu *vcpu, unsigned long flags, /* Find and lock the HPTEG slot to use */ do_insert: - if (pte_index >= HPT_NPTE) + if (pte_index >= kvm->arch.hpt_npte) return H_PARAMETER; if (likely((flags & H_EXACT) == 0)) { pte_index &= ~7UL; @@ -352,7 +352,7 @@ long kvmppc_h_remove(struct kvm_vcpu *vcpu, unsigned long flags, unsigned long v, r, rb; struct revmap_entry *rev; - if (pte_index >= HPT_NPTE) + if (pte_index >= kvm->arch.hpt_npte) return H_PARAMETER; hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); while (!try_lock_hpte(hpte, HPTE_V_HVLOCK)) @@ -419,7 +419,8 @@ long kvmppc_h_bulk_remove(struct kvm_vcpu *vcpu) i = 4; break; } - if (req != 1 || flags == 3 || pte_index >= HPT_NPTE) { + if (req != 1 || flags == 3 || + pte_index >= kvm->arch.hpt_npte) { /* parameter error */ args[j] = ((0xa0 | flags) << 56) + pte_index; ret = H_PARAMETER; @@ -521,7 +522,7 @@ long kvmppc_h_protect(struct kvm_vcpu *vcpu, unsigned long flags, struct revmap_entry *rev; unsigned long v, r, rb, mask, bits; - if (pte_index >= HPT_NPTE) + if (pte_index >= kvm->arch.hpt_npte) return H_PARAMETER; hpte = (unsigned long *)(kvm->arch.hpt_virt + (pte_index << 4)); @@ -583,7 +584,7 @@ long kvmppc_h_read(struct kvm_vcpu *vcpu, unsigned long flags, int i, n = 1; struct revmap_entry *rev = NULL; - if (pte_index >= HPT_NPTE) + if (pte_index >= kvm->arch.hpt_npte) return H_PARAMETER; if (flags & H_READ_4) { pte_index &= ~3; @@ -678,7 +679,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, somask = (1UL << 28) - 1; vsid = (slb_v & ~SLB_VSID_B) >> SLB_VSID_SHIFT; } - hash = (vsid ^ ((eaddr & somask) >> pshift)) & HPT_HASH_MASK; + hash = (vsid ^ ((eaddr & somask) >> pshift)) & kvm->arch.hpt_mask; avpn = slb_v & ~(somask >> 16); /* also includes B */ avpn |= (eaddr & somask) >> 16; @@ -723,7 +724,7 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, if (val & HPTE_V_SECONDARY) break; val |= HPTE_V_SECONDARY; - hash = hash ^ HPT_HASH_MASK; + hash = hash ^ kvm->arch.hpt_mask; } return -1; } diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 72f13f4a06e0..d25a097c852b 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -612,6 +612,12 @@ static void kvmppc_fill_pt_regs(struct pt_regs *regs) regs->link = lr; } +/* + * For interrupts needed to be handled by host interrupt handlers, + * corresponding host handler are called from here in similar way + * (but not exact) as they are called from low level handler + * (such as from arch/powerpc/kernel/head_fsl_booke.S). + */ static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu, unsigned int exit_nr) { @@ -639,6 +645,17 @@ static void kvmppc_restart_interrupt(struct kvm_vcpu *vcpu, kvmppc_fill_pt_regs(®s); performance_monitor_exception(®s); break; + case BOOKE_INTERRUPT_WATCHDOG: + kvmppc_fill_pt_regs(®s); +#ifdef CONFIG_BOOKE_WDT + WatchdogException(®s); +#else + unknown_exception(®s); +#endif + break; + case BOOKE_INTERRUPT_CRITICAL: + unknown_exception(®s); + break; } } @@ -683,6 +700,10 @@ int kvmppc_handle_exit(struct kvm_run *run, struct kvm_vcpu *vcpu, r = RESUME_GUEST; break; + case BOOKE_INTERRUPT_WATCHDOG: + r = RESUME_GUEST; + break; + case BOOKE_INTERRUPT_DOORBELL: kvmppc_account_exit(vcpu, DBELL_EXITS); r = RESUME_GUEST; @@ -1267,6 +1288,11 @@ void kvmppc_decrementer_func(unsigned long data) { struct kvm_vcpu *vcpu = (struct kvm_vcpu *)data; + if (vcpu->arch.tcr & TCR_ARE) { + vcpu->arch.dec = vcpu->arch.decar; + kvmppc_emulate_dec(vcpu); + } + kvmppc_set_tsr_bits(vcpu, TSR_DIS); } diff --git a/arch/powerpc/kvm/booke_emulate.c b/arch/powerpc/kvm/booke_emulate.c index 6c76397f2af4..12834bb608ab 100644 --- a/arch/powerpc/kvm/booke_emulate.c +++ b/arch/powerpc/kvm/booke_emulate.c @@ -24,6 +24,7 @@ #include "booke.h" #define OP_19_XOP_RFI 50 +#define OP_19_XOP_RFCI 51 #define OP_31_XOP_MFMSR 83 #define OP_31_XOP_WRTEE 131 @@ -36,6 +37,12 @@ static void kvmppc_emul_rfi(struct kvm_vcpu *vcpu) kvmppc_set_msr(vcpu, vcpu->arch.shared->srr1); } +static void kvmppc_emul_rfci(struct kvm_vcpu *vcpu) +{ + vcpu->arch.pc = vcpu->arch.csrr0; + kvmppc_set_msr(vcpu, vcpu->arch.csrr1); +} + int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, unsigned int inst, int *advance) { @@ -52,6 +59,12 @@ int kvmppc_booke_emulate_op(struct kvm_run *run, struct kvm_vcpu *vcpu, *advance = 0; break; + case OP_19_XOP_RFCI: + kvmppc_emul_rfci(vcpu); + kvmppc_set_exit_type(vcpu, EMULATED_RFCI_EXITS); + *advance = 0; + break; + default: emulated = EMULATE_FAIL; break; @@ -113,6 +126,12 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) case SPRN_ESR: vcpu->arch.shared->esr = spr_val; break; + case SPRN_CSRR0: + vcpu->arch.csrr0 = spr_val; + break; + case SPRN_CSRR1: + vcpu->arch.csrr1 = spr_val; + break; case SPRN_DBCR0: vcpu->arch.dbcr0 = spr_val; break; @@ -129,6 +148,9 @@ int kvmppc_booke_emulate_mtspr(struct kvm_vcpu *vcpu, int sprn, ulong spr_val) kvmppc_set_tcr(vcpu, spr_val); break; + case SPRN_DECAR: + vcpu->arch.decar = spr_val; + break; /* * Note: SPRG4-7 are user-readable. * These values are loaded into the real SPRGs when resuming the @@ -229,6 +251,12 @@ int kvmppc_booke_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) case SPRN_ESR: *spr_val = vcpu->arch.shared->esr; break; + case SPRN_CSRR0: + *spr_val = vcpu->arch.csrr0; + break; + case SPRN_CSRR1: + *spr_val = vcpu->arch.csrr1; + break; case SPRN_DBCR0: *spr_val = vcpu->arch.dbcr0; break; diff --git a/arch/powerpc/kvm/booke_interrupts.S b/arch/powerpc/kvm/booke_interrupts.S index 8fd4b2a0911b..bb46b32f9813 100644 --- a/arch/powerpc/kvm/booke_interrupts.S +++ b/arch/powerpc/kvm/booke_interrupts.S @@ -52,16 +52,21 @@ (1<<BOOKE_INTERRUPT_PROGRAM) | \ (1<<BOOKE_INTERRUPT_DTLB_MISS)) -.macro KVM_HANDLER ivor_nr +.macro KVM_HANDLER ivor_nr scratch srr0 _GLOBAL(kvmppc_handler_\ivor_nr) /* Get pointer to vcpu and record exit number. */ - mtspr SPRN_SPRG_WSCRATCH0, r4 + mtspr \scratch , r4 mfspr r4, SPRN_SPRG_RVCPU + stw r3, VCPU_GPR(R3)(r4) stw r5, VCPU_GPR(R5)(r4) stw r6, VCPU_GPR(R6)(r4) + mfspr r3, \scratch mfctr r5 - lis r6, kvmppc_resume_host@h + stw r3, VCPU_GPR(R4)(r4) stw r5, VCPU_CTR(r4) + mfspr r3, \srr0 + lis r6, kvmppc_resume_host@h + stw r3, VCPU_PC(r4) li r5, \ivor_nr ori r6, r6, kvmppc_resume_host@l mtctr r6 @@ -69,37 +74,35 @@ _GLOBAL(kvmppc_handler_\ivor_nr) .endm _GLOBAL(kvmppc_handlers_start) -KVM_HANDLER BOOKE_INTERRUPT_CRITICAL -KVM_HANDLER BOOKE_INTERRUPT_MACHINE_CHECK -KVM_HANDLER BOOKE_INTERRUPT_DATA_STORAGE -KVM_HANDLER BOOKE_INTERRUPT_INST_STORAGE -KVM_HANDLER BOOKE_INTERRUPT_EXTERNAL -KVM_HANDLER BOOKE_INTERRUPT_ALIGNMENT -KVM_HANDLER BOOKE_INTERRUPT_PROGRAM -KVM_HANDLER BOOKE_INTERRUPT_FP_UNAVAIL -KVM_HANDLER BOOKE_INTERRUPT_SYSCALL -KVM_HANDLER BOOKE_INTERRUPT_AP_UNAVAIL -KVM_HANDLER BOOKE_INTERRUPT_DECREMENTER -KVM_HANDLER BOOKE_INTERRUPT_FIT -KVM_HANDLER BOOKE_INTERRUPT_WATCHDOG -KVM_HANDLER BOOKE_INTERRUPT_DTLB_MISS -KVM_HANDLER BOOKE_INTERRUPT_ITLB_MISS -KVM_HANDLER BOOKE_INTERRUPT_DEBUG -KVM_HANDLER BOOKE_INTERRUPT_SPE_UNAVAIL -KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_DATA -KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_ROUND +KVM_HANDLER BOOKE_INTERRUPT_CRITICAL SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0 +KVM_HANDLER BOOKE_INTERRUPT_MACHINE_CHECK SPRN_SPRG_RSCRATCH_MC SPRN_MCSRR0 +KVM_HANDLER BOOKE_INTERRUPT_DATA_STORAGE SPRN_SPRG_RSCRATCH0 SPRN_SRR0 +KVM_HANDLER BOOKE_INTERRUPT_INST_STORAGE SPRN_SPRG_RSCRATCH0 SPRN_SRR0 +KVM_HANDLER BOOKE_INTERRUPT_EXTERNAL SPRN_SPRG_RSCRATCH0 SPRN_SRR0 +KVM_HANDLER BOOKE_INTERRUPT_ALIGNMENT SPRN_SPRG_RSCRATCH0 SPRN_SRR0 +KVM_HANDLER BOOKE_INTERRUPT_PROGRAM SPRN_SPRG_RSCRATCH0 SPRN_SRR0 +KVM_HANDLER BOOKE_INTERRUPT_FP_UNAVAIL SPRN_SPRG_RSCRATCH0 SPRN_SRR0 +KVM_HANDLER BOOKE_INTERRUPT_SYSCALL SPRN_SPRG_RSCRATCH0 SPRN_SRR0 +KVM_HANDLER BOOKE_INTERRUPT_AP_UNAVAIL SPRN_SPRG_RSCRATCH0 SPRN_SRR0 +KVM_HANDLER BOOKE_INTERRUPT_DECREMENTER SPRN_SPRG_RSCRATCH0 SPRN_SRR0 +KVM_HANDLER BOOKE_INTERRUPT_FIT SPRN_SPRG_RSCRATCH0 SPRN_SRR0 +KVM_HANDLER BOOKE_INTERRUPT_WATCHDOG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0 +KVM_HANDLER BOOKE_INTERRUPT_DTLB_MISS SPRN_SPRG_RSCRATCH0 SPRN_SRR0 +KVM_HANDLER BOOKE_INTERRUPT_ITLB_MISS SPRN_SPRG_RSCRATCH0 SPRN_SRR0 +KVM_HANDLER BOOKE_INTERRUPT_DEBUG SPRN_SPRG_RSCRATCH_CRIT SPRN_CSRR0 +KVM_HANDLER BOOKE_INTERRUPT_SPE_UNAVAIL SPRN_SPRG_RSCRATCH0 SPRN_SRR0 +KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_DATA SPRN_SPRG_RSCRATCH0 SPRN_SRR0 +KVM_HANDLER BOOKE_INTERRUPT_SPE_FP_ROUND SPRN_SPRG_RSCRATCH0 SPRN_SRR0 _GLOBAL(kvmppc_handler_len) .long kvmppc_handler_1 - kvmppc_handler_0 - /* Registers: * SPRG_SCRATCH0: guest r4 * r4: vcpu pointer * r5: KVM exit number */ _GLOBAL(kvmppc_resume_host) - stw r3, VCPU_GPR(R3)(r4) mfcr r3 stw r3, VCPU_CR(r4) stw r7, VCPU_GPR(R7)(r4) @@ -180,10 +183,6 @@ _GLOBAL(kvmppc_resume_host) stw r3, VCPU_LR(r4) mfxer r3 stw r3, VCPU_XER(r4) - mfspr r3, SPRN_SPRG_RSCRATCH0 - stw r3, VCPU_GPR(R4)(r4) - mfspr r3, SPRN_SRR0 - stw r3, VCPU_PC(r4) /* Restore host stack pointer and PID before IVPR, since the host * exception handlers use them. */ diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S index 1685dc43bcf2..d28c2d43ac1b 100644 --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -262,7 +262,7 @@ kvm_lvl_handler BOOKE_INTERRUPT_CRITICAL, \ kvm_lvl_handler BOOKE_INTERRUPT_MACHINE_CHECK, \ SPRN_SPRG_RSCRATCH_MC, SPRN_MCSRR0, SPRN_MCSRR1, 0 kvm_handler BOOKE_INTERRUPT_DATA_STORAGE, \ - SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR) + SPRN_SRR0, SPRN_SRR1, (NEED_EMU | NEED_DEAR | NEED_ESR) kvm_handler BOOKE_INTERRUPT_INST_STORAGE, SPRN_SRR0, SPRN_SRR1, NEED_ESR kvm_handler BOOKE_INTERRUPT_EXTERNAL, SPRN_SRR0, SPRN_SRR1, 0 kvm_handler BOOKE_INTERRUPT_ALIGNMENT, \ diff --git a/arch/powerpc/kvm/e500_emulate.c b/arch/powerpc/kvm/e500_emulate.c index 8b99e076dc81..e04b0ef55ce0 100644 --- a/arch/powerpc/kvm/e500_emulate.c +++ b/arch/powerpc/kvm/e500_emulate.c @@ -269,6 +269,9 @@ int kvmppc_core_emulate_mfspr(struct kvm_vcpu *vcpu, int sprn, ulong *spr_val) *spr_val = vcpu->arch.shared->mas7_3 >> 32; break; #endif + case SPRN_DECAR: + *spr_val = vcpu->arch.decar; + break; case SPRN_TLB0CFG: *spr_val = vcpu->arch.tlbcfg[0]; break; diff --git a/arch/powerpc/kvm/e500mc.c b/arch/powerpc/kvm/e500mc.c index fe6c1de6b701..1f89d26e65fb 100644 --- a/arch/powerpc/kvm/e500mc.c +++ b/arch/powerpc/kvm/e500mc.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2010 Freescale Semiconductor, Inc. All rights reserved. + * Copyright (C) 2010,2012 Freescale Semiconductor, Inc. All rights reserved. * * Author: Varun Sethi, <varun.sethi@freescale.com> * @@ -57,7 +57,8 @@ void kvmppc_e500_tlbil_one(struct kvmppc_vcpu_e500 *vcpu_e500, struct kvm_book3e_206_tlb_entry *gtlbe) { unsigned int tid, ts; - u32 val, eaddr, lpid; + gva_t eaddr; + u32 val, lpid; unsigned long flags; ts = get_tlb_ts(gtlbe); @@ -183,6 +184,9 @@ int kvmppc_core_vcpu_setup(struct kvm_vcpu *vcpu) vcpu->arch.shadow_epcr = SPRN_EPCR_DSIGS | SPRN_EPCR_DGTMI | \ SPRN_EPCR_DUVD; +#ifdef CONFIG_64BIT + vcpu->arch.shadow_epcr |= SPRN_EPCR_ICM; +#endif vcpu->arch.shadow_msrp = MSRP_UCLEP | MSRP_DEP | MSRP_PMMP; vcpu->arch.eplc = EPC_EGS | (vcpu->kvm->arch.lpid << EPC_ELPID_SHIFT); vcpu->arch.epsc = vcpu->arch.eplc; diff --git a/arch/powerpc/kvm/emulate.c b/arch/powerpc/kvm/emulate.c index f90e86dea7a2..ee04abaefe23 100644 --- a/arch/powerpc/kvm/emulate.c +++ b/arch/powerpc/kvm/emulate.c @@ -59,11 +59,13 @@ #define OP_31_XOP_STHBRX 918 #define OP_LWZ 32 +#define OP_LD 58 #define OP_LWZU 33 #define OP_LBZ 34 #define OP_LBZU 35 #define OP_STW 36 #define OP_STWU 37 +#define OP_STD 62 #define OP_STB 38 #define OP_STBU 39 #define OP_LHZ 40 @@ -392,6 +394,12 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); break; + /* TBD: Add support for other 64 bit load variants like ldu, ldux, ldx etc. */ + case OP_LD: + rt = get_rt(inst); + emulated = kvmppc_handle_load(run, vcpu, rt, 8, 1); + break; + case OP_LWZU: emulated = kvmppc_handle_load(run, vcpu, rt, 4, 1); kvmppc_set_gpr(vcpu, ra, vcpu->arch.vaddr_accessed); @@ -412,6 +420,14 @@ int kvmppc_emulate_instruction(struct kvm_run *run, struct kvm_vcpu *vcpu) 4, 1); break; + /* TBD: Add support for other 64 bit store variants like stdu, stdux, stdx etc. */ + case OP_STD: + rs = get_rs(inst); + emulated = kvmppc_handle_store(run, vcpu, + kvmppc_get_gpr(vcpu, rs), + 8, 1); + break; + case OP_STWU: emulated = kvmppc_handle_store(run, vcpu, kvmppc_get_gpr(vcpu, rs), diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 1493c8de947b..87f4dc886076 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -246,6 +246,7 @@ int kvm_dev_ioctl_check_extension(long ext) #endif #ifdef CONFIG_PPC_BOOK3S_64 case KVM_CAP_SPAPR_TCE: + case KVM_CAP_PPC_ALLOC_HTAB: r = 1; break; #endif /* CONFIG_PPC_BOOK3S_64 */ @@ -802,6 +803,23 @@ long kvm_arch_vm_ioctl(struct file *filp, r = -EFAULT; break; } + + case KVM_PPC_ALLOCATE_HTAB: { + struct kvm *kvm = filp->private_data; + u32 htab_order; + + r = -EFAULT; + if (get_user(htab_order, (u32 __user *)argp)) + break; + r = kvmppc_alloc_reset_hpt(kvm, &htab_order); + if (r) + break; + r = -EFAULT; + if (put_user(htab_order, (u32 __user *)argp)) + break; + r = 0; + break; + } #endif /* CONFIG_KVM_BOOK3S_64_HV */ #ifdef CONFIG_PPC_BOOK3S_64 diff --git a/arch/powerpc/platforms/Kconfig b/arch/powerpc/platforms/Kconfig index a35ca44ade66..e7a896acd982 100644 --- a/arch/powerpc/platforms/Kconfig +++ b/arch/powerpc/platforms/Kconfig @@ -25,6 +25,7 @@ source "arch/powerpc/platforms/wsp/Kconfig" config KVM_GUEST bool "KVM Guest support" default n + select EPAPR_PARAVIRT ---help--- This option enables various optimizations for running under the KVM hypervisor. Overhead for the kernel when not running inside KVM should @@ -32,6 +33,14 @@ config KVM_GUEST In case of doubt, say Y +config EPAPR_PARAVIRT + bool "ePAPR para-virtualization support" + default n + help + Enables ePAPR para-virtualization support for guests. + + In case of doubt, say Y + config PPC_NATIVE bool depends on 6xx || PPC64 |