diff options
Diffstat (limited to 'arch/powerpc/kvm')
-rw-r--r-- | arch/powerpc/kvm/book3s_64_mmu_hv.c | 13 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_64_mmu_radix.c | 71 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_64_vio_hv.c | 66 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv.c | 15 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_nested.c | 39 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_rm_mmu.c | 60 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_hv_rmhandlers.S | 23 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_xive_native.c | 6 | ||||
-rw-r--r-- | arch/powerpc/kvm/book3s_xive_template.c | 3 | ||||
-rw-r--r-- | arch/powerpc/kvm/emulate_loadstore.c | 2 |
10 files changed, 161 insertions, 137 deletions
diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 2b35f9bcf892..18aed9775a3c 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -281,11 +281,10 @@ static long kvmppc_virtmode_do_h_enter(struct kvm *kvm, unsigned long flags, { long ret; - /* Protect linux PTE lookup from page table destruction */ - rcu_read_lock_sched(); /* this disables preemption too */ + preempt_disable(); ret = kvmppc_do_h_enter(kvm, flags, pte_index, pteh, ptel, kvm->mm->pgd, false, pte_idx_ret); - rcu_read_unlock_sched(); + preempt_enable(); if (ret == H_TOO_HARD) { /* this can't happen */ pr_err("KVM: Oops, kvmppc_h_enter returned too hard!\n"); @@ -602,12 +601,12 @@ int kvmppc_book3s_hv_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, * Read the PTE from the process' radix tree and use that * so we get the shift and attribute bits. */ - local_irq_disable(); - ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift); + spin_lock(&kvm->mmu_lock); + ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &shift); pte = __pte(0); if (ptep) - pte = *ptep; - local_irq_enable(); + pte = READ_ONCE(*ptep); + spin_unlock(&kvm->mmu_lock); /* * If the PTE disappeared temporarily due to a THP * collapse, just return and let the guest try again. diff --git a/arch/powerpc/kvm/book3s_64_mmu_radix.c b/arch/powerpc/kvm/book3s_64_mmu_radix.c index d605ed0bb2e7..02219e28b1e4 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_radix.c +++ b/arch/powerpc/kvm/book3s_64_mmu_radix.c @@ -739,7 +739,7 @@ int kvmppc_create_pte(struct kvm *kvm, pgd_t *pgtable, pte_t pte, return ret; } -bool kvmppc_hv_handle_set_rc(struct kvm *kvm, pgd_t *pgtable, bool writing, +bool kvmppc_hv_handle_set_rc(struct kvm *kvm, bool nested, bool writing, unsigned long gpa, unsigned int lpid) { unsigned long pgflags; @@ -754,12 +754,12 @@ bool kvmppc_hv_handle_set_rc(struct kvm *kvm, pgd_t *pgtable, bool writing, pgflags = _PAGE_ACCESSED; if (writing) pgflags |= _PAGE_DIRTY; - /* - * We are walking the secondary (partition-scoped) page table here. - * We can do this without disabling irq because the Linux MM - * subsystem doesn't do THP splits and collapses on this tree. - */ - ptep = __find_linux_pte(pgtable, gpa, NULL, &shift); + + if (nested) + ptep = find_kvm_nested_guest_pte(kvm, lpid, gpa, &shift); + else + ptep = find_kvm_secondary_pte(kvm, gpa, &shift); + if (ptep && pte_present(*ptep) && (!writing || pte_write(*ptep))) { kvmppc_radix_update_pte(kvm, ptep, 0, pgflags, gpa, shift); return true; @@ -817,12 +817,12 @@ int kvmppc_book3s_instantiate_page(struct kvm_vcpu *vcpu, * Read the PTE from the process' radix tree and use that * so we get the shift and attribute bits. */ - local_irq_disable(); - ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift); + spin_lock(&kvm->mmu_lock); + ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &shift); pte = __pte(0); if (ptep) - pte = *ptep; - local_irq_enable(); + pte = READ_ONCE(*ptep); + spin_unlock(&kvm->mmu_lock); /* * If the PTE disappeared temporarily due to a THP * collapse, just return and let the guest try again. @@ -953,8 +953,8 @@ int kvmppc_book3s_radix_page_fault(struct kvm_run *run, struct kvm_vcpu *vcpu, /* Failed to set the reference/change bits */ if (dsisr & DSISR_SET_RC) { spin_lock(&kvm->mmu_lock); - if (kvmppc_hv_handle_set_rc(kvm, kvm->arch.pgtable, - writing, gpa, kvm->arch.lpid)) + if (kvmppc_hv_handle_set_rc(kvm, false, writing, + gpa, kvm->arch.lpid)) dsisr &= ~DSISR_SET_RC; spin_unlock(&kvm->mmu_lock); @@ -985,11 +985,11 @@ int kvm_unmap_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, return 0; } - ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); + ptep = find_kvm_secondary_pte(kvm, gpa, &shift); if (ptep && pte_present(*ptep)) kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot, kvm->arch.lpid); - return 0; + return 0; } /* Called with kvm->mmu_lock held */ @@ -1005,7 +1005,7 @@ int kvm_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) return ref; - ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); + ptep = find_kvm_secondary_pte(kvm, gpa, &shift); if (ptep && pte_present(*ptep) && pte_young(*ptep)) { old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_ACCESSED, 0, gpa, shift); @@ -1032,7 +1032,7 @@ int kvm_test_age_radix(struct kvm *kvm, struct kvm_memory_slot *memslot, if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) return ref; - ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); + ptep = find_kvm_secondary_pte(kvm, gpa, &shift); if (ptep && pte_present(*ptep) && pte_young(*ptep)) ref = 1; return ref; @@ -1044,7 +1044,7 @@ static int kvm_radix_test_clear_dirty(struct kvm *kvm, { unsigned long gfn = memslot->base_gfn + pagenum; unsigned long gpa = gfn << PAGE_SHIFT; - pte_t *ptep; + pte_t *ptep, pte; unsigned int shift; int ret = 0; unsigned long old, *rmapp; @@ -1052,12 +1052,35 @@ static int kvm_radix_test_clear_dirty(struct kvm *kvm, if (kvm->arch.secure_guest & KVMPPC_SECURE_INIT_DONE) return ret; - ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); - if (ptep && pte_present(*ptep) && pte_dirty(*ptep)) { - ret = 1; - if (shift) - ret = 1 << (shift - PAGE_SHIFT); + /* + * For performance reasons we don't hold kvm->mmu_lock while walking the + * partition scoped table. + */ + ptep = find_kvm_secondary_pte_unlocked(kvm, gpa, &shift); + if (!ptep) + return 0; + + pte = READ_ONCE(*ptep); + if (pte_present(pte) && pte_dirty(pte)) { spin_lock(&kvm->mmu_lock); + /* + * Recheck the pte again + */ + if (pte_val(pte) != pte_val(*ptep)) { + /* + * We have KVM_MEM_LOG_DIRTY_PAGES enabled. Hence we can + * only find PAGE_SIZE pte entries here. We can continue + * to use the pte addr returned by above page table + * walk. + */ + if (!pte_present(*ptep) || !pte_dirty(*ptep)) { + spin_unlock(&kvm->mmu_lock); + return 0; + } + } + + ret = 1; + VM_BUG_ON(shift); old = kvmppc_radix_update_pte(kvm, ptep, _PAGE_DIRTY, 0, gpa, shift); kvmppc_radix_tlbie_page(kvm, gpa, shift, kvm->arch.lpid); @@ -1113,7 +1136,7 @@ void kvmppc_radix_flush_memslot(struct kvm *kvm, gpa = memslot->base_gfn << PAGE_SHIFT; spin_lock(&kvm->mmu_lock); for (n = memslot->npages; n; --n) { - ptep = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); + ptep = find_kvm_secondary_pte(kvm, gpa, &shift); if (ptep && pte_present(*ptep)) kvmppc_unmap_pte(kvm, ptep, gpa, shift, memslot, kvm->arch.lpid); diff --git a/arch/powerpc/kvm/book3s_64_vio_hv.c b/arch/powerpc/kvm/book3s_64_vio_hv.c index 6fcaf1fa8e02..ac6ac192b8bb 100644 --- a/arch/powerpc/kvm/book3s_64_vio_hv.c +++ b/arch/powerpc/kvm/book3s_64_vio_hv.c @@ -74,8 +74,8 @@ struct kvmppc_spapr_tce_table *kvmppc_find_table(struct kvm *kvm, EXPORT_SYMBOL_GPL(kvmppc_find_table); #ifdef CONFIG_KVM_BOOK3S_HV_POSSIBLE -static long kvmppc_rm_tce_to_ua(struct kvm *kvm, unsigned long tce, - unsigned long *ua, unsigned long **prmap) +static long kvmppc_rm_tce_to_ua(struct kvm *kvm, + unsigned long tce, unsigned long *ua) { unsigned long gfn = tce >> PAGE_SHIFT; struct kvm_memory_slot *memslot; @@ -87,9 +87,6 @@ static long kvmppc_rm_tce_to_ua(struct kvm *kvm, unsigned long tce, *ua = __gfn_to_hva_memslot(memslot, gfn) | (tce & ~(PAGE_MASK | TCE_PCI_READ | TCE_PCI_WRITE)); - if (prmap) - *prmap = &memslot->arch.rmap[gfn - memslot->base_gfn]; - return 0; } @@ -116,7 +113,7 @@ static long kvmppc_rm_tce_validate(struct kvmppc_spapr_tce_table *stt, if (iommu_tce_check_gpa(stt->page_shift, gpa)) return H_PARAMETER; - if (kvmppc_rm_tce_to_ua(stt->kvm, tce, &ua, NULL)) + if (kvmppc_rm_tce_to_ua(stt->kvm, tce, &ua)) return H_TOO_HARD; list_for_each_entry_lockless(stit, &stt->iommu_tables, next) { @@ -208,7 +205,7 @@ static long kvmppc_rm_ioba_validate(struct kvmppc_spapr_tce_table *stt, idx = (ioba >> stt->page_shift) - stt->offset; sttpage = idx / TCES_PER_PAGE; - sttpages = _ALIGN_UP(idx % TCES_PER_PAGE + npages, TCES_PER_PAGE) / + sttpages = ALIGN(idx % TCES_PER_PAGE + npages, TCES_PER_PAGE) / TCES_PER_PAGE; for (i = sttpage; i < sttpage + sttpages; ++i) if (!stt->pages[i]) @@ -411,7 +408,7 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, return ret; dir = iommu_tce_direction(tce); - if ((dir != DMA_NONE) && kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) + if ((dir != DMA_NONE) && kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua)) return H_PARAMETER; entry = ioba >> stt->page_shift; @@ -437,8 +434,8 @@ long kvmppc_rm_h_put_tce(struct kvm_vcpu *vcpu, unsigned long liobn, return H_SUCCESS; } -static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu, - unsigned long ua, unsigned long *phpa) +static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu, unsigned long mmu_seq, + unsigned long ua, unsigned long *phpa) { pte_t *ptep, pte; unsigned shift = 0; @@ -452,10 +449,17 @@ static long kvmppc_rm_ua_to_hpa(struct kvm_vcpu *vcpu, * to exit which will agains result in the below page table walk * to finish. */ - ptep = __find_linux_pte(vcpu->arch.pgdir, ua, NULL, &shift); - if (!ptep || !pte_present(*ptep)) + /* an rmap lock won't make it safe. because that just ensure hash + * page table entries are removed with rmap lock held. After that + * mmu notifier returns and we go ahead and removing ptes from Qemu page table. + */ + ptep = find_kvm_host_pte(vcpu->kvm, mmu_seq, ua, &shift); + if (!ptep) + return -ENXIO; + + pte = READ_ONCE(*ptep); + if (!pte_present(pte)) return -ENXIO; - pte = *ptep; if (!shift) shift = PAGE_SHIFT; @@ -477,10 +481,11 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, unsigned long liobn, unsigned long ioba, unsigned long tce_list, unsigned long npages) { + struct kvm *kvm = vcpu->kvm; struct kvmppc_spapr_tce_table *stt; long i, ret = H_SUCCESS; unsigned long tces, entry, ua = 0; - unsigned long *rmap = NULL; + unsigned long mmu_seq; bool prereg = false; struct kvmppc_spapr_tce_iommu_table *stit; @@ -488,6 +493,12 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, if (kvm_is_radix(vcpu->kvm)) return H_TOO_HARD; + /* + * used to check for invalidations in progress + */ + mmu_seq = kvm->mmu_notifier_seq; + smp_rmb(); + stt = kvmppc_find_table(vcpu->kvm, liobn); if (!stt) return H_TOO_HARD; @@ -515,7 +526,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, */ struct mm_iommu_table_group_mem_t *mem; - if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce_list, &ua, NULL)) + if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce_list, &ua)) return H_TOO_HARD; mem = mm_iommu_lookup_rm(vcpu->kvm->mm, ua, IOMMU_PAGE_SIZE_4K); @@ -531,23 +542,11 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, * We do not require memory to be preregistered in this case * so lock rmap and do __find_linux_pte_or_hugepte(). */ - if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce_list, &ua, &rmap)) - return H_TOO_HARD; - - rmap = (void *) vmalloc_to_phys(rmap); - if (WARN_ON_ONCE_RM(!rmap)) + if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce_list, &ua)) return H_TOO_HARD; - /* - * Synchronize with the MMU notifier callbacks in - * book3s_64_mmu_hv.c (kvm_unmap_hva_range_hv etc.). - * While we have the rmap lock, code running on other CPUs - * cannot finish unmapping the host real page that backs - * this guest real page, so we are OK to access the host - * real page. - */ - lock_rmap(rmap); - if (kvmppc_rm_ua_to_hpa(vcpu, ua, &tces)) { + arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock); + if (kvmppc_rm_ua_to_hpa(vcpu, mmu_seq, ua, &tces)) { ret = H_TOO_HARD; goto unlock_exit; } @@ -565,7 +564,7 @@ long kvmppc_rm_h_put_tce_indirect(struct kvm_vcpu *vcpu, unsigned long tce = be64_to_cpu(((u64 *)tces)[i]); ua = 0; - if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua, NULL)) { + if (kvmppc_rm_tce_to_ua(vcpu->kvm, tce, &ua)) { ret = H_PARAMETER; goto invalidate_exit; } @@ -590,9 +589,8 @@ invalidate_exit: iommu_tce_kill_rm(stit->tbl, entry, npages); unlock_exit: - if (rmap) - unlock_rmap(rmap); - + if (!prereg) + arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock); return ret; } diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 7f59c47a5b9d..a07e12ed9f5a 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -342,9 +342,6 @@ static void kvmppc_set_pvr_hv(struct kvm_vcpu *vcpu, u32 pvr) vcpu->arch.pvr = pvr; } -/* Dummy value used in computing PCR value below */ -#define PCR_ARCH_300 (PCR_ARCH_207 << 1) - static int kvmppc_set_arch_compat(struct kvm_vcpu *vcpu, u32 arch_compat) { unsigned long host_pcr_bit = 0, guest_pcr_bit = 0; @@ -3390,8 +3387,8 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit, int trap; unsigned long host_hfscr = mfspr(SPRN_HFSCR); unsigned long host_ciabr = mfspr(SPRN_CIABR); - unsigned long host_dawr = mfspr(SPRN_DAWR); - unsigned long host_dawrx = mfspr(SPRN_DAWRX); + unsigned long host_dawr = mfspr(SPRN_DAWR0); + unsigned long host_dawrx = mfspr(SPRN_DAWRX0); unsigned long host_psscr = mfspr(SPRN_PSSCR); unsigned long host_pidr = mfspr(SPRN_PID); @@ -3420,8 +3417,8 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit, mtspr(SPRN_SPURR, vcpu->arch.spurr); if (dawr_enabled()) { - mtspr(SPRN_DAWR, vcpu->arch.dawr); - mtspr(SPRN_DAWRX, vcpu->arch.dawrx); + mtspr(SPRN_DAWR0, vcpu->arch.dawr); + mtspr(SPRN_DAWRX0, vcpu->arch.dawrx); } mtspr(SPRN_CIABR, vcpu->arch.ciabr); mtspr(SPRN_IC, vcpu->arch.ic); @@ -3473,8 +3470,8 @@ static int kvmhv_load_hv_regs_and_go(struct kvm_vcpu *vcpu, u64 time_limit, (local_paca->kvm_hstate.fake_suspend << PSSCR_FAKE_SUSPEND_LG)); mtspr(SPRN_HFSCR, host_hfscr); mtspr(SPRN_CIABR, host_ciabr); - mtspr(SPRN_DAWR, host_dawr); - mtspr(SPRN_DAWRX, host_dawrx); + mtspr(SPRN_DAWR0, host_dawr); + mtspr(SPRN_DAWRX0, host_dawrx); mtspr(SPRN_PID, host_pidr); /* diff --git a/arch/powerpc/kvm/book3s_hv_nested.c b/arch/powerpc/kvm/book3s_hv_nested.c index dc97e5be76f6..66c38ee37fd5 100644 --- a/arch/powerpc/kvm/book3s_hv_nested.c +++ b/arch/powerpc/kvm/book3s_hv_nested.c @@ -750,6 +750,23 @@ static struct kvm_nested_guest *kvmhv_find_nested(struct kvm *kvm, int lpid) return kvm->arch.nested_guests[lpid]; } +pte_t *find_kvm_nested_guest_pte(struct kvm *kvm, unsigned long lpid, + unsigned long ea, unsigned *hshift) +{ + struct kvm_nested_guest *gp; + pte_t *pte; + + gp = kvmhv_find_nested(kvm, lpid); + if (!gp) + return NULL; + + VM_WARN(!spin_is_locked(&kvm->mmu_lock), + "%s called with kvm mmu_lock not held \n", __func__); + pte = __find_linux_pte(gp->shadow_pgtable, ea, NULL, hshift); + + return pte; +} + static inline bool kvmhv_n_rmap_is_equal(u64 rmap_1, u64 rmap_2) { return !((rmap_1 ^ rmap_2) & (RMAP_NESTED_LPID_MASK | @@ -792,19 +809,15 @@ static void kvmhv_update_nest_rmap_rc(struct kvm *kvm, u64 n_rmap, unsigned long clr, unsigned long set, unsigned long hpa, unsigned long mask) { - struct kvm_nested_guest *gp; unsigned long gpa; unsigned int shift, lpid; pte_t *ptep; gpa = n_rmap & RMAP_NESTED_GPA_MASK; lpid = (n_rmap & RMAP_NESTED_LPID_MASK) >> RMAP_NESTED_LPID_SHIFT; - gp = kvmhv_find_nested(kvm, lpid); - if (!gp) - return; /* Find the pte */ - ptep = __find_linux_pte(gp->shadow_pgtable, gpa, NULL, &shift); + ptep = find_kvm_nested_guest_pte(kvm, lpid, gpa, &shift); /* * If the pte is present and the pfn is still the same, update the pte. * If the pfn has changed then this is a stale rmap entry, the nested @@ -854,7 +867,7 @@ static void kvmhv_remove_nest_rmap(struct kvm *kvm, u64 n_rmap, return; /* Find and invalidate the pte */ - ptep = __find_linux_pte(gp->shadow_pgtable, gpa, NULL, &shift); + ptep = find_kvm_nested_guest_pte(kvm, lpid, gpa, &shift); /* Don't spuriously invalidate ptes if the pfn has changed */ if (ptep && pte_present(*ptep) && ((pte_val(*ptep) & mask) == hpa)) kvmppc_unmap_pte(kvm, ptep, gpa, shift, NULL, gp->shadow_lpid); @@ -921,7 +934,7 @@ static bool kvmhv_invalidate_shadow_pte(struct kvm_vcpu *vcpu, int shift; spin_lock(&kvm->mmu_lock); - ptep = __find_linux_pte(gp->shadow_pgtable, gpa, NULL, &shift); + ptep = find_kvm_nested_guest_pte(kvm, gp->l1_lpid, gpa, &shift); if (!shift) shift = PAGE_SHIFT; if (ptep && pte_present(*ptep)) { @@ -1169,7 +1182,7 @@ static int kvmhv_translate_addr_nested(struct kvm_vcpu *vcpu, } else if (vcpu->arch.trap == BOOK3S_INTERRUPT_H_INST_STORAGE) { /* Can we execute? */ if (!gpte_p->may_execute) { - flags |= SRR1_ISI_N_OR_G; + flags |= SRR1_ISI_N_G_OR_CIP; goto forward_to_l1; } } else { @@ -1212,16 +1225,16 @@ static long kvmhv_handle_nested_set_rc(struct kvm_vcpu *vcpu, spin_lock(&kvm->mmu_lock); /* Set the rc bit in the pte of our (L0) pgtable for the L1 guest */ - ret = kvmppc_hv_handle_set_rc(kvm, kvm->arch.pgtable, writing, - gpte.raddr, kvm->arch.lpid); + ret = kvmppc_hv_handle_set_rc(kvm, false, writing, + gpte.raddr, kvm->arch.lpid); if (!ret) { ret = -EINVAL; goto out_unlock; } /* Set the rc bit in the pte of the shadow_pgtable for the nest guest */ - ret = kvmppc_hv_handle_set_rc(kvm, gp->shadow_pgtable, writing, n_gpa, - gp->shadow_lpid); + ret = kvmppc_hv_handle_set_rc(kvm, true, writing, + n_gpa, gp->shadow_lpid); if (!ret) ret = -EINVAL; else @@ -1362,7 +1375,7 @@ static long int __kvmhv_nested_page_fault(struct kvm_run *run, /* See if can find translation in our partition scoped tables for L1 */ pte = __pte(0); spin_lock(&kvm->mmu_lock); - pte_p = __find_linux_pte(kvm->arch.pgtable, gpa, NULL, &shift); + pte_p = find_kvm_secondary_pte(kvm, gpa, &shift); if (!shift) shift = PAGE_SHIFT; if (pte_p) diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 220305454c23..88da2764c1bb 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -210,7 +210,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, pte_t *ptep; unsigned int writing; unsigned long mmu_seq; - unsigned long rcbits, irq_flags = 0; + unsigned long rcbits; if (kvm_is_radix(kvm)) return H_FUNCTION; @@ -248,17 +248,9 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, /* Translate to host virtual address */ hva = __gfn_to_hva_memslot(memslot, gfn); - /* - * If we had a page table table change after lookup, we would - * retry via mmu_notifier_retry. - */ - if (!realmode) - local_irq_save(irq_flags); - /* - * If called in real mode we have MSR_EE = 0. Otherwise - * we disable irq above. - */ - ptep = __find_linux_pte(pgdir, hva, NULL, &hpage_shift); + + arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock); + ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &hpage_shift); if (ptep) { pte_t pte; unsigned int host_pte_size; @@ -272,8 +264,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, * to <= host page size, if host is using hugepage */ if (host_pte_size < psize) { - if (!realmode) - local_irq_restore(flags); + arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock); return H_PARAMETER; } pte = kvmppc_read_update_linux_pte(ptep, writing); @@ -287,8 +278,7 @@ long kvmppc_do_h_enter(struct kvm *kvm, unsigned long flags, pa |= gpa & ~PAGE_MASK; } } - if (!realmode) - local_irq_restore(irq_flags); + arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock); ptel &= HPTE_R_KEY | HPTE_R_PP0 | (psize-1); ptel |= pa; @@ -888,8 +878,8 @@ long kvmppc_h_clear_mod(struct kvm_vcpu *vcpu, unsigned long flags, return ret; } -static int kvmppc_get_hpa(struct kvm_vcpu *vcpu, unsigned long gpa, - int writing, unsigned long *hpa, +static int kvmppc_get_hpa(struct kvm_vcpu *vcpu, unsigned long mmu_seq, + unsigned long gpa, int writing, unsigned long *hpa, struct kvm_memory_slot **memslot_p) { struct kvm *kvm = vcpu->kvm; @@ -908,7 +898,7 @@ static int kvmppc_get_hpa(struct kvm_vcpu *vcpu, unsigned long gpa, hva = __gfn_to_hva_memslot(memslot, gfn); /* Try to find the host pte for that virtual address */ - ptep = __find_linux_pte(vcpu->arch.pgdir, hva, NULL, &shift); + ptep = find_kvm_host_pte(kvm, mmu_seq, hva, &shift); if (!ptep) return H_TOO_HARD; pte = kvmppc_read_update_linux_pte(ptep, writing); @@ -943,16 +933,11 @@ static long kvmppc_do_h_page_init_zero(struct kvm_vcpu *vcpu, mmu_seq = kvm->mmu_notifier_seq; smp_rmb(); - ret = kvmppc_get_hpa(vcpu, dest, 1, &pa, &memslot); - if (ret != H_SUCCESS) - return ret; + arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock); - /* Check if we've been invalidated */ - raw_spin_lock(&kvm->mmu_lock.rlock); - if (mmu_notifier_retry(kvm, mmu_seq)) { - ret = H_TOO_HARD; + ret = kvmppc_get_hpa(vcpu, mmu_seq, dest, 1, &pa, &memslot); + if (ret != H_SUCCESS) goto out_unlock; - } /* Zero the page */ for (i = 0; i < SZ_4K; i += L1_CACHE_BYTES, pa += L1_CACHE_BYTES) @@ -960,7 +945,7 @@ static long kvmppc_do_h_page_init_zero(struct kvm_vcpu *vcpu, kvmppc_update_dirty_map(memslot, dest >> PAGE_SHIFT, PAGE_SIZE); out_unlock: - raw_spin_unlock(&kvm->mmu_lock.rlock); + arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock); return ret; } @@ -976,19 +961,14 @@ static long kvmppc_do_h_page_init_copy(struct kvm_vcpu *vcpu, mmu_seq = kvm->mmu_notifier_seq; smp_rmb(); - ret = kvmppc_get_hpa(vcpu, dest, 1, &dest_pa, &dest_memslot); - if (ret != H_SUCCESS) - return ret; - ret = kvmppc_get_hpa(vcpu, src, 0, &src_pa, NULL); + arch_spin_lock(&kvm->mmu_lock.rlock.raw_lock); + ret = kvmppc_get_hpa(vcpu, mmu_seq, dest, 1, &dest_pa, &dest_memslot); if (ret != H_SUCCESS) - return ret; + goto out_unlock; - /* Check if we've been invalidated */ - raw_spin_lock(&kvm->mmu_lock.rlock); - if (mmu_notifier_retry(kvm, mmu_seq)) { - ret = H_TOO_HARD; + ret = kvmppc_get_hpa(vcpu, mmu_seq, src, 0, &src_pa, NULL); + if (ret != H_SUCCESS) goto out_unlock; - } /* Copy the page */ memcpy((void *)dest_pa, (void *)src_pa, SZ_4K); @@ -996,7 +976,7 @@ static long kvmppc_do_h_page_init_copy(struct kvm_vcpu *vcpu, kvmppc_update_dirty_map(dest_memslot, dest >> PAGE_SHIFT, PAGE_SIZE); out_unlock: - raw_spin_unlock(&kvm->mmu_lock.rlock); + arch_spin_unlock(&kvm->mmu_lock.rlock.raw_lock); return ret; } @@ -1260,7 +1240,7 @@ long kvmppc_hpte_hv_fault(struct kvm_vcpu *vcpu, unsigned long addr, status &= ~DSISR_NOHPTE; /* DSISR_NOHPTE == SRR1_ISI_NOPT */ if (!data) { if (gr & (HPTE_R_N | HPTE_R_G)) - return status | SRR1_ISI_N_OR_G; + return status | SRR1_ISI_N_G_OR_CIP; if (!hpte_read_permission(pp, slb_v & key)) return status | SRR1_ISI_PROT; } else if (status & DSISR_ISSTORE) { diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 780a499c7114..71943892c81c 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -707,8 +707,8 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) BEGIN_FTR_SECTION mfspr r5, SPRN_CIABR - mfspr r6, SPRN_DAWR - mfspr r7, SPRN_DAWRX + mfspr r6, SPRN_DAWR0 + mfspr r7, SPRN_DAWRX0 mfspr r8, SPRN_IAMR std r5, STACK_SLOT_CIABR(r1) std r6, STACK_SLOT_DAWR(r1) @@ -803,8 +803,8 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) beq 1f ld r5, VCPU_DAWR(r4) ld r6, VCPU_DAWRX(r4) - mtspr SPRN_DAWR, r5 - mtspr SPRN_DAWRX, r6 + mtspr SPRN_DAWR0, r5 + mtspr SPRN_DAWRX0, r6 1: ld r7, VCPU_CIABR(r4) ld r8, VCPU_TAR(r4) @@ -1766,8 +1766,8 @@ BEGIN_FTR_SECTION * If the DAWR doesn't work, it's ok to write these here as * this value should always be zero */ - mtspr SPRN_DAWR, r6 - mtspr SPRN_DAWRX, r7 + mtspr SPRN_DAWR0, r6 + mtspr SPRN_DAWRX0, r7 END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) BEGIN_FTR_SECTION ld r5, STACK_SLOT_TID(r1) @@ -2577,8 +2577,8 @@ END_FTR_SECTION_IFSET(CPU_FTR_ARCH_207S) mfmsr r6 andi. r6, r6, MSR_DR /* in real mode? */ bne 4f - mtspr SPRN_DAWR, r4 - mtspr SPRN_DAWRX, r5 + mtspr SPRN_DAWR0, r4 + mtspr SPRN_DAWRX0, r5 4: li r3, 0 blr @@ -2907,6 +2907,11 @@ kvm_cede_exit: beq 4f li r0, 0 stb r0, VCPU_CEDED(r9) + /* + * The escalation interrupts are special as we don't EOI them. + * There is no need to use the load-after-store ordering offset + * to set PQ to 10 as we won't use StoreEOI. + */ li r6, XIVE_ESB_SET_PQ_10 b 5f 4: li r0, 1 @@ -3329,7 +3334,7 @@ END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_300) mtspr SPRN_AMR, r0 mtspr SPRN_IAMR, r0 mtspr SPRN_CIABR, r0 - mtspr SPRN_DAWRX, r0 + mtspr SPRN_DAWRX0, r0 BEGIN_MMU_FTR_SECTION b 4f diff --git a/arch/powerpc/kvm/book3s_xive_native.c b/arch/powerpc/kvm/book3s_xive_native.c index 6ef0151ff70a..bdea91df1497 100644 --- a/arch/powerpc/kvm/book3s_xive_native.c +++ b/arch/powerpc/kvm/book3s_xive_native.c @@ -31,6 +31,12 @@ static u8 xive_vm_esb_load(struct xive_irq_data *xd, u32 offset) { u64 val; + /* + * The KVM XIVE native device does not use the XIVE_ESB_SET_PQ_10 + * load operation, so there is no need to enforce load-after-store + * ordering. + */ + if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG) offset |= offset << 4; diff --git a/arch/powerpc/kvm/book3s_xive_template.c b/arch/powerpc/kvm/book3s_xive_template.c index a8a900ace1e6..4ad3c0279458 100644 --- a/arch/powerpc/kvm/book3s_xive_template.c +++ b/arch/powerpc/kvm/book3s_xive_template.c @@ -58,6 +58,9 @@ static u8 GLUE(X_PFX,esb_load)(struct xive_irq_data *xd, u32 offset) { u64 val; + if (offset == XIVE_ESB_SET_PQ_10 && xd->flags & XIVE_IRQ_FLAG_STORE_EOI) + offset |= XIVE_ESB_LD_ST_MO; + if (xd->flags & XIVE_IRQ_FLAG_SHIFT_BUG) offset |= offset << 4; diff --git a/arch/powerpc/kvm/emulate_loadstore.c b/arch/powerpc/kvm/emulate_loadstore.c index 1139bc56e004..135d0e686622 100644 --- a/arch/powerpc/kvm/emulate_loadstore.c +++ b/arch/powerpc/kvm/emulate_loadstore.c @@ -95,7 +95,7 @@ int kvmppc_emulate_loadstore(struct kvm_vcpu *vcpu) emulated = EMULATE_FAIL; vcpu->arch.regs.msr = vcpu->arch.shared->msr; - if (analyse_instr(&op, &vcpu->arch.regs, inst) == 0) { + if (analyse_instr(&op, &vcpu->arch.regs, ppc_inst(inst)) == 0) { int type = op.type & INSTR_TYPE_MASK; int size = GETSIZE(op.type); |