summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--arch/x86/kvm/mmu/mmu.c13
-rw-r--r--arch/x86/kvm/mmu/tdp_iter.h7
-rw-r--r--arch/x86/kvm/mmu/tdp_mmu.c20
3 files changed, 26 insertions, 14 deletions
diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c
index c6e68ed68618..fe026e5be187 100644
--- a/arch/x86/kvm/mmu/mmu.c
+++ b/arch/x86/kvm/mmu/mmu.c
@@ -6360,6 +6360,13 @@ static void kvm_recover_nx_lpages(struct kvm *kvm)
rcu_idx = srcu_read_lock(&kvm->srcu);
write_lock(&kvm->mmu_lock);
+ /*
+ * Zapping TDP MMU shadow pages, including the remote TLB flush, must
+ * be done under RCU protection, because the pages are freed via RCU
+ * callback.
+ */
+ rcu_read_lock();
+
ratio = READ_ONCE(nx_huge_pages_recovery_ratio);
to_zap = ratio ? DIV_ROUND_UP(nx_lpage_splits, ratio) : 0;
for ( ; to_zap; --to_zap) {
@@ -6384,12 +6391,18 @@ static void kvm_recover_nx_lpages(struct kvm *kvm)
if (need_resched() || rwlock_needbreak(&kvm->mmu_lock)) {
kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush);
+ rcu_read_unlock();
+
cond_resched_rwlock_write(&kvm->mmu_lock);
flush = false;
+
+ rcu_read_lock();
}
}
kvm_mmu_remote_flush_or_zap(kvm, &invalid_list, flush);
+ rcu_read_unlock();
+
write_unlock(&kvm->mmu_lock);
srcu_read_unlock(&kvm->srcu, rcu_idx);
}
diff --git a/arch/x86/kvm/mmu/tdp_iter.h b/arch/x86/kvm/mmu/tdp_iter.h
index e2a7e267a77d..b1eaf6ec0e0b 100644
--- a/arch/x86/kvm/mmu/tdp_iter.h
+++ b/arch/x86/kvm/mmu/tdp_iter.h
@@ -9,10 +9,9 @@
/*
* TDP MMU SPTEs are RCU protected to allow paging structures (non-leaf SPTEs)
- * to be zapped while holding mmu_lock for read. Holding RCU isn't required for
- * correctness if mmu_lock is held for write, but plumbing "struct kvm" down to
- * the lower depths of the TDP MMU just to make lockdep happy is a nightmare, so
- * all accesses to SPTEs are done under RCU protection.
+ * to be zapped while holding mmu_lock for read, and to allow TLB flushes to be
+ * batched without having to collect the list of zapped SPs. Flows that can
+ * remove SPs must service pending TLB flushes prior to dropping RCU protection.
*/
static inline u64 kvm_tdp_mmu_read_spte(tdp_ptep_t sptep)
{
diff --git a/arch/x86/kvm/mmu/tdp_mmu.c b/arch/x86/kvm/mmu/tdp_mmu.c
index 3a866fcb5ea9..5038de0c872d 100644
--- a/arch/x86/kvm/mmu/tdp_mmu.c
+++ b/arch/x86/kvm/mmu/tdp_mmu.c
@@ -391,9 +391,6 @@ static void handle_removed_pt(struct kvm *kvm, tdp_ptep_t pt, bool shared)
shared);
}
- kvm_flush_remote_tlbs_with_address(kvm, base_gfn,
- KVM_PAGES_PER_HPAGE(level + 1));
-
call_rcu(&sp->rcu_head, tdp_mmu_free_sp_rcu_callback);
}
@@ -817,19 +814,13 @@ bool kvm_tdp_mmu_zap_sp(struct kvm *kvm, struct kvm_mmu_page *sp)
if (WARN_ON_ONCE(!sp->ptep))
return false;
- rcu_read_lock();
-
old_spte = kvm_tdp_mmu_read_spte(sp->ptep);
- if (WARN_ON_ONCE(!is_shadow_present_pte(old_spte))) {
- rcu_read_unlock();
+ if (WARN_ON_ONCE(!is_shadow_present_pte(old_spte)))
return false;
- }
__tdp_mmu_set_spte(kvm, kvm_mmu_page_as_id(sp), sp->ptep, old_spte, 0,
sp->gfn, sp->role.level + 1, true, true);
- rcu_read_unlock();
-
return true;
}
@@ -870,6 +861,11 @@ static bool tdp_mmu_zap_leafs(struct kvm *kvm, struct kvm_mmu_page *root,
}
rcu_read_unlock();
+
+ /*
+ * Because this flow zaps _only_ leaf SPTEs, the caller doesn't need
+ * to provide RCU protection as no 'struct kvm_mmu_page' will be freed.
+ */
return flush;
}
@@ -1036,6 +1032,10 @@ static int tdp_mmu_map_handle_target_level(struct kvm_vcpu *vcpu,
ret = RET_PF_SPURIOUS;
else if (tdp_mmu_set_spte_atomic(vcpu->kvm, iter, new_spte))
return RET_PF_RETRY;
+ else if (is_shadow_present_pte(iter->old_spte) &&
+ !is_last_spte(iter->old_spte, iter->level))
+ kvm_flush_remote_tlbs_with_address(vcpu->kvm, sp->gfn,
+ KVM_PAGES_PER_HPAGE(iter->level + 1));
/*
* If the page fault was caused by a write but the page is write