From 86bdf3ebcfe1ded055282536fecce13001874740 Mon Sep 17 00:00:00 2001 From: Gavin Shan Date: Thu, 10 Nov 2022 18:49:10 +0800 Subject: KVM: Support dirty ring in conjunction with bitmap ARM64 needs to dirty memory outside of a VCPU context when VGIC/ITS is enabled. It's conflicting with that ring-based dirty page tracking always requires a running VCPU context. Introduce a new flavor of dirty ring that requires the use of both VCPU dirty rings and a dirty bitmap. The expectation is that for non-VCPU sources of dirty memory (such as the VGIC/ITS on arm64), KVM writes to the dirty bitmap. Userspace should scan the dirty bitmap before migrating the VM to the target. Use an additional capability to advertise this behavior. The newly added capability (KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP) can't be enabled before KVM_CAP_DIRTY_LOG_RING_ACQ_REL on ARM64. In this way, the newly added capability is treated as an extension of KVM_CAP_DIRTY_LOG_RING_ACQ_REL. Suggested-by: Marc Zyngier Suggested-by: Peter Xu Co-developed-by: Oliver Upton Signed-off-by: Oliver Upton Signed-off-by: Gavin Shan Acked-by: Peter Xu Signed-off-by: Marc Zyngier Link: https://lore.kernel.org/r/20221110104914.31280-4-gshan@redhat.com --- virt/kvm/Kconfig | 6 +++++ virt/kvm/dirty_ring.c | 14 ++++++++++++ virt/kvm/kvm_main.c | 61 +++++++++++++++++++++++++++++++++++++++++++-------- 3 files changed, 72 insertions(+), 9 deletions(-) (limited to 'virt') diff --git a/virt/kvm/Kconfig b/virt/kvm/Kconfig index 800f9470e36b..9fb1ff6f19e5 100644 --- a/virt/kvm/Kconfig +++ b/virt/kvm/Kconfig @@ -33,6 +33,12 @@ config HAVE_KVM_DIRTY_RING_ACQ_REL bool select HAVE_KVM_DIRTY_RING +# Allow enabling both the dirty bitmap and dirty ring. Only architectures +# that need to dirty memory outside of a vCPU context should select this. +config NEED_KVM_DIRTY_RING_WITH_BITMAP + bool + depends on HAVE_KVM_DIRTY_RING + config HAVE_KVM_EVENTFD bool select EVENTFD diff --git a/virt/kvm/dirty_ring.c b/virt/kvm/dirty_ring.c index fecbb7d75ad2..c1cd7dfe4a90 100644 --- a/virt/kvm/dirty_ring.c +++ b/virt/kvm/dirty_ring.c @@ -21,6 +21,20 @@ u32 kvm_dirty_ring_get_rsvd_entries(void) return KVM_DIRTY_RING_RSVD_ENTRIES + kvm_cpu_dirty_log_size(); } +bool kvm_use_dirty_bitmap(struct kvm *kvm) +{ + lockdep_assert_held(&kvm->slots_lock); + + return !kvm->dirty_ring_size || kvm->dirty_ring_with_bitmap; +} + +#ifndef CONFIG_NEED_KVM_DIRTY_RING_WITH_BITMAP +bool kvm_arch_allow_write_without_running_vcpu(struct kvm *kvm) +{ + return false; +} +#endif + static u32 kvm_dirty_ring_used(struct kvm_dirty_ring *ring) { return READ_ONCE(ring->dirty_index) - READ_ONCE(ring->reset_index); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 04b22d2f99d8..be40d1ce6e91 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1617,7 +1617,7 @@ static int kvm_prepare_memory_region(struct kvm *kvm, new->dirty_bitmap = NULL; else if (old && old->dirty_bitmap) new->dirty_bitmap = old->dirty_bitmap; - else if (!kvm->dirty_ring_size) { + else if (kvm_use_dirty_bitmap(kvm)) { r = kvm_alloc_dirty_bitmap(new); if (r) return r; @@ -2060,8 +2060,8 @@ int kvm_get_dirty_log(struct kvm *kvm, struct kvm_dirty_log *log, unsigned long n; unsigned long any = 0; - /* Dirty ring tracking is exclusive to dirty log tracking */ - if (kvm->dirty_ring_size) + /* Dirty ring tracking may be exclusive to dirty log tracking */ + if (!kvm_use_dirty_bitmap(kvm)) return -ENXIO; *memslot = NULL; @@ -2125,8 +2125,8 @@ static int kvm_get_dirty_log_protect(struct kvm *kvm, struct kvm_dirty_log *log) unsigned long *dirty_bitmap_buffer; bool flush; - /* Dirty ring tracking is exclusive to dirty log tracking */ - if (kvm->dirty_ring_size) + /* Dirty ring tracking may be exclusive to dirty log tracking */ + if (!kvm_use_dirty_bitmap(kvm)) return -ENXIO; as_id = log->slot >> 16; @@ -2237,8 +2237,8 @@ static int kvm_clear_dirty_log_protect(struct kvm *kvm, unsigned long *dirty_bitmap_buffer; bool flush; - /* Dirty ring tracking is exclusive to dirty log tracking */ - if (kvm->dirty_ring_size) + /* Dirty ring tracking may be exclusive to dirty log tracking */ + if (!kvm_use_dirty_bitmap(kvm)) return -ENXIO; as_id = log->slot >> 16; @@ -3305,7 +3305,10 @@ void mark_page_dirty_in_slot(struct kvm *kvm, struct kvm_vcpu *vcpu = kvm_get_running_vcpu(); #ifdef CONFIG_HAVE_KVM_DIRTY_RING - if (WARN_ON_ONCE(!vcpu) || WARN_ON_ONCE(vcpu->kvm != kvm)) + if (WARN_ON_ONCE(vcpu && vcpu->kvm != kvm)) + return; + + if (WARN_ON_ONCE(!kvm_arch_allow_write_without_running_vcpu(kvm) && !vcpu)) return; #endif @@ -3313,7 +3316,7 @@ void mark_page_dirty_in_slot(struct kvm *kvm, unsigned long rel_gfn = gfn - memslot->base_gfn; u32 slot = (memslot->as_id << 16) | memslot->id; - if (kvm->dirty_ring_size) + if (kvm->dirty_ring_size && vcpu) kvm_dirty_ring_push(vcpu, slot, rel_gfn); else set_bit_le(rel_gfn, memslot->dirty_bitmap); @@ -4482,6 +4485,9 @@ static long kvm_vm_ioctl_check_extension_generic(struct kvm *kvm, long arg) return KVM_DIRTY_RING_MAX_ENTRIES * sizeof(struct kvm_dirty_gfn); #else return 0; +#endif +#ifdef CONFIG_NEED_KVM_DIRTY_RING_WITH_BITMAP + case KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP: #endif case KVM_CAP_BINARY_STATS_FD: case KVM_CAP_SYSTEM_EVENT_DATA: @@ -4558,6 +4564,20 @@ int __attribute__((weak)) kvm_vm_ioctl_enable_cap(struct kvm *kvm, return -EINVAL; } +static bool kvm_are_all_memslots_empty(struct kvm *kvm) +{ + int i; + + lockdep_assert_held(&kvm->slots_lock); + + for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) { + if (!kvm_memslots_empty(__kvm_memslots(kvm, i))) + return false; + } + + return true; +} + static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm, struct kvm_enable_cap *cap) { @@ -4588,6 +4608,29 @@ static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm, return -EINVAL; return kvm_vm_ioctl_enable_dirty_log_ring(kvm, cap->args[0]); + case KVM_CAP_DIRTY_LOG_RING_WITH_BITMAP: { + int r = -EINVAL; + + if (!IS_ENABLED(CONFIG_NEED_KVM_DIRTY_RING_WITH_BITMAP) || + !kvm->dirty_ring_size || cap->flags) + return r; + + mutex_lock(&kvm->slots_lock); + + /* + * For simplicity, allow enabling ring+bitmap if and only if + * there are no memslots, e.g. to ensure all memslots allocate + * a bitmap after the capability is enabled. + */ + if (kvm_are_all_memslots_empty(kvm)) { + kvm->dirty_ring_with_bitmap = true; + r = 0; + } + + mutex_unlock(&kvm->slots_lock); + + return r; + } default: return kvm_vm_ioctl_enable_cap(kvm, cap); } -- cgit v1.2.3-70-g09d2