From 26f457142d7ee2da20a5b701862230e4961423d9 Mon Sep 17 00:00:00 2001 From: Ricardo Koller Date: Wed, 26 Apr 2023 17:23:22 +0000 Subject: KVM: arm64: Export kvm_are_all_memslots_empty() Export kvm_are_all_memslots_empty(). This will be used by a future commit when checking before setting a capability. Signed-off-by: Ricardo Koller Reviewed-by: Shaoqin Huang Reviewed-by: Gavin Shan Link: https://lore.kernel.org/r/20230426172330.1439644-5-ricarkol@google.com Signed-off-by: Oliver Upton --- virt/kvm/kvm_main.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index cb5c13eee193..13aed654111a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -4598,7 +4598,7 @@ int __attribute__((weak)) kvm_vm_ioctl_enable_cap(struct kvm *kvm, return -EINVAL; } -static bool kvm_are_all_memslots_empty(struct kvm *kvm) +bool kvm_are_all_memslots_empty(struct kvm *kvm) { int i; @@ -4611,6 +4611,7 @@ static bool kvm_are_all_memslots_empty(struct kvm *kvm) return true; } +EXPORT_SYMBOL_GPL(kvm_are_all_memslots_empty); static int kvm_vm_ioctl_enable_cap_generic(struct kvm *kvm, struct kvm_enable_cap *cap) -- cgit v1.2.3-70-g09d2 From 76021e96d781e1fe8de02ebe52f3eb276716b6b0 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Sat, 11 Feb 2023 01:07:19 +0000 Subject: KVM: Protect vcpu->pid dereference via debugfs with RCU Wrap the vcpu->pid dereference in the debugfs hook vcpu_get_pid() with proper RCU read (un)lock. Unlike the code in kvm_vcpu_ioctl(), vcpu_get_pid() is not a simple access; the pid pointer is passed to pid_nr() and fully dereferenced if the pointer is non-NULL. Failure to acquire RCU could result in use-after-free of the old pid if a different task invokes KVM_RUN and puts the last reference to the old vcpu->pid between vcpu_get_pid() reading the pointer and dereferencing it in pid_nr(). Fixes: e36de87d34a7 ("KVM: debugfs: expose pid of vcpu threads") Link: https://lore.kernel.org/r/20230211010719.982919-1-seanjc@google.com Signed-off-by: Sean Christopherson --- virt/kvm/kvm_main.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 479802a892d4..6a658f30af91 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3870,7 +3870,10 @@ static int create_vcpu_fd(struct kvm_vcpu *vcpu) static int vcpu_get_pid(void *data, u64 *val) { struct kvm_vcpu *vcpu = data; - *val = pid_nr(rcu_access_pointer(vcpu->pid)); + + rcu_read_lock(); + *val = pid_nr(rcu_dereference(vcpu->pid)); + rcu_read_unlock(); return 0; } -- cgit v1.2.3-70-g09d2 From 70b0bc4c0a05cb68ffeeaba8c8340896b5ff6fd7 Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Mon, 27 Mar 2023 19:54:57 +0200 Subject: KVM: Don't kfree(NULL) on kzalloc() failure in kvm_assign_ioeventfd_idx() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On kzalloc() failure, taking the `goto fail` path leads to kfree(NULL). Such no-op has no use. Move it out. Signed-off-by: Michal Luczaj Reviewed-by: Sean Christopherson Reviewed-by: Philippe Mathieu-Daudé Link: https://lore.kernel.org/r/20230327175457.735903-1-mhal@rbox.co Signed-off-by: Sean Christopherson --- virt/kvm/eventfd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index b0af834ffa95..7c42441425cf 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -889,9 +889,9 @@ static int kvm_assign_ioeventfd_idx(struct kvm *kvm, unlock_fail: mutex_unlock(&kvm->slots_lock); + kfree(p); fail: - kfree(p); eventfd_ctx_put(eventfd); return ret; -- cgit v1.2.3-70-g09d2 From 0a8a5f2c8c266e9d94fb45f76a26cff135d0051c Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Thu, 1 Jun 2023 18:15:17 -0700 Subject: KVM: x86: Use standard mmu_notifier invalidate hooks for APIC access page Now that KVM honors past and in-progress mmu_notifier invalidations when reloading the APIC-access page, use KVM's "standard" invalidation hooks to trigger a reload and delete the one-off usage of invalidate_range(). Aside from eliminating one-off code in KVM, dropping KVM's use of invalidate_range() will allow common mmu_notifier to redefine the API to be more strictly focused on invalidating secondary TLBs that share the primary MMU's page tables. Suggested-by: Jason Gunthorpe Cc: Alistair Popple Cc: Robin Murphy Reviewed-by: Alistair Popple Reviewed-by: Paolo Bonzini Link: https://lore.kernel.org/r/20230602011518.787006-3-seanjc@google.com Signed-off-by: Sean Christopherson --- arch/x86/kvm/mmu/mmu.c | 3 +++ arch/x86/kvm/x86.c | 14 -------------- include/linux/kvm_host.h | 3 --- virt/kvm/kvm_main.c | 18 ------------------ 4 files changed, 3 insertions(+), 35 deletions(-) (limited to 'virt') diff --git a/arch/x86/kvm/mmu/mmu.c b/arch/x86/kvm/mmu/mmu.c index c8961f45e3b1..01a11ce68e57 100644 --- a/arch/x86/kvm/mmu/mmu.c +++ b/arch/x86/kvm/mmu/mmu.c @@ -1600,6 +1600,9 @@ bool kvm_unmap_gfn_range(struct kvm *kvm, struct kvm_gfn_range *range) if (tdp_mmu_enabled) flush = kvm_tdp_mmu_unmap_gfn_range(kvm, range, flush); + if (range->slot->id == APIC_ACCESS_PAGE_PRIVATE_MEMSLOT) + kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD); + return flush; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c0778ca39650..f962b7e3487e 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -10435,20 +10435,6 @@ static void vcpu_load_eoi_exitmap(struct kvm_vcpu *vcpu) vcpu, (u64 *)vcpu->arch.ioapic_handled_vectors); } -void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, - unsigned long start, unsigned long end) -{ - unsigned long apic_address; - - /* - * The physical address of apic access page is stored in the VMCS. - * Update it when it becomes invalid. - */ - apic_address = gfn_to_hva(kvm, APIC_DEFAULT_PHYS_BASE >> PAGE_SHIFT); - if (start <= apic_address && apic_address < end) - kvm_make_all_cpus_request(kvm, KVM_REQ_APIC_PAGE_RELOAD); -} - void kvm_arch_guest_memory_reclaimed(struct kvm *kvm) { static_call_cond(kvm_x86_guest_memory_reclaimed)(kvm); diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 0e571e973bc2..cb66f4100be7 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -2237,9 +2237,6 @@ static inline long kvm_arch_vcpu_async_ioctl(struct file *filp, } #endif /* CONFIG_HAVE_KVM_VCPU_ASYNC_IOCTL */ -void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, - unsigned long start, unsigned long end); - void kvm_arch_guest_memory_reclaimed(struct kvm *kvm); #ifdef CONFIG_HAVE_KVM_VCPU_RUN_PID_CHANGE diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 479802a892d4..f3c7c3c90161 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -154,11 +154,6 @@ static unsigned long long kvm_active_vms; static DEFINE_PER_CPU(cpumask_var_t, cpu_kick_mask); -__weak void kvm_arch_mmu_notifier_invalidate_range(struct kvm *kvm, - unsigned long start, unsigned long end) -{ -} - __weak void kvm_arch_guest_memory_reclaimed(struct kvm *kvm) { } @@ -521,18 +516,6 @@ static inline struct kvm *mmu_notifier_to_kvm(struct mmu_notifier *mn) return container_of(mn, struct kvm, mmu_notifier); } -static void kvm_mmu_notifier_invalidate_range(struct mmu_notifier *mn, - struct mm_struct *mm, - unsigned long start, unsigned long end) -{ - struct kvm *kvm = mmu_notifier_to_kvm(mn); - int idx; - - idx = srcu_read_lock(&kvm->srcu); - kvm_arch_mmu_notifier_invalidate_range(kvm, start, end); - srcu_read_unlock(&kvm->srcu, idx); -} - typedef bool (*hva_handler_t)(struct kvm *kvm, struct kvm_gfn_range *range); typedef void (*on_lock_fn_t)(struct kvm *kvm, unsigned long start, @@ -892,7 +875,6 @@ static void kvm_mmu_notifier_release(struct mmu_notifier *mn, } static const struct mmu_notifier_ops kvm_mmu_notifier_ops = { - .invalidate_range = kvm_mmu_notifier_invalidate_range, .invalidate_range_start = kvm_mmu_notifier_invalidate_range_start, .invalidate_range_end = kvm_mmu_notifier_invalidate_range_end, .clear_flush_young = kvm_mmu_notifier_clear_flush_young, -- cgit v1.2.3-70-g09d2 From 5f643e460ab1298a32b7d0db104bfcab9d6165c0 Mon Sep 17 00:00:00 2001 From: Michal Luczaj Date: Mon, 5 Jun 2023 13:44:19 +0200 Subject: KVM: Clean up kvm_vm_ioctl_create_vcpu() Since c9d601548603 ("KVM: allow KVM_BUG/KVM_BUG_ON to handle 64-bit cond") 'cond' is internally converted to boolean, so caller's explicit conversion from void* is unnecessary. Remove the double bang. Signed-off-by: Michal Luczaj Reviewed-by: Yuan Yao base-commit: 76a17bf03a268bc342e08c05d8ddbe607d294eb4 Link: https://lore.kernel.org/r/20230605114852.288964-1-mhal@rbox.co Signed-off-by: Sean Christopherson --- virt/kvm/kvm_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 6a658f30af91..64dd940c549e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3975,7 +3975,7 @@ static int kvm_vm_ioctl_create_vcpu(struct kvm *kvm, u32 id) if (r < 0) goto kvm_put_xa_release; - if (KVM_BUG_ON(!!xa_store(&kvm->vcpu_array, vcpu->vcpu_idx, vcpu, 0), kvm)) { + if (KVM_BUG_ON(xa_store(&kvm->vcpu_array, vcpu->vcpu_idx, vcpu, 0), kvm)) { r = -EINVAL; goto kvm_put_xa_release; } -- cgit v1.2.3-70-g09d2 From 5ea5ca3c2b4bf4090232e18cfc515dcb52f914a6 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Tue, 7 Feb 2023 20:37:12 +0800 Subject: KVM: destruct kvm_io_device while unregistering it from kvm_io_bus Current usage of kvm_io_device requires users to destruct it with an extra call of kvm_iodevice_destructor after the device gets unregistered from kvm_io_bus. This is not necessary and can cause errors if a user forgot to make the extra call. Simplify the usage by combining kvm_iodevice_destructor into kvm_io_bus_unregister_dev. This reduces LOCs a bit for users and can avoid the leakage of destructing the device explicitly. Signed-off-by: Wei Wang Reviewed-by: Sean Christopherson Link: https://lore.kernel.org/r/20230207123713.3905-2-wei.w.wang@intel.com Signed-off-by: Sean Christopherson --- include/kvm/iodev.h | 6 ------ virt/kvm/coalesced_mmio.c | 9 ++------- virt/kvm/eventfd.c | 1 - virt/kvm/kvm_main.c | 23 +++++++++++++++-------- 4 files changed, 17 insertions(+), 22 deletions(-) (limited to 'virt') diff --git a/include/kvm/iodev.h b/include/kvm/iodev.h index d75fc4365746..56619e33251e 100644 --- a/include/kvm/iodev.h +++ b/include/kvm/iodev.h @@ -55,10 +55,4 @@ static inline int kvm_iodevice_write(struct kvm_vcpu *vcpu, : -EOPNOTSUPP; } -static inline void kvm_iodevice_destructor(struct kvm_io_device *dev) -{ - if (dev->ops->destructor) - dev->ops->destructor(dev); -} - #endif /* __KVM_IODEV_H__ */ diff --git a/virt/kvm/coalesced_mmio.c b/virt/kvm/coalesced_mmio.c index 5ef88f5a0864..1b90acb6e3fe 100644 --- a/virt/kvm/coalesced_mmio.c +++ b/virt/kvm/coalesced_mmio.c @@ -186,15 +186,10 @@ int kvm_vm_ioctl_unregister_coalesced_mmio(struct kvm *kvm, coalesced_mmio_in_range(dev, zone->addr, zone->size)) { r = kvm_io_bus_unregister_dev(kvm, zone->pio ? KVM_PIO_BUS : KVM_MMIO_BUS, &dev->dev); - - kvm_iodevice_destructor(&dev->dev); - /* * On failure, unregister destroys all devices on the - * bus _except_ the target device, i.e. coalesced_zones - * has been modified. Bail after destroying the target - * device, there's no need to restart the walk as there - * aren't any zones left. + * bus, including the target device. There's no need + * to restart the walk as there aren't any zones left. */ if (r) break; diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 7c42441425cf..4d47fffe03d9 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -931,7 +931,6 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx, bus = kvm_get_bus(kvm, bus_idx); if (bus) bus->ioeventfd_count--; - ioeventfd_release(p); ret = 0; break; } diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 64dd940c549e..b8242607392a 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -5297,6 +5297,12 @@ static void hardware_disable_all(void) } #endif /* CONFIG_KVM_GENERIC_HARDWARE_ENABLING */ +static void kvm_iodevice_destructor(struct kvm_io_device *dev) +{ + if (dev->ops->destructor) + dev->ops->destructor(dev); +} + static void kvm_io_bus_destroy(struct kvm_io_bus *bus) { int i; @@ -5520,7 +5526,7 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, struct kvm_io_device *dev) { - int i, j; + int i; struct kvm_io_bus *new_bus, *bus; lockdep_assert_held(&kvm->slots_lock); @@ -5550,18 +5556,19 @@ int kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, rcu_assign_pointer(kvm->buses[bus_idx], new_bus); synchronize_srcu_expedited(&kvm->srcu); - /* Destroy the old bus _after_ installing the (null) bus. */ + /* + * If NULL bus is installed, destroy the old bus, including all the + * attached devices. Otherwise, destroy the caller's device only. + */ if (!new_bus) { pr_err("kvm: failed to shrink bus, removing it completely\n"); - for (j = 0; j < bus->dev_count; j++) { - if (j == i) - continue; - kvm_iodevice_destructor(bus->range[j].dev); - } + kvm_io_bus_destroy(bus); + return -ENOMEM; } + kvm_iodevice_destructor(dev); kfree(bus); - return new_bus ? 0 : -ENOMEM; + return 0; } struct kvm_io_device *kvm_io_bus_get_dev(struct kvm *kvm, enum kvm_bus bus_idx, -- cgit v1.2.3-70-g09d2 From cc77b95acf3c7d9a24204b0555fed2014f300fd5 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Tue, 7 Feb 2023 20:37:13 +0800 Subject: kvm/eventfd: use list_for_each_entry when deassign ioeventfd Simpify kvm_deassign_ioeventfd_idx to use list_for_each_entry as the loop just ends at the entry that's found and deleted. Note, coalesced_mmio_ops and ioeventfd_ops are the only instances of kvm_io_device_ops that implement a destructor, all other callers of kvm_io_bus_unregister_dev() are unaffected by this change. Suggested-by: Michal Luczaj Signed-off-by: Wei Wang Reviewed-by: Sean Christopherson Link: https://lore.kernel.org/r/20230207123713.3905-3-wei.w.wang@intel.com [sean: call out that only select users implement a destructor] Signed-off-by: Sean Christopherson --- virt/kvm/eventfd.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'virt') diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index 4d47fffe03d9..89912a17f5d5 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -901,7 +901,7 @@ static int kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx, struct kvm_ioeventfd *args) { - struct _ioeventfd *p, *tmp; + struct _ioeventfd *p; struct eventfd_ctx *eventfd; struct kvm_io_bus *bus; int ret = -ENOENT; @@ -915,8 +915,7 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx, mutex_lock(&kvm->slots_lock); - list_for_each_entry_safe(p, tmp, &kvm->ioeventfds, list) { - + list_for_each_entry(p, &kvm->ioeventfds, list) { if (p->bus_idx != bus_idx || p->eventfd != eventfd || p->addr != args->addr || -- cgit v1.2.3-70-g09d2