From 2602087ef49d8081ece54019aa862dbb12a04806 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 16 May 2017 09:31:58 +0200 Subject: KVM: arm/arm64: Allow GICv2 to supply a uaccess register function We are about to differentiate between writes from a VCPU and from userspace to the GIC's GICD_ISACTIVER and GICD_ICACTIVER registers due to different synchronization requirements. Expand the macro to define a register description for the GIC to take uaccess functions as well. Signed-off-by: Christoffer Dall Acked-by: Marc Zyngier --- virt/kvm/arm/vgic/vgic-mmio-v2.c | 22 +++++++++++----------- virt/kvm/arm/vgic/vgic-mmio.h | 4 +++- 2 files changed, 14 insertions(+), 12 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c index 0a4283ed9aa7..95543a20513a 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v2.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c @@ -296,34 +296,34 @@ static const struct vgic_register_region vgic_v2_dist_registers[] = { vgic_mmio_read_v2_misc, vgic_mmio_write_v2_misc, 12, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_IGROUP, - vgic_mmio_read_rao, vgic_mmio_write_wi, 1, + vgic_mmio_read_rao, vgic_mmio_write_wi, NULL, NULL, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_SET, - vgic_mmio_read_enable, vgic_mmio_write_senable, 1, + vgic_mmio_read_enable, vgic_mmio_write_senable, NULL, NULL, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_CLEAR, - vgic_mmio_read_enable, vgic_mmio_write_cenable, 1, + vgic_mmio_read_enable, vgic_mmio_write_cenable, NULL, NULL, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_SET, - vgic_mmio_read_pending, vgic_mmio_write_spending, 1, + vgic_mmio_read_pending, vgic_mmio_write_spending, NULL, NULL, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_CLEAR, - vgic_mmio_read_pending, vgic_mmio_write_cpending, 1, + vgic_mmio_read_pending, vgic_mmio_write_cpending, NULL, NULL, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_SET, - vgic_mmio_read_active, vgic_mmio_write_sactive, 1, + vgic_mmio_read_active, vgic_mmio_write_sactive, NULL, NULL, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_CLEAR, - vgic_mmio_read_active, vgic_mmio_write_cactive, 1, + vgic_mmio_read_active, vgic_mmio_write_cactive, NULL, NULL, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PRI, - vgic_mmio_read_priority, vgic_mmio_write_priority, 8, - VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), + vgic_mmio_read_priority, vgic_mmio_write_priority, NULL, NULL, + 8, VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_TARGET, - vgic_mmio_read_target, vgic_mmio_write_target, 8, + vgic_mmio_read_target, vgic_mmio_write_target, NULL, NULL, 8, VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_CONFIG, - vgic_mmio_read_config, vgic_mmio_write_config, 2, + vgic_mmio_read_config, vgic_mmio_write_config, NULL, NULL, 2, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_LENGTH(GIC_DIST_SOFTINT, vgic_mmio_read_raz, vgic_mmio_write_sgir, 4, diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h index ea4171acdef3..27924a3d8769 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.h +++ b/virt/kvm/arm/vgic/vgic-mmio.h @@ -75,7 +75,7 @@ extern struct kvm_io_device_ops kvm_io_gic_ops; * The _WITH_LENGTH version instantiates registers with a fixed length * and is mutually exclusive with the _PER_IRQ version. */ -#define REGISTER_DESC_WITH_BITS_PER_IRQ(off, rd, wr, bpi, acc) \ +#define REGISTER_DESC_WITH_BITS_PER_IRQ(off, rd, wr, ur, uw, bpi, acc) \ { \ .reg_offset = off, \ .bits_per_irq = bpi, \ @@ -83,6 +83,8 @@ extern struct kvm_io_device_ops kvm_io_gic_ops; .access_flags = acc, \ .read = rd, \ .write = wr, \ + .uaccess_read = ur, \ + .uaccess_write = uw, \ } #define REGISTER_DESC_WITH_LENGTH(off, rd, wr, length, acc) \ -- cgit v1.2.3-70-g09d2 From 3197191e5525ea7cf8b3fdd9afc75ab5779d21fd Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 16 May 2017 09:44:39 +0200 Subject: KVM: arm/arm64: Separate guest and uaccess writes to dist {sc}active Factor out the core register modifier functionality from the entry points from the register description table, and only call the prepare/finish functions from the guest path, not the uaccess path. Signed-off-by: Christoffer Dall Reviewed-by: Marc Zyngier --- virt/kvm/arm/vgic/vgic-mmio-v2.c | 6 +++-- virt/kvm/arm/vgic/vgic-mmio-v3.c | 8 ++++--- virt/kvm/arm/vgic/vgic-mmio.c | 50 ++++++++++++++++++++++++++++++++++------ virt/kvm/arm/vgic/vgic-mmio.h | 8 +++++++ 4 files changed, 60 insertions(+), 12 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c index 95543a20513a..8bafe9af3205 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v2.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c @@ -311,10 +311,12 @@ static const struct vgic_register_region vgic_v2_dist_registers[] = { vgic_mmio_read_pending, vgic_mmio_write_cpending, NULL, NULL, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_SET, - vgic_mmio_read_active, vgic_mmio_write_sactive, NULL, NULL, 1, + vgic_mmio_read_active, vgic_mmio_write_sactive, + NULL, vgic_mmio_uaccess_write_sactive, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_CLEAR, - vgic_mmio_read_active, vgic_mmio_write_cactive, NULL, NULL, 1, + vgic_mmio_read_active, vgic_mmio_write_cactive, + NULL, vgic_mmio_uaccess_write_cactive, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PRI, vgic_mmio_read_priority, vgic_mmio_write_priority, NULL, NULL, diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index 99da1a207c19..23c0d56436a7 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c @@ -456,11 +456,13 @@ static const struct vgic_register_region vgic_v3_dist_registers[] = { vgic_mmio_read_raz, vgic_mmio_write_wi, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISACTIVER, - vgic_mmio_read_active, vgic_mmio_write_sactive, NULL, NULL, 1, + vgic_mmio_read_active, vgic_mmio_write_sactive, + NULL, vgic_mmio_uaccess_write_sactive, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICACTIVER, - vgic_mmio_read_active, vgic_mmio_write_cactive, NULL, NULL, 1, - VGIC_ACCESS_32bit), + vgic_mmio_read_active, vgic_mmio_write_cactive, + NULL, vgic_mmio_uaccess_write_cactive, + 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IPRIORITYR, vgic_mmio_read_priority, vgic_mmio_write_priority, NULL, NULL, 8, VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c index 1c17b2a2f105..64cbcb4c47d0 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.c +++ b/virt/kvm/arm/vgic/vgic-mmio.c @@ -251,38 +251,74 @@ static void vgic_change_active_finish(struct kvm_vcpu *vcpu, u32 intid) kvm_arm_resume_guest(vcpu->kvm); } -void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len, - unsigned long val) +static void __vgic_mmio_write_cactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) { u32 intid = VGIC_ADDR_TO_INTID(addr, 1); int i; - vgic_change_active_prepare(vcpu, intid); for_each_set_bit(i, &val, len * 8) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); vgic_mmio_change_active(vcpu, irq, false); vgic_put_irq(vcpu->kvm, irq); } - vgic_change_active_finish(vcpu, intid); } -void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu, +void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val) { u32 intid = VGIC_ADDR_TO_INTID(addr, 1); - int i; vgic_change_active_prepare(vcpu, intid); + + __vgic_mmio_write_cactive(vcpu, addr, len, val); + + vgic_change_active_finish(vcpu, intid); +} + +void vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + __vgic_mmio_write_cactive(vcpu, addr, len, val); +} + +static void __vgic_mmio_write_sactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + int i; + for_each_set_bit(i, &val, len * 8) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); vgic_mmio_change_active(vcpu, irq, true); vgic_put_irq(vcpu->kvm, irq); } +} + +void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + + vgic_change_active_prepare(vcpu, intid); + + __vgic_mmio_write_sactive(vcpu, addr, len, val); + vgic_change_active_finish(vcpu, intid); } +void vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + __vgic_mmio_write_sactive(vcpu, addr, len, val); +} + unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len) { diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h index 27924a3d8769..5693f6df45ec 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.h +++ b/virt/kvm/arm/vgic/vgic-mmio.h @@ -167,6 +167,14 @@ void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val); +void vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +void vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len); -- cgit v1.2.3-70-g09d2 From abd7229626b9339e378a8cfcdebe0c0943b06a7f Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sat, 6 May 2017 20:01:24 +0200 Subject: KVM: arm/arm64: Simplify active_change_prepare and plug race We don't need to stop a specific VCPU when changing the active state, because private IRQs can only be modified by a running VCPU for the VCPU itself and it is therefore already stopped. However, it is also possible for two VCPUs to be modifying the active state of SPIs at the same time, which can cause the thread being stuck in the loop that checks other VCPU threads for a potentially very long time, or to modify the active state of a running VCPU. Fix this by serializing all accesses to setting and clearing the active state of interrupts using the KVM mutex. Reported-by: Andrew Jones Signed-off-by: Christoffer Dall Reviewed-by: Marc Zyngier --- arch/arm/include/asm/kvm_host.h | 2 -- arch/arm64/include/asm/kvm_host.h | 2 -- virt/kvm/arm/arm.c | 20 ++++---------------- virt/kvm/arm/vgic/vgic-mmio.c | 18 ++++++++++-------- virt/kvm/arm/vgic/vgic.c | 11 ++++++----- 5 files changed, 20 insertions(+), 33 deletions(-) (limited to 'virt') diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index f0e66577ce05..12274d46df70 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -233,8 +233,6 @@ struct kvm_vcpu *kvm_arm_get_running_vcpu(void); struct kvm_vcpu __percpu **kvm_get_running_vcpus(void); void kvm_arm_halt_guest(struct kvm *kvm); void kvm_arm_resume_guest(struct kvm *kvm); -void kvm_arm_halt_vcpu(struct kvm_vcpu *vcpu); -void kvm_arm_resume_vcpu(struct kvm_vcpu *vcpu); int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices); unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 5e19165c5fa8..32cbe8a3bb0d 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -333,8 +333,6 @@ struct kvm_vcpu *kvm_arm_get_running_vcpu(void); struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); void kvm_arm_halt_guest(struct kvm *kvm); void kvm_arm_resume_guest(struct kvm *kvm); -void kvm_arm_halt_vcpu(struct kvm_vcpu *vcpu); -void kvm_arm_resume_vcpu(struct kvm_vcpu *vcpu); u64 __kvm_call_hyp(void *hypfn, ...); #define kvm_call_hyp(f, ...) __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__) diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 3417e184c8e1..3c387fdc4a9e 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -539,27 +539,15 @@ void kvm_arm_halt_guest(struct kvm *kvm) kvm_make_all_cpus_request(kvm, KVM_REQ_VCPU_EXIT); } -void kvm_arm_halt_vcpu(struct kvm_vcpu *vcpu) -{ - vcpu->arch.pause = true; - kvm_vcpu_kick(vcpu); -} - -void kvm_arm_resume_vcpu(struct kvm_vcpu *vcpu) -{ - struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu); - - vcpu->arch.pause = false; - swake_up(wq); -} - void kvm_arm_resume_guest(struct kvm *kvm) { int i; struct kvm_vcpu *vcpu; - kvm_for_each_vcpu(i, vcpu, kvm) - kvm_arm_resume_vcpu(vcpu); + kvm_for_each_vcpu(i, vcpu, kvm) { + vcpu->arch.pause = false; + swake_up(kvm_arch_vcpu_wq(vcpu)); + } } static void vcpu_sleep(struct kvm_vcpu *vcpu) diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c index 64cbcb4c47d0..c1e4bdd66131 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.c +++ b/virt/kvm/arm/vgic/vgic-mmio.c @@ -231,23 +231,21 @@ static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq, * be migrated while we don't hold the IRQ locks and we don't want to be * chasing moving targets. * - * For private interrupts, we only have to make sure the single and only VCPU - * that can potentially queue the IRQ is stopped. + * For private interrupts we don't have to do anything because userspace + * accesses to the VGIC state already require all VCPUs to be stopped, and + * only the VCPU itself can modify its private interrupts active state, which + * guarantees that the VCPU is not running. */ static void vgic_change_active_prepare(struct kvm_vcpu *vcpu, u32 intid) { - if (intid < VGIC_NR_PRIVATE_IRQS) - kvm_arm_halt_vcpu(vcpu); - else + if (intid > VGIC_NR_PRIVATE_IRQS) kvm_arm_halt_guest(vcpu->kvm); } /* See vgic_change_active_prepare */ static void vgic_change_active_finish(struct kvm_vcpu *vcpu, u32 intid) { - if (intid < VGIC_NR_PRIVATE_IRQS) - kvm_arm_resume_vcpu(vcpu); - else + if (intid > VGIC_NR_PRIVATE_IRQS) kvm_arm_resume_guest(vcpu->kvm); } @@ -271,11 +269,13 @@ void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu, { u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + mutex_lock(&vcpu->kvm->lock); vgic_change_active_prepare(vcpu, intid); __vgic_mmio_write_cactive(vcpu, addr, len, val); vgic_change_active_finish(vcpu, intid); + mutex_unlock(&vcpu->kvm->lock); } void vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu, @@ -305,11 +305,13 @@ void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu, { u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + mutex_lock(&vcpu->kvm->lock); vgic_change_active_prepare(vcpu, intid); __vgic_mmio_write_sactive(vcpu, addr, len, val); vgic_change_active_finish(vcpu, intid); + mutex_unlock(&vcpu->kvm->lock); } void vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu, diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index 83b24d20ff8f..aea080a2c443 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -35,11 +35,12 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = { /* * Locking order is always: - * its->cmd_lock (mutex) - * its->its_lock (mutex) - * vgic_cpu->ap_list_lock - * kvm->lpi_list_lock - * vgic_irq->irq_lock + * kvm->lock (mutex) + * its->cmd_lock (mutex) + * its->its_lock (mutex) + * vgic_cpu->ap_list_lock + * kvm->lpi_list_lock + * vgic_irq->irq_lock * * If you need to take multiple locks, always take the upper lock first, * then the lower ones, e.g. first take the its_lock, then the irq_lock. -- cgit v1.2.3-70-g09d2 From 0710f9a637ef209e53c8127cf588a46130fcdcc5 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Sun, 4 Jun 2017 13:23:52 +0200 Subject: KVM: arm/arm64: Use uaccess functions for GICv3 {sc}active We recently rewrote the sactive and cactive handlers to take the kvm lock for guest accesses to these registers. However, when accessed from userspace this lock is already held. Unfortunately we forgot to change the private accessors for GICv3, because these are redistributor registers and not distributor registers. Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic/vgic-mmio-v3.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index 23c0d56436a7..e25567a93dd5 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c @@ -528,12 +528,14 @@ static const struct vgic_register_region vgic_v3_sgibase_registers[] = { vgic_mmio_read_pending, vgic_mmio_write_cpending, vgic_mmio_read_raz, vgic_mmio_write_wi, 4, VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH(GICR_ISACTIVER0, - vgic_mmio_read_active, vgic_mmio_write_sactive, 4, - VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH(GICR_ICACTIVER0, - vgic_mmio_read_active, vgic_mmio_write_cactive, 4, - VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ISACTIVER0, + vgic_mmio_read_active, vgic_mmio_write_sactive, + NULL, vgic_mmio_uaccess_write_sactive, + 4, VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ICACTIVER0, + vgic_mmio_read_active, vgic_mmio_write_cactive, + NULL, vgic_mmio_uaccess_write_cactive, + 4, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_LENGTH(GICR_IPRIORITYR0, vgic_mmio_read_priority, vgic_mmio_write_priority, 32, VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), -- cgit v1.2.3-70-g09d2 From 6a6d73be12fbe492d0678cd84d3b35e2bc9698e4 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Sun, 4 Jun 2017 14:43:54 +0200 Subject: KVM: arm/arm64: properly use vcpu requests arm/arm64 already has one VCPU request used when setting pause, but it doesn't properly check requests in VCPU RUN. Check it and also make sure we set vcpu->mode at the appropriate time (before the check) and with the appropriate barriers. See Documentation/virtual/kvm/vcpu-requests.rst. Also make sure we don't leave any vcpu requests we don't intend to handle later set in the request bitmap. If we don't clear them, then kvm_request_pending() may return true when it shouldn't. Using VCPU requests properly fixes a small race where pause could get set just as a VCPU was entering guest mode. Signed-off-by: Andrew Jones Reviewed-by: Christoffer Dall Signed-off-by: Christoffer Dall --- arch/arm/kvm/handle_exit.c | 1 + arch/arm64/kvm/handle_exit.c | 1 + virt/kvm/arm/arm.c | 14 ++++++++++++-- virt/kvm/arm/psci.c | 1 + 4 files changed, 15 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index 5fd7968cdae9..a2b4f7b82356 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -72,6 +72,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) trace_kvm_wfx(*vcpu_pc(vcpu), false); vcpu->stat.wfi_exit_stat++; kvm_vcpu_block(vcpu); + kvm_clear_request(KVM_REQ_UNHALT, vcpu); } kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index fa1b18e364fc..17d8a1677a0b 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -89,6 +89,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false); vcpu->stat.wfi_exit_stat++; kvm_vcpu_block(vcpu); + kvm_clear_request(KVM_REQ_UNHALT, vcpu); } kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 3c387fdc4a9e..138212605ad9 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -546,6 +546,7 @@ void kvm_arm_resume_guest(struct kvm *kvm) kvm_for_each_vcpu(i, vcpu, kvm) { vcpu->arch.pause = false; + kvm_clear_request(KVM_REQ_VCPU_EXIT, vcpu); swake_up(kvm_arch_vcpu_wq(vcpu)); } } @@ -638,8 +639,18 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) run->exit_reason = KVM_EXIT_INTR; } + /* + * Ensure we set mode to IN_GUEST_MODE after we disable + * interrupts and before the final VCPU requests check. + * See the comment in kvm_vcpu_exiting_guest_mode() and + * Documentation/virtual/kvm/vcpu-requests.rst + */ + smp_store_mb(vcpu->mode, IN_GUEST_MODE); + if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) || - vcpu->arch.power_off || vcpu->arch.pause) { + kvm_request_pending(vcpu) || + vcpu->arch.power_off || vcpu->arch.pause) { + vcpu->mode = OUTSIDE_GUEST_MODE; local_irq_enable(); kvm_pmu_sync_hwstate(vcpu); kvm_timer_sync_hwstate(vcpu); @@ -655,7 +666,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) */ trace_kvm_entry(*vcpu_pc(vcpu)); guest_enter_irqoff(); - vcpu->mode = IN_GUEST_MODE; ret = kvm_call_hyp(__kvm_vcpu_run, vcpu); diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c index a08d7a93aebb..f68be2cc6256 100644 --- a/virt/kvm/arm/psci.c +++ b/virt/kvm/arm/psci.c @@ -57,6 +57,7 @@ static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu) * for KVM will preserve the register state. */ kvm_vcpu_block(vcpu); + kvm_clear_request(KVM_REQ_UNHALT, vcpu); return PSCI_RET_SUCCESS; } -- cgit v1.2.3-70-g09d2 From 0592c005622582fb4049a2f710d92c3a70f4c79b Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Sun, 4 Jun 2017 14:43:55 +0200 Subject: KVM: arm/arm64: replace pause checks with vcpu request checks The current use of KVM_REQ_VCPU_EXIT for pause is fine. Even the requester clearing the request is OK, as this is the special case where the sole requesting thread and receiving VCPU are executing synchronously (see "Clearing Requests" in Documentation/virtual/kvm/vcpu-requests.rst) However, that's about to change, so let's ensure only the receiving VCPU clears the request. Additionally, by guaranteeing KVM_REQ_VCPU_EXIT is always set when pause is, we can avoid checking pause directly in VCPU RUN. Signed-off-by: Andrew Jones Reviewed-by: Christoffer Dall Signed-off-by: Christoffer Dall --- virt/kvm/arm/arm.c | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 138212605ad9..21a4db90073f 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -546,7 +546,6 @@ void kvm_arm_resume_guest(struct kvm *kvm) kvm_for_each_vcpu(i, vcpu, kvm) { vcpu->arch.pause = false; - kvm_clear_request(KVM_REQ_VCPU_EXIT, vcpu); swake_up(kvm_arch_vcpu_wq(vcpu)); } } @@ -557,6 +556,11 @@ static void vcpu_sleep(struct kvm_vcpu *vcpu) swait_event_interruptible(*wq, ((!vcpu->arch.power_off) && (!vcpu->arch.pause))); + + if (vcpu->arch.pause) { + /* Awaken to handle a signal, request we sleep again later. */ + kvm_make_request(KVM_REQ_VCPU_EXIT, vcpu); + } } static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu) @@ -564,6 +568,14 @@ static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu) return vcpu->arch.target >= 0; } +static void check_vcpu_requests(struct kvm_vcpu *vcpu) +{ + if (kvm_request_pending(vcpu)) { + if (kvm_check_request(KVM_REQ_VCPU_EXIT, vcpu)) + vcpu_sleep(vcpu); + } +} + /** * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code * @vcpu: The VCPU pointer @@ -609,7 +621,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) update_vttbr(vcpu->kvm); - if (vcpu->arch.power_off || vcpu->arch.pause) + check_vcpu_requests(vcpu); + + if (vcpu->arch.power_off) vcpu_sleep(vcpu); /* @@ -649,7 +663,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) || kvm_request_pending(vcpu) || - vcpu->arch.power_off || vcpu->arch.pause) { + vcpu->arch.power_off) { vcpu->mode = OUTSIDE_GUEST_MODE; local_irq_enable(); kvm_pmu_sync_hwstate(vcpu); -- cgit v1.2.3-70-g09d2 From cc9b43f99d5ff4df75d11e75c0bfaf08f1ff5a9a Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Sun, 4 Jun 2017 14:43:56 +0200 Subject: KVM: arm/arm64: use vcpu requests for power_off System shutdown is currently using request-less VCPU kicks. This leaves open a tiny race window, as it doesn't ensure the state change to power_off is seen by a VCPU just about to enter guest mode. VCPU requests, OTOH, are guaranteed to be seen (see "Ensuring Requests Are Seen" of Documentation/virtual/kvm/vcpu-requests.rst) This patch applies the EXIT request used by pause to power_off, fixing the race. Signed-off-by: Andrew Jones Reviewed-by: Christoffer Dall Signed-off-by: Christoffer Dall --- virt/kvm/arm/psci.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c index f68be2cc6256..f189d0ad30d5 100644 --- a/virt/kvm/arm/psci.c +++ b/virt/kvm/arm/psci.c @@ -179,10 +179,9 @@ static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type) * after this call is handled and before the VCPUs have been * re-initialized. */ - kvm_for_each_vcpu(i, tmp, vcpu->kvm) { + kvm_for_each_vcpu(i, tmp, vcpu->kvm) tmp->arch.power_off = true; - kvm_vcpu_kick(tmp); - } + kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_VCPU_EXIT); memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event)); vcpu->run->system_event.type = type; -- cgit v1.2.3-70-g09d2 From 424c989b1a664a270727550506321af0a605c302 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Sun, 4 Jun 2017 14:43:57 +0200 Subject: KVM: arm/arm64: optimize VCPU RUN We can make a small optimization by not checking the state of the power_off field on each run. This is done by treating power_off like pause, only checking it when we get the EXIT VCPU request. When a VCPU powers off another VCPU the EXIT request is already made, so we just need to make sure the request is also made on self power off. kvm_vcpu_kick() isn't necessary for these cases, as the VCPU would just be kicking itself, but we add it anyway as a self kick doesn't cost much, and it makes the code more future-proof. Signed-off-by: Andrew Jones Reviewed-by: Christoffer Dall Signed-off-by: Christoffer Dall --- virt/kvm/arm/arm.c | 19 +++++++++++-------- virt/kvm/arm/psci.c | 2 ++ 2 files changed, 13 insertions(+), 8 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 21a4db90073f..9379b1d75ad3 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -368,6 +368,13 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) kvm_timer_vcpu_put(vcpu); } +static void vcpu_power_off(struct kvm_vcpu *vcpu) +{ + vcpu->arch.power_off = true; + kvm_make_request(KVM_REQ_VCPU_EXIT, vcpu); + kvm_vcpu_kick(vcpu); +} + int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { @@ -387,7 +394,7 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, vcpu->arch.power_off = false; break; case KVM_MP_STATE_STOPPED: - vcpu->arch.power_off = true; + vcpu_power_off(vcpu); break; default: return -EINVAL; @@ -557,7 +564,7 @@ static void vcpu_sleep(struct kvm_vcpu *vcpu) swait_event_interruptible(*wq, ((!vcpu->arch.power_off) && (!vcpu->arch.pause))); - if (vcpu->arch.pause) { + if (vcpu->arch.power_off || vcpu->arch.pause) { /* Awaken to handle a signal, request we sleep again later. */ kvm_make_request(KVM_REQ_VCPU_EXIT, vcpu); } @@ -623,9 +630,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) check_vcpu_requests(vcpu); - if (vcpu->arch.power_off) - vcpu_sleep(vcpu); - /* * Preparing the interrupts to be injected also * involves poking the GIC, which must be done in a @@ -662,8 +666,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) smp_store_mb(vcpu->mode, IN_GUEST_MODE); if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) || - kvm_request_pending(vcpu) || - vcpu->arch.power_off) { + kvm_request_pending(vcpu)) { vcpu->mode = OUTSIDE_GUEST_MODE; local_irq_enable(); kvm_pmu_sync_hwstate(vcpu); @@ -896,7 +899,7 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, * Handle the "start in power-off" case. */ if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) - vcpu->arch.power_off = true; + vcpu_power_off(vcpu); else vcpu->arch.power_off = false; diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c index f189d0ad30d5..4a436685c552 100644 --- a/virt/kvm/arm/psci.c +++ b/virt/kvm/arm/psci.c @@ -65,6 +65,8 @@ static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu) static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu) { vcpu->arch.power_off = true; + kvm_make_request(KVM_REQ_VCPU_EXIT, vcpu); + kvm_vcpu_kick(vcpu); } static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) -- cgit v1.2.3-70-g09d2 From 7b244e2be654d90d77800015d23395357dbc82ba Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Sun, 4 Jun 2017 14:43:58 +0200 Subject: KVM: arm/arm64: change exit request to sleep request A request called EXIT is too generic. All requests are meant to cause exits, but different requests have different flags. Let's not make it difficult to decide if the EXIT request is correct for some case by just always providing unique requests for each case. This patch changes EXIT to SLEEP, because that's what the request is asking the VCPU to do. Signed-off-by: Andrew Jones Acked-by: Christoffer Dall Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_host.h | 2 +- arch/arm64/include/asm/kvm_host.h | 2 +- virt/kvm/arm/arm.c | 12 ++++++------ virt/kvm/arm/psci.c | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) (limited to 'virt') diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index c556babe467c..fdd644c01c89 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -44,7 +44,7 @@ #define KVM_MAX_VCPUS VGIC_V2_MAX_CPUS #endif -#define KVM_REQ_VCPU_EXIT \ +#define KVM_REQ_SLEEP \ KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 0ff991c9c66e..9bd0d1040de9 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -41,7 +41,7 @@ #define KVM_VCPU_MAX_FEATURES 4 -#define KVM_REQ_VCPU_EXIT \ +#define KVM_REQ_SLEEP \ KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) int __attribute_const__ kvm_target_cpu(void); diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 9379b1d75ad3..ddc833987dfb 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -371,7 +371,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) static void vcpu_power_off(struct kvm_vcpu *vcpu) { vcpu->arch.power_off = true; - kvm_make_request(KVM_REQ_VCPU_EXIT, vcpu); + kvm_make_request(KVM_REQ_SLEEP, vcpu); kvm_vcpu_kick(vcpu); } @@ -543,7 +543,7 @@ void kvm_arm_halt_guest(struct kvm *kvm) kvm_for_each_vcpu(i, vcpu, kvm) vcpu->arch.pause = true; - kvm_make_all_cpus_request(kvm, KVM_REQ_VCPU_EXIT); + kvm_make_all_cpus_request(kvm, KVM_REQ_SLEEP); } void kvm_arm_resume_guest(struct kvm *kvm) @@ -557,7 +557,7 @@ void kvm_arm_resume_guest(struct kvm *kvm) } } -static void vcpu_sleep(struct kvm_vcpu *vcpu) +static void vcpu_req_sleep(struct kvm_vcpu *vcpu) { struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu); @@ -566,7 +566,7 @@ static void vcpu_sleep(struct kvm_vcpu *vcpu) if (vcpu->arch.power_off || vcpu->arch.pause) { /* Awaken to handle a signal, request we sleep again later. */ - kvm_make_request(KVM_REQ_VCPU_EXIT, vcpu); + kvm_make_request(KVM_REQ_SLEEP, vcpu); } } @@ -578,8 +578,8 @@ static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu) static void check_vcpu_requests(struct kvm_vcpu *vcpu) { if (kvm_request_pending(vcpu)) { - if (kvm_check_request(KVM_REQ_VCPU_EXIT, vcpu)) - vcpu_sleep(vcpu); + if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) + vcpu_req_sleep(vcpu); } } diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c index 4a436685c552..f1e363bab5e8 100644 --- a/virt/kvm/arm/psci.c +++ b/virt/kvm/arm/psci.c @@ -65,7 +65,7 @@ static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu) static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu) { vcpu->arch.power_off = true; - kvm_make_request(KVM_REQ_VCPU_EXIT, vcpu); + kvm_make_request(KVM_REQ_SLEEP, vcpu); kvm_vcpu_kick(vcpu); } @@ -183,7 +183,7 @@ static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type) */ kvm_for_each_vcpu(i, tmp, vcpu->kvm) tmp->arch.power_off = true; - kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_VCPU_EXIT); + kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP); memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event)); vcpu->run->system_event.type = type; -- cgit v1.2.3-70-g09d2 From 325f9c649c8a4e447e4d3babacc7a60b75012d5d Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Sun, 4 Jun 2017 14:43:59 +0200 Subject: KVM: arm/arm64: use vcpu requests for irq injection Don't use request-less VCPU kicks when injecting IRQs, as a VCPU kick meant to trigger the interrupt injection could be sent while the VCPU is outside guest mode, which means no IPI is sent, and after it has called kvm_vgic_flush_hwstate(), meaning it won't see the updated GIC state until its next exit some time later for some other reason. The receiving VCPU only needs to check this request in VCPU RUN to handle it. By checking it, if it's pending, a memory barrier will be issued that ensures all state is visible. See "Ensuring Requests Are Seen" of Documentation/virtual/kvm/vcpu-requests.rst Signed-off-by: Andrew Jones Reviewed-by: Christoffer Dall Signed-off-by: Christoffer Dall --- arch/arm/include/asm/kvm_host.h | 1 + arch/arm64/include/asm/kvm_host.h | 1 + virt/kvm/arm/arm.c | 7 +++++++ virt/kvm/arm/vgic/vgic.c | 9 +++++++-- 4 files changed, 16 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index fdd644c01c89..00ad56ee6455 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -46,6 +46,7 @@ #define KVM_REQ_SLEEP \ KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1) u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode); int __attribute_const__ kvm_target_cpu(void); diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 9bd0d1040de9..0c4fd1f46e10 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -43,6 +43,7 @@ #define KVM_REQ_SLEEP \ KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1) int __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index ddc833987dfb..cac5c2f2ddba 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -580,6 +580,12 @@ static void check_vcpu_requests(struct kvm_vcpu *vcpu) if (kvm_request_pending(vcpu)) { if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) vcpu_req_sleep(vcpu); + + /* + * Clear IRQ_PENDING requests that were made to guarantee + * that a VCPU sees new virtual interrupts. + */ + kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu); } } @@ -771,6 +777,7 @@ static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level) * trigger a world-switch round on the running physical CPU to set the * virtual IRQ/FIQ fields in the HCR appropriately. */ + kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); kvm_vcpu_kick(vcpu); return 0; diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index aea080a2c443..c66feaca2a5d 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -286,8 +286,10 @@ retry: * won't see this one until it exits for some other * reason. */ - if (vcpu) + if (vcpu) { + kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); kvm_vcpu_kick(vcpu); + } return false; } @@ -333,6 +335,7 @@ retry: spin_unlock(&irq->irq_lock); spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); + kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); kvm_vcpu_kick(vcpu); return true; @@ -722,8 +725,10 @@ void vgic_kick_vcpus(struct kvm *kvm) * a good kick... */ kvm_for_each_vcpu(c, vcpu, kvm) { - if (kvm_vgic_vcpu_pending_irq(vcpu)) + if (kvm_vgic_vcpu_pending_irq(vcpu)) { + kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); kvm_vcpu_kick(vcpu); + } } } -- cgit v1.2.3-70-g09d2 From b7484931e4a8b06fe734ac2b5134f09204bad11a Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Sun, 4 Jun 2017 14:44:00 +0200 Subject: KVM: arm/arm64: PMU: remove request-less vcpu kick Refactor PMU overflow handling in order to remove the request-less vcpu kick. Now, since kvm_vgic_inject_irq() uses vcpu requests, there should be no chance that a kick sent at just the wrong time (between the VCPU's call to kvm_pmu_flush_hwstate() and before it enters guest mode) results in a failure for the guest to see updated GIC state until its next exit some time later for some other reason. Signed-off-by: Andrew Jones Reviewed-by: Christoffer Dall Signed-off-by: Christoffer Dall --- virt/kvm/arm/pmu.c | 40 +++++++++++++++++++--------------------- 1 file changed, 19 insertions(+), 21 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index 4b43e7f3b158..2451607dc25e 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -203,6 +203,23 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) return reg; } +static void kvm_pmu_check_overflow(struct kvm_vcpu *vcpu) +{ + struct kvm_pmu *pmu = &vcpu->arch.pmu; + bool overflow = !!kvm_pmu_overflow_status(vcpu); + + if (pmu->irq_level == overflow) + return; + + pmu->irq_level = overflow; + + if (likely(irqchip_in_kernel(vcpu->kvm))) { + int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, + pmu->irq_num, overflow); + WARN_ON(ret); + } +} + /** * kvm_pmu_overflow_set - set PMU overflow interrupt * @vcpu: The vcpu pointer @@ -210,37 +227,18 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) */ void kvm_pmu_overflow_set(struct kvm_vcpu *vcpu, u64 val) { - u64 reg; - if (val == 0) return; vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= val; - reg = kvm_pmu_overflow_status(vcpu); - if (reg != 0) - kvm_vcpu_kick(vcpu); + kvm_pmu_check_overflow(vcpu); } static void kvm_pmu_update_state(struct kvm_vcpu *vcpu) { - struct kvm_pmu *pmu = &vcpu->arch.pmu; - bool overflow; - if (!kvm_arm_pmu_v3_ready(vcpu)) return; - - overflow = !!kvm_pmu_overflow_status(vcpu); - if (pmu->irq_level == overflow) - return; - - pmu->irq_level = overflow; - - if (likely(irqchip_in_kernel(vcpu->kvm))) { - int ret; - ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, - pmu->irq_num, overflow); - WARN_ON(ret); - } + kvm_pmu_check_overflow(vcpu); } bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu) -- cgit v1.2.3-70-g09d2 From 1b6502e5bc5ef16179bcd812dfa43d8bbb5689d4 Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Sun, 4 Jun 2017 14:44:01 +0200 Subject: KVM: arm/arm64: timer: remove request-less vcpu kick The timer work is only scheduled for a VCPU when that VCPU is blocked. This means we only need to wake it up, not kick (IPI) it. While calling kvm_vcpu_kick() would just do the wake up, and not kick, anyway, let's change this to avoid request-less vcpu kicks, as they're generally not a good idea (see "Request-less VCPU Kicks" in Documentation/virtual/kvm/vcpu-requests.rst) Signed-off-by: Andrew Jones Reviewed-by: Christoffer Dall Signed-off-by: Christoffer Dall --- virt/kvm/arm/arch_timer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 5976609ef27c..7933b1f8f7b7 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -95,7 +95,7 @@ static void kvm_timer_inject_irq_work(struct work_struct *work) * If the vcpu is blocked we want to wake it up so that it will see * the timer has expired when entering the guest. */ - kvm_vcpu_kick(vcpu); + kvm_vcpu_wake_up(vcpu); } static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx) -- cgit v1.2.3-70-g09d2 From a2befacf50940017e0de8461c4b924a929c4edc5 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 2 May 2017 13:41:02 +0200 Subject: KVM: arm64: Allow creating the PMU without the in-kernel GIC Since we got support for devices in userspace which allows reporting the PMU overflow output status to userspace, we should actually allow creating the PMU on systems without an in-kernel irqchip, which in turn requires us to slightly clarify error codes for the ABI and move things around for the initialization phase. Signed-off-by: Christoffer Dall Reviewed-by: Marc Zyngier --- Documentation/virtual/kvm/devices/vcpu.txt | 16 ++++++---- include/kvm/arm_pmu.h | 6 ++++ virt/kvm/arm/arm.c | 4 +++ virt/kvm/arm/pmu.c | 51 +++++++++++++++++++++++------- 4 files changed, 58 insertions(+), 19 deletions(-) (limited to 'virt') diff --git a/Documentation/virtual/kvm/devices/vcpu.txt b/Documentation/virtual/kvm/devices/vcpu.txt index 02f50686c418..d7236a3e01dc 100644 --- a/Documentation/virtual/kvm/devices/vcpu.txt +++ b/Documentation/virtual/kvm/devices/vcpu.txt @@ -16,7 +16,9 @@ Parameters: in kvm_device_attr.addr the address for PMU overflow interrupt is a Returns: -EBUSY: The PMU overflow interrupt is already set -ENXIO: The overflow interrupt not set when attempting to get it -ENODEV: PMUv3 not supported - -EINVAL: Invalid PMU overflow interrupt number supplied + -EINVAL: Invalid PMU overflow interrupt number supplied or + trying to set the IRQ number without using an in-kernel + irqchip. A value describing the PMUv3 (Performance Monitor Unit v3) overflow interrupt number for this vcpu. This interrupt could be a PPI or SPI, but the interrupt @@ -25,11 +27,11 @@ all vcpus, while as an SPI it must be a separate number per vcpu. 1.2 ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_INIT Parameters: no additional parameter in kvm_device_attr.addr -Returns: -ENODEV: PMUv3 not supported - -ENXIO: PMUv3 not properly configured as required prior to calling this - attribute +Returns: -ENODEV: PMUv3 not supported or GIC not initialized + -ENXIO: PMUv3 not properly configured or in-kernel irqchip not + configured as required prior to calling this attribute -EBUSY: PMUv3 already initialized -Request the initialization of the PMUv3. This must be done after creating the -in-kernel irqchip. Creating a PMU with a userspace irqchip is currently not -supported. +Request the initialization of the PMUv3. If using the PMUv3 with an in-kernel +virtual GIC implementation, this must be done after initializing the in-kernel +irqchip. diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 1ab4633adf4f..f6e030617467 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -35,6 +35,7 @@ struct kvm_pmu { int irq_num; struct kvm_pmc pmc[ARMV8_PMU_MAX_COUNTERS]; bool ready; + bool created; bool irq_level; }; @@ -63,6 +64,7 @@ int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); +int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu); #else struct kvm_pmu { }; @@ -112,6 +114,10 @@ static inline int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, { return -ENXIO; } +static inline int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) +{ + return 0; +} #endif #endif diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index cac5c2f2ddba..72816d3f23a7 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -527,6 +527,10 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) } ret = kvm_timer_enable(vcpu); + if (ret) + return ret; + + ret = kvm_arm_pmu_v3_enable(vcpu); return ret; } diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index 2451607dc25e..96a9cab39982 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -449,29 +449,50 @@ bool kvm_arm_support_pmu_v3(void) return (perf_num_counters() > 0); } -static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu) +int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) { - if (!kvm_arm_support_pmu_v3()) - return -ENODEV; + if (!vcpu->arch.pmu.created) + return 0; /* - * We currently require an in-kernel VGIC to use the PMU emulation, - * because we do not support forwarding PMU overflow interrupts to - * userspace yet. + * A valid interrupt configuration for the PMU is either to have a + * properly configured interrupt number and using an in-kernel + * irqchip, or to neither set an IRQ nor create an in-kernel irqchip. */ - if (!irqchip_in_kernel(vcpu->kvm) || !vgic_initialized(vcpu->kvm)) + if (kvm_arm_pmu_irq_initialized(vcpu) != irqchip_in_kernel(vcpu->kvm)) + return -EINVAL; + + kvm_pmu_vcpu_reset(vcpu); + vcpu->arch.pmu.ready = true; + + return 0; +} + +static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu) +{ + if (!kvm_arm_support_pmu_v3()) return -ENODEV; - if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features) || - !kvm_arm_pmu_irq_initialized(vcpu)) + if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) return -ENXIO; - if (kvm_arm_pmu_v3_ready(vcpu)) + if (vcpu->arch.pmu.created) return -EBUSY; - kvm_pmu_vcpu_reset(vcpu); - vcpu->arch.pmu.ready = true; + if (irqchip_in_kernel(vcpu->kvm)) { + /* + * If using the PMU with an in-kernel virtual GIC + * implementation, we require the GIC to be already + * initialized when initializing the PMU. + */ + if (!vgic_initialized(vcpu->kvm)) + return -ENODEV; + + if (!kvm_arm_pmu_irq_initialized(vcpu)) + return -ENXIO; + } + vcpu->arch.pmu.created = true; return 0; } @@ -510,6 +531,9 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) int __user *uaddr = (int __user *)(long)attr->addr; int irq; + if (!irqchip_in_kernel(vcpu->kvm)) + return -EINVAL; + if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) return -ENODEV; @@ -544,6 +568,9 @@ int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) int __user *uaddr = (int __user *)(long)attr->addr; int irq; + if (!irqchip_in_kernel(vcpu->kvm)) + return -EINVAL; + if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) return -ENODEV; -- cgit v1.2.3-70-g09d2 From 3cba4af31c61fc9420fdcf083f509a6c20a6d8e5 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 2 May 2017 20:11:49 +0200 Subject: KVM: arm/arm64: Move irq_is_ppi() to header file We are about to need this define in the arch timer code as well so move it to a common location. Signed-off-by: Christoffer Dall Acked-by: Marc Zyngier --- include/kvm/arm_vgic.h | 2 ++ virt/kvm/arm/pmu.c | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'virt') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 97b8d3728b31..dde59fbc5f80 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -38,6 +38,8 @@ #define VGIC_MIN_LPI 8192 #define KVM_IRQCHIP_NUM_PINS (1020 - 32) +#define irq_is_ppi(irq) ((irq) >= VGIC_NR_SGIS && (irq) < VGIC_NR_PRIVATE_IRQS) + enum vgic_type { VGIC_V2, /* Good ol' GICv2 */ VGIC_V3, /* New fancy GICv3 */ diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index 96a9cab39982..574feff55a6c 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -496,8 +496,6 @@ static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu) return 0; } -#define irq_is_ppi(irq) ((irq) >= VGIC_NR_SGIS && (irq) < VGIC_NR_PRIVATE_IRQS) - /* * For one VM the interrupt type must be same for each vcpu. * As a PPI, the interrupt number is the same for all vcpus, -- cgit v1.2.3-70-g09d2 From 85e69ad7f2cc6dd829987a70cf32785b1d8c8b27 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 2 May 2017 20:14:06 +0200 Subject: KVM: arm/arm64: Move timer IRQ default init to arch_timer.c We currently initialize the arch timer IRQ numbers from the reset code, presumably because we once intended to model multiple CPU or SoC types from within the kernel and have hard-coded reset values in the reset code. As we are moving towards userspace being in charge of more fine-grained CPU emulation and stitching together the pieces needed to emulate a particular type of CPU, we should no longer have a tight coupling between resetting a VCPU and setting IRQ numbers. Therefore, move the logic to define and use the default IRQ numbers to the timer code and set the IRQ number immediately when creating the VCPU. Signed-off-by: Christoffer Dall Reviewed-by: Marc Zyngier --- arch/arm/kvm/reset.c | 16 +--------------- arch/arm64/kvm/reset.c | 16 +--------------- include/kvm/arm_arch_timer.h | 4 +--- virt/kvm/arm/arch_timer.c | 28 ++++++++++++++++------------ 4 files changed, 19 insertions(+), 45 deletions(-) (limited to 'virt') diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c index 1da8b2d14550..5ed0c3ee33d6 100644 --- a/arch/arm/kvm/reset.c +++ b/arch/arm/kvm/reset.c @@ -37,16 +37,6 @@ static struct kvm_regs cortexa_regs_reset = { .usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT, }; -static const struct kvm_irq_level cortexa_ptimer_irq = { - { .irq = 30 }, - .level = 1, -}; - -static const struct kvm_irq_level cortexa_vtimer_irq = { - { .irq = 27 }, - .level = 1, -}; - /******************************************************************************* * Exported reset function @@ -62,16 +52,12 @@ static const struct kvm_irq_level cortexa_vtimer_irq = { int kvm_reset_vcpu(struct kvm_vcpu *vcpu) { struct kvm_regs *reset_regs; - const struct kvm_irq_level *cpu_vtimer_irq; - const struct kvm_irq_level *cpu_ptimer_irq; switch (vcpu->arch.target) { case KVM_ARM_TARGET_CORTEX_A7: case KVM_ARM_TARGET_CORTEX_A15: reset_regs = &cortexa_regs_reset; vcpu->arch.midr = read_cpuid_id(); - cpu_vtimer_irq = &cortexa_vtimer_irq; - cpu_ptimer_irq = &cortexa_ptimer_irq; break; default: return -ENODEV; @@ -84,5 +70,5 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) kvm_reset_coprocs(vcpu); /* Reset arch_timer context */ - return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq, cpu_ptimer_irq); + return kvm_timer_vcpu_reset(vcpu); } diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 561badf93de8..3256b9228e75 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -46,16 +46,6 @@ static const struct kvm_regs default_regs_reset32 = { COMPAT_PSR_I_BIT | COMPAT_PSR_F_BIT), }; -static const struct kvm_irq_level default_ptimer_irq = { - .irq = 30, - .level = 1, -}; - -static const struct kvm_irq_level default_vtimer_irq = { - .irq = 27, - .level = 1, -}; - static bool cpu_has_32bit_el1(void) { u64 pfr0; @@ -108,8 +98,6 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext) */ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) { - const struct kvm_irq_level *cpu_vtimer_irq; - const struct kvm_irq_level *cpu_ptimer_irq; const struct kvm_regs *cpu_reset; switch (vcpu->arch.target) { @@ -122,8 +110,6 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) cpu_reset = &default_regs_reset; } - cpu_vtimer_irq = &default_vtimer_irq; - cpu_ptimer_irq = &default_ptimer_irq; break; } @@ -137,5 +123,5 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) kvm_pmu_vcpu_reset(vcpu); /* Reset timer */ - return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq, cpu_ptimer_irq); + return kvm_timer_vcpu_reset(vcpu); } diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 295584f31a4e..f1c967a4f603 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -57,9 +57,7 @@ struct arch_timer_cpu { int kvm_timer_hyp_init(void); int kvm_timer_enable(struct kvm_vcpu *vcpu); -int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, - const struct kvm_irq_level *virt_irq, - const struct kvm_irq_level *phys_irq); +int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu); void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu); void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 7933b1f8f7b7..72d5aa7d4c64 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -35,6 +35,16 @@ static struct timecounter *timecounter; static unsigned int host_vtimer_irq; static u32 host_vtimer_irq_flags; +static const struct kvm_irq_level default_ptimer_irq = { + .irq = 30, + .level = 1, +}; + +static const struct kvm_irq_level default_vtimer_irq = { + .irq = 27, + .level = 1, +}; + void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) { vcpu_vtimer(vcpu)->active_cleared_last = false; @@ -445,22 +455,11 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) kvm_timer_update_state(vcpu); } -int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, - const struct kvm_irq_level *virt_irq, - const struct kvm_irq_level *phys_irq) +int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) { struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); - /* - * The vcpu timer irq number cannot be determined in - * kvm_timer_vcpu_init() because it is called much before - * kvm_vcpu_set_target(). To handle this, we determine - * vcpu timer irq number when the vcpu is reset. - */ - vtimer->irq.irq = virt_irq->irq; - ptimer->irq.irq = phys_irq->irq; - /* * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 * and to 0 for ARMv7. We provide an implementation that always @@ -496,6 +495,8 @@ static void update_vtimer_cntvoff(struct kvm_vcpu *vcpu, u64 cntvoff) void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); /* Synchronize cntvoff across all vtimers of a VM. */ update_vtimer_cntvoff(vcpu, kvm_phys_timer_read()); @@ -504,6 +505,9 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) INIT_WORK(&timer->expired, kvm_timer_inject_irq_work); hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); timer->timer.function = kvm_timer_expire; + + vtimer->irq.irq = default_vtimer_irq.irq; + ptimer->irq.irq = default_ptimer_irq.irq; } static void kvm_timer_init_interrupt(void *info) -- cgit v1.2.3-70-g09d2 From 99a1db7a2c9b2ecb9a801cee3f6a7a71945a2fca Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 2 May 2017 20:19:15 +0200 Subject: KVM: arm/arm64: Allow setting the timer IRQ numbers from userspace First we define an ABI using the vcpu devices that lets userspace set the interrupt numbers for the various timers on both the 32-bit and 64-bit KVM/ARM implementations. Second, we add the definitions for the groups and attributes introduced by the above ABI. (We add the PMU define on the 32-bit side as well for symmetry and it may get used some day.) Third, we set up the arch-specific vcpu device operation handlers to call into the timer code for anything related to the KVM_ARM_VCPU_TIMER_CTRL group. Fourth, we implement support for getting and setting the timer interrupt numbers using the above defined ABI in the arch timer code. Fifth, we introduce error checking upon enabling the arch timer (which is called when first running a VCPU) to check that all VCPUs are configured to use the same PPI for the timer (as mandated by the architecture) and that the virtual and physical timers are not configured to use the same IRQ number. Signed-off-by: Christoffer Dall Reviewed-by: Marc Zyngier --- Documentation/virtual/kvm/devices/vcpu.txt | 25 +++++++ arch/arm/include/uapi/asm/kvm.h | 8 +++ arch/arm/kvm/guest.c | 9 +++ arch/arm64/include/uapi/asm/kvm.h | 3 + arch/arm64/kvm/guest.c | 9 +++ include/kvm/arm_arch_timer.h | 4 ++ virt/kvm/arm/arch_timer.c | 104 +++++++++++++++++++++++++++++ 7 files changed, 162 insertions(+) (limited to 'virt') diff --git a/Documentation/virtual/kvm/devices/vcpu.txt b/Documentation/virtual/kvm/devices/vcpu.txt index d7236a3e01dc..2b5dab16c4f2 100644 --- a/Documentation/virtual/kvm/devices/vcpu.txt +++ b/Documentation/virtual/kvm/devices/vcpu.txt @@ -35,3 +35,28 @@ Returns: -ENODEV: PMUv3 not supported or GIC not initialized Request the initialization of the PMUv3. If using the PMUv3 with an in-kernel virtual GIC implementation, this must be done after initializing the in-kernel irqchip. + + +2. GROUP: KVM_ARM_VCPU_TIMER_CTRL +Architectures: ARM,ARM64 + +2.1. ATTRIBUTE: KVM_ARM_VCPU_TIMER_IRQ_VTIMER +2.2. ATTRIBUTE: KVM_ARM_VCPU_TIMER_IRQ_PTIMER +Parameters: in kvm_device_attr.addr the address for the timer interrupt is a + pointer to an int +Returns: -EINVAL: Invalid timer interrupt number + -EBUSY: One or more VCPUs has already run + +A value describing the architected timer interrupt number when connected to an +in-kernel virtual GIC. These must be a PPI (16 <= intid < 32). Setting the +attribute overrides the default values (see below). + +KVM_ARM_VCPU_TIMER_IRQ_VTIMER: The EL1 virtual timer intid (default: 27) +KVM_ARM_VCPU_TIMER_IRQ_PTIMER: The EL1 physical timer intid (default: 30) + +Setting the same PPI for different timers will prevent the VCPUs from running. +Setting the interrupt number on a VCPU configures all VCPUs created at that +time to use the number provided for a given timer, overwriting any previously +configured values on other VCPUs. Userspace should configure the interrupt +numbers on at least one VCPU after creating all VCPUs and before running any +VCPUs. diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index 5e3c673fa3f4..5db2d4c6a55f 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -203,6 +203,14 @@ struct kvm_arch_memory_slot { #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff #define VGIC_LEVEL_INFO_LINE_LEVEL 0 +/* Device Control API on vcpu fd */ +#define KVM_ARM_VCPU_PMU_V3_CTRL 0 +#define KVM_ARM_VCPU_PMU_V3_IRQ 0 +#define KVM_ARM_VCPU_PMU_V3_INIT 1 +#define KVM_ARM_VCPU_TIMER_CTRL 1 +#define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0 +#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 + #define KVM_DEV_ARM_VGIC_CTRL_INIT 0 #define KVM_DEV_ARM_ITS_SAVE_TABLES 1 #define KVM_DEV_ARM_ITS_RESTORE_TABLES 2 diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index acea05e9db4e..1e0784ebbfd6 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -308,6 +308,9 @@ int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, int ret; switch (attr->group) { + case KVM_ARM_VCPU_TIMER_CTRL: + ret = kvm_arm_timer_set_attr(vcpu, attr); + break; default: ret = -ENXIO; break; @@ -322,6 +325,9 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, int ret; switch (attr->group) { + case KVM_ARM_VCPU_TIMER_CTRL: + ret = kvm_arm_timer_get_attr(vcpu, attr); + break; default: ret = -ENXIO; break; @@ -336,6 +342,9 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, int ret; switch (attr->group) { + case KVM_ARM_VCPU_TIMER_CTRL: + ret = kvm_arm_timer_has_attr(vcpu, attr); + break; default: ret = -ENXIO; break; diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 70eea2ecc663..9f3ca24bbcc6 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -232,6 +232,9 @@ struct kvm_arch_memory_slot { #define KVM_ARM_VCPU_PMU_V3_CTRL 0 #define KVM_ARM_VCPU_PMU_V3_IRQ 0 #define KVM_ARM_VCPU_PMU_V3_INIT 1 +#define KVM_ARM_VCPU_TIMER_CTRL 1 +#define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0 +#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 /* KVM_IRQ_LINE irq field index values */ #define KVM_ARM_IRQ_TYPE_SHIFT 24 diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index b37446a8ffdb..5c7f657dd207 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -390,6 +390,9 @@ int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, case KVM_ARM_VCPU_PMU_V3_CTRL: ret = kvm_arm_pmu_v3_set_attr(vcpu, attr); break; + case KVM_ARM_VCPU_TIMER_CTRL: + ret = kvm_arm_timer_set_attr(vcpu, attr); + break; default: ret = -ENXIO; break; @@ -407,6 +410,9 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, case KVM_ARM_VCPU_PMU_V3_CTRL: ret = kvm_arm_pmu_v3_get_attr(vcpu, attr); break; + case KVM_ARM_VCPU_TIMER_CTRL: + ret = kvm_arm_timer_get_attr(vcpu, attr); + break; default: ret = -ENXIO; break; @@ -424,6 +430,9 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, case KVM_ARM_VCPU_PMU_V3_CTRL: ret = kvm_arm_pmu_v3_has_attr(vcpu, attr); break; + case KVM_ARM_VCPU_TIMER_CTRL: + ret = kvm_arm_timer_has_attr(vcpu, attr); + break; default: ret = -ENXIO; break; diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index f1c967a4f603..f0053f884b4a 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -68,6 +68,10 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu); u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); +int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); +int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); +int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); + bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx); void kvm_timer_schedule(struct kvm_vcpu *vcpu); void kvm_timer_unschedule(struct kvm_vcpu *vcpu); diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 72d5aa7d4c64..e03da1abd11f 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include @@ -617,6 +618,28 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) kvm_vgic_unmap_phys_irq(vcpu, vtimer->irq.irq); } +static bool timer_irqs_are_valid(struct kvm *kvm) +{ + struct kvm_vcpu *vcpu; + int vtimer_irq, ptimer_irq; + int i; + + vcpu = kvm_get_vcpu(kvm, 0); + vtimer_irq = vcpu_vtimer(vcpu)->irq.irq; + ptimer_irq = vcpu_ptimer(vcpu)->irq.irq; + + if (vtimer_irq == ptimer_irq) + return false; + + kvm_for_each_vcpu(i, vcpu, kvm) { + if (vcpu_vtimer(vcpu)->irq.irq != vtimer_irq || + vcpu_ptimer(vcpu)->irq.irq != ptimer_irq) + return false; + } + + return true; +} + int kvm_timer_enable(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; @@ -636,6 +659,11 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) if (!vgic_initialized(vcpu->kvm)) return -ENODEV; + if (!timer_irqs_are_valid(vcpu->kvm)) { + kvm_debug("incorrectly configured timer irqs\n"); + return -EINVAL; + } + /* * Find the physical IRQ number corresponding to the host_vtimer_irq */ @@ -685,3 +713,79 @@ void kvm_timer_init_vhe(void) val |= (CNTHCTL_EL1PCTEN << cnthctl_shift); write_sysreg(val, cnthctl_el2); } + +static void set_timer_irqs(struct kvm *kvm, int vtimer_irq, int ptimer_irq) +{ + struct kvm_vcpu *vcpu; + int i; + + kvm_for_each_vcpu(i, vcpu, kvm) { + vcpu_vtimer(vcpu)->irq.irq = vtimer_irq; + vcpu_ptimer(vcpu)->irq.irq = ptimer_irq; + } +} + +int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) +{ + int __user *uaddr = (int __user *)(long)attr->addr; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); + int irq; + + if (!irqchip_in_kernel(vcpu->kvm)) + return -EINVAL; + + if (get_user(irq, uaddr)) + return -EFAULT; + + if (!(irq_is_ppi(irq))) + return -EINVAL; + + if (vcpu->arch.timer_cpu.enabled) + return -EBUSY; + + switch (attr->attr) { + case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: + set_timer_irqs(vcpu->kvm, irq, ptimer->irq.irq); + break; + case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: + set_timer_irqs(vcpu->kvm, vtimer->irq.irq, irq); + break; + default: + return -ENXIO; + } + + return 0; +} + +int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) +{ + int __user *uaddr = (int __user *)(long)attr->addr; + struct arch_timer_context *timer; + int irq; + + switch (attr->attr) { + case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: + timer = vcpu_vtimer(vcpu); + break; + case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: + timer = vcpu_ptimer(vcpu); + break; + default: + return -ENXIO; + } + + irq = timer->irq.irq; + return put_user(irq, uaddr); +} + +int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) +{ + switch (attr->attr) { + case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: + case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: + return 0; + } + + return -ENXIO; +} -- cgit v1.2.3-70-g09d2 From c6ccd30e0de384f506449474ca780ff680ad4217 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 4 May 2017 13:24:20 +0200 Subject: KVM: arm/arm64: Introduce an allocator for in-kernel irq lines Having multiple devices being able to signal the same interrupt line is very confusing and almost certainly guarantees a configuration error. Therefore, introduce a very simple allocator which allows a device to claim an interrupt line from the vgic for a given VM. Signed-off-by: Christoffer Dall Acked-by: Marc Zyngier --- include/kvm/arm_vgic.h | 5 +++++ virt/kvm/arm/vgic/vgic.c | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 38 insertions(+) (limited to 'virt') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index dde59fbc5f80..5d5b34534ce9 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -121,6 +121,9 @@ struct vgic_irq { u8 source; /* GICv2 SGIs only */ u8 priority; enum vgic_irq_config config; /* Level or edge */ + + void *owner; /* Opaque pointer to reserve an interrupt + for in-kernel devices. */ }; struct vgic_register_region; @@ -340,4 +343,6 @@ int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi); */ int kvm_vgic_setup_default_irq_routing(struct kvm *kvm); +int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner); + #endif /* __KVM_ARM_VGIC_H */ diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index c66feaca2a5d..9628945234e4 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -434,6 +434,39 @@ int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq) return 0; } +/** + * kvm_vgic_set_owner - Set the owner of an interrupt for a VM + * + * @vcpu: Pointer to the VCPU (used for PPIs) + * @intid: The virtual INTID identifying the interrupt (PPI or SPI) + * @owner: Opaque pointer to the owner + * + * Returns 0 if intid is not already used by another in-kernel device and the + * owner is set, otherwise returns an error code. + */ +int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner) +{ + struct vgic_irq *irq; + int ret = 0; + + if (!vgic_initialized(vcpu->kvm)) + return -EAGAIN; + + /* SGIs and LPIs cannot be wired up to any device */ + if (!irq_is_ppi(intid) && !vgic_valid_spi(vcpu->kvm, intid)) + return -EINVAL; + + irq = vgic_get_irq(vcpu->kvm, vcpu, intid); + spin_lock(&irq->irq_lock); + if (irq->owner && irq->owner != owner) + ret = -EEXIST; + else + irq->owner = owner; + spin_unlock(&irq->irq_lock); + + return ret; +} + /** * vgic_prune_ap_list - Remove non-relevant interrupts from the list * -- cgit v1.2.3-70-g09d2 From abcb851daa617706e90ee7d39d4d9a74ac05f4b1 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 4 May 2017 13:32:53 +0200 Subject: KVM: arm/arm64: Check if irq lines to the GIC are already used We check if other in-kernel devices have already been connected to the GIC for a particular interrupt line when possible. For the PMU, we can do this whenever setting the PMU interrupt number from userspace. For the timers, we have to wait until we try to enable the timer, because we have a concept of default IRQ numbers that userspace shouldn't have to work around in the initialization phase. Signed-off-by: Christoffer Dall Reviewed-by: Marc Zyngier --- virt/kvm/arm/arch_timer.c | 18 ++++++++++-------- virt/kvm/arm/pmu.c | 7 +++++++ 2 files changed, 17 insertions(+), 8 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index e03da1abd11f..07f6f9bfc1f2 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -618,20 +618,22 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) kvm_vgic_unmap_phys_irq(vcpu, vtimer->irq.irq); } -static bool timer_irqs_are_valid(struct kvm *kvm) +static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu) { - struct kvm_vcpu *vcpu; int vtimer_irq, ptimer_irq; - int i; + int i, ret; - vcpu = kvm_get_vcpu(kvm, 0); vtimer_irq = vcpu_vtimer(vcpu)->irq.irq; - ptimer_irq = vcpu_ptimer(vcpu)->irq.irq; + ret = kvm_vgic_set_owner(vcpu, vtimer_irq, vcpu_vtimer(vcpu)); + if (ret) + return false; - if (vtimer_irq == ptimer_irq) + ptimer_irq = vcpu_ptimer(vcpu)->irq.irq; + ret = kvm_vgic_set_owner(vcpu, ptimer_irq, vcpu_ptimer(vcpu)); + if (ret) return false; - kvm_for_each_vcpu(i, vcpu, kvm) { + kvm_for_each_vcpu(i, vcpu, vcpu->kvm) { if (vcpu_vtimer(vcpu)->irq.irq != vtimer_irq || vcpu_ptimer(vcpu)->irq.irq != ptimer_irq) return false; @@ -659,7 +661,7 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) if (!vgic_initialized(vcpu->kvm)) return -ENODEV; - if (!timer_irqs_are_valid(vcpu->kvm)) { + if (!timer_irqs_are_valid(vcpu)) { kvm_debug("incorrectly configured timer irqs\n"); return -EINVAL; } diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index 574feff55a6c..3f0866925b6b 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -480,6 +480,8 @@ static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu) return -EBUSY; if (irqchip_in_kernel(vcpu->kvm)) { + int ret; + /* * If using the PMU with an in-kernel virtual GIC * implementation, we require the GIC to be already @@ -490,6 +492,11 @@ static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu) if (!kvm_arm_pmu_irq_initialized(vcpu)) return -ENXIO; + + ret = kvm_vgic_set_owner(vcpu, vcpu->arch.pmu.irq_num, + &vcpu->arch.pmu); + if (ret) + return ret; } vcpu->arch.pmu.created = true; -- cgit v1.2.3-70-g09d2 From cb3f0ad881a6cee39c6a652b4aa4f12f341d98f0 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 16 May 2017 12:41:18 +0200 Subject: KVM: arm/arm64: Disallow userspace control of in-kernel IRQ lines When injecting an IRQ to the VGIC, you now have to present an owner token for that IRQ line to show that you are the owner of that line. IRQ lines driven from userspace or via an irqfd do not have an owner and will simply pass a NULL pointer. Also get rid of the unused kvm_vgic_inject_mapped_irq prototype. Signed-off-by: Christoffer Dall Acked-by: Marc Zyngier --- include/kvm/arm_vgic.h | 4 +--- virt/kvm/arm/arch_timer.c | 3 ++- virt/kvm/arm/arm.c | 4 ++-- virt/kvm/arm/pmu.c | 3 ++- virt/kvm/arm/vgic/vgic-irqfd.c | 2 +- virt/kvm/arm/vgic/vgic.c | 15 +++++++++++---- 6 files changed, 19 insertions(+), 12 deletions(-) (limited to 'virt') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 5d5b34534ce9..131668f8599c 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -300,9 +300,7 @@ int kvm_vgic_hyp_init(void); void kvm_vgic_init_cpu_hardware(void); int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, - bool level); -int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid, unsigned int intid, - bool level); + bool level, void *owner); int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq); int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq); bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq); diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 07f6f9bfc1f2..8e89d63005c7 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -226,7 +226,8 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, if (likely(irqchip_in_kernel(vcpu->kvm))) { ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, timer_ctx->irq.irq, - timer_ctx->irq.level); + timer_ctx->irq.level, + timer_ctx); WARN_ON(ret); } } diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 72816d3f23a7..a265acc53e39 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -832,7 +832,7 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, if (irq_num < VGIC_NR_SGIS || irq_num >= VGIC_NR_PRIVATE_IRQS) return -EINVAL; - return kvm_vgic_inject_irq(kvm, vcpu->vcpu_id, irq_num, level); + return kvm_vgic_inject_irq(kvm, vcpu->vcpu_id, irq_num, level, NULL); case KVM_ARM_IRQ_TYPE_SPI: if (!irqchip_in_kernel(kvm)) return -ENXIO; @@ -840,7 +840,7 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, if (irq_num < VGIC_NR_PRIVATE_IRQS) return -EINVAL; - return kvm_vgic_inject_irq(kvm, 0, irq_num, level); + return kvm_vgic_inject_irq(kvm, 0, irq_num, level, NULL); } return -EINVAL; diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index 3f0866925b6b..9923eb90cdc7 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -215,7 +215,8 @@ static void kvm_pmu_check_overflow(struct kvm_vcpu *vcpu) if (likely(irqchip_in_kernel(vcpu->kvm))) { int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, - pmu->irq_num, overflow); + pmu->irq_num, overflow, + &vcpu->arch.pmu); WARN_ON(ret); } } diff --git a/virt/kvm/arm/vgic/vgic-irqfd.c b/virt/kvm/arm/vgic/vgic-irqfd.c index f138ed2e9c63..b7baf581611a 100644 --- a/virt/kvm/arm/vgic/vgic-irqfd.c +++ b/virt/kvm/arm/vgic/vgic-irqfd.c @@ -34,7 +34,7 @@ static int vgic_irqfd_set_irq(struct kvm_kernel_irq_routing_entry *e, if (!vgic_valid_spi(kvm, spi_id)) return -EINVAL; - return kvm_vgic_inject_irq(kvm, 0, spi_id, level); + return kvm_vgic_inject_irq(kvm, 0, spi_id, level, NULL); } /** diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index 9628945234e4..fed717e07938 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -235,10 +235,14 @@ static void vgic_sort_ap_list(struct kvm_vcpu *vcpu) /* * Only valid injection if changing level for level-triggered IRQs or for a - * rising edge. + * rising edge, and in-kernel connected IRQ lines can only be controlled by + * their owner. */ -static bool vgic_validate_injection(struct vgic_irq *irq, bool level) +static bool vgic_validate_injection(struct vgic_irq *irq, bool level, void *owner) { + if (irq->owner != owner) + return false; + switch (irq->config) { case VGIC_CONFIG_LEVEL: return irq->line_level != level; @@ -350,13 +354,16 @@ retry: * false: to ignore the call * Level-sensitive true: raise the input signal * false: lower the input signal + * @owner: The opaque pointer to the owner of the IRQ being raised to verify + * that the caller is allowed to inject this IRQ. Userspace + * injections will have owner == NULL. * * The VGIC is not concerned with devices being active-LOW or active-HIGH for * level-sensitive interrupts. You can think of the level parameter as 1 * being HIGH and 0 being LOW and all devices being active-HIGH. */ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, - bool level) + bool level, void *owner) { struct kvm_vcpu *vcpu; struct vgic_irq *irq; @@ -378,7 +385,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, spin_lock(&irq->irq_lock); - if (!vgic_validate_injection(irq, level)) { + if (!vgic_validate_injection(irq, level, owner)) { /* Nothing to see here, move along... */ spin_unlock(&irq->irq_lock); vgic_put_irq(kvm, irq); -- cgit v1.2.3-70-g09d2 From ebb127f2d6f32665643165289151bd20929d9931 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 16 May 2017 19:53:50 +0200 Subject: KVM: arm/arm64: Don't assume initialized vgic when setting PMU IRQ The PMU IRQ number is set through the VCPU device's KVM_SET_DEVICE_ATTR ioctl handler for the KVM_ARM_VCPU_PMU_V3_IRQ attribute, but there is no enforced or stated requirement that this must happen after initializing the VGIC. As a result, calling vgic_valid_spi() which relies on the nr_spis being set during the VGIC init can incorrectly fail. Introduce irq_is_spi, which determines if an IRQ number is within the SPI range without verifying it against the actual VGIC properties. Signed-off-by: Christoffer Dall Reviewed-by: Marc Zyngier --- include/kvm/arm_vgic.h | 2 ++ virt/kvm/arm/pmu.c | 22 ++++++++++++++++++---- 2 files changed, 20 insertions(+), 4 deletions(-) (limited to 'virt') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 131668f8599c..a2ae9d2de21f 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -39,6 +39,8 @@ #define KVM_IRQCHIP_NUM_PINS (1020 - 32) #define irq_is_ppi(irq) ((irq) >= VGIC_NR_SGIS && (irq) < VGIC_NR_PRIVATE_IRQS) +#define irq_is_spi(irq) ((irq) >= VGIC_NR_PRIVATE_IRQS && \ + (irq) <= VGIC_MAX_SPI) enum vgic_type { VGIC_V2, /* Good ol' GICv2 */ diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index 9923eb90cdc7..fc8a723ff387 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -458,10 +458,24 @@ int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) /* * A valid interrupt configuration for the PMU is either to have a * properly configured interrupt number and using an in-kernel - * irqchip, or to neither set an IRQ nor create an in-kernel irqchip. + * irqchip, or to not have an in-kernel GIC and not set an IRQ. */ - if (kvm_arm_pmu_irq_initialized(vcpu) != irqchip_in_kernel(vcpu->kvm)) - return -EINVAL; + if (irqchip_in_kernel(vcpu->kvm)) { + int irq = vcpu->arch.pmu.irq_num; + if (!kvm_arm_pmu_irq_initialized(vcpu)) + return -EINVAL; + + /* + * If we are using an in-kernel vgic, at this point we know + * the vgic will be initialized, so we can check the PMU irq + * number against the dimensions of the vgic and make sure + * it's valid. + */ + if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq)) + return -EINVAL; + } else if (kvm_arm_pmu_irq_initialized(vcpu)) { + return -EINVAL; + } kvm_pmu_vcpu_reset(vcpu); vcpu->arch.pmu.ready = true; @@ -547,7 +561,7 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) return -EFAULT; /* The PMU overflow interrupt can be a PPI or a valid SPI. */ - if (!(irq_is_ppi(irq) || vgic_valid_spi(vcpu->kvm, irq))) + if (!(irq_is_ppi(irq) || irq_is_spi(irq))) return -EINVAL; if (!pmu_irq_is_valid(vcpu->kvm, irq)) -- cgit v1.2.3-70-g09d2 From 63000dd8006dc987db31ba670edc23142ea91e01 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Jun 2017 12:49:31 +0100 Subject: KVM: arm/arm64: vgic-v3: Add accessors for the ICH_APxRn_EL2 registers As we're about to access the Active Priority registers a lot more, let's define accessors that take the register number as a parameter. Tested-by: Alexander Graf Acked-by: David Daney Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/hyp/vgic-v3-sr.c | 116 ++++++++++++++++++++++++++++++++++++------ 1 file changed, 100 insertions(+), 16 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index 87940364570b..3dd8f0c4419e 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c @@ -118,6 +118,90 @@ static void __hyp_text __gic_v3_set_lr(u64 val, int lr) } } +static void __hyp_text __vgic_v3_write_ap0rn(u32 val, int n) +{ + switch (n) { + case 0: + write_gicreg(val, ICH_AP0R0_EL2); + break; + case 1: + write_gicreg(val, ICH_AP0R1_EL2); + break; + case 2: + write_gicreg(val, ICH_AP0R2_EL2); + break; + case 3: + write_gicreg(val, ICH_AP0R3_EL2); + break; + } +} + +static void __hyp_text __vgic_v3_write_ap1rn(u32 val, int n) +{ + switch (n) { + case 0: + write_gicreg(val, ICH_AP1R0_EL2); + break; + case 1: + write_gicreg(val, ICH_AP1R1_EL2); + break; + case 2: + write_gicreg(val, ICH_AP1R2_EL2); + break; + case 3: + write_gicreg(val, ICH_AP1R3_EL2); + break; + } +} + +static u32 __hyp_text __vgic_v3_read_ap0rn(int n) +{ + u32 val; + + switch (n) { + case 0: + val = read_gicreg(ICH_AP0R0_EL2); + break; + case 1: + val = read_gicreg(ICH_AP0R1_EL2); + break; + case 2: + val = read_gicreg(ICH_AP0R2_EL2); + break; + case 3: + val = read_gicreg(ICH_AP0R3_EL2); + break; + default: + unreachable(); + } + + return val; +} + +static u32 __hyp_text __vgic_v3_read_ap1rn(int n) +{ + u32 val; + + switch (n) { + case 0: + val = read_gicreg(ICH_AP1R0_EL2); + break; + case 1: + val = read_gicreg(ICH_AP1R1_EL2); + break; + case 2: + val = read_gicreg(ICH_AP1R2_EL2); + break; + case 3: + val = read_gicreg(ICH_AP1R3_EL2); + break; + default: + unreachable(); + } + + return val; +} + void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpu_if = &vcpu->arch.vgic_cpu.vgic_v3; @@ -154,22 +238,22 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) switch (nr_pre_bits) { case 7: - cpu_if->vgic_ap0r[3] = read_gicreg(ICH_AP0R3_EL2); - cpu_if->vgic_ap0r[2] = read_gicreg(ICH_AP0R2_EL2); + cpu_if->vgic_ap0r[3] = __vgic_v3_read_ap0rn(3); + cpu_if->vgic_ap0r[2] = __vgic_v3_read_ap0rn(2); case 6: - cpu_if->vgic_ap0r[1] = read_gicreg(ICH_AP0R1_EL2); + cpu_if->vgic_ap0r[1] = __vgic_v3_read_ap0rn(1); default: - cpu_if->vgic_ap0r[0] = read_gicreg(ICH_AP0R0_EL2); + cpu_if->vgic_ap0r[0] = __vgic_v3_read_ap0rn(0); } switch (nr_pre_bits) { case 7: - cpu_if->vgic_ap1r[3] = read_gicreg(ICH_AP1R3_EL2); - cpu_if->vgic_ap1r[2] = read_gicreg(ICH_AP1R2_EL2); + cpu_if->vgic_ap1r[3] = __vgic_v3_read_ap1rn(3); + cpu_if->vgic_ap1r[2] = __vgic_v3_read_ap1rn(2); case 6: - cpu_if->vgic_ap1r[1] = read_gicreg(ICH_AP1R1_EL2); + cpu_if->vgic_ap1r[1] = __vgic_v3_read_ap1rn(1); default: - cpu_if->vgic_ap1r[0] = read_gicreg(ICH_AP1R0_EL2); + cpu_if->vgic_ap1r[0] = __vgic_v3_read_ap1rn(0); } } else { cpu_if->vgic_elrsr = 0xffff; @@ -224,22 +308,22 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) switch (nr_pre_bits) { case 7: - write_gicreg(cpu_if->vgic_ap0r[3], ICH_AP0R3_EL2); - write_gicreg(cpu_if->vgic_ap0r[2], ICH_AP0R2_EL2); + __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[3], 3); + __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[2], 2); case 6: - write_gicreg(cpu_if->vgic_ap0r[1], ICH_AP0R1_EL2); + __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[1], 1); default: - write_gicreg(cpu_if->vgic_ap0r[0], ICH_AP0R0_EL2); + __vgic_v3_write_ap0rn(cpu_if->vgic_ap0r[0], 0); } switch (nr_pre_bits) { case 7: - write_gicreg(cpu_if->vgic_ap1r[3], ICH_AP1R3_EL2); - write_gicreg(cpu_if->vgic_ap1r[2], ICH_AP1R2_EL2); + __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[3], 3); + __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[2], 2); case 6: - write_gicreg(cpu_if->vgic_ap1r[1], ICH_AP1R1_EL2); + __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[1], 1); default: - write_gicreg(cpu_if->vgic_ap1r[0], ICH_AP1R0_EL2); + __vgic_v3_write_ap1rn(cpu_if->vgic_ap1r[0], 0); } for (i = 0; i < used_lrs; i++) -- cgit v1.2.3-70-g09d2 From 021234ef3752f853704ef1919e8ff33173aca093 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Jun 2017 12:49:32 +0100 Subject: KVM: arm64: Make kvm_condition_valid32() accessible from EL2 As we're about to trap CP15 accesses and handle them at EL2, we need to evaluate whether or not the condition flags are valid, as an implementation is allowed to trap despite the condition not being met. Tagging the function as __hyp_text allows this. We still rely on the cc_map array to be mapped at EL2 by virtue of being "const", and the linker to only emit relative references. Tested-by: Alexander Graf Acked-by: David Daney Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/aarch32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/arm/aarch32.c b/virt/kvm/arm/aarch32.c index 528af4b2d09e..79c7c357804b 100644 --- a/virt/kvm/arm/aarch32.c +++ b/virt/kvm/arm/aarch32.c @@ -60,7 +60,7 @@ static const unsigned short cc_map[16] = { /* * Check if a trapped instruction should have been executed or not. */ -bool kvm_condition_valid32(const struct kvm_vcpu *vcpu) +bool __hyp_text kvm_condition_valid32(const struct kvm_vcpu *vcpu) { unsigned long cpsr; u32 cpsr_cond; -- cgit v1.2.3-70-g09d2 From 59da1cbfd840d69bd7a310249924da3fc202c417 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Jun 2017 12:49:33 +0100 Subject: KVM: arm64: vgic-v3: Add hook to handle guest GICv3 sysreg accesses at EL2 In order to start handling guest access to GICv3 system registers, let's add a hook that will get called when we trap a system register access. This is gated by a new static key (vgic_v3_cpuif_trap). Tested-by: Alexander Graf Acked-by: David Daney Reviewed-by: Eric Auger Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/kvm_hyp.h | 1 + arch/arm64/kvm/hyp/switch.c | 14 ++++++++++++++ include/kvm/arm_vgic.h | 1 + virt/kvm/arm/hyp/vgic-v3-sr.c | 38 ++++++++++++++++++++++++++++++++++++++ virt/kvm/arm/vgic/vgic-v3.c | 2 ++ 5 files changed, 56 insertions(+) (limited to 'virt') diff --git a/arch/arm64/include/asm/kvm_hyp.h b/arch/arm64/include/asm/kvm_hyp.h index b18e852d27e8..4572a9b560fa 100644 --- a/arch/arm64/include/asm/kvm_hyp.h +++ b/arch/arm64/include/asm/kvm_hyp.h @@ -127,6 +127,7 @@ int __vgic_v2_perform_cpuif_access(struct kvm_vcpu *vcpu); void __vgic_v3_save_state(struct kvm_vcpu *vcpu); void __vgic_v3_restore_state(struct kvm_vcpu *vcpu); +int __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu); void __timer_save_state(struct kvm_vcpu *vcpu); void __timer_restore_state(struct kvm_vcpu *vcpu); diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index e5f089de6526..945e79c641c4 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -350,6 +350,20 @@ again: } } + if (static_branch_unlikely(&vgic_v3_cpuif_trap) && + exit_code == ARM_EXCEPTION_TRAP && + (kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_SYS64 || + kvm_vcpu_trap_get_class(vcpu) == ESR_ELx_EC_CP15_32)) { + int ret = __vgic_v3_perform_cpuif_access(vcpu); + + if (ret == 1) { + __skip_instr(vcpu); + goto again; + } + + /* 0 falls through to be handled out of EL2 */ + } + fp_enabled = __fpsimd_enabled(); __sysreg_save_guest_state(guest_ctxt); diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 2d923a6b2175..34dba516ef24 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -292,6 +292,7 @@ struct vgic_cpu { }; extern struct static_key_false vgic_v2_cpuif_trap; +extern struct static_key_false vgic_v3_cpuif_trap; int kvm_vgic_addr(struct kvm *kvm, unsigned long type, u64 *addr, bool write); void kvm_vgic_early_init(struct kvm *kvm); diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index 3dd8f0c4419e..e6c05b95a1b1 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c @@ -19,6 +19,7 @@ #include #include +#include #include #define vtr_to_max_lr_idx(v) ((v) & 0xf) @@ -371,3 +372,40 @@ void __hyp_text __vgic_v3_write_vmcr(u32 vmcr) { write_gicreg(vmcr, ICH_VMCR_EL2); } + +#ifdef CONFIG_ARM64 + +int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) +{ + int rt; + u32 esr; + u32 vmcr; + void (*fn)(struct kvm_vcpu *, u32, int); + bool is_read; + u32 sysreg; + + esr = kvm_vcpu_get_hsr(vcpu); + if (vcpu_mode_is_32bit(vcpu)) { + if (!kvm_condition_valid(vcpu)) + return 1; + + sysreg = esr_cp15_to_sysreg(esr); + } else { + sysreg = esr_sys64_to_sysreg(esr); + } + + is_read = (esr & ESR_ELx_SYS64_ISS_DIR_MASK) == ESR_ELx_SYS64_ISS_DIR_READ; + + switch (sysreg) { + default: + return 0; + } + + vmcr = __vgic_v3_read_vmcr(); + rt = kvm_vcpu_sys_get_rt(vcpu); + fn(vcpu, vmcr, rt); + + return 1; +} + +#endif diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 030248e669f6..fac6e23cd0b3 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -429,6 +429,8 @@ out: return ret; } +DEFINE_STATIC_KEY_FALSE(vgic_v3_cpuif_trap); + /** * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT * @node: pointer to the DT node -- cgit v1.2.3-70-g09d2 From d70c7b31a60f2458f35c226131f2a01a7a98b6cf Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Jun 2017 12:49:34 +0100 Subject: KVM: arm64: vgic-v3: Add ICV_BPR1_EL1 handler Add a handler for reading/writing the guest's view of the ICC_BPR1_EL1 register, which is located in the ICH_VMCR_EL2.BPR1 field. Tested-by: Alexander Graf Acked-by: David Daney Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier Reviewed-by: Christoffer Dall Signed-off-by: Christoffer Dall --- virt/kvm/arm/hyp/vgic-v3-sr.c | 57 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) (limited to 'virt') diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index e6c05b95a1b1..fe021abc8b51 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c @@ -375,6 +375,57 @@ void __hyp_text __vgic_v3_write_vmcr(u32 vmcr) #ifdef CONFIG_ARM64 +static int __hyp_text __vgic_v3_bpr_min(void) +{ + /* See Pseudocode for VPriorityGroup */ + return 8 - vtr_to_nr_pre_bits(read_gicreg(ICH_VTR_EL2)); +} + +static unsigned int __hyp_text __vgic_v3_get_bpr0(u32 vmcr) +{ + return (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT; +} + +static unsigned int __hyp_text __vgic_v3_get_bpr1(u32 vmcr) +{ + unsigned int bpr; + + if (vmcr & ICH_VMCR_CBPR_MASK) { + bpr = __vgic_v3_get_bpr0(vmcr); + if (bpr < 7) + bpr++; + } else { + bpr = (vmcr & ICH_VMCR_BPR1_MASK) >> ICH_VMCR_BPR1_SHIFT; + } + + return bpr; +} + +static void __hyp_text __vgic_v3_read_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + vcpu_set_reg(vcpu, rt, __vgic_v3_get_bpr1(vmcr)); +} + +static void __hyp_text __vgic_v3_write_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + u64 val = vcpu_get_reg(vcpu, rt); + u8 bpr_min = __vgic_v3_bpr_min(); + + if (vmcr & ICH_VMCR_CBPR_MASK) + return; + + /* Enforce BPR limiting */ + if (val < bpr_min) + val = bpr_min; + + val <<= ICH_VMCR_BPR1_SHIFT; + val &= ICH_VMCR_BPR1_MASK; + vmcr &= ~ICH_VMCR_BPR1_MASK; + vmcr |= val; + + __vgic_v3_write_vmcr(vmcr); +} + int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) { int rt; @@ -397,6 +448,12 @@ int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) is_read = (esr & ESR_ELx_SYS64_ISS_DIR_MASK) == ESR_ELx_SYS64_ISS_DIR_READ; switch (sysreg) { + case SYS_ICC_BPR1_EL1: + if (is_read) + fn = __vgic_v3_read_bpr1; + else + fn = __vgic_v3_write_bpr1; + break; default: return 0; } -- cgit v1.2.3-70-g09d2 From f8b630bc542e0368886ae193d3519c832b270359 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Jun 2017 12:49:35 +0100 Subject: KVM: arm64: vgic-v3: Add ICV_IGRPEN1_EL1 handler Add a handler for reading/writing the guest's view of the ICC_IGRPEN1_EL1 register, which is located in the ICH_VMCR_EL2.VENG1 field. Tested-by: Alexander Graf Acked-by: David Daney Reviewed-by: Eric Auger Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/hyp/vgic-v3-sr.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'virt') diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index fe021abc8b51..25f09721241f 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c @@ -401,6 +401,23 @@ static unsigned int __hyp_text __vgic_v3_get_bpr1(u32 vmcr) return bpr; } +static void __hyp_text __vgic_v3_read_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + vcpu_set_reg(vcpu, rt, !!(vmcr & ICH_VMCR_ENG1_MASK)); +} + +static void __hyp_text __vgic_v3_write_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + u64 val = vcpu_get_reg(vcpu, rt); + + if (val & 1) + vmcr |= ICH_VMCR_ENG1_MASK; + else + vmcr &= ~ICH_VMCR_ENG1_MASK; + + __vgic_v3_write_vmcr(vmcr); +} + static void __hyp_text __vgic_v3_read_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { vcpu_set_reg(vcpu, rt, __vgic_v3_get_bpr1(vmcr)); @@ -448,6 +465,12 @@ int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) is_read = (esr & ESR_ELx_SYS64_ISS_DIR_MASK) == ESR_ELx_SYS64_ISS_DIR_READ; switch (sysreg) { + case SYS_ICC_GRPEN1_EL1: + if (is_read) + fn = __vgic_v3_read_igrpen1; + else + fn = __vgic_v3_write_igrpen1; + break; case SYS_ICC_BPR1_EL1: if (is_read) fn = __vgic_v3_read_bpr1; -- cgit v1.2.3-70-g09d2 From 132a324ab62fe4fb8d6dcc2ab4eddb0e93b69afe Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Jun 2017 12:49:36 +0100 Subject: KVM: arm64: vgic-v3: Add ICV_IAR1_EL1 handler Add a handler for reading the guest's view of the ICC_IAR1_EL1 register. This involves finding the highest priority Group-1 interrupt, checking against both PMR and the active group priority, activating the interrupt and setting the group priority as active. Tested-by: Alexander Graf Acked-by: David Daney Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier Reviewed-by: Christoffer Dall Signed-off-by: Christoffer Dall --- include/linux/irqchip/arm-gic-v3.h | 1 + virt/kvm/arm/hyp/vgic-v3-sr.c | 163 +++++++++++++++++++++++++++++++++++++ 2 files changed, 164 insertions(+) (limited to 'virt') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 1fa293a37f4a..d70668fae003 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -405,6 +405,7 @@ #define ICH_LR_PHYS_ID_SHIFT 32 #define ICH_LR_PHYS_ID_MASK (0x3ffULL << ICH_LR_PHYS_ID_SHIFT) #define ICH_LR_PRIORITY_SHIFT 48 +#define ICH_LR_PRIORITY_MASK (0xffULL << ICH_LR_PRIORITY_SHIFT) /* These are for GICv2 emulation only */ #define GICH_LR_VIRTUALID (0x3ffUL << 0) diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index 25f09721241f..5a20f8d5bada 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c @@ -24,6 +24,7 @@ #define vtr_to_max_lr_idx(v) ((v) & 0xf) #define vtr_to_nr_pre_bits(v) ((((u32)(v) >> 26) & 7) + 1) +#define vtr_to_nr_apr_regs(v) (1 << (vtr_to_nr_pre_bits(v) - 5)) static u64 __hyp_text __gic_v3_get_lr(unsigned int lr) { @@ -381,6 +382,88 @@ static int __hyp_text __vgic_v3_bpr_min(void) return 8 - vtr_to_nr_pre_bits(read_gicreg(ICH_VTR_EL2)); } +static int __hyp_text __vgic_v3_get_group(struct kvm_vcpu *vcpu) +{ + u32 esr = kvm_vcpu_get_hsr(vcpu); + u8 crm = (esr & ESR_ELx_SYS64_ISS_CRM_MASK) >> ESR_ELx_SYS64_ISS_CRM_SHIFT; + + return crm != 8; +} + +#define GICv3_IDLE_PRIORITY 0xff + +static int __hyp_text __vgic_v3_highest_priority_lr(struct kvm_vcpu *vcpu, + u32 vmcr, + u64 *lr_val) +{ + unsigned int used_lrs = vcpu->arch.vgic_cpu.used_lrs; + u8 priority = GICv3_IDLE_PRIORITY; + int i, lr = -1; + + for (i = 0; i < used_lrs; i++) { + u64 val = __gic_v3_get_lr(i); + u8 lr_prio = (val & ICH_LR_PRIORITY_MASK) >> ICH_LR_PRIORITY_SHIFT; + + /* Not pending in the state? */ + if ((val & ICH_LR_STATE) != ICH_LR_PENDING_BIT) + continue; + + /* Group-0 interrupt, but Group-0 disabled? */ + if (!(val & ICH_LR_GROUP) && !(vmcr & ICH_VMCR_ENG0_MASK)) + continue; + + /* Group-1 interrupt, but Group-1 disabled? */ + if ((val & ICH_LR_GROUP) && !(vmcr & ICH_VMCR_ENG1_MASK)) + continue; + + /* Not the highest priority? */ + if (lr_prio >= priority) + continue; + + /* This is a candidate */ + priority = lr_prio; + *lr_val = val; + lr = i; + } + + if (lr == -1) + *lr_val = ICC_IAR1_EL1_SPURIOUS; + + return lr; +} + +static int __hyp_text __vgic_v3_get_highest_active_priority(void) +{ + u8 nr_apr_regs = vtr_to_nr_apr_regs(read_gicreg(ICH_VTR_EL2)); + u32 hap = 0; + int i; + + for (i = 0; i < nr_apr_regs; i++) { + u32 val; + + /* + * The ICH_AP0Rn_EL2 and ICH_AP1Rn_EL2 registers + * contain the active priority levels for this VCPU + * for the maximum number of supported priority + * levels, and we return the full priority level only + * if the BPR is programmed to its minimum, otherwise + * we return a combination of the priority level and + * subpriority, as determined by the setting of the + * BPR, but without the full subpriority. + */ + val = __vgic_v3_read_ap0rn(i); + val |= __vgic_v3_read_ap1rn(i); + if (!val) { + hap += 32; + continue; + } + + return (hap + __ffs(val)) << __vgic_v3_bpr_min(); + } + + return GICv3_IDLE_PRIORITY; +} + static unsigned int __hyp_text __vgic_v3_get_bpr0(u32 vmcr) { return (vmcr & ICH_VMCR_BPR0_MASK) >> ICH_VMCR_BPR0_SHIFT; @@ -401,6 +484,83 @@ static unsigned int __hyp_text __vgic_v3_get_bpr1(u32 vmcr) return bpr; } +/* + * Convert a priority to a preemption level, taking the relevant BPR + * into account by zeroing the sub-priority bits. + */ +static u8 __hyp_text __vgic_v3_pri_to_pre(u8 pri, u32 vmcr, int grp) +{ + unsigned int bpr; + + if (!grp) + bpr = __vgic_v3_get_bpr0(vmcr) + 1; + else + bpr = __vgic_v3_get_bpr1(vmcr); + + return pri & (GENMASK(7, 0) << bpr); +} + +/* + * The priority value is independent of any of the BPR values, so we + * normalize it using the minumal BPR value. This guarantees that no + * matter what the guest does with its BPR, we can always set/get the + * same value of a priority. + */ +static void __hyp_text __vgic_v3_set_active_priority(u8 pri, u32 vmcr, int grp) +{ + u8 pre, ap; + u32 val; + int apr; + + pre = __vgic_v3_pri_to_pre(pri, vmcr, grp); + ap = pre >> __vgic_v3_bpr_min(); + apr = ap / 32; + + if (!grp) { + val = __vgic_v3_read_ap0rn(apr); + __vgic_v3_write_ap0rn(val | BIT(ap % 32), apr); + } else { + val = __vgic_v3_read_ap1rn(apr); + __vgic_v3_write_ap1rn(val | BIT(ap % 32), apr); + } +} + +static void __hyp_text __vgic_v3_read_iar(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + u64 lr_val; + u8 lr_prio, pmr; + int lr, grp; + + grp = __vgic_v3_get_group(vcpu); + + lr = __vgic_v3_highest_priority_lr(vcpu, vmcr, &lr_val); + if (lr < 0) + goto spurious; + + if (grp != !!(lr_val & ICH_LR_GROUP)) + goto spurious; + + pmr = (vmcr & ICH_VMCR_PMR_MASK) >> ICH_VMCR_PMR_SHIFT; + lr_prio = (lr_val & ICH_LR_PRIORITY_MASK) >> ICH_LR_PRIORITY_SHIFT; + if (pmr <= lr_prio) + goto spurious; + + if (__vgic_v3_get_highest_active_priority() <= __vgic_v3_pri_to_pre(lr_prio, vmcr, grp)) + goto spurious; + + lr_val &= ~ICH_LR_STATE; + /* No active state for LPIs */ + if ((lr_val & ICH_LR_VIRTUAL_ID_MASK) <= VGIC_MAX_SPI) + lr_val |= ICH_LR_ACTIVE_BIT; + __gic_v3_set_lr(lr_val, lr); + __vgic_v3_set_active_priority(lr_prio, vmcr, grp); + vcpu_set_reg(vcpu, rt, lr_val & ICH_LR_VIRTUAL_ID_MASK); + return; + +spurious: + vcpu_set_reg(vcpu, rt, ICC_IAR1_EL1_SPURIOUS); +} + static void __hyp_text __vgic_v3_read_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { vcpu_set_reg(vcpu, rt, !!(vmcr & ICH_VMCR_ENG1_MASK)); @@ -465,6 +625,9 @@ int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) is_read = (esr & ESR_ELx_SYS64_ISS_DIR_MASK) == ESR_ELx_SYS64_ISS_DIR_READ; switch (sysreg) { + case SYS_ICC_IAR1_EL1: + fn = __vgic_v3_read_iar; + break; case SYS_ICC_GRPEN1_EL1: if (is_read) fn = __vgic_v3_read_igrpen1; -- cgit v1.2.3-70-g09d2 From b6f49035b4bf6e2709f2a5fed3107f5438c1fd02 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Jun 2017 12:49:37 +0100 Subject: KVM: arm64: vgic-v3: Add ICV_EOIR1_EL1 handler Add a handler for writing the guest's view of the ICC_EOIR1_EL1 register. This involves dropping the priority of the interrupt, and deactivating it if required (EOImode == 0). Tested-by: Alexander Graf Acked-by: David Daney Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier Reviewed-by: Christoffer Dall Signed-off-by: Christoffer Dall --- include/linux/irqchip/arm-gic-v3.h | 2 + virt/kvm/arm/hyp/vgic-v3-sr.c | 120 +++++++++++++++++++++++++++++++++++++ 2 files changed, 122 insertions(+) (limited to 'virt') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index d70668fae003..1f458ac6f494 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -417,6 +417,8 @@ #define ICH_HCR_EN (1 << 0) #define ICH_HCR_UIE (1 << 1) +#define ICH_HCR_EOIcount_SHIFT 27 +#define ICH_HCR_EOIcount_MASK (0x1f << ICH_HCR_EOIcount_SHIFT) #define ICH_VMCR_ACK_CTL_SHIFT 2 #define ICH_VMCR_ACK_CTL_MASK (1 << ICH_VMCR_ACK_CTL_SHIFT) diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index 5a20f8d5bada..e9ff99112c4d 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c @@ -432,6 +432,26 @@ static int __hyp_text __vgic_v3_highest_priority_lr(struct kvm_vcpu *vcpu, return lr; } +static int __hyp_text __vgic_v3_find_active_lr(struct kvm_vcpu *vcpu, + int intid, u64 *lr_val) +{ + unsigned int used_lrs = vcpu->arch.vgic_cpu.used_lrs; + int i; + + for (i = 0; i < used_lrs; i++) { + u64 val = __gic_v3_get_lr(i); + + if ((val & ICH_LR_VIRTUAL_ID_MASK) == intid && + (val & ICH_LR_ACTIVE_BIT)) { + *lr_val = val; + return i; + } + } + + *lr_val = ICC_IAR1_EL1_SPURIOUS; + return -1; +} + static int __hyp_text __vgic_v3_get_highest_active_priority(void) { u8 nr_apr_regs = vtr_to_nr_apr_regs(read_gicreg(ICH_VTR_EL2)); @@ -525,6 +545,44 @@ static void __hyp_text __vgic_v3_set_active_priority(u8 pri, u32 vmcr, int grp) } } +static int __hyp_text __vgic_v3_clear_highest_active_priority(void) +{ + u8 nr_apr_regs = vtr_to_nr_apr_regs(read_gicreg(ICH_VTR_EL2)); + u32 hap = 0; + int i; + + for (i = 0; i < nr_apr_regs; i++) { + u32 ap0, ap1; + int c0, c1; + + ap0 = __vgic_v3_read_ap0rn(i); + ap1 = __vgic_v3_read_ap1rn(i); + if (!ap0 && !ap1) { + hap += 32; + continue; + } + + c0 = ap0 ? __ffs(ap0) : 32; + c1 = ap1 ? __ffs(ap1) : 32; + + /* Always clear the LSB, which is the highest priority */ + if (c0 < c1) { + ap0 &= ~BIT(c0); + __vgic_v3_write_ap0rn(ap0, i); + hap += c0; + } else { + ap1 &= ~BIT(c1); + __vgic_v3_write_ap1rn(ap1, i); + hap += c1; + } + + /* Rescale to 8 bits of priority */ + return hap << __vgic_v3_bpr_min(); + } + + return GICv3_IDLE_PRIORITY; +} + static void __hyp_text __vgic_v3_read_iar(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u64 lr_val; @@ -561,6 +619,65 @@ spurious: vcpu_set_reg(vcpu, rt, ICC_IAR1_EL1_SPURIOUS); } +static void __hyp_text __vgic_v3_clear_active_lr(int lr, u64 lr_val) +{ + lr_val &= ~ICH_LR_ACTIVE_BIT; + if (lr_val & ICH_LR_HW) { + u32 pid; + + pid = (lr_val & ICH_LR_PHYS_ID_MASK) >> ICH_LR_PHYS_ID_SHIFT; + gic_write_dir(pid); + } + + __gic_v3_set_lr(lr_val, lr); +} + +static void __hyp_text __vgic_v3_bump_eoicount(void) +{ + u32 hcr; + + hcr = read_gicreg(ICH_HCR_EL2); + hcr += 1 << ICH_HCR_EOIcount_SHIFT; + write_gicreg(hcr, ICH_HCR_EL2); +} + +static void __hyp_text __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + u32 vid = vcpu_get_reg(vcpu, rt); + u64 lr_val; + u8 lr_prio, act_prio; + int lr, grp; + + grp = __vgic_v3_get_group(vcpu); + + /* Drop priority in any case */ + act_prio = __vgic_v3_clear_highest_active_priority(); + + /* If EOIing an LPI, no deactivate to be performed */ + if (vid >= VGIC_MIN_LPI) + return; + + /* EOImode == 1, nothing to be done here */ + if (vmcr & ICH_VMCR_EOIM_MASK) + return; + + lr = __vgic_v3_find_active_lr(vcpu, vid, &lr_val); + if (lr == -1) { + __vgic_v3_bump_eoicount(); + return; + } + + lr_prio = (lr_val & ICH_LR_PRIORITY_MASK) >> ICH_LR_PRIORITY_SHIFT; + + /* If priorities or group do not match, the guest has fscked-up. */ + if (grp != !!(lr_val & ICH_LR_GROUP) || + __vgic_v3_pri_to_pre(lr_prio, vmcr, grp) != act_prio) + return; + + /* Let's now perform the deactivation */ + __vgic_v3_clear_active_lr(lr, lr_val); +} + static void __hyp_text __vgic_v3_read_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { vcpu_set_reg(vcpu, rt, !!(vmcr & ICH_VMCR_ENG1_MASK)); @@ -628,6 +745,9 @@ int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) case SYS_ICC_IAR1_EL1: fn = __vgic_v3_read_iar; break; + case SYS_ICC_EOIR1_EL1: + fn = __vgic_v3_write_eoir; + break; case SYS_ICC_GRPEN1_EL1: if (is_read) fn = __vgic_v3_read_igrpen1; -- cgit v1.2.3-70-g09d2 From f9e7449c780f688bf61a13dfa8c344afeb4ad6e0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Jun 2017 12:49:38 +0100 Subject: KVM: arm64: vgic-v3: Add ICV_AP1Rn_EL1 handler Add a handler for reading/writing the guest's view of the ICV_AP1Rn_EL1 registers. We just map them to the corresponding ICH_AP1Rn_EL2 registers. Tested-by: Alexander Graf Acked-by: David Daney Reviewed-by: Eric Auger Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/sysreg.h | 1 + virt/kvm/arm/hyp/vgic-v3-sr.c | 94 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 95 insertions(+) (limited to 'virt') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index b4d13d9267ff..563bba108442 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -180,6 +180,7 @@ #define SYS_VBAR_EL1 sys_reg(3, 0, 12, 0, 0) +#define SYS_ICC_AP1Rn_EL1(n) sys_reg(3, 0, 12, 9, n) #define SYS_ICC_DIR_EL1 sys_reg(3, 0, 12, 11, 1) #define SYS_ICC_SGI1R_EL1 sys_reg(3, 0, 12, 11, 5) #define SYS_ICC_IAR1_EL1 sys_reg(3, 0, 12, 12, 0) diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index e9ff99112c4d..1c85a6df22d9 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c @@ -720,6 +720,76 @@ static void __hyp_text __vgic_v3_write_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int __vgic_v3_write_vmcr(vmcr); } +static void __hyp_text __vgic_v3_read_apxrn(struct kvm_vcpu *vcpu, int rt, int n) +{ + u32 val; + + if (!__vgic_v3_get_group(vcpu)) + val = __vgic_v3_read_ap0rn(n); + else + val = __vgic_v3_read_ap1rn(n); + + vcpu_set_reg(vcpu, rt, val); +} + +static void __hyp_text __vgic_v3_write_apxrn(struct kvm_vcpu *vcpu, int rt, int n) +{ + u32 val = vcpu_get_reg(vcpu, rt); + + if (!__vgic_v3_get_group(vcpu)) + __vgic_v3_write_ap0rn(val, n); + else + __vgic_v3_write_ap1rn(val, n); +} + +static void __hyp_text __vgic_v3_read_apxr0(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + __vgic_v3_read_apxrn(vcpu, rt, 0); +} + +static void __hyp_text __vgic_v3_read_apxr1(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + __vgic_v3_read_apxrn(vcpu, rt, 1); +} + +static void __hyp_text __vgic_v3_read_apxr2(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + __vgic_v3_read_apxrn(vcpu, rt, 2); +} + +static void __hyp_text __vgic_v3_read_apxr3(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + __vgic_v3_read_apxrn(vcpu, rt, 3); +} + +static void __hyp_text __vgic_v3_write_apxr0(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + __vgic_v3_write_apxrn(vcpu, rt, 0); +} + +static void __hyp_text __vgic_v3_write_apxr1(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + __vgic_v3_write_apxrn(vcpu, rt, 1); +} + +static void __hyp_text __vgic_v3_write_apxr2(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + __vgic_v3_write_apxrn(vcpu, rt, 2); +} + +static void __hyp_text __vgic_v3_write_apxr3(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + __vgic_v3_write_apxrn(vcpu, rt, 3); +} + int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) { int rt; @@ -760,6 +830,30 @@ int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) else fn = __vgic_v3_write_bpr1; break; + case SYS_ICC_AP1Rn_EL1(0): + if (is_read) + fn = __vgic_v3_read_apxr0; + else + fn = __vgic_v3_write_apxr0; + break; + case SYS_ICC_AP1Rn_EL1(1): + if (is_read) + fn = __vgic_v3_read_apxr1; + else + fn = __vgic_v3_write_apxr1; + break; + case SYS_ICC_AP1Rn_EL1(2): + if (is_read) + fn = __vgic_v3_read_apxr2; + else + fn = __vgic_v3_write_apxr2; + break; + case SYS_ICC_AP1Rn_EL1(3): + if (is_read) + fn = __vgic_v3_read_apxr3; + else + fn = __vgic_v3_write_apxr3; + break; default: return 0; } -- cgit v1.2.3-70-g09d2 From 2724c11a1df4b22ee966c04809ea0e808f66b04e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Jun 2017 12:49:39 +0100 Subject: KVM: arm64: vgic-v3: Add ICV_HPPIR1_EL1 handler Add a handler for reading the guest's view of the ICV_HPPIR1_EL1 register. This is a simple parsing of the available LRs, extracting the highest available interrupt. Tested-by: Alexander Graf Acked-by: David Daney Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier Reviewed-by: Christoffer Dall Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/sysreg.h | 1 + virt/kvm/arm/hyp/vgic-v3-sr.c | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+) (limited to 'virt') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 563bba108442..0ce7f81dd47e 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -185,6 +185,7 @@ #define SYS_ICC_SGI1R_EL1 sys_reg(3, 0, 12, 11, 5) #define SYS_ICC_IAR1_EL1 sys_reg(3, 0, 12, 12, 0) #define SYS_ICC_EOIR1_EL1 sys_reg(3, 0, 12, 12, 1) +#define SYS_ICC_HPPIR1_EL1 sys_reg(3, 0, 12, 12, 2) #define SYS_ICC_BPR1_EL1 sys_reg(3, 0, 12, 12, 3) #define SYS_ICC_CTLR_EL1 sys_reg(3, 0, 12, 12, 4) #define SYS_ICC_SRE_EL1 sys_reg(3, 0, 12, 12, 5) diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index 1c85a6df22d9..f031e8f088ae 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c @@ -790,6 +790,26 @@ static void __hyp_text __vgic_v3_write_apxr3(struct kvm_vcpu *vcpu, __vgic_v3_write_apxrn(vcpu, rt, 3); } +static void __hyp_text __vgic_v3_read_hppir(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + u64 lr_val; + int lr, lr_grp, grp; + + grp = __vgic_v3_get_group(vcpu); + + lr = __vgic_v3_highest_priority_lr(vcpu, vmcr, &lr_val); + if (lr == -1) + goto spurious; + + lr_grp = !!(lr_val & ICH_LR_GROUP); + if (lr_grp != grp) + lr_val = ICC_IAR1_EL1_SPURIOUS; + +spurious: + vcpu_set_reg(vcpu, rt, lr_val & ICH_LR_VIRTUAL_ID_MASK); +} + int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) { int rt; @@ -854,6 +874,9 @@ int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) else fn = __vgic_v3_write_apxr3; break; + case SYS_ICC_HPPIR1_EL1: + fn = __vgic_v3_read_hppir; + break; default: return 0; } -- cgit v1.2.3-70-g09d2 From 9c7bfc288c71068ab323b802dba2eb87fd08b127 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Jun 2017 12:49:40 +0100 Subject: KVM: arm64: vgic-v3: Enable trapping of Group-1 system registers In order to be able to trap Group-1 GICv3 system registers, we need to set ICH_HCR_EL2.TALL1 before entering the guest. This is conditionally done after having restored the guest's state, and cleared on exit. Tested-by: Alexander Graf Acked-by: David Daney Acked-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- include/linux/irqchip/arm-gic-v3.h | 1 + virt/kvm/arm/hyp/vgic-v3-sr.c | 11 +++++++++++ virt/kvm/arm/vgic/vgic-v3.c | 4 ++++ 3 files changed, 16 insertions(+) (limited to 'virt') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 1f458ac6f494..6b05d2ac8c54 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -417,6 +417,7 @@ #define ICH_HCR_EN (1 << 0) #define ICH_HCR_UIE (1 << 1) +#define ICH_HCR_TALL1 (1 << 12) #define ICH_HCR_EOIcount_SHIFT 27 #define ICH_HCR_EOIcount_MASK (0x1f << ICH_HCR_EOIcount_SHIFT) diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index f031e8f088ae..a2a62f030341 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c @@ -258,6 +258,9 @@ void __hyp_text __vgic_v3_save_state(struct kvm_vcpu *vcpu) cpu_if->vgic_ap1r[0] = __vgic_v3_read_ap1rn(0); } } else { + if (static_branch_unlikely(&vgic_v3_cpuif_trap)) + write_gicreg(0, ICH_HCR_EL2); + cpu_if->vgic_elrsr = 0xffff; cpu_if->vgic_ap0r[0] = 0; cpu_if->vgic_ap0r[1] = 0; @@ -330,6 +333,14 @@ void __hyp_text __vgic_v3_restore_state(struct kvm_vcpu *vcpu) for (i = 0; i < used_lrs; i++) __gic_v3_set_lr(cpu_if->vgic_lr[i], i); + } else { + /* + * If we need to trap system registers, we must write + * ICH_HCR_EL2 anyway, even if no interrupts are being + * injected, + */ + if (static_branch_unlikely(&vgic_v3_cpuif_trap)) + write_gicreg(cpu_if->vgic_hcr, ICH_HCR_EL2); } /* diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index fac6e23cd0b3..722bdebdfbb5 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -21,6 +21,8 @@ #include "vgic.h" +static bool group1_trap; + void vgic_v3_set_underflow(struct kvm_vcpu *vcpu) { struct vgic_v3_cpu_if *cpuif = &vcpu->arch.vgic_cpu.vgic_v3; @@ -258,6 +260,8 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu) /* Get the show on the road... */ vgic_v3->vgic_hcr = ICH_HCR_EN; + if (group1_trap) + vgic_v3->vgic_hcr |= ICH_HCR_TALL1; } int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq) -- cgit v1.2.3-70-g09d2 From 182936eee7e6b1956881b51bc534a541dc71a101 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Jun 2017 12:49:41 +0100 Subject: KVM: arm64: Enable GICv3 Group-1 sysreg trapping via command-line Now that we're able to safely handle Group-1 sysreg access, let's give the user the opportunity to enable it by passing a specific command-line option (vgic_v3.group1_trap). Tested-by: Alexander Graf Acked-by: David Daney Signed-off-by: Marc Zyngier Acked-by: Christoffer Dall Signed-off-by: Christoffer Dall --- Documentation/admin-guide/kernel-parameters.txt | 4 ++++ virt/kvm/arm/vgic/vgic-v3.c | 11 +++++++++++ 2 files changed, 15 insertions(+) (limited to 'virt') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 15f79c27748d..42fe395be6c8 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1829,6 +1829,10 @@ for all guests. Default is 1 (enabled) if in 64-bit or 32-bit PAE mode. + kvm-arm.vgic_v3_group1_trap= + [KVM,ARM] Trap guest accesses to GICv3 group-1 + system registers + kvm-intel.ept= [KVM,Intel] Disable extended page tables (virtualized MMU) support on capable Intel chips. Default is 1 (enabled) diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 722bdebdfbb5..4b2b62b73470 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -435,6 +435,12 @@ out: DEFINE_STATIC_KEY_FALSE(vgic_v3_cpuif_trap); +static int __init early_group1_trap_cfg(char *buf) +{ + return strtobool(buf, &group1_trap); +} +early_param("kvm-arm.vgic_v3_group1_trap", early_group1_trap_cfg); + /** * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT * @node: pointer to the DT node @@ -486,6 +492,11 @@ int vgic_v3_probe(const struct gic_kvm_info *info) if (kvm_vgic_global_state.vcpu_base == 0) kvm_info("disabling GICv2 emulation\n"); + if (group1_trap) { + kvm_info("GICv3 sysreg trapping enabled (reduced performance)\n"); + static_branch_enable(&vgic_v3_cpuif_trap); + } + kvm_vgic_global_state.vctrl_base = NULL; kvm_vgic_global_state.type = VGIC_V3; kvm_vgic_global_state.max_gic_vcpus = VGIC_V3_MAX_CPUS; -- cgit v1.2.3-70-g09d2 From 423de85a98c2b50715a0784a74f6124fbc0b1548 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Jun 2017 12:49:42 +0100 Subject: KVM: arm64: vgic-v3: Add ICV_BPR0_EL1 handler Add a handler for reading/writing the guest's view of the ICC_BPR0_EL1 register, which is located in the ICH_VMCR_EL2.BPR0 field. Tested-by: Alexander Graf Acked-by: David Daney Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier Reviewed-by: Christoffer Dall Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/sysreg.h | 1 + virt/kvm/arm/hyp/vgic-v3-sr.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 29 insertions(+) (limited to 'virt') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 0ce7f81dd47e..6b80211f9837 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -180,6 +180,7 @@ #define SYS_VBAR_EL1 sys_reg(3, 0, 12, 0, 0) +#define SYS_ICC_BPR0_EL1 sys_reg(3, 0, 12, 8, 3) #define SYS_ICC_AP1Rn_EL1(n) sys_reg(3, 0, 12, 9, n) #define SYS_ICC_DIR_EL1 sys_reg(3, 0, 12, 11, 1) #define SYS_ICC_SGI1R_EL1 sys_reg(3, 0, 12, 11, 5) diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index a2a62f030341..f53908cc981c 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c @@ -706,11 +706,33 @@ static void __hyp_text __vgic_v3_write_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, __vgic_v3_write_vmcr(vmcr); } +static void __hyp_text __vgic_v3_read_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + vcpu_set_reg(vcpu, rt, __vgic_v3_get_bpr0(vmcr)); +} + static void __hyp_text __vgic_v3_read_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { vcpu_set_reg(vcpu, rt, __vgic_v3_get_bpr1(vmcr)); } +static void __hyp_text __vgic_v3_write_bpr0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + u64 val = vcpu_get_reg(vcpu, rt); + u8 bpr_min = __vgic_v3_bpr_min() - 1; + + /* Enforce BPR limiting */ + if (val < bpr_min) + val = bpr_min; + + val <<= ICH_VMCR_BPR0_SHIFT; + val &= ICH_VMCR_BPR0_MASK; + vmcr &= ~ICH_VMCR_BPR0_MASK; + vmcr |= val; + + __vgic_v3_write_vmcr(vmcr); +} + static void __hyp_text __vgic_v3_write_bpr1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u64 val = vcpu_get_reg(vcpu, rt); @@ -888,6 +910,12 @@ int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) case SYS_ICC_HPPIR1_EL1: fn = __vgic_v3_read_hppir; break; + case SYS_ICC_BPR0_EL1: + if (is_read) + fn = __vgic_v3_read_bpr0; + else + fn = __vgic_v3_write_bpr0; + break; default: return 0; } -- cgit v1.2.3-70-g09d2 From fbc48a0011deb3d51cb657ca9c0f9083f41c0665 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Jun 2017 12:49:43 +0100 Subject: KVM: arm64: vgic-v3: Add ICV_IGNREN0_EL1 handler Add a handler for reading/writing the guest's view of the ICC_IGRPEN0_EL1 register, which is located in the ICH_VMCR_EL2.VENG0 field. Tested-by: Alexander Graf Acked-by: David Daney Reviewed-by: Eric Auger Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/sysreg.h | 1 + virt/kvm/arm/hyp/vgic-v3-sr.c | 23 +++++++++++++++++++++++ 2 files changed, 24 insertions(+) (limited to 'virt') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 6b80211f9837..80b4e0a93574 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -190,6 +190,7 @@ #define SYS_ICC_BPR1_EL1 sys_reg(3, 0, 12, 12, 3) #define SYS_ICC_CTLR_EL1 sys_reg(3, 0, 12, 12, 4) #define SYS_ICC_SRE_EL1 sys_reg(3, 0, 12, 12, 5) +#define SYS_ICC_GRPEN0_EL1 sys_reg(3, 0, 12, 12, 6) #define SYS_ICC_GRPEN1_EL1 sys_reg(3, 0, 12, 12, 7) #define SYS_CONTEXTIDR_EL1 sys_reg(3, 0, 13, 0, 1) diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index f53908cc981c..45927762bf14 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c @@ -689,11 +689,28 @@ static void __hyp_text __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int __vgic_v3_clear_active_lr(lr, lr_val); } +static void __hyp_text __vgic_v3_read_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + vcpu_set_reg(vcpu, rt, !!(vmcr & ICH_VMCR_ENG0_MASK)); +} + static void __hyp_text __vgic_v3_read_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { vcpu_set_reg(vcpu, rt, !!(vmcr & ICH_VMCR_ENG1_MASK)); } +static void __hyp_text __vgic_v3_write_igrpen0(struct kvm_vcpu *vcpu, u32 vmcr, int rt) +{ + u64 val = vcpu_get_reg(vcpu, rt); + + if (val & 1) + vmcr |= ICH_VMCR_ENG0_MASK; + else + vmcr &= ~ICH_VMCR_ENG0_MASK; + + __vgic_v3_write_vmcr(vmcr); +} + static void __hyp_text __vgic_v3_write_igrpen1(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u64 val = vcpu_get_reg(vcpu, rt); @@ -910,6 +927,12 @@ int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) case SYS_ICC_HPPIR1_EL1: fn = __vgic_v3_read_hppir; break; + case SYS_ICC_GRPEN0_EL1: + if (is_read) + fn = __vgic_v3_read_igrpen0; + else + fn = __vgic_v3_write_igrpen0; + break; case SYS_ICC_BPR0_EL1: if (is_read) fn = __vgic_v3_read_bpr0; -- cgit v1.2.3-70-g09d2 From eab0b2dc4f6f34147e3d10da49ab8032e15dbea0 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Jun 2017 12:49:44 +0100 Subject: KVM: arm64: vgic-v3: Add misc Group-0 handlers A number of Group-0 registers can be handled by the same accessors as that of Group-1, so let's add the required system register encodings and catch them in the dispatching function. Tested-by: Alexander Graf Acked-by: David Daney Acked-by: Christoffer Dall Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/sysreg.h | 4 ++++ virt/kvm/arm/hyp/vgic-v3-sr.c | 7 +++++++ 2 files changed, 11 insertions(+) (limited to 'virt') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 80b4e0a93574..670bf51d55e3 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -180,7 +180,11 @@ #define SYS_VBAR_EL1 sys_reg(3, 0, 12, 0, 0) +#define SYS_ICC_IAR0_EL1 sys_reg(3, 0, 12, 8, 0) +#define SYS_ICC_EOIR0_EL1 sys_reg(3, 0, 12, 8, 1) +#define SYS_ICC_HPPIR0_EL1 sys_reg(3, 0, 12, 8, 2) #define SYS_ICC_BPR0_EL1 sys_reg(3, 0, 12, 8, 3) +#define SYS_ICC_AP0Rn_EL1(n) sys_reg(3, 0, 12, 8, 4 | n) #define SYS_ICC_AP1Rn_EL1(n) sys_reg(3, 0, 12, 9, n) #define SYS_ICC_DIR_EL1 sys_reg(3, 0, 12, 11, 1) #define SYS_ICC_SGI1R_EL1 sys_reg(3, 0, 12, 11, 5) diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index 45927762bf14..08a5d76c82c7 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c @@ -882,9 +882,11 @@ int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) is_read = (esr & ESR_ELx_SYS64_ISS_DIR_MASK) == ESR_ELx_SYS64_ISS_DIR_READ; switch (sysreg) { + case SYS_ICC_IAR0_EL1: case SYS_ICC_IAR1_EL1: fn = __vgic_v3_read_iar; break; + case SYS_ICC_EOIR0_EL1: case SYS_ICC_EOIR1_EL1: fn = __vgic_v3_write_eoir; break; @@ -900,30 +902,35 @@ int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) else fn = __vgic_v3_write_bpr1; break; + case SYS_ICC_AP0Rn_EL1(0): case SYS_ICC_AP1Rn_EL1(0): if (is_read) fn = __vgic_v3_read_apxr0; else fn = __vgic_v3_write_apxr0; break; + case SYS_ICC_AP0Rn_EL1(1): case SYS_ICC_AP1Rn_EL1(1): if (is_read) fn = __vgic_v3_read_apxr1; else fn = __vgic_v3_write_apxr1; break; + case SYS_ICC_AP0Rn_EL1(2): case SYS_ICC_AP1Rn_EL1(2): if (is_read) fn = __vgic_v3_read_apxr2; else fn = __vgic_v3_write_apxr2; break; + case SYS_ICC_AP0Rn_EL1(3): case SYS_ICC_AP1Rn_EL1(3): if (is_read) fn = __vgic_v3_read_apxr3; else fn = __vgic_v3_write_apxr3; break; + case SYS_ICC_HPPIR0_EL1: case SYS_ICC_HPPIR1_EL1: fn = __vgic_v3_read_hppir; break; -- cgit v1.2.3-70-g09d2 From abf55766f7b062234083ff612446ff8d47e2417e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Jun 2017 12:49:45 +0100 Subject: KVM: arm64: vgic-v3: Enable trapping of Group-0 system registers In order to be able to trap Group-0 GICv3 system registers, we need to set ICH_HCR_EL2.TALL0 begore entering the guest. This is conditionnaly done after having restored the guest's state, and cleared on exit. Tested-by: Alexander Graf Acked-by: David Daney Acked-by: Christoffer Dall Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- include/linux/irqchip/arm-gic-v3.h | 1 + virt/kvm/arm/vgic/vgic-v3.c | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'virt') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 6b05d2ac8c54..c7f31a962cfc 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -417,6 +417,7 @@ #define ICH_HCR_EN (1 << 0) #define ICH_HCR_UIE (1 << 1) +#define ICH_HCR_TALL0 (1 << 11) #define ICH_HCR_TALL1 (1 << 12) #define ICH_HCR_EOIcount_SHIFT 27 #define ICH_HCR_EOIcount_MASK (0x1f << ICH_HCR_EOIcount_SHIFT) diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 4b2b62b73470..39046ee5be25 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -21,6 +21,7 @@ #include "vgic.h" +static bool group0_trap; static bool group1_trap; void vgic_v3_set_underflow(struct kvm_vcpu *vcpu) @@ -260,6 +261,8 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu) /* Get the show on the road... */ vgic_v3->vgic_hcr = ICH_HCR_EN; + if (group0_trap) + vgic_v3->vgic_hcr |= ICH_HCR_TALL0; if (group1_trap) vgic_v3->vgic_hcr |= ICH_HCR_TALL1; } @@ -492,7 +495,7 @@ int vgic_v3_probe(const struct gic_kvm_info *info) if (kvm_vgic_global_state.vcpu_base == 0) kvm_info("disabling GICv2 emulation\n"); - if (group1_trap) { + if (group0_trap || group1_trap) { kvm_info("GICv3 sysreg trapping enabled (reduced performance)\n"); static_branch_enable(&vgic_v3_cpuif_trap); } -- cgit v1.2.3-70-g09d2 From e23f62f76acea232d4def5d4eb21709a2b575f14 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Jun 2017 12:49:46 +0100 Subject: KVM: arm64: Enable GICv3 Group-0 sysreg trapping via command-line Now that we're able to safely handle Group-0 sysreg access, let's give the user the opportunity to enable it by passing a specific command-line option (vgic_v3.group0_trap). Tested-by: Alexander Graf Acked-by: David Daney Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- Documentation/admin-guide/kernel-parameters.txt | 4 ++++ virt/kvm/arm/vgic/vgic-v3.c | 6 ++++++ 2 files changed, 10 insertions(+) (limited to 'virt') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 42fe395be6c8..88bdc421351f 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1829,6 +1829,10 @@ for all guests. Default is 1 (enabled) if in 64-bit or 32-bit PAE mode. + kvm-arm.vgic_v3_group0_trap= + [KVM,ARM] Trap guest accesses to GICv3 group-0 + system registers + kvm-arm.vgic_v3_group1_trap= [KVM,ARM] Trap guest accesses to GICv3 group-1 system registers diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 39046ee5be25..828ca7f9a060 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -438,6 +438,12 @@ out: DEFINE_STATIC_KEY_FALSE(vgic_v3_cpuif_trap); +static int __init early_group0_trap_cfg(char *buf) +{ + return strtobool(buf, &group0_trap); +} +early_param("kvm-arm.vgic_v3_group0_trap", early_group0_trap_cfg); + static int __init early_group1_trap_cfg(char *buf) { return strtobool(buf, &group1_trap); -- cgit v1.2.3-70-g09d2 From 690a341577f9adf2c275ababe0dcefe91898bbf0 Mon Sep 17 00:00:00 2001 From: David Daney Date: Fri, 9 Jun 2017 12:49:48 +0100 Subject: arm64: Add workaround for Cavium Thunder erratum 30115 Some Cavium Thunder CPUs suffer a problem where a KVM guest may inadvertently cause the host kernel to quit receiving interrupts. Use the Group-0/1 trapping in order to deal with it. [maz]: Adapted patch to the Group-0/1 trapping, reworked commit log Tested-by: Alexander Graf Acked-by: Catalin Marinas Reviewed-by: Eric Auger Signed-off-by: David Daney Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- Documentation/arm64/silicon-errata.txt | 1 + arch/arm64/Kconfig | 11 +++++++++++ arch/arm64/include/asm/cpucaps.h | 3 ++- arch/arm64/kernel/cpu_errata.c | 21 +++++++++++++++++++++ virt/kvm/arm/vgic/vgic-v3.c | 7 +++++++ 5 files changed, 42 insertions(+), 1 deletion(-) (limited to 'virt') diff --git a/Documentation/arm64/silicon-errata.txt b/Documentation/arm64/silicon-errata.txt index 10f2dddbf449..f5f93dca54b7 100644 --- a/Documentation/arm64/silicon-errata.txt +++ b/Documentation/arm64/silicon-errata.txt @@ -62,6 +62,7 @@ stable kernels. | Cavium | ThunderX GICv3 | #23154 | CAVIUM_ERRATUM_23154 | | Cavium | ThunderX Core | #27456 | CAVIUM_ERRATUM_27456 | | Cavium | ThunderX SMMUv2 | #27704 | N/A | +| Cavium | ThunderX Core | #30115 | CAVIUM_ERRATUM_30115 | | | | | | | Freescale/NXP | LS2080A/LS1043A | A-008585 | FSL_ERRATUM_A008585 | | | | | | diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 3dcd7ec69bca..6252365b0c96 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -480,6 +480,17 @@ config CAVIUM_ERRATUM_27456 If unsure, say Y. +config CAVIUM_ERRATUM_30115 + bool "Cavium erratum 30115: Guest may disable interrupts in host" + default y + help + On ThunderX T88 pass 1.x through 2.2, T81 pass 1.0 through + 1.2, and T83 Pass 1.0, KVM guest execution may disable + interrupts in host. Trapping both GICv3 group-0 and group-1 + accesses sidesteps the issue. + + If unsure, say Y. + config QCOM_FALKOR_ERRATUM_1003 bool "Falkor E1003: Incorrect translation due to ASID change" default y diff --git a/arch/arm64/include/asm/cpucaps.h b/arch/arm64/include/asm/cpucaps.h index b3aab8a17868..8d2272c6822c 100644 --- a/arch/arm64/include/asm/cpucaps.h +++ b/arch/arm64/include/asm/cpucaps.h @@ -38,7 +38,8 @@ #define ARM64_WORKAROUND_REPEAT_TLBI 17 #define ARM64_WORKAROUND_QCOM_FALKOR_E1003 18 #define ARM64_WORKAROUND_858921 19 +#define ARM64_WORKAROUND_CAVIUM_30115 20 -#define ARM64_NCAPS 20 +#define ARM64_NCAPS 21 #endif /* __ASM_CPUCAPS_H */ diff --git a/arch/arm64/kernel/cpu_errata.c b/arch/arm64/kernel/cpu_errata.c index 2ed2a7657711..0e27f86ee709 100644 --- a/arch/arm64/kernel/cpu_errata.c +++ b/arch/arm64/kernel/cpu_errata.c @@ -132,6 +132,27 @@ const struct arm64_cpu_capabilities arm64_errata[] = { .capability = ARM64_WORKAROUND_CAVIUM_27456, MIDR_RANGE(MIDR_THUNDERX_81XX, 0x00, 0x00), }, +#endif +#ifdef CONFIG_CAVIUM_ERRATUM_30115 + { + /* Cavium ThunderX, T88 pass 1.x - 2.2 */ + .desc = "Cavium erratum 30115", + .capability = ARM64_WORKAROUND_CAVIUM_30115, + MIDR_RANGE(MIDR_THUNDERX, 0x00, + (1 << MIDR_VARIANT_SHIFT) | 2), + }, + { + /* Cavium ThunderX, T81 pass 1.0 - 1.2 */ + .desc = "Cavium erratum 30115", + .capability = ARM64_WORKAROUND_CAVIUM_30115, + MIDR_RANGE(MIDR_THUNDERX_81XX, 0x00, 0x02), + }, + { + /* Cavium ThunderX, T83 pass 1.0 */ + .desc = "Cavium erratum 30115", + .capability = ARM64_WORKAROUND_CAVIUM_30115, + MIDR_RANGE(MIDR_THUNDERX_83XX, 0x00, 0x00), + }, #endif { .desc = "Mismatched cache line size", diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 828ca7f9a060..35c00efc110b 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -501,6 +501,13 @@ int vgic_v3_probe(const struct gic_kvm_info *info) if (kvm_vgic_global_state.vcpu_base == 0) kvm_info("disabling GICv2 emulation\n"); +#ifdef CONFIG_ARM64 + if (cpus_have_const_cap(ARM64_WORKAROUND_CAVIUM_30115)) { + group0_trap = true; + group1_trap = true; + } +#endif + if (group0_trap || group1_trap) { kvm_info("GICv3 sysreg trapping enabled (reduced performance)\n"); static_branch_enable(&vgic_v3_cpuif_trap); -- cgit v1.2.3-70-g09d2 From 40228ba57c8532d4977d684a363915402d85dbf9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Jun 2017 12:49:49 +0100 Subject: KVM: arm64: vgic-v3: Add ICV_DIR_EL1 handler Add a handler for writing the guest's view of the ICC_DIR_EL1 register, performing the deactivation of an interrupt if EOImode is set ot 1. Tested-by: Alexander Graf Acked-by: David Daney Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/hyp/vgic-v3-sr.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'virt') diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index 08a5d76c82c7..2f2af03e8932 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c @@ -652,6 +652,30 @@ static void __hyp_text __vgic_v3_bump_eoicount(void) write_gicreg(hcr, ICH_HCR_EL2); } +static void __hyp_text __vgic_v3_write_dir(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + u32 vid = vcpu_get_reg(vcpu, rt); + u64 lr_val; + int lr; + + /* EOImode == 0, nothing to be done here */ + if (!(vmcr & ICH_VMCR_EOIM_MASK)) + return; + + /* No deactivate to be performed on an LPI */ + if (vid >= VGIC_MIN_LPI) + return; + + lr = __vgic_v3_find_active_lr(vcpu, vid, &lr_val); + if (lr == -1) { + __vgic_v3_bump_eoicount(); + return; + } + + __vgic_v3_clear_active_lr(lr, lr_val); +} + static void __hyp_text __vgic_v3_write_eoir(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { u32 vid = vcpu_get_reg(vcpu, rt); @@ -946,6 +970,9 @@ int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) else fn = __vgic_v3_write_bpr0; break; + case SYS_ICC_DIR_EL1: + fn = __vgic_v3_write_dir; + break; default: return 0; } -- cgit v1.2.3-70-g09d2 From 43515894c06f856b7743145e002591309f60b247 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Jun 2017 12:49:50 +0100 Subject: KVM: arm64: vgic-v3: Add ICV_RPR_EL1 handler Add a handler for reading the guest's view of the ICV_RPR_EL1 register, returning the highest active priority. Tested-by: Alexander Graf Acked-by: David Daney Acked-by: Christoffer Dall Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/sysreg.h | 1 + virt/kvm/arm/hyp/vgic-v3-sr.c | 10 ++++++++++ 2 files changed, 11 insertions(+) (limited to 'virt') diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 670bf51d55e3..56a3247e928c 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -187,6 +187,7 @@ #define SYS_ICC_AP0Rn_EL1(n) sys_reg(3, 0, 12, 8, 4 | n) #define SYS_ICC_AP1Rn_EL1(n) sys_reg(3, 0, 12, 9, n) #define SYS_ICC_DIR_EL1 sys_reg(3, 0, 12, 11, 1) +#define SYS_ICC_RPR_EL1 sys_reg(3, 0, 12, 11, 3) #define SYS_ICC_SGI1R_EL1 sys_reg(3, 0, 12, 11, 5) #define SYS_ICC_IAR1_EL1 sys_reg(3, 0, 12, 12, 0) #define SYS_ICC_EOIR1_EL1 sys_reg(3, 0, 12, 12, 1) diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index 2f2af03e8932..406da9c667ff 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c @@ -884,6 +884,13 @@ spurious: vcpu_set_reg(vcpu, rt, lr_val & ICH_LR_VIRTUAL_ID_MASK); } +static void __hyp_text __vgic_v3_read_rpr(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + u32 val = __vgic_v3_get_highest_active_priority(); + vcpu_set_reg(vcpu, rt, val); +} + int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) { int rt; @@ -973,6 +980,9 @@ int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) case SYS_ICC_DIR_EL1: fn = __vgic_v3_write_dir; break; + case SYS_ICC_RPR_EL1: + fn = __vgic_v3_read_rpr; + break; default: return 0; } -- cgit v1.2.3-70-g09d2 From d840b2d37d3ef2463db38274c6fd72619acbe216 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Jun 2017 12:49:51 +0100 Subject: KVM: arm64: vgic-v3: Add ICV_CTLR_EL1 handler Add a handler for reading/writing the guest's view of the ICV_CTLR_EL1 register. only EOIMode and CBPR are of interest here, as all the other bits directly come from ICH_VTR_EL2 and are Read-Only. Tested-by: Alexander Graf Acked-by: David Daney Acked-by: Christoffer Dall Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/hyp/vgic-v3-sr.c | 46 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) (limited to 'virt') diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index 406da9c667ff..a8a58dedc38e 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c @@ -891,6 +891,46 @@ static void __hyp_text __vgic_v3_read_rpr(struct kvm_vcpu *vcpu, vcpu_set_reg(vcpu, rt, val); } +static void __hyp_text __vgic_v3_read_ctlr(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + u32 vtr, val; + + vtr = read_gicreg(ICH_VTR_EL2); + /* PRIbits */ + val = ((vtr >> 29) & 7) << ICC_CTLR_EL1_PRI_BITS_SHIFT; + /* IDbits */ + val |= ((vtr >> 23) & 7) << ICC_CTLR_EL1_ID_BITS_SHIFT; + /* SEIS */ + val |= ((vtr >> 22) & 1) << ICC_CTLR_EL1_SEIS_SHIFT; + /* A3V */ + val |= ((vtr >> 21) & 1) << ICC_CTLR_EL1_A3V_SHIFT; + /* EOImode */ + val |= ((vmcr & ICH_VMCR_EOIM_MASK) >> ICH_VMCR_EOIM_SHIFT) << ICC_CTLR_EL1_EOImode_SHIFT; + /* CBPR */ + val |= (vmcr & ICH_VMCR_CBPR_MASK) >> ICH_VMCR_CBPR_SHIFT; + + vcpu_set_reg(vcpu, rt, val); +} + +static void __hyp_text __vgic_v3_write_ctlr(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + u32 val = vcpu_get_reg(vcpu, rt); + + if (val & ICC_CTLR_EL1_CBPR_MASK) + vmcr |= ICH_VMCR_CBPR_MASK; + else + vmcr &= ~ICH_VMCR_CBPR_MASK; + + if (val & ICC_CTLR_EL1_EOImode_MASK) + vmcr |= ICH_VMCR_EOIM_MASK; + else + vmcr &= ~ICH_VMCR_EOIM_MASK; + + write_gicreg(vmcr, ICH_VMCR_EL2); +} + int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) { int rt; @@ -983,6 +1023,12 @@ int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) case SYS_ICC_RPR_EL1: fn = __vgic_v3_read_rpr; break; + case SYS_ICC_CTLR_EL1: + if (is_read) + fn = __vgic_v3_read_ctlr; + else + fn = __vgic_v3_write_ctlr; + break; default: return 0; } -- cgit v1.2.3-70-g09d2 From 6293d6514d6b0945210e15edcecc71d99cfca97c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Jun 2017 12:49:52 +0100 Subject: KVM: arm64: vgic-v3: Add ICV_PMR_EL1 handler Add a handler for reading/writing the guest's view of the ICC_PMR_EL1 register, which is located in the ICH_VMCR_EL2.VPMR field. Tested-by: Alexander Graf Acked-by: David Daney Acked-by: Christoffer Dall Reviewed-by: Eric Auger Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/hyp/vgic-v3-sr.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'virt') diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index a8a58dedc38e..15b557697086 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c @@ -884,6 +884,27 @@ spurious: vcpu_set_reg(vcpu, rt, lr_val & ICH_LR_VIRTUAL_ID_MASK); } +static void __hyp_text __vgic_v3_read_pmr(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + vmcr &= ICH_VMCR_PMR_MASK; + vmcr >>= ICH_VMCR_PMR_SHIFT; + vcpu_set_reg(vcpu, rt, vmcr); +} + +static void __hyp_text __vgic_v3_write_pmr(struct kvm_vcpu *vcpu, + u32 vmcr, int rt) +{ + u32 val = vcpu_get_reg(vcpu, rt); + + val <<= ICH_VMCR_PMR_SHIFT; + val &= ICH_VMCR_PMR_MASK; + vmcr &= ~ICH_VMCR_PMR_MASK; + vmcr |= val; + + write_gicreg(vmcr, ICH_VMCR_EL2); +} + static void __hyp_text __vgic_v3_read_rpr(struct kvm_vcpu *vcpu, u32 vmcr, int rt) { @@ -1029,6 +1050,12 @@ int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) else fn = __vgic_v3_write_ctlr; break; + case SYS_ICC_PMR_EL1: + if (is_read) + fn = __vgic_v3_read_pmr; + else + fn = __vgic_v3_write_pmr; + break; default: return 0; } -- cgit v1.2.3-70-g09d2 From ff89511ef29b794d6a9c6b62f5ea76fc013cdae7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Jun 2017 12:49:53 +0100 Subject: KVM: arm64: Enable GICv3 common sysreg trapping via command-line Now that we're able to safely handle common sysreg access, let's give the user the opportunity to enable it by passing a specific command-line option (vgic_v3.common_trap). Tested-by: Alexander Graf Acked-by: David Daney Signed-off-by: Marc Zyngier Acked-by: Christoffer Dall Signed-off-by: Christoffer Dall --- Documentation/admin-guide/kernel-parameters.txt | 4 ++++ include/linux/irqchip/arm-gic-v3.h | 1 + virt/kvm/arm/vgic/vgic-v3.c | 11 ++++++++++- 3 files changed, 15 insertions(+), 1 deletion(-) (limited to 'virt') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 88bdc421351f..aa8341e73b35 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1837,6 +1837,10 @@ [KVM,ARM] Trap guest accesses to GICv3 group-1 system registers + kvm-arm.vgic_v3_common_trap= + [KVM,ARM] Trap guest accesses to GICv3 common + system registers + kvm-intel.ept= [KVM,Intel] Disable extended page tables (virtualized MMU) support on capable Intel chips. Default is 1 (enabled) diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index c7f31a962cfc..6a1f87ff94e2 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -417,6 +417,7 @@ #define ICH_HCR_EN (1 << 0) #define ICH_HCR_UIE (1 << 1) +#define ICH_HCR_TC (1 << 10) #define ICH_HCR_TALL0 (1 << 11) #define ICH_HCR_TALL1 (1 << 12) #define ICH_HCR_EOIcount_SHIFT 27 diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 35c00efc110b..91cf8b4f01f1 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -23,6 +23,7 @@ static bool group0_trap; static bool group1_trap; +static bool common_trap; void vgic_v3_set_underflow(struct kvm_vcpu *vcpu) { @@ -265,6 +266,8 @@ void vgic_v3_enable(struct kvm_vcpu *vcpu) vgic_v3->vgic_hcr |= ICH_HCR_TALL0; if (group1_trap) vgic_v3->vgic_hcr |= ICH_HCR_TALL1; + if (common_trap) + vgic_v3->vgic_hcr |= ICH_HCR_TC; } int vgic_v3_lpi_sync_pending_status(struct kvm *kvm, struct vgic_irq *irq) @@ -450,6 +453,12 @@ static int __init early_group1_trap_cfg(char *buf) } early_param("kvm-arm.vgic_v3_group1_trap", early_group1_trap_cfg); +static int __init early_common_trap_cfg(char *buf) +{ + return strtobool(buf, &common_trap); +} +early_param("kvm-arm.vgic_v3_common_trap", early_common_trap_cfg); + /** * vgic_v3_probe - probe for a GICv3 compatible interrupt controller in DT * @node: pointer to the DT node @@ -508,7 +517,7 @@ int vgic_v3_probe(const struct gic_kvm_info *info) } #endif - if (group0_trap || group1_trap) { + if (group0_trap || group1_trap || common_trap) { kvm_info("GICv3 sysreg trapping enabled (reduced performance)\n"); static_branch_enable(&vgic_v3_cpuif_trap); } -- cgit v1.2.3-70-g09d2 From 2873b5082c5fbe2037f12e8d2abc8ad8f8d82a1b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Jun 2017 12:49:54 +0100 Subject: KVM: arm64: vgic-v3: Log which GICv3 system registers are trapped In order to facilitate debug, let's log which class of GICv3 system registers are trapped. Tested-by: Alexander Graf Acked-by: David Daney Acked-by: Christoffer Dall Signed-off-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/vgic/vgic-v3.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/arm/vgic/vgic-v3.c b/virt/kvm/arm/vgic/vgic-v3.c index 91cf8b4f01f1..96ea597db0e7 100644 --- a/virt/kvm/arm/vgic/vgic-v3.c +++ b/virt/kvm/arm/vgic/vgic-v3.c @@ -518,7 +518,10 @@ int vgic_v3_probe(const struct gic_kvm_info *info) #endif if (group0_trap || group1_trap || common_trap) { - kvm_info("GICv3 sysreg trapping enabled (reduced performance)\n"); + kvm_info("GICv3 sysreg trapping enabled ([%s%s%s], reduced performance)\n", + group0_trap ? "G0" : "", + group1_trap ? "G1" : "", + common_trap ? "C" : ""); static_branch_enable(&vgic_v3_cpuif_trap); } -- cgit v1.2.3-70-g09d2 From e7f1d1eef482150a64a6e6ad8faf40f8f97eed67 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Jun 2017 12:49:55 +0100 Subject: KVM: arm64: Log an error if trapping a read-from-write-only GICv3 access A read-from-write-only GICv3 access should UNDEF at EL1. But since we're in complete paranoia-land with broken CPUs, let's assume the worse and gracefully handle the case. Signed-off-by: Marc Zyngier Reviewed-by: Christoffer Dall Signed-off-by: Christoffer Dall --- arch/arm64/kvm/sys_regs.c | 12 ++++++++---- virt/kvm/arm/hyp/vgic-v3-sr.c | 4 ++++ 2 files changed, 12 insertions(+), 4 deletions(-) (limited to 'virt') diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 0fe27024a2e1..8d51c075966d 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -56,7 +56,8 @@ */ static bool read_from_write_only(struct kvm_vcpu *vcpu, - const struct sys_reg_params *params) + struct sys_reg_params *params, + const struct sys_reg_desc *r) { WARN_ONCE(1, "Unexpected sys_reg read to write-only register\n"); print_sys_reg_instr(params); @@ -93,7 +94,7 @@ static bool access_dcsw(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { if (!p->is_write) - return read_from_write_only(vcpu, p); + return read_from_write_only(vcpu, p, r); kvm_set_way_flush(vcpu); return true; @@ -135,7 +136,7 @@ static bool access_gic_sgi(struct kvm_vcpu *vcpu, const struct sys_reg_desc *r) { if (!p->is_write) - return read_from_write_only(vcpu, p); + return read_from_write_only(vcpu, p, r); vgic_v3_dispatch_sgi(vcpu, p->regval); @@ -773,7 +774,7 @@ static bool access_pmswinc(struct kvm_vcpu *vcpu, struct sys_reg_params *p, return trap_raz_wi(vcpu, p, r); if (!p->is_write) - return read_from_write_only(vcpu, p); + return read_from_write_only(vcpu, p, r); if (pmu_write_swinc_el0_disabled(vcpu)) return false; @@ -953,7 +954,10 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_VBAR_EL1), NULL, reset_val, VBAR_EL1, 0 }, + { SYS_DESC(SYS_ICC_EOIR0_EL1), read_from_write_only }, + { SYS_DESC(SYS_ICC_DIR_EL1), read_from_write_only }, { SYS_DESC(SYS_ICC_SGI1R_EL1), access_gic_sgi }, + { SYS_DESC(SYS_ICC_EOIR1_EL1), read_from_write_only }, { SYS_DESC(SYS_ICC_SRE_EL1), access_gic_sre }, { SYS_DESC(SYS_CONTEXTIDR_EL1), access_vm_reg, reset_val, CONTEXTIDR_EL1, 0 }, diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index 15b557697086..b26ce58b012a 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c @@ -980,6 +980,8 @@ int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) break; case SYS_ICC_EOIR0_EL1: case SYS_ICC_EOIR1_EL1: + if (unlikely(is_read)) + return 0; fn = __vgic_v3_write_eoir; break; case SYS_ICC_GRPEN1_EL1: @@ -1039,6 +1041,8 @@ int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) fn = __vgic_v3_write_bpr0; break; case SYS_ICC_DIR_EL1: + if (unlikely(is_read)) + return 0; fn = __vgic_v3_write_dir; break; case SYS_ICC_RPR_EL1: -- cgit v1.2.3-70-g09d2 From 7b1dba1f7325629427c0e5bdf014159b229d16c8 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Jun 2017 12:49:56 +0100 Subject: KVM: arm64: Log an error if trapping a write-to-read-only GICv3 access A write-to-read-only GICv3 access should UNDEF at EL1. But since we're in complete paranoia-land with broken CPUs, let's assume the worse and gracefully handle the case. Signed-off-by: Marc Zyngier Reviewed-by: Christoffer Dall Signed-off-by: Christoffer Dall --- arch/arm64/kvm/sys_regs.c | 15 +++++++++++++++ virt/kvm/arm/hyp/vgic-v3-sr.c | 6 ++++++ 2 files changed, 21 insertions(+) (limited to 'virt') diff --git a/arch/arm64/kvm/sys_regs.c b/arch/arm64/kvm/sys_regs.c index 8d51c075966d..77862881ae86 100644 --- a/arch/arm64/kvm/sys_regs.c +++ b/arch/arm64/kvm/sys_regs.c @@ -65,6 +65,16 @@ static bool read_from_write_only(struct kvm_vcpu *vcpu, return false; } +static bool write_to_read_only(struct kvm_vcpu *vcpu, + struct sys_reg_params *params, + const struct sys_reg_desc *r) +{ + WARN_ONCE(1, "Unexpected sys_reg write to read-only register\n"); + print_sys_reg_instr(params); + kvm_inject_undefined(vcpu); + return false; +} + /* 3 bits per cache level, as per CLIDR, but non-existent caches always 0 */ static u32 cache_levels; @@ -954,10 +964,15 @@ static const struct sys_reg_desc sys_reg_descs[] = { { SYS_DESC(SYS_VBAR_EL1), NULL, reset_val, VBAR_EL1, 0 }, + { SYS_DESC(SYS_ICC_IAR0_EL1), write_to_read_only }, { SYS_DESC(SYS_ICC_EOIR0_EL1), read_from_write_only }, + { SYS_DESC(SYS_ICC_HPPIR0_EL1), write_to_read_only }, { SYS_DESC(SYS_ICC_DIR_EL1), read_from_write_only }, + { SYS_DESC(SYS_ICC_RPR_EL1), write_to_read_only }, { SYS_DESC(SYS_ICC_SGI1R_EL1), access_gic_sgi }, + { SYS_DESC(SYS_ICC_IAR1_EL1), write_to_read_only }, { SYS_DESC(SYS_ICC_EOIR1_EL1), read_from_write_only }, + { SYS_DESC(SYS_ICC_HPPIR1_EL1), write_to_read_only }, { SYS_DESC(SYS_ICC_SRE_EL1), access_gic_sre }, { SYS_DESC(SYS_CONTEXTIDR_EL1), access_vm_reg, reset_val, CONTEXTIDR_EL1, 0 }, diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index b26ce58b012a..79e3c2d3b754 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c @@ -976,6 +976,8 @@ int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) switch (sysreg) { case SYS_ICC_IAR0_EL1: case SYS_ICC_IAR1_EL1: + if (unlikely(!is_read)) + return 0; fn = __vgic_v3_read_iar; break; case SYS_ICC_EOIR0_EL1: @@ -1026,6 +1028,8 @@ int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) break; case SYS_ICC_HPPIR0_EL1: case SYS_ICC_HPPIR1_EL1: + if (unlikely(!is_read)) + return 0; fn = __vgic_v3_read_hppir; break; case SYS_ICC_GRPEN0_EL1: @@ -1046,6 +1050,8 @@ int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) fn = __vgic_v3_write_dir; break; case SYS_ICC_RPR_EL1: + if (unlikely(!is_read)) + return 0; fn = __vgic_v3_read_rpr; break; case SYS_ICC_CTLR_EL1: -- cgit v1.2.3-70-g09d2 From 21bc52817772a5af6a8a5a750c676ea4a02d4d3b Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 5 Jun 2017 14:20:00 +0100 Subject: arm64/kvm: sysreg: fix typo'd SYS_ICC_IGRPEN*_EL1 Per ARM DDI 0487B.a, the registers are named ICC_IGRPEN*_EL1 rather than ICC_GRPEN*_EL1. Correct our mnemonics and comments to match, before we add more GICv3 register definitions. Signed-off-by: Mark Rutland Cc: Catalin Marinas Cc: Marc Zyngier Cc: kvmarm@lists.cs.columbia.edu Acked-by: Christoffer Dall Acked-by: Will Deacon Signed-off-by: Christoffer Dall --- arch/arm64/include/asm/arch_gicv3.h | 2 +- arch/arm64/include/asm/sysreg.h | 4 ++-- arch/arm64/kvm/vgic-sys-reg-v3.c | 2 +- virt/kvm/arm/hyp/vgic-v3-sr.c | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'virt') diff --git a/arch/arm64/include/asm/arch_gicv3.h b/arch/arm64/include/asm/arch_gicv3.h index 1a98bc8602a2..8cef47fa2218 100644 --- a/arch/arm64/include/asm/arch_gicv3.h +++ b/arch/arm64/include/asm/arch_gicv3.h @@ -89,7 +89,7 @@ static inline void gic_write_ctlr(u32 val) static inline void gic_write_grpen1(u32 val) { - write_sysreg_s(val, SYS_ICC_GRPEN1_EL1); + write_sysreg_s(val, SYS_ICC_IGRPEN1_EL1); isb(); } diff --git a/arch/arm64/include/asm/sysreg.h b/arch/arm64/include/asm/sysreg.h index 56a3247e928c..00d493ba8dbd 100644 --- a/arch/arm64/include/asm/sysreg.h +++ b/arch/arm64/include/asm/sysreg.h @@ -195,8 +195,8 @@ #define SYS_ICC_BPR1_EL1 sys_reg(3, 0, 12, 12, 3) #define SYS_ICC_CTLR_EL1 sys_reg(3, 0, 12, 12, 4) #define SYS_ICC_SRE_EL1 sys_reg(3, 0, 12, 12, 5) -#define SYS_ICC_GRPEN0_EL1 sys_reg(3, 0, 12, 12, 6) -#define SYS_ICC_GRPEN1_EL1 sys_reg(3, 0, 12, 12, 7) +#define SYS_ICC_IGRPEN0_EL1 sys_reg(3, 0, 12, 12, 6) +#define SYS_ICC_IGRPEN1_EL1 sys_reg(3, 0, 12, 12, 7) #define SYS_CONTEXTIDR_EL1 sys_reg(3, 0, 13, 0, 1) #define SYS_TPIDR_EL1 sys_reg(3, 0, 13, 0, 4) diff --git a/arch/arm64/kvm/vgic-sys-reg-v3.c b/arch/arm64/kvm/vgic-sys-reg-v3.c index 6260b69e5622..5fb3cc9e8f52 100644 --- a/arch/arm64/kvm/vgic-sys-reg-v3.c +++ b/arch/arm64/kvm/vgic-sys-reg-v3.c @@ -296,7 +296,7 @@ static const struct sys_reg_desc gic_v3_icc_reg_descs[] = { { Op0(3), Op1(0), CRn(12), CRm(12), Op2(5), access_gic_sre }, /* ICC_IGRPEN0_EL1 */ { Op0(3), Op1(0), CRn(12), CRm(12), Op2(6), access_gic_grpen0 }, - /* ICC_GRPEN1_EL1 */ + /* ICC_IGRPEN1_EL1 */ { Op0(3), Op1(0), CRn(12), CRm(12), Op2(7), access_gic_grpen1 }, }; diff --git a/virt/kvm/arm/hyp/vgic-v3-sr.c b/virt/kvm/arm/hyp/vgic-v3-sr.c index 79e3c2d3b754..91728faa13fd 100644 --- a/virt/kvm/arm/hyp/vgic-v3-sr.c +++ b/virt/kvm/arm/hyp/vgic-v3-sr.c @@ -986,7 +986,7 @@ int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) return 0; fn = __vgic_v3_write_eoir; break; - case SYS_ICC_GRPEN1_EL1: + case SYS_ICC_IGRPEN1_EL1: if (is_read) fn = __vgic_v3_read_igrpen1; else @@ -1032,7 +1032,7 @@ int __hyp_text __vgic_v3_perform_cpuif_access(struct kvm_vcpu *vcpu) return 0; fn = __vgic_v3_read_hppir; break; - case SYS_ICC_GRPEN0_EL1: + case SYS_ICC_IGRPEN0_EL1: if (is_read) fn = __vgic_v3_read_igrpen0; else -- cgit v1.2.3-70-g09d2 From 02d50cdaff36c135d222015cffdca3ff11d168ea Mon Sep 17 00:00:00 2001 From: Hu Huajun Date: Mon, 12 Jun 2017 22:37:48 +0800 Subject: KVM: ARM64: fix phy counter access failure in guest. When reading the cntpct_el0 in guest with VHE (Virtual Host Extension) enabled in host, the "Unsupported guest sys_reg access" error reported. The reason is cnthctl_el2.EL1PCTEN is not enabled, which is expected to be done in kvm_timer_init_vhe(). The problem is kvm_timer_init_vhe is called by cpu_init_hyp_mode, and which is called when VHE is disabled. This patch remove the incorrect call to kvm_timer_init_vhe() from cpu_init_hyp_mode(), and calls kvm_timer_init_vhe() to enable cnthctl_el2.EL1PCTEN in cpu_hyp_reinit(). Fixes: 488f94d7212b ("KVM: arm64: Access CNTHCTL_EL2 bit fields correctly on VHE systems") Cc: stable@vger.kernel.org Signed-off-by: Hu Huajun Reviewed-by: Christoffer Dall Acked-by: Marc Zyngier Signed-off-by: Christoffer Dall --- virt/kvm/arm/arm.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'virt') diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index a265acc53e39..a39a1e161e63 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -1141,9 +1141,6 @@ static void cpu_init_hyp_mode(void *dummy) __cpu_init_hyp_mode(pgd_ptr, hyp_stack_ptr, vector_ptr); __cpu_init_stage2(); - if (is_kernel_in_hyp_mode()) - kvm_timer_init_vhe(); - kvm_arm_init_debug(); } @@ -1163,6 +1160,7 @@ static void cpu_hyp_reinit(void) * event was cancelled before the CPU was reset. */ __cpu_init_stage2(); + kvm_timer_init_vhe(); } else { cpu_init_hyp_mode(NULL); } -- cgit v1.2.3-70-g09d2 From 196f878a7ac2e727a1ded5197e552e6e2d3dfe2c Mon Sep 17 00:00:00 2001 From: James Morse Date: Tue, 20 Jun 2017 17:11:48 +0100 Subject: KVM: arm/arm64: Signal SIGBUS when stage2 discovers hwpoison memory Once we enable ARCH_SUPPORTS_MEMORY_FAILURE on arm64, notifications for broken memory can call memory_failure() in mm/memory-failure.c to offline pages of memory, possibly signalling user space processes and notifying all the in-kernel users. memory_failure() has two modes, early and late. Early is used by machine-managers like Qemu to receive a notification when a memory error is notified to the host. These can then be relayed to the guest before the affected page is accessed. To enable this, the process must set PR_MCE_KILL_EARLY in PR_MCE_KILL_SET using the prctl() syscall. Once the early notification has been handled, nothing stops the machine-manager or guest from accessing the affected page. If the machine-manager does this the page will fail to be mapped and SIGBUS will be sent. This patch adds the equivalent path for when the guest accesses the page, sending SIGBUS to the machine-manager. These two signals can be distinguished by the machine-manager using their si_code: BUS_MCEERR_AO for 'action optional' early notifications, and BUS_MCEERR_AR for 'action required' synchronous/late notifications. Do as x86 does, and deliver the SIGBUS when we discover pfn == KVM_PFN_ERR_HWPOISON. Use the hugepage size as si_addr_lsb if this vma was allocated as a hugepage. Transparent hugepages will be split by memory_failure() before we see them here. Cc: Punit Agrawal Signed-off-by: James Morse Signed-off-by: Marc Zyngier --- virt/kvm/arm/mmu.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'virt') diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index e2e5effba2a9..f2d5b6cf06ae 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include @@ -1261,6 +1262,24 @@ static void coherent_cache_guest_page(struct kvm_vcpu *vcpu, kvm_pfn_t pfn, __coherent_cache_guest_page(vcpu, pfn, size); } +static void kvm_send_hwpoison_signal(unsigned long address, + struct vm_area_struct *vma) +{ + siginfo_t info; + + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = BUS_MCEERR_AR; + info.si_addr = (void __user *)address; + + if (is_vm_hugetlb_page(vma)) + info.si_addr_lsb = huge_page_shift(hstate_vma(vma)); + else + info.si_addr_lsb = PAGE_SHIFT; + + send_sig_info(SIGBUS, &info, current); +} + static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, struct kvm_memory_slot *memslot, unsigned long hva, unsigned long fault_status) @@ -1330,6 +1349,10 @@ static int user_mem_abort(struct kvm_vcpu *vcpu, phys_addr_t fault_ipa, smp_rmb(); pfn = gfn_to_pfn_prot(kvm, gfn, write_fault, &writable); + if (pfn == KVM_PFN_ERR_HWPOISON) { + kvm_send_hwpoison_signal(hva, vma); + return 0; + } if (is_error_noslot_pfn(pfn)) return -EFAULT; -- cgit v1.2.3-70-g09d2 From 621f48e40ee9b0100a802531069166d7d94796e0 Mon Sep 17 00:00:00 2001 From: Tyler Baicar Date: Wed, 21 Jun 2017 12:17:14 -0600 Subject: arm/arm64: KVM: add guest SEA support Currently external aborts are unsupported by the guest abort handling. Add handling for SEAs so that the host kernel reports SEAs which occur in the guest kernel. When an SEA occurs in the guest kernel, the guest exits and is routed to kvm_handle_guest_abort(). Prior to this patch, a print message of an unsupported FSC would be printed and nothing else would happen. With this patch, the code gets routed to the APEI handling of SEAs in the host kernel to report the SEA information. Signed-off-by: Tyler Baicar Acked-by: Catalin Marinas Acked-by: Marc Zyngier Acked-by: Christoffer Dall Signed-off-by: Will Deacon --- arch/arm/include/asm/kvm_arm.h | 10 ++++++++++ arch/arm/include/asm/system_misc.h | 5 +++++ arch/arm64/include/asm/kvm_arm.h | 10 ++++++++++ arch/arm64/include/asm/system_misc.h | 2 ++ arch/arm64/mm/fault.c | 22 ++++++++++++++++++++-- drivers/acpi/apei/ghes.c | 17 +++++++++++------ include/acpi/ghes.h | 2 +- virt/kvm/arm/mmu.c | 36 +++++++++++++++++++++++++++++++++--- 8 files changed, 92 insertions(+), 12 deletions(-) (limited to 'virt') diff --git a/arch/arm/include/asm/kvm_arm.h b/arch/arm/include/asm/kvm_arm.h index a3f0b3d50089..ebf020b02bc8 100644 --- a/arch/arm/include/asm/kvm_arm.h +++ b/arch/arm/include/asm/kvm_arm.h @@ -187,6 +187,16 @@ #define FSC_FAULT (0x04) #define FSC_ACCESS (0x08) #define FSC_PERM (0x0c) +#define FSC_SEA (0x10) +#define FSC_SEA_TTW0 (0x14) +#define FSC_SEA_TTW1 (0x15) +#define FSC_SEA_TTW2 (0x16) +#define FSC_SEA_TTW3 (0x17) +#define FSC_SECC (0x18) +#define FSC_SECC_TTW0 (0x1c) +#define FSC_SECC_TTW1 (0x1d) +#define FSC_SECC_TTW2 (0x1e) +#define FSC_SECC_TTW3 (0x1f) /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */ #define HPFAR_MASK (~0xf) diff --git a/arch/arm/include/asm/system_misc.h b/arch/arm/include/asm/system_misc.h index a3d61ad984af..8c4a89f5ce7d 100644 --- a/arch/arm/include/asm/system_misc.h +++ b/arch/arm/include/asm/system_misc.h @@ -22,6 +22,11 @@ extern void (*arm_pm_idle)(void); extern unsigned int user_debug; +static inline int handle_guest_sea(phys_addr_t addr, unsigned int esr) +{ + return -1; +} + #endif /* !__ASSEMBLY__ */ #endif /* __ASM_ARM_SYSTEM_MISC_H */ diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h index 6e99978e83bd..61d694c2eae5 100644 --- a/arch/arm64/include/asm/kvm_arm.h +++ b/arch/arm64/include/asm/kvm_arm.h @@ -204,6 +204,16 @@ #define FSC_FAULT ESR_ELx_FSC_FAULT #define FSC_ACCESS ESR_ELx_FSC_ACCESS #define FSC_PERM ESR_ELx_FSC_PERM +#define FSC_SEA ESR_ELx_FSC_EXTABT +#define FSC_SEA_TTW0 (0x14) +#define FSC_SEA_TTW1 (0x15) +#define FSC_SEA_TTW2 (0x16) +#define FSC_SEA_TTW3 (0x17) +#define FSC_SECC (0x18) +#define FSC_SECC_TTW0 (0x1c) +#define FSC_SECC_TTW1 (0x1d) +#define FSC_SECC_TTW2 (0x1e) +#define FSC_SECC_TTW3 (0x1f) /* Hyp Prefetch Fault Address Register (HPFAR/HDFAR) */ #define HPFAR_MASK (~UL(0xf)) diff --git a/arch/arm64/include/asm/system_misc.h b/arch/arm64/include/asm/system_misc.h index bc812435bc76..95aa4423247d 100644 --- a/arch/arm64/include/asm/system_misc.h +++ b/arch/arm64/include/asm/system_misc.h @@ -56,6 +56,8 @@ extern void (*arm_pm_restart)(enum reboot_mode reboot_mode, const char *cmd); __show_ratelimited; \ }) +int handle_guest_sea(phys_addr_t addr, unsigned int esr); + #endif /* __ASSEMBLY__ */ #endif /* __ASM_SYSTEM_MISC_H */ diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 07481af15fd7..a8c9f2db20ba 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -532,6 +532,7 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs) { struct siginfo info; const struct fault_info *inf; + int ret = 0; inf = esr_to_fault_info(esr); pr_err("Synchronous External Abort: %s (0x%08x) at 0x%016lx\n", @@ -546,7 +547,7 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs) if (interrupts_enabled(regs)) nmi_enter(); - ghes_notify_sea(); + ret = ghes_notify_sea(); if (interrupts_enabled(regs)) nmi_exit(); @@ -561,7 +562,7 @@ static int do_sea(unsigned long addr, unsigned int esr, struct pt_regs *regs) info.si_addr = (void __user *)addr; arm64_notify_die("", regs, &info, esr); - return 0; + return ret; } static const struct fault_info fault_info[] = { @@ -631,6 +632,23 @@ static const struct fault_info fault_info[] = { { do_bad, SIGBUS, 0, "unknown 63" }, }; +/* + * Handle Synchronous External Aborts that occur in a guest kernel. + * + * The return value will be zero if the SEA was successfully handled + * and non-zero if there was an error processing the error or there was + * no error to process. + */ +int handle_guest_sea(phys_addr_t addr, unsigned int esr) +{ + int ret = -ENOENT; + + if (IS_ENABLED(CONFIG_ACPI_APEI_SEA)) + ret = ghes_notify_sea(); + + return ret; +} + /* * Dispatch a data abort to the relevant handler. */ diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index 5073d035bb6e..bc717bdf50f1 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -816,17 +816,22 @@ static struct notifier_block ghes_notifier_sci = { #ifdef CONFIG_ACPI_APEI_SEA static LIST_HEAD(ghes_sea); -void ghes_notify_sea(void) +/* + * Return 0 only if one of the SEA error sources successfully reported an error + * record sent from the firmware. + */ +int ghes_notify_sea(void) { struct ghes *ghes; + int ret = -ENOENT; - /* - * synchronize_rcu() will wait for nmi_exit(), so no need to - * rcu_read_lock(). - */ + rcu_read_lock(); list_for_each_entry_rcu(ghes, &ghes_sea, list) { - ghes_proc(ghes); + if (!ghes_proc(ghes)) + ret = 0; } + rcu_read_unlock(); + return ret; } static void ghes_sea_add(struct ghes *ghes) diff --git a/include/acpi/ghes.h b/include/acpi/ghes.h index 95305aeb13ff..9f26e01186ae 100644 --- a/include/acpi/ghes.h +++ b/include/acpi/ghes.h @@ -113,6 +113,6 @@ static inline void *acpi_hest_get_next(struct acpi_hest_generic_data *gdata) return (void *)(gdata) + acpi_hest_get_record_size(gdata); } -void ghes_notify_sea(void); +int ghes_notify_sea(void); #endif /* GHES_H */ diff --git a/virt/kvm/arm/mmu.c b/virt/kvm/arm/mmu.c index a2d63247d1bb..bde0cdd60997 100644 --- a/virt/kvm/arm/mmu.c +++ b/virt/kvm/arm/mmu.c @@ -29,6 +29,7 @@ #include #include #include +#include #include "trace.h" @@ -1427,6 +1428,25 @@ out: kvm_set_pfn_accessed(pfn); } +static bool is_abort_sea(unsigned long fault_status) +{ + switch (fault_status) { + case FSC_SEA: + case FSC_SEA_TTW0: + case FSC_SEA_TTW1: + case FSC_SEA_TTW2: + case FSC_SEA_TTW3: + case FSC_SECC: + case FSC_SECC_TTW0: + case FSC_SECC_TTW1: + case FSC_SECC_TTW2: + case FSC_SECC_TTW3: + return true; + default: + return false; + } +} + /** * kvm_handle_guest_abort - handles all 2nd stage aborts * @vcpu: the VCPU pointer @@ -1449,19 +1469,29 @@ int kvm_handle_guest_abort(struct kvm_vcpu *vcpu, struct kvm_run *run) gfn_t gfn; int ret, idx; + fault_status = kvm_vcpu_trap_get_fault_type(vcpu); + + fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); + + /* + * The host kernel will handle the synchronous external abort. There + * is no need to pass the error into the guest. + */ + if (is_abort_sea(fault_status)) { + if (!handle_guest_sea(fault_ipa, kvm_vcpu_get_hsr(vcpu))) + return 1; + } + is_iabt = kvm_vcpu_trap_is_iabt(vcpu); if (unlikely(!is_iabt && kvm_vcpu_dabt_isextabt(vcpu))) { kvm_inject_vabt(vcpu); return 1; } - fault_ipa = kvm_vcpu_get_fault_ipa(vcpu); - trace_kvm_guest_fault(*vcpu_pc(vcpu), kvm_vcpu_get_hsr(vcpu), kvm_vcpu_get_hfar(vcpu), fault_ipa); /* Check the stage-2 fault is trans. fault or write fault */ - fault_status = kvm_vcpu_trap_get_fault_type(vcpu); if (fault_status != FSC_FAULT && fault_status != FSC_PERM && fault_status != FSC_ACCESS) { kvm_err("Unsupported FSC: EC=%#x xFSC=%#lx ESR_EL2=%#lx\n", -- cgit v1.2.3-70-g09d2 From 039c5d1b2c5f249f5a291215eb5f0eb0afd54f81 Mon Sep 17 00:00:00 2001 From: Roman Storozhenko Date: Tue, 27 Jun 2017 12:51:18 +0300 Subject: KVM: Replaces symbolic permissions with numeric Replaces "S_IRUGO | S_IWUSR" with 0644. The reason is that symbolic permissions considered harmful: https://lwn.net/Articles/696229/ Signed-off-by: Roman Storozhenko Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index f0fe9d02f6bb..3863cf7ae1a3 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -73,17 +73,17 @@ MODULE_LICENSE("GPL"); /* Architectures should define their poll value according to the halt latency */ unsigned int halt_poll_ns = KVM_HALT_POLL_NS_DEFAULT; -module_param(halt_poll_ns, uint, S_IRUGO | S_IWUSR); +module_param(halt_poll_ns, uint, 0644); EXPORT_SYMBOL_GPL(halt_poll_ns); /* Default doubles per-vcpu halt_poll_ns. */ unsigned int halt_poll_ns_grow = 2; -module_param(halt_poll_ns_grow, uint, S_IRUGO | S_IWUSR); +module_param(halt_poll_ns_grow, uint, 0644); EXPORT_SYMBOL_GPL(halt_poll_ns_grow); /* Default resets per-vcpu halt_poll_ns . */ unsigned int halt_poll_ns_shrink; -module_param(halt_poll_ns_shrink, uint, S_IRUGO | S_IWUSR); +module_param(halt_poll_ns_shrink, uint, 0644); EXPORT_SYMBOL_GPL(halt_poll_ns_shrink); /* -- cgit v1.2.3-70-g09d2 From 525df86145bf731af522ba9b3982c5b48078b81a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 27 Jun 2017 15:45:09 +0200 Subject: KVM: explain missing kvm_put_kvm in case of failure The call to kvm_put_kvm was removed from error handling in commit 506cfba9e726 ("KVM: don't use anon_inode_getfd() before possible failures"), but it is _not_ a memory leak. Reuse Al's explanation to avoid that someone else makes the same mistake. Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 3863cf7ae1a3..19f0ecb9b93e 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -3191,6 +3191,12 @@ static int kvm_dev_ioctl_create_vm(unsigned long type) return PTR_ERR(file); } + /* + * Don't call kvm_put_kvm anymore at this point; file->f_op is + * already set, with ->release() being kvm_vm_release(). In error + * cases it will be called by the final fput(file) and will take + * care of doing kvm_put_kvm(kvm). + */ if (kvm_create_vm_debugfs(kvm, r) < 0) { put_unused_fd(r); fput(file); -- cgit v1.2.3-70-g09d2 From e323369b2e204da4dc771bbddceef986f4bf85d5 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 28 Jun 2017 13:49:52 -0600 Subject: kvm-vfio: Decouple only when we match a group Unset-KVM and decrement-assignment only when we find the group in our list. Otherwise we can get out of sync if the user triggers this for groups that aren't currently on our list. Signed-off-by: Alex Williamson Reviewed-by: Alexey Kardashevskiy Reviewed-by: Eric Auger Tested-by: Eric Auger Acked-by: Paolo Bonzini Cc: stable@vger.kernel.org --- virt/kvm/vfio.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'virt') diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c index 37d9118fd84b..6e002d0f3191 100644 --- a/virt/kvm/vfio.c +++ b/virt/kvm/vfio.c @@ -246,21 +246,20 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) continue; list_del(&kvg->node); + kvm_arch_end_assignment(dev->kvm); +#ifdef CONFIG_SPAPR_TCE_IOMMU + kvm_spapr_tce_release_vfio_group(dev->kvm, + kvg->vfio_group); +#endif + kvm_vfio_group_set_kvm(kvg->vfio_group, NULL); kvm_vfio_group_put_external_user(kvg->vfio_group); kfree(kvg); ret = 0; break; } - kvm_arch_end_assignment(dev->kvm); - mutex_unlock(&kv->lock); -#ifdef CONFIG_SPAPR_TCE_IOMMU - kvm_spapr_tce_release_vfio_group(dev->kvm, vfio_group); -#endif - kvm_vfio_group_set_kvm(vfio_group, NULL); - kvm_vfio_group_put_external_user(vfio_group); kvm_vfio_update_coherency(dev); -- cgit v1.2.3-70-g09d2 From 5d6dee80a1e94cc284d03e06d930e60e8d3ecf7d Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Wed, 28 Jun 2017 13:50:05 -0600 Subject: vfio: New external user group/file match At the point where the kvm-vfio pseudo device wants to release its vfio group reference, we can't always acquire a new reference to make that happen. The group can be in a state where we wouldn't allow a new reference to be added. This new helper function allows a caller to match a file to a group to facilitate this. Given a file and group, report if they match. Thus the caller needs to already have a group reference to match to the file. This allows the deletion of a group without acquiring a new reference. Signed-off-by: Alex Williamson Reviewed-by: Eric Auger Reviewed-by: Paolo Bonzini Tested-by: Eric Auger Cc: stable@vger.kernel.org --- drivers/vfio/vfio.c | 9 +++++++++ include/linux/vfio.h | 2 ++ virt/kvm/vfio.c | 27 +++++++++++++++++++-------- 3 files changed, 30 insertions(+), 8 deletions(-) (limited to 'virt') diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 54dd2fbf83d9..7597a377eb4e 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -1776,6 +1776,15 @@ void vfio_group_put_external_user(struct vfio_group *group) } EXPORT_SYMBOL_GPL(vfio_group_put_external_user); +bool vfio_external_group_match_file(struct vfio_group *test_group, + struct file *filep) +{ + struct vfio_group *group = filep->private_data; + + return (filep->f_op == &vfio_group_fops) && (group == test_group); +} +EXPORT_SYMBOL_GPL(vfio_external_group_match_file); + int vfio_external_user_iommu_id(struct vfio_group *group) { return iommu_group_id(group->iommu_group); diff --git a/include/linux/vfio.h b/include/linux/vfio.h index edf9b2cad277..9b34d0af5d27 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -97,6 +97,8 @@ extern void vfio_unregister_iommu_driver( */ extern struct vfio_group *vfio_group_get_external_user(struct file *filep); extern void vfio_group_put_external_user(struct vfio_group *group); +extern bool vfio_external_group_match_file(struct vfio_group *group, + struct file *filep); extern int vfio_external_user_iommu_id(struct vfio_group *group); extern long vfio_external_check_extension(struct vfio_group *group, unsigned long arg); diff --git a/virt/kvm/vfio.c b/virt/kvm/vfio.c index 6e002d0f3191..d99850c462a1 100644 --- a/virt/kvm/vfio.c +++ b/virt/kvm/vfio.c @@ -51,6 +51,22 @@ static struct vfio_group *kvm_vfio_group_get_external_user(struct file *filep) return vfio_group; } +static bool kvm_vfio_external_group_match_file(struct vfio_group *group, + struct file *filep) +{ + bool ret, (*fn)(struct vfio_group *, struct file *); + + fn = symbol_get(vfio_external_group_match_file); + if (!fn) + return false; + + ret = fn(group, filep); + + symbol_put(vfio_external_group_match_file); + + return ret; +} + static void kvm_vfio_group_put_external_user(struct vfio_group *vfio_group) { void (*fn)(struct vfio_group *); @@ -231,18 +247,13 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) if (!f.file) return -EBADF; - vfio_group = kvm_vfio_group_get_external_user(f.file); - fdput(f); - - if (IS_ERR(vfio_group)) - return PTR_ERR(vfio_group); - ret = -ENOENT; mutex_lock(&kv->lock); list_for_each_entry(kvg, &kv->group_list, node) { - if (kvg->vfio_group != vfio_group) + if (!kvm_vfio_external_group_match_file(kvg->vfio_group, + f.file)) continue; list_del(&kvg->node); @@ -260,7 +271,7 @@ static int kvm_vfio_set_group(struct kvm_device *dev, long attr, u64 arg) mutex_unlock(&kv->lock); - kvm_vfio_group_put_external_user(vfio_group); + fdput(f); kvm_vfio_update_coherency(dev); -- cgit v1.2.3-70-g09d2 From 0e4524a5d341e719e8ee9ee7db5d58e2c5a4c10e Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 6 Jul 2017 14:44:28 +0200 Subject: KVM: mark vcpu->pid pointer as rcu protected We do use rcu to protect the pid pointer. Mark it as such and adopt all code to use the proper access methods. This was detected by sparse. "virt/kvm/kvm_main.c:2248:15: error: incompatible types in comparison expression (different address spaces)" Signed-off-by: Christian Borntraeger Reviewed-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 +- virt/kvm/kvm_main.c | 15 +++++++++++---- 2 files changed, 12 insertions(+), 5 deletions(-) (limited to 'virt') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 0b50e7b35ed4..bcd37b855c66 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -234,7 +234,7 @@ struct kvm_vcpu { int guest_fpu_loaded, guest_xcr0_loaded; struct swait_queue_head wq; - struct pid *pid; + struct pid __rcu *pid; int sigset_active; sigset_t sigset; struct kvm_vcpu_stat stat; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 19f0ecb9b93e..fc2d58312fd5 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -293,7 +293,12 @@ EXPORT_SYMBOL_GPL(kvm_vcpu_init); void kvm_vcpu_uninit(struct kvm_vcpu *vcpu) { - put_pid(vcpu->pid); + /* + * no need for rcu_read_lock as VCPU_RUN is the only place that + * will change the vcpu->pid pointer and on uninit all file + * descriptors are already gone. + */ + put_pid(rcu_dereference_protected(vcpu->pid, 1)); kvm_arch_vcpu_uninit(vcpu); free_page((unsigned long)vcpu->run); } @@ -2551,13 +2556,14 @@ static long kvm_vcpu_ioctl(struct file *filp, if (r) return r; switch (ioctl) { - case KVM_RUN: + case KVM_RUN: { + struct pid *oldpid; r = -EINVAL; if (arg) goto out; - if (unlikely(vcpu->pid != current->pids[PIDTYPE_PID].pid)) { + oldpid = rcu_access_pointer(vcpu->pid); + if (unlikely(oldpid != current->pids[PIDTYPE_PID].pid)) { /* The thread running this VCPU changed. */ - struct pid *oldpid = vcpu->pid; struct pid *newpid = get_task_pid(current, PIDTYPE_PID); rcu_assign_pointer(vcpu->pid, newpid); @@ -2568,6 +2574,7 @@ static long kvm_vcpu_ioctl(struct file *filp, r = kvm_arch_vcpu_ioctl_run(vcpu, vcpu->run); trace_kvm_userspace_exit(vcpu->run->exit_reason, r); break; + } case KVM_GET_REGS: { struct kvm_regs *kvm_regs; -- cgit v1.2.3-70-g09d2 From 5535f800b0e1533e5f3a1428f6ef25eb29eccc0f Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 6 Jul 2017 20:31:11 +0200 Subject: KVM: use rcu access function for irq routing irq routing is rcu protected. Use the proper access functions. Found by sparse virt/kvm/irqchip.c:233:13: warning: incorrect type in assignment (different address spaces) virt/kvm/irqchip.c:233:13: expected struct kvm_irq_routing_table *old virt/kvm/irqchip.c:233:13: got struct kvm_irq_routing_table [noderef] *irq_routing Signed-off-by: Christian Borntraeger Reviewed-by: Paolo Bonzini --- virt/kvm/irqchip.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'virt') diff --git a/virt/kvm/irqchip.c b/virt/kvm/irqchip.c index 31e40c9e81df..b1286c4e0712 100644 --- a/virt/kvm/irqchip.c +++ b/virt/kvm/irqchip.c @@ -230,7 +230,7 @@ int kvm_set_irq_routing(struct kvm *kvm, } mutex_lock(&kvm->irq_lock); - old = kvm->irq_routing; + old = rcu_dereference_protected(kvm->irq_routing, 1); rcu_assign_pointer(kvm->irq_routing, new); kvm_irq_routing_update(kvm); kvm_arch_irq_routing_update(kvm); -- cgit v1.2.3-70-g09d2 From 4a12f95177280a660bda99e81838919b1cc6a91a Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Fri, 7 Jul 2017 10:51:38 +0200 Subject: KVM: mark kvm->busses as rcu protected mark kvm->busses as rcu protected and use the correct access function everywhere. found by sparse virt/kvm/kvm_main.c:3490:15: error: incompatible types in comparison expression (different address spaces) virt/kvm/kvm_main.c:3509:15: error: incompatible types in comparison expression (different address spaces) virt/kvm/kvm_main.c:3561:15: error: incompatible types in comparison expression (different address spaces) virt/kvm/kvm_main.c:3644:15: error: incompatible types in comparison expression (different address spaces) Signed-off-by: Christian Borntraeger --- include/linux/kvm_host.h | 8 +++++++- virt/kvm/eventfd.c | 8 +++++--- virt/kvm/kvm_main.c | 17 ++++++++++------- 3 files changed, 22 insertions(+), 11 deletions(-) (limited to 'virt') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index bcd37b855c66..6a164f9eb02c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -404,7 +404,7 @@ struct kvm { int last_boosted_vcpu; struct list_head vm_list; struct mutex lock; - struct kvm_io_bus *buses[KVM_NR_BUSES]; + struct kvm_io_bus __rcu *buses[KVM_NR_BUSES]; #ifdef CONFIG_HAVE_KVM_EVENTFD struct { spinlock_t lock; @@ -473,6 +473,12 @@ struct kvm { #define vcpu_err(vcpu, fmt, ...) \ kvm_err("vcpu%i " fmt, (vcpu)->vcpu_id, ## __VA_ARGS__) +static inline struct kvm_io_bus *kvm_get_bus(struct kvm *kvm, enum kvm_bus idx) +{ + return srcu_dereference_check(kvm->buses[idx], &kvm->srcu, + lockdep_is_held(&kvm->slots_lock)); +} + static inline struct kvm_vcpu *kvm_get_vcpu(struct kvm *kvm, int i) { /* Pairs with smp_wmb() in kvm_vm_ioctl_create_vcpu, in case diff --git a/virt/kvm/eventfd.c b/virt/kvm/eventfd.c index a8d540398bbd..d016aadd5fbb 100644 --- a/virt/kvm/eventfd.c +++ b/virt/kvm/eventfd.c @@ -825,7 +825,7 @@ static int kvm_assign_ioeventfd_idx(struct kvm *kvm, if (ret < 0) goto unlock_fail; - kvm->buses[bus_idx]->ioeventfd_count++; + kvm_get_bus(kvm, bus_idx)->ioeventfd_count++; list_add_tail(&p->list, &kvm->ioeventfds); mutex_unlock(&kvm->slots_lock); @@ -848,6 +848,7 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx, { struct _ioeventfd *p, *tmp; struct eventfd_ctx *eventfd; + struct kvm_io_bus *bus; int ret = -ENOENT; eventfd = eventfd_ctx_fdget(args->fd); @@ -870,8 +871,9 @@ kvm_deassign_ioeventfd_idx(struct kvm *kvm, enum kvm_bus bus_idx, continue; kvm_io_bus_unregister_dev(kvm, bus_idx, &p->dev); - if (kvm->buses[bus_idx]) - kvm->buses[bus_idx]->ioeventfd_count--; + bus = kvm_get_bus(kvm, bus_idx); + if (bus) + bus->ioeventfd_count--; ioeventfd_release(p); ret = 0; break; diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index fc2d58312fd5..d76e822f8929 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -679,8 +679,8 @@ static struct kvm *kvm_create_vm(unsigned long type) if (init_srcu_struct(&kvm->irq_srcu)) goto out_err_no_irq_srcu; for (i = 0; i < KVM_NR_BUSES; i++) { - kvm->buses[i] = kzalloc(sizeof(struct kvm_io_bus), - GFP_KERNEL); + rcu_assign_pointer(kvm->buses[i], + kzalloc(sizeof(struct kvm_io_bus), GFP_KERNEL)); if (!kvm->buses[i]) goto out_err; } @@ -705,7 +705,7 @@ out_err_no_srcu: hardware_disable_all(); out_err_no_disable: for (i = 0; i < KVM_NR_BUSES; i++) - kfree(kvm->buses[i]); + kfree(rcu_access_pointer(kvm->buses[i])); for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) kvm_free_memslots(kvm, kvm->memslots[i]); kvm_arch_free_vm(kvm); @@ -740,8 +740,11 @@ static void kvm_destroy_vm(struct kvm *kvm) spin_unlock(&kvm_lock); kvm_free_irq_routing(kvm); for (i = 0; i < KVM_NR_BUSES; i++) { - if (kvm->buses[i]) - kvm_io_bus_destroy(kvm->buses[i]); + struct kvm_io_bus *bus; + + bus = rcu_dereference_protected(kvm->buses[i], 1); + if (bus) + kvm_io_bus_destroy(bus); kvm->buses[i] = NULL; } kvm_coalesced_mmio_free(kvm); @@ -3570,7 +3573,7 @@ int kvm_io_bus_register_dev(struct kvm *kvm, enum kvm_bus bus_idx, gpa_t addr, { struct kvm_io_bus *new_bus, *bus; - bus = kvm->buses[bus_idx]; + bus = kvm_get_bus(kvm, bus_idx); if (!bus) return -ENOMEM; @@ -3599,7 +3602,7 @@ void kvm_io_bus_unregister_dev(struct kvm *kvm, enum kvm_bus bus_idx, int i; struct kvm_io_bus *new_bus, *bus; - bus = kvm->buses[bus_idx]; + bus = kvm_get_bus(kvm, bus_idx); if (!bus) return; -- cgit v1.2.3-70-g09d2 From a80cf7b5f4149753d5f19c872a47e66195b167d4 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Thu, 6 Jul 2017 16:17:14 +0200 Subject: KVM: mark memory slots as rcu we access the memslots array via srcu. Mark it as such and use the right access functions also for the freeing of memory slots. Found by sparse: ./include/linux/kvm_host.h:565:16: error: incompatible types in comparison expression (different address spaces) Signed-off-by: Christian Borntraeger Reviewed-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 +- virt/kvm/kvm_main.c | 6 ++++-- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'virt') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 6a164f9eb02c..b3ca77a96b2d 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -390,7 +390,7 @@ struct kvm { spinlock_t mmu_lock; struct mutex slots_lock; struct mm_struct *mm; /* userspace tied to this vm */ - struct kvm_memslots *memslots[KVM_ADDRESS_SPACE_NUM]; + struct kvm_memslots __rcu *memslots[KVM_ADDRESS_SPACE_NUM]; struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; /* diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index d76e822f8929..6e6d4edf0e92 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -707,7 +707,8 @@ out_err_no_disable: for (i = 0; i < KVM_NR_BUSES; i++) kfree(rcu_access_pointer(kvm->buses[i])); for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) - kvm_free_memslots(kvm, kvm->memslots[i]); + kvm_free_memslots(kvm, + rcu_dereference_protected(kvm->memslots[i], 1)); kvm_arch_free_vm(kvm); mmdrop(current->mm); return ERR_PTR(r); @@ -756,7 +757,8 @@ static void kvm_destroy_vm(struct kvm *kvm) kvm_arch_destroy_vm(kvm); kvm_destroy_devices(kvm); for (i = 0; i < KVM_ADDRESS_SPACE_NUM; i++) - kvm_free_memslots(kvm, kvm->memslots[i]); + kvm_free_memslots(kvm, + rcu_dereference_protected(kvm->memslots[i], 1)); cleanup_srcu_struct(&kvm->irq_srcu); cleanup_srcu_struct(&kvm->srcu); kvm_arch_free_vm(kvm); -- cgit v1.2.3-70-g09d2 From b49defe83659cefbb1763d541e779da32594ab10 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 30 Jun 2017 13:25:45 +0200 Subject: kvm: avoid unused variable warning for UP builds The uniprocessor version of smp_call_function_many does not evaluate all of its argument, and the compiler emits a warning about "wait" being unused. This breaks the build on architectures for which "-Werror" is enabled by default. Work around it by moving the invocation of smp_call_function_many to its own inline function. Reported-by: Paul Mackerras Cc: stable@vger.kernel.org Fixes: 7a97cec26b94c909f4cbad2dc3186af3e457a522 Signed-off-by: Paolo Bonzini --- virt/kvm/kvm_main.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 19f0ecb9b93e..0d796c9a6482 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -187,12 +187,23 @@ static void ack_flush(void *_completed) { } +static inline bool kvm_kick_many_cpus(const struct cpumask *cpus, bool wait) +{ + if (unlikely(!cpus)) + cpus = cpu_online_mask; + + if (cpumask_empty(cpus)) + return false; + + smp_call_function_many(cpus, ack_flush, NULL, wait); + return true; +} + bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req) { int i, cpu, me; cpumask_var_t cpus; - bool called = true; - bool wait = req & KVM_REQUEST_WAIT; + bool called; struct kvm_vcpu *vcpu; zalloc_cpumask_var(&cpus, GFP_ATOMIC); @@ -207,14 +218,9 @@ bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req) if (cpus != NULL && cpu != -1 && cpu != me && kvm_request_needs_ipi(vcpu, req)) - cpumask_set_cpu(cpu, cpus); + __cpumask_set_cpu(cpu, cpus); } - if (unlikely(cpus == NULL)) - smp_call_function_many(cpu_online_mask, ack_flush, NULL, wait); - else if (!cpumask_empty(cpus)) - smp_call_function_many(cpus, ack_flush, NULL, wait); - else - called = false; + called = kvm_kick_many_cpus(cpus, !!(req & KVM_REQUEST_WAIT)); put_cpu(); free_cpumask_var(cpus); return called; -- cgit v1.2.3-70-g09d2 From 286de8f6ac9202f1c9012784639156c6ec386eb8 Mon Sep 17 00:00:00 2001 From: Claudio Imbrenda Date: Wed, 12 Jul 2017 17:56:44 +0200 Subject: KVM: trigger uevents when creating or destroying a VM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds a few lines to the KVM common code to fire a KOBJ_CHANGE uevent whenever a KVM VM is created or destroyed. The event carries five environment variables: CREATED indicates how many times a new VM has been created. It is useful for example to trigger specific actions when the first VM is started COUNT indicates how many VMs are currently active. This can be used for logging or monitoring purposes PID has the pid of the KVM process that has been started or stopped. This can be used to perform process-specific tuning. STATS_PATH contains the path in debugfs to the directory with all the runtime statistics for this VM. This is useful for performance monitoring and profiling. EVENT described the type of event, its value can be either "create" or "destroy" Specific udev rules can be then set up in userspace to deal with the creation or destruction of VMs as needed. Signed-off-by: Claudio Imbrenda Signed-off-by: Radim Krčmář --- virt/kvm/kvm_main.c | 69 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) (limited to 'virt') diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 7766c2b52797..82987d457b8b 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -130,6 +130,12 @@ EXPORT_SYMBOL_GPL(kvm_rebooting); static bool largepages_enabled = true; +#define KVM_EVENT_CREATE_VM 0 +#define KVM_EVENT_DESTROY_VM 1 +static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm); +static unsigned long long kvm_createvm_count; +static unsigned long long kvm_active_vms; + bool kvm_is_reserved_pfn(kvm_pfn_t pfn) { if (pfn_valid(pfn)) @@ -740,6 +746,7 @@ static void kvm_destroy_vm(struct kvm *kvm) int i; struct mm_struct *mm = kvm->mm; + kvm_uevent_notify_change(KVM_EVENT_DESTROY_VM, kvm); kvm_destroy_vm_debugfs(kvm); kvm_arch_sync_events(kvm); spin_lock(&kvm_lock); @@ -3220,6 +3227,7 @@ static int kvm_dev_ioctl_create_vm(unsigned long type) fput(file); return -ENOMEM; } + kvm_uevent_notify_change(KVM_EVENT_CREATE_VM, kvm); fd_install(r, file); return r; @@ -3872,6 +3880,67 @@ static const struct file_operations *stat_fops[] = { [KVM_STAT_VM] = &vm_stat_fops, }; +static void kvm_uevent_notify_change(unsigned int type, struct kvm *kvm) +{ + struct kobj_uevent_env *env; + char *tmp, *pathbuf = NULL; + unsigned long long created, active; + + if (!kvm_dev.this_device || !kvm) + return; + + spin_lock(&kvm_lock); + if (type == KVM_EVENT_CREATE_VM) { + kvm_createvm_count++; + kvm_active_vms++; + } else if (type == KVM_EVENT_DESTROY_VM) { + kvm_active_vms--; + } + created = kvm_createvm_count; + active = kvm_active_vms; + spin_unlock(&kvm_lock); + + env = kzalloc(sizeof(*env), GFP_KERNEL); + if (!env) + return; + + add_uevent_var(env, "CREATED=%llu", created); + add_uevent_var(env, "COUNT=%llu", active); + + if (type == KVM_EVENT_CREATE_VM) + add_uevent_var(env, "EVENT=create"); + else if (type == KVM_EVENT_DESTROY_VM) + add_uevent_var(env, "EVENT=destroy"); + + if (kvm->debugfs_dentry) { + char p[ITOA_MAX_LEN]; + + snprintf(p, sizeof(p), "%s", kvm->debugfs_dentry->d_name.name); + tmp = strchrnul(p + 1, '-'); + *tmp = '\0'; + add_uevent_var(env, "PID=%s", p); + pathbuf = kmalloc(PATH_MAX, GFP_KERNEL); + if (pathbuf) { + /* sizeof counts the final '\0' */ + int len = sizeof("STATS_PATH=") - 1; + const char *pvar = "STATS_PATH="; + + tmp = dentry_path_raw(kvm->debugfs_dentry, + pathbuf + len, + PATH_MAX - len); + if (!IS_ERR(tmp)) { + memcpy(tmp - len, pvar, len); + env->envp[env->envp_idx++] = tmp - len; + } + } + } + /* no need for checks, since we are adding at most only 5 keys */ + env->envp[env->envp_idx++] = NULL; + kobject_uevent_env(&kvm_dev.this_device->kobj, KOBJ_CHANGE, env->envp); + kfree(env); + kfree(pathbuf); +} + static int kvm_init_debug(void) { int r = -EEXIST; -- cgit v1.2.3-70-g09d2