diff options
37 files changed, 914 insertions, 227 deletions
diff --git a/Documentation/virtual/kvm/devices/vcpu.txt b/Documentation/virtual/kvm/devices/vcpu.txt index 02f50686c418..2b5dab16c4f2 100644 --- a/Documentation/virtual/kvm/devices/vcpu.txt +++ b/Documentation/virtual/kvm/devices/vcpu.txt @@ -16,7 +16,9 @@ Parameters: in kvm_device_attr.addr the address for PMU overflow interrupt is a Returns: -EBUSY: The PMU overflow interrupt is already set -ENXIO: The overflow interrupt not set when attempting to get it -ENODEV: PMUv3 not supported - -EINVAL: Invalid PMU overflow interrupt number supplied + -EINVAL: Invalid PMU overflow interrupt number supplied or + trying to set the IRQ number without using an in-kernel + irqchip. A value describing the PMUv3 (Performance Monitor Unit v3) overflow interrupt number for this vcpu. This interrupt could be a PPI or SPI, but the interrupt @@ -25,11 +27,36 @@ all vcpus, while as an SPI it must be a separate number per vcpu. 1.2 ATTRIBUTE: KVM_ARM_VCPU_PMU_V3_INIT Parameters: no additional parameter in kvm_device_attr.addr -Returns: -ENODEV: PMUv3 not supported - -ENXIO: PMUv3 not properly configured as required prior to calling this - attribute +Returns: -ENODEV: PMUv3 not supported or GIC not initialized + -ENXIO: PMUv3 not properly configured or in-kernel irqchip not + configured as required prior to calling this attribute -EBUSY: PMUv3 already initialized -Request the initialization of the PMUv3. This must be done after creating the -in-kernel irqchip. Creating a PMU with a userspace irqchip is currently not -supported. +Request the initialization of the PMUv3. If using the PMUv3 with an in-kernel +virtual GIC implementation, this must be done after initializing the in-kernel +irqchip. + + +2. GROUP: KVM_ARM_VCPU_TIMER_CTRL +Architectures: ARM,ARM64 + +2.1. ATTRIBUTE: KVM_ARM_VCPU_TIMER_IRQ_VTIMER +2.2. ATTRIBUTE: KVM_ARM_VCPU_TIMER_IRQ_PTIMER +Parameters: in kvm_device_attr.addr the address for the timer interrupt is a + pointer to an int +Returns: -EINVAL: Invalid timer interrupt number + -EBUSY: One or more VCPUs has already run + +A value describing the architected timer interrupt number when connected to an +in-kernel virtual GIC. These must be a PPI (16 <= intid < 32). Setting the +attribute overrides the default values (see below). + +KVM_ARM_VCPU_TIMER_IRQ_VTIMER: The EL1 virtual timer intid (default: 27) +KVM_ARM_VCPU_TIMER_IRQ_PTIMER: The EL1 physical timer intid (default: 30) + +Setting the same PPI for different timers will prevent the VCPUs from running. +Setting the interrupt number on a VCPU configures all VCPUs created at that +time to use the number provided for a given timer, overwriting any previously +configured values on other VCPUs. Userspace should configure the interrupt +numbers on at least one VCPU after creating all VCPUs and before running any +VCPUs. diff --git a/Documentation/virtual/kvm/vcpu-requests.rst b/Documentation/virtual/kvm/vcpu-requests.rst new file mode 100644 index 000000000000..5feb3706a7ae --- /dev/null +++ b/Documentation/virtual/kvm/vcpu-requests.rst @@ -0,0 +1,307 @@ +================= +KVM VCPU Requests +================= + +Overview +======== + +KVM supports an internal API enabling threads to request a VCPU thread to +perform some activity. For example, a thread may request a VCPU to flush +its TLB with a VCPU request. The API consists of the following functions:: + + /* Check if any requests are pending for VCPU @vcpu. */ + bool kvm_request_pending(struct kvm_vcpu *vcpu); + + /* Check if VCPU @vcpu has request @req pending. */ + bool kvm_test_request(int req, struct kvm_vcpu *vcpu); + + /* Clear request @req for VCPU @vcpu. */ + void kvm_clear_request(int req, struct kvm_vcpu *vcpu); + + /* + * Check if VCPU @vcpu has request @req pending. When the request is + * pending it will be cleared and a memory barrier, which pairs with + * another in kvm_make_request(), will be issued. + */ + bool kvm_check_request(int req, struct kvm_vcpu *vcpu); + + /* + * Make request @req of VCPU @vcpu. Issues a memory barrier, which pairs + * with another in kvm_check_request(), prior to setting the request. + */ + void kvm_make_request(int req, struct kvm_vcpu *vcpu); + + /* Make request @req of all VCPUs of the VM with struct kvm @kvm. */ + bool kvm_make_all_cpus_request(struct kvm *kvm, unsigned int req); + +Typically a requester wants the VCPU to perform the activity as soon +as possible after making the request. This means most requests +(kvm_make_request() calls) are followed by a call to kvm_vcpu_kick(), +and kvm_make_all_cpus_request() has the kicking of all VCPUs built +into it. + +VCPU Kicks +---------- + +The goal of a VCPU kick is to bring a VCPU thread out of guest mode in +order to perform some KVM maintenance. To do so, an IPI is sent, forcing +a guest mode exit. However, a VCPU thread may not be in guest mode at the +time of the kick. Therefore, depending on the mode and state of the VCPU +thread, there are two other actions a kick may take. All three actions +are listed below: + +1) Send an IPI. This forces a guest mode exit. +2) Waking a sleeping VCPU. Sleeping VCPUs are VCPU threads outside guest + mode that wait on waitqueues. Waking them removes the threads from + the waitqueues, allowing the threads to run again. This behavior + may be suppressed, see KVM_REQUEST_NO_WAKEUP below. +3) Nothing. When the VCPU is not in guest mode and the VCPU thread is not + sleeping, then there is nothing to do. + +VCPU Mode +--------- + +VCPUs have a mode state, ``vcpu->mode``, that is used to track whether the +guest is running in guest mode or not, as well as some specific +outside guest mode states. The architecture may use ``vcpu->mode`` to +ensure VCPU requests are seen by VCPUs (see "Ensuring Requests Are Seen"), +as well as to avoid sending unnecessary IPIs (see "IPI Reduction"), and +even to ensure IPI acknowledgements are waited upon (see "Waiting for +Acknowledgements"). The following modes are defined: + +OUTSIDE_GUEST_MODE + + The VCPU thread is outside guest mode. + +IN_GUEST_MODE + + The VCPU thread is in guest mode. + +EXITING_GUEST_MODE + + The VCPU thread is transitioning from IN_GUEST_MODE to + OUTSIDE_GUEST_MODE. + +READING_SHADOW_PAGE_TABLES + + The VCPU thread is outside guest mode, but it wants the sender of + certain VCPU requests, namely KVM_REQ_TLB_FLUSH, to wait until the VCPU + thread is done reading the page tables. + +VCPU Request Internals +====================== + +VCPU requests are simply bit indices of the ``vcpu->requests`` bitmap. +This means general bitops, like those documented in [atomic-ops]_ could +also be used, e.g. :: + + clear_bit(KVM_REQ_UNHALT & KVM_REQUEST_MASK, &vcpu->requests); + +However, VCPU request users should refrain from doing so, as it would +break the abstraction. The first 8 bits are reserved for architecture +independent requests, all additional bits are available for architecture +dependent requests. + +Architecture Independent Requests +--------------------------------- + +KVM_REQ_TLB_FLUSH + + KVM's common MMU notifier may need to flush all of a guest's TLB + entries, calling kvm_flush_remote_tlbs() to do so. Architectures that + choose to use the common kvm_flush_remote_tlbs() implementation will + need to handle this VCPU request. + +KVM_REQ_MMU_RELOAD + + When shadow page tables are used and memory slots are removed it's + necessary to inform each VCPU to completely refresh the tables. This + request is used for that. + +KVM_REQ_PENDING_TIMER + + This request may be made from a timer handler run on the host on behalf + of a VCPU. It informs the VCPU thread to inject a timer interrupt. + +KVM_REQ_UNHALT + + This request may be made from the KVM common function kvm_vcpu_block(), + which is used to emulate an instruction that causes a CPU to halt until + one of an architectural specific set of events and/or interrupts is + received (determined by checking kvm_arch_vcpu_runnable()). When that + event or interrupt arrives kvm_vcpu_block() makes the request. This is + in contrast to when kvm_vcpu_block() returns due to any other reason, + such as a pending signal, which does not indicate the VCPU's halt + emulation should stop, and therefore does not make the request. + +KVM_REQUEST_MASK +---------------- + +VCPU requests should be masked by KVM_REQUEST_MASK before using them with +bitops. This is because only the lower 8 bits are used to represent the +request's number. The upper bits are used as flags. Currently only two +flags are defined. + +VCPU Request Flags +------------------ + +KVM_REQUEST_NO_WAKEUP + + This flag is applied to requests that only need immediate attention + from VCPUs running in guest mode. That is, sleeping VCPUs do not need + to be awaken for these requests. Sleeping VCPUs will handle the + requests when they are awaken later for some other reason. + +KVM_REQUEST_WAIT + + When requests with this flag are made with kvm_make_all_cpus_request(), + then the caller will wait for each VCPU to acknowledge its IPI before + proceeding. This flag only applies to VCPUs that would receive IPIs. + If, for example, the VCPU is sleeping, so no IPI is necessary, then + the requesting thread does not wait. This means that this flag may be + safely combined with KVM_REQUEST_NO_WAKEUP. See "Waiting for + Acknowledgements" for more information about requests with + KVM_REQUEST_WAIT. + +VCPU Requests with Associated State +=================================== + +Requesters that want the receiving VCPU to handle new state need to ensure +the newly written state is observable to the receiving VCPU thread's CPU +by the time it observes the request. This means a write memory barrier +must be inserted after writing the new state and before setting the VCPU +request bit. Additionally, on the receiving VCPU thread's side, a +corresponding read barrier must be inserted after reading the request bit +and before proceeding to read the new state associated with it. See +scenario 3, Message and Flag, of [lwn-mb]_ and the kernel documentation +[memory-barriers]_. + +The pair of functions, kvm_check_request() and kvm_make_request(), provide +the memory barriers, allowing this requirement to be handled internally by +the API. + +Ensuring Requests Are Seen +========================== + +When making requests to VCPUs, we want to avoid the receiving VCPU +executing in guest mode for an arbitrary long time without handling the +request. We can be sure this won't happen as long as we ensure the VCPU +thread checks kvm_request_pending() before entering guest mode and that a +kick will send an IPI to force an exit from guest mode when necessary. +Extra care must be taken to cover the period after the VCPU thread's last +kvm_request_pending() check and before it has entered guest mode, as kick +IPIs will only trigger guest mode exits for VCPU threads that are in guest +mode or at least have already disabled interrupts in order to prepare to +enter guest mode. This means that an optimized implementation (see "IPI +Reduction") must be certain when it's safe to not send the IPI. One +solution, which all architectures except s390 apply, is to: + +- set ``vcpu->mode`` to IN_GUEST_MODE between disabling the interrupts and + the last kvm_request_pending() check; +- enable interrupts atomically when entering the guest. + +This solution also requires memory barriers to be placed carefully in both +the requesting thread and the receiving VCPU. With the memory barriers we +can exclude the possibility of a VCPU thread observing +!kvm_request_pending() on its last check and then not receiving an IPI for +the next request made of it, even if the request is made immediately after +the check. This is done by way of the Dekker memory barrier pattern +(scenario 10 of [lwn-mb]_). As the Dekker pattern requires two variables, +this solution pairs ``vcpu->mode`` with ``vcpu->requests``. Substituting +them into the pattern gives:: + + CPU1 CPU2 + ================= ================= + local_irq_disable(); + WRITE_ONCE(vcpu->mode, IN_GUEST_MODE); kvm_make_request(REQ, vcpu); + smp_mb(); smp_mb(); + if (kvm_request_pending(vcpu)) { if (READ_ONCE(vcpu->mode) == + IN_GUEST_MODE) { + ...abort guest entry... ...send IPI... + } } + +As stated above, the IPI is only useful for VCPU threads in guest mode or +that have already disabled interrupts. This is why this specific case of +the Dekker pattern has been extended to disable interrupts before setting +``vcpu->mode`` to IN_GUEST_MODE. WRITE_ONCE() and READ_ONCE() are used to +pedantically implement the memory barrier pattern, guaranteeing the +compiler doesn't interfere with ``vcpu->mode``'s carefully planned +accesses. + +IPI Reduction +------------- + +As only one IPI is needed to get a VCPU to check for any/all requests, +then they may be coalesced. This is easily done by having the first IPI +sending kick also change the VCPU mode to something !IN_GUEST_MODE. The +transitional state, EXITING_GUEST_MODE, is used for this purpose. + +Waiting for Acknowledgements +---------------------------- + +Some requests, those with the KVM_REQUEST_WAIT flag set, require IPIs to +be sent, and the acknowledgements to be waited upon, even when the target +VCPU threads are in modes other than IN_GUEST_MODE. For example, one case +is when a target VCPU thread is in READING_SHADOW_PAGE_TABLES mode, which +is set after disabling interrupts. To support these cases, the +KVM_REQUEST_WAIT flag changes the condition for sending an IPI from +checking that the VCPU is IN_GUEST_MODE to checking that it is not +OUTSIDE_GUEST_MODE. + +Request-less VCPU Kicks +----------------------- + +As the determination of whether or not to send an IPI depends on the +two-variable Dekker memory barrier pattern, then it's clear that +request-less VCPU kicks are almost never correct. Without the assurance +that a non-IPI generating kick will still result in an action by the +receiving VCPU, as the final kvm_request_pending() check does for +request-accompanying kicks, then the kick may not do anything useful at +all. If, for instance, a request-less kick was made to a VCPU that was +just about to set its mode to IN_GUEST_MODE, meaning no IPI is sent, then +the VCPU thread may continue its entry without actually having done +whatever it was the kick was meant to initiate. + +One exception is x86's posted interrupt mechanism. In this case, however, +even the request-less VCPU kick is coupled with the same +local_irq_disable() + smp_mb() pattern described above; the ON bit +(Outstanding Notification) in the posted interrupt descriptor takes the +role of ``vcpu->requests``. When sending a posted interrupt, PIR.ON is +set before reading ``vcpu->mode``; dually, in the VCPU thread, +vmx_sync_pir_to_irr() reads PIR after setting ``vcpu->mode`` to +IN_GUEST_MODE. + +Additional Considerations +========================= + +Sleeping VCPUs +-------------- + +VCPU threads may need to consider requests before and/or after calling +functions that may put them to sleep, e.g. kvm_vcpu_block(). Whether they +do or not, and, if they do, which requests need consideration, is +architecture dependent. kvm_vcpu_block() calls kvm_arch_vcpu_runnable() +to check if it should awaken. One reason to do so is to provide +architectures a function where requests may be checked if necessary. + +Clearing Requests +----------------- + +Generally it only makes sense for the receiving VCPU thread to clear a +request. However, in some circumstances, such as when the requesting +thread and the receiving VCPU thread are executed serially, such as when +they are the same thread, or when they are using some form of concurrency +control to temporarily execute synchronously, then it's possible to know +that the request may be cleared immediately, rather than waiting for the +receiving VCPU thread to handle the request in VCPU RUN. The only current +examples of this are kvm_vcpu_block() calls made by VCPUs to block +themselves. A possible side-effect of that call is to make the +KVM_REQ_UNHALT request, which may then be cleared immediately when the +VCPU returns from the call. + +References +========== + +.. [atomic-ops] Documentation/core-api/atomic_ops.rst +.. [memory-barriers] Documentation/memory-barriers.txt +.. [lwn-mb] https://lwn.net/Articles/573436/ diff --git a/arch/arm/include/asm/kvm_host.h b/arch/arm/include/asm/kvm_host.h index f0e66577ce05..127e2dd2e21c 100644 --- a/arch/arm/include/asm/kvm_host.h +++ b/arch/arm/include/asm/kvm_host.h @@ -44,7 +44,9 @@ #define KVM_MAX_VCPUS VGIC_V2_MAX_CPUS #endif -#define KVM_REQ_VCPU_EXIT (8 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_SLEEP \ + KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1) u32 *kvm_vcpu_reg(struct kvm_vcpu *vcpu, u8 reg_num, u32 mode); int __attribute_const__ kvm_target_cpu(void); @@ -233,8 +235,6 @@ struct kvm_vcpu *kvm_arm_get_running_vcpu(void); struct kvm_vcpu __percpu **kvm_get_running_vcpus(void); void kvm_arm_halt_guest(struct kvm *kvm); void kvm_arm_resume_guest(struct kvm *kvm); -void kvm_arm_halt_vcpu(struct kvm_vcpu *vcpu); -void kvm_arm_resume_vcpu(struct kvm_vcpu *vcpu); int kvm_arm_copy_coproc_indices(struct kvm_vcpu *vcpu, u64 __user *uindices); unsigned long kvm_arm_num_coproc_regs(struct kvm_vcpu *vcpu); @@ -291,20 +291,12 @@ static inline void kvm_arm_init_debug(void) {} static inline void kvm_arm_setup_debug(struct kvm_vcpu *vcpu) {} static inline void kvm_arm_clear_debug(struct kvm_vcpu *vcpu) {} static inline void kvm_arm_reset_debug_ptr(struct kvm_vcpu *vcpu) {} -static inline int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, - struct kvm_device_attr *attr) -{ - return -ENXIO; -} -static inline int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, - struct kvm_device_attr *attr) -{ - return -ENXIO; -} -static inline int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, - struct kvm_device_attr *attr) -{ - return -ENXIO; -} + +int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr); +int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr); +int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr); #endif /* __ARM_KVM_HOST_H__ */ diff --git a/arch/arm/include/uapi/asm/kvm.h b/arch/arm/include/uapi/asm/kvm.h index 5e3c673fa3f4..5db2d4c6a55f 100644 --- a/arch/arm/include/uapi/asm/kvm.h +++ b/arch/arm/include/uapi/asm/kvm.h @@ -203,6 +203,14 @@ struct kvm_arch_memory_slot { #define KVM_DEV_ARM_VGIC_LINE_LEVEL_INTID_MASK 0x3ff #define VGIC_LEVEL_INFO_LINE_LEVEL 0 +/* Device Control API on vcpu fd */ +#define KVM_ARM_VCPU_PMU_V3_CTRL 0 +#define KVM_ARM_VCPU_PMU_V3_IRQ 0 +#define KVM_ARM_VCPU_PMU_V3_INIT 1 +#define KVM_ARM_VCPU_TIMER_CTRL 1 +#define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0 +#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 + #define KVM_DEV_ARM_VGIC_CTRL_INIT 0 #define KVM_DEV_ARM_ITS_SAVE_TABLES 1 #define KVM_DEV_ARM_ITS_RESTORE_TABLES 2 diff --git a/arch/arm/kvm/guest.c b/arch/arm/kvm/guest.c index fa6182a40941..1e0784ebbfd6 100644 --- a/arch/arm/kvm/guest.c +++ b/arch/arm/kvm/guest.c @@ -301,3 +301,54 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, { return -EINVAL; } + +int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + int ret; + + switch (attr->group) { + case KVM_ARM_VCPU_TIMER_CTRL: + ret = kvm_arm_timer_set_attr(vcpu, attr); + break; + default: + ret = -ENXIO; + break; + } + + return ret; +} + +int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + int ret; + + switch (attr->group) { + case KVM_ARM_VCPU_TIMER_CTRL: + ret = kvm_arm_timer_get_attr(vcpu, attr); + break; + default: + ret = -ENXIO; + break; + } + + return ret; +} + +int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, + struct kvm_device_attr *attr) +{ + int ret; + + switch (attr->group) { + case KVM_ARM_VCPU_TIMER_CTRL: + ret = kvm_arm_timer_has_attr(vcpu, attr); + break; + default: + ret = -ENXIO; + break; + } + + return ret; +} diff --git a/arch/arm/kvm/handle_exit.c b/arch/arm/kvm/handle_exit.c index f86a9aaef462..54442e375354 100644 --- a/arch/arm/kvm/handle_exit.c +++ b/arch/arm/kvm/handle_exit.c @@ -72,6 +72,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) trace_kvm_wfx(*vcpu_pc(vcpu), false); vcpu->stat.wfi_exit_stat++; kvm_vcpu_block(vcpu); + kvm_clear_request(KVM_REQ_UNHALT, vcpu); } kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); diff --git a/arch/arm/kvm/hyp/switch.c b/arch/arm/kvm/hyp/switch.c index 624a510d31df..ebd2dd46adf7 100644 --- a/arch/arm/kvm/hyp/switch.c +++ b/arch/arm/kvm/hyp/switch.c @@ -237,8 +237,10 @@ void __hyp_text __noreturn __hyp_panic(int cause) vcpu = (struct kvm_vcpu *)read_sysreg(HTPIDR); host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); + __timer_save_state(vcpu); __deactivate_traps(vcpu); __deactivate_vm(vcpu); + __banked_restore_state(host_ctxt); __sysreg_restore_state(host_ctxt); } diff --git a/arch/arm/kvm/reset.c b/arch/arm/kvm/reset.c index 1da8b2d14550..5ed0c3ee33d6 100644 --- a/arch/arm/kvm/reset.c +++ b/arch/arm/kvm/reset.c @@ -37,16 +37,6 @@ static struct kvm_regs cortexa_regs_reset = { .usr_regs.ARM_cpsr = SVC_MODE | PSR_A_BIT | PSR_I_BIT | PSR_F_BIT, }; -static const struct kvm_irq_level cortexa_ptimer_irq = { - { .irq = 30 }, - .level = 1, -}; - -static const struct kvm_irq_level cortexa_vtimer_irq = { - { .irq = 27 }, - .level = 1, -}; - /******************************************************************************* * Exported reset function @@ -62,16 +52,12 @@ static const struct kvm_irq_level cortexa_vtimer_irq = { int kvm_reset_vcpu(struct kvm_vcpu *vcpu) { struct kvm_regs *reset_regs; - const struct kvm_irq_level *cpu_vtimer_irq; - const struct kvm_irq_level *cpu_ptimer_irq; switch (vcpu->arch.target) { case KVM_ARM_TARGET_CORTEX_A7: case KVM_ARM_TARGET_CORTEX_A15: reset_regs = &cortexa_regs_reset; vcpu->arch.midr = read_cpuid_id(); - cpu_vtimer_irq = &cortexa_vtimer_irq; - cpu_ptimer_irq = &cortexa_ptimer_irq; break; default: return -ENODEV; @@ -84,5 +70,5 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) kvm_reset_coprocs(vcpu); /* Reset arch_timer context */ - return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq, cpu_ptimer_irq); + return kvm_timer_vcpu_reset(vcpu); } diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h index 5e19165c5fa8..0c4fd1f46e10 100644 --- a/arch/arm64/include/asm/kvm_host.h +++ b/arch/arm64/include/asm/kvm_host.h @@ -41,7 +41,9 @@ #define KVM_VCPU_MAX_FEATURES 4 -#define KVM_REQ_VCPU_EXIT (8 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_SLEEP \ + KVM_ARCH_REQ_FLAGS(0, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_IRQ_PENDING KVM_ARCH_REQ(1) int __attribute_const__ kvm_target_cpu(void); int kvm_reset_vcpu(struct kvm_vcpu *vcpu); @@ -333,8 +335,6 @@ struct kvm_vcpu *kvm_arm_get_running_vcpu(void); struct kvm_vcpu * __percpu *kvm_get_running_vcpus(void); void kvm_arm_halt_guest(struct kvm *kvm); void kvm_arm_resume_guest(struct kvm *kvm); -void kvm_arm_halt_vcpu(struct kvm_vcpu *vcpu); -void kvm_arm_resume_vcpu(struct kvm_vcpu *vcpu); u64 __kvm_call_hyp(void *hypfn, ...); #define kvm_call_hyp(f, ...) __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__) diff --git a/arch/arm64/include/uapi/asm/kvm.h b/arch/arm64/include/uapi/asm/kvm.h index 70eea2ecc663..9f3ca24bbcc6 100644 --- a/arch/arm64/include/uapi/asm/kvm.h +++ b/arch/arm64/include/uapi/asm/kvm.h @@ -232,6 +232,9 @@ struct kvm_arch_memory_slot { #define KVM_ARM_VCPU_PMU_V3_CTRL 0 #define KVM_ARM_VCPU_PMU_V3_IRQ 0 #define KVM_ARM_VCPU_PMU_V3_INIT 1 +#define KVM_ARM_VCPU_TIMER_CTRL 1 +#define KVM_ARM_VCPU_TIMER_IRQ_VTIMER 0 +#define KVM_ARM_VCPU_TIMER_IRQ_PTIMER 1 /* KVM_IRQ_LINE irq field index values */ #define KVM_ARM_IRQ_TYPE_SHIFT 24 diff --git a/arch/arm64/kvm/guest.c b/arch/arm64/kvm/guest.c index b37446a8ffdb..5c7f657dd207 100644 --- a/arch/arm64/kvm/guest.c +++ b/arch/arm64/kvm/guest.c @@ -390,6 +390,9 @@ int kvm_arm_vcpu_arch_set_attr(struct kvm_vcpu *vcpu, case KVM_ARM_VCPU_PMU_V3_CTRL: ret = kvm_arm_pmu_v3_set_attr(vcpu, attr); break; + case KVM_ARM_VCPU_TIMER_CTRL: + ret = kvm_arm_timer_set_attr(vcpu, attr); + break; default: ret = -ENXIO; break; @@ -407,6 +410,9 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu, case KVM_ARM_VCPU_PMU_V3_CTRL: ret = kvm_arm_pmu_v3_get_attr(vcpu, attr); break; + case KVM_ARM_VCPU_TIMER_CTRL: + ret = kvm_arm_timer_get_attr(vcpu, attr); + break; default: ret = -ENXIO; break; @@ -424,6 +430,9 @@ int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu, case KVM_ARM_VCPU_PMU_V3_CTRL: ret = kvm_arm_pmu_v3_has_attr(vcpu, attr); break; + case KVM_ARM_VCPU_TIMER_CTRL: + ret = kvm_arm_timer_has_attr(vcpu, attr); + break; default: ret = -ENXIO; break; diff --git a/arch/arm64/kvm/handle_exit.c b/arch/arm64/kvm/handle_exit.c index fa1b18e364fc..17d8a1677a0b 100644 --- a/arch/arm64/kvm/handle_exit.c +++ b/arch/arm64/kvm/handle_exit.c @@ -89,6 +89,7 @@ static int kvm_handle_wfx(struct kvm_vcpu *vcpu, struct kvm_run *run) trace_kvm_wfx_arm64(*vcpu_pc(vcpu), false); vcpu->stat.wfi_exit_stat++; kvm_vcpu_block(vcpu); + kvm_clear_request(KVM_REQ_UNHALT, vcpu); } kvm_skip_instr(vcpu, kvm_vcpu_trap_il_is32bit(vcpu)); diff --git a/arch/arm64/kvm/hyp/switch.c b/arch/arm64/kvm/hyp/switch.c index aede1658aeda..e5f089de6526 100644 --- a/arch/arm64/kvm/hyp/switch.c +++ b/arch/arm64/kvm/hyp/switch.c @@ -422,6 +422,7 @@ void __hyp_text __noreturn __hyp_panic(void) vcpu = (struct kvm_vcpu *)read_sysreg(tpidr_el2); host_ctxt = kern_hyp_va(vcpu->arch.host_cpu_context); + __timer_save_state(vcpu); __deactivate_traps(vcpu); __deactivate_vm(vcpu); __sysreg_restore_host_state(host_ctxt); diff --git a/arch/arm64/kvm/reset.c b/arch/arm64/kvm/reset.c index 561badf93de8..3256b9228e75 100644 --- a/arch/arm64/kvm/reset.c +++ b/arch/arm64/kvm/reset.c @@ -46,16 +46,6 @@ static const struct kvm_regs default_regs_reset32 = { COMPAT_PSR_I_BIT | COMPAT_PSR_F_BIT), }; -static const struct kvm_irq_level default_ptimer_irq = { - .irq = 30, - .level = 1, -}; - -static const struct kvm_irq_level default_vtimer_irq = { - .irq = 27, - .level = 1, -}; - static bool cpu_has_32bit_el1(void) { u64 pfr0; @@ -108,8 +98,6 @@ int kvm_arch_dev_ioctl_check_extension(struct kvm *kvm, long ext) */ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) { - const struct kvm_irq_level *cpu_vtimer_irq; - const struct kvm_irq_level *cpu_ptimer_irq; const struct kvm_regs *cpu_reset; switch (vcpu->arch.target) { @@ -122,8 +110,6 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) cpu_reset = &default_regs_reset; } - cpu_vtimer_irq = &default_vtimer_irq; - cpu_ptimer_irq = &default_ptimer_irq; break; } @@ -137,5 +123,5 @@ int kvm_reset_vcpu(struct kvm_vcpu *vcpu) kvm_pmu_vcpu_reset(vcpu); /* Reset timer */ - return kvm_timer_vcpu_reset(vcpu, cpu_vtimer_irq, cpu_ptimer_irq); + return kvm_timer_vcpu_reset(vcpu); } diff --git a/arch/mips/kvm/trap_emul.c b/arch/mips/kvm/trap_emul.c index a563759fd142..6a0d7040d882 100644 --- a/arch/mips/kvm/trap_emul.c +++ b/arch/mips/kvm/trap_emul.c @@ -1094,7 +1094,7 @@ static void kvm_trap_emul_check_requests(struct kvm_vcpu *vcpu, int cpu, struct mm_struct *mm; int i; - if (likely(!vcpu->requests)) + if (likely(!kvm_request_pending(vcpu))) return; if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) { diff --git a/arch/mips/kvm/vz.c b/arch/mips/kvm/vz.c index 71d8856ade64..74805035edc8 100644 --- a/arch/mips/kvm/vz.c +++ b/arch/mips/kvm/vz.c @@ -2337,7 +2337,7 @@ static int kvm_vz_check_requests(struct kvm_vcpu *vcpu, int cpu) int ret = 0; int i; - if (!vcpu->requests) + if (!kvm_request_pending(vcpu)) return 0; if (kvm_check_request(KVM_REQ_TLB_FLUSH, vcpu)) { diff --git a/arch/powerpc/include/asm/kvm_host.h b/arch/powerpc/include/asm/kvm_host.h index 9c51ac4b8f36..50e0bc9723cc 100644 --- a/arch/powerpc/include/asm/kvm_host.h +++ b/arch/powerpc/include/asm/kvm_host.h @@ -52,8 +52,8 @@ #define KVM_IRQCHIP_NUM_PINS 256 /* PPC-specific vcpu->requests bit members */ -#define KVM_REQ_WATCHDOG 8 -#define KVM_REQ_EPR_EXIT 9 +#define KVM_REQ_WATCHDOG KVM_ARCH_REQ(0) +#define KVM_REQ_EPR_EXIT KVM_ARCH_REQ(1) #include <linux/mmu_notifier.h> diff --git a/arch/powerpc/kvm/booke.c b/arch/powerpc/kvm/booke.c index 3eaac3809977..071b87ee682f 100644 --- a/arch/powerpc/kvm/booke.c +++ b/arch/powerpc/kvm/booke.c @@ -687,7 +687,7 @@ int kvmppc_core_prepare_to_enter(struct kvm_vcpu *vcpu) kvmppc_core_check_exceptions(vcpu); - if (vcpu->requests) { + if (kvm_request_pending(vcpu)) { /* Exception delivery raised request; start over */ return 1; } diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index f7cf2cd564ef..fd64f087737c 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -55,8 +55,7 @@ EXPORT_SYMBOL_GPL(kvmppc_pr_ops); int kvm_arch_vcpu_runnable(struct kvm_vcpu *v) { - return !!(v->arch.pending_exceptions) || - v->requests; + return !!(v->arch.pending_exceptions) || kvm_request_pending(v); } int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu) @@ -108,7 +107,7 @@ int kvmppc_prepare_to_enter(struct kvm_vcpu *vcpu) */ smp_mb(); - if (vcpu->requests) { + if (kvm_request_pending(vcpu)) { /* Make sure we process requests preemptable */ local_irq_enable(); trace_kvm_check_requests(vcpu); diff --git a/arch/s390/include/asm/kvm_host.h b/arch/s390/include/asm/kvm_host.h index 426614a882a9..9c3bd94204ac 100644 --- a/arch/s390/include/asm/kvm_host.h +++ b/arch/s390/include/asm/kvm_host.h @@ -42,9 +42,9 @@ #define KVM_HALT_POLL_NS_DEFAULT 80000 /* s390-specific vcpu->requests bit members */ -#define KVM_REQ_ENABLE_IBS 8 -#define KVM_REQ_DISABLE_IBS 9 -#define KVM_REQ_ICPT_OPEREXC 10 +#define KVM_REQ_ENABLE_IBS KVM_ARCH_REQ(0) +#define KVM_REQ_DISABLE_IBS KVM_ARCH_REQ(1) +#define KVM_REQ_ICPT_OPEREXC KVM_ARCH_REQ(2) #define SIGP_CTRL_C 0x80 #define SIGP_CTRL_SCN_MASK 0x3f diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 689ac48361c6..ad41e0fa3a21 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -2440,7 +2440,7 @@ static int kvm_s390_handle_requests(struct kvm_vcpu *vcpu) { retry: kvm_s390_vcpu_request_handled(vcpu); - if (!vcpu->requests) + if (!kvm_request_pending(vcpu)) return 0; /* * We use MMU_RELOAD just to re-arm the ipte notifier for the diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 9c761fea0c98..563979976fab 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -48,28 +48,31 @@ #define KVM_IRQCHIP_NUM_PINS KVM_IOAPIC_NUM_PINS /* x86-specific vcpu->requests bit members */ -#define KVM_REQ_MIGRATE_TIMER 8 -#define KVM_REQ_REPORT_TPR_ACCESS 9 -#define KVM_REQ_TRIPLE_FAULT 10 -#define KVM_REQ_MMU_SYNC 11 -#define KVM_REQ_CLOCK_UPDATE 12 -#define KVM_REQ_EVENT 14 -#define KVM_REQ_APF_HALT 15 -#define KVM_REQ_STEAL_UPDATE 16 -#define KVM_REQ_NMI 17 -#define KVM_REQ_PMU 18 -#define KVM_REQ_PMI 19 -#define KVM_REQ_SMI 20 -#define KVM_REQ_MASTERCLOCK_UPDATE 21 -#define KVM_REQ_MCLOCK_INPROGRESS (22 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) -#define KVM_REQ_SCAN_IOAPIC (23 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) -#define KVM_REQ_GLOBAL_CLOCK_UPDATE 24 -#define KVM_REQ_APIC_PAGE_RELOAD (25 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) -#define KVM_REQ_HV_CRASH 26 -#define KVM_REQ_IOAPIC_EOI_EXIT 27 -#define KVM_REQ_HV_RESET 28 -#define KVM_REQ_HV_EXIT 29 -#define KVM_REQ_HV_STIMER 30 +#define KVM_REQ_MIGRATE_TIMER KVM_ARCH_REQ(0) +#define KVM_REQ_REPORT_TPR_ACCESS KVM_ARCH_REQ(1) +#define KVM_REQ_TRIPLE_FAULT KVM_ARCH_REQ(2) +#define KVM_REQ_MMU_SYNC KVM_ARCH_REQ(3) +#define KVM_REQ_CLOCK_UPDATE KVM_ARCH_REQ(4) +#define KVM_REQ_EVENT KVM_ARCH_REQ(6) +#define KVM_REQ_APF_HALT KVM_ARCH_REQ(7) +#define KVM_REQ_STEAL_UPDATE KVM_ARCH_REQ(8) +#define KVM_REQ_NMI KVM_ARCH_REQ(9) +#define KVM_REQ_PMU KVM_ARCH_REQ(10) +#define KVM_REQ_PMI KVM_ARCH_REQ(11) +#define KVM_REQ_SMI KVM_ARCH_REQ(12) +#define KVM_REQ_MASTERCLOCK_UPDATE KVM_ARCH_REQ(13) +#define KVM_REQ_MCLOCK_INPROGRESS \ + KVM_ARCH_REQ_FLAGS(14, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_SCAN_IOAPIC \ + KVM_ARCH_REQ_FLAGS(15, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_GLOBAL_CLOCK_UPDATE KVM_ARCH_REQ(16) +#define KVM_REQ_APIC_PAGE_RELOAD \ + KVM_ARCH_REQ_FLAGS(17, KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_HV_CRASH KVM_ARCH_REQ(18) +#define KVM_REQ_IOAPIC_EOI_EXIT KVM_ARCH_REQ(19) +#define KVM_REQ_HV_RESET KVM_ARCH_REQ(20) +#define KVM_REQ_HV_EXIT KVM_ARCH_REQ(21) +#define KVM_REQ_HV_STIMER KVM_ARCH_REQ(22) #define CR0_RESERVED_BITS \ (~(unsigned long)(X86_CR0_PE | X86_CR0_MP | X86_CR0_EM | X86_CR0_TS \ diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 464da936c53d..f81060518635 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6710,7 +6710,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) bool req_immediate_exit = false; - if (vcpu->requests) { + if (kvm_request_pending(vcpu)) { if (kvm_check_request(KVM_REQ_MMU_RELOAD, vcpu)) kvm_mmu_unload(vcpu); if (kvm_check_request(KVM_REQ_MIGRATE_TIMER, vcpu)) @@ -6874,7 +6874,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_x86_ops->sync_pir_to_irr(vcpu); } - if (vcpu->mode == EXITING_GUEST_MODE || vcpu->requests + if (vcpu->mode == EXITING_GUEST_MODE || kvm_request_pending(vcpu) || need_resched() || signal_pending(current)) { vcpu->mode = OUTSIDE_GUEST_MODE; smp_wmb(); diff --git a/include/kvm/arm_arch_timer.h b/include/kvm/arm_arch_timer.h index 295584f31a4e..f0053f884b4a 100644 --- a/include/kvm/arm_arch_timer.h +++ b/include/kvm/arm_arch_timer.h @@ -57,9 +57,7 @@ struct arch_timer_cpu { int kvm_timer_hyp_init(void); int kvm_timer_enable(struct kvm_vcpu *vcpu); -int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, - const struct kvm_irq_level *virt_irq, - const struct kvm_irq_level *phys_irq); +int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu); void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu); void kvm_timer_flush_hwstate(struct kvm_vcpu *vcpu); void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu); @@ -70,6 +68,10 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu); u64 kvm_arm_timer_get_reg(struct kvm_vcpu *, u64 regid); int kvm_arm_timer_set_reg(struct kvm_vcpu *, u64 regid, u64 value); +int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); +int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); +int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); + bool kvm_timer_should_fire(struct arch_timer_context *timer_ctx); void kvm_timer_schedule(struct kvm_vcpu *vcpu); void kvm_timer_unschedule(struct kvm_vcpu *vcpu); diff --git a/include/kvm/arm_pmu.h b/include/kvm/arm_pmu.h index 1ab4633adf4f..f6e030617467 100644 --- a/include/kvm/arm_pmu.h +++ b/include/kvm/arm_pmu.h @@ -35,6 +35,7 @@ struct kvm_pmu { int irq_num; struct kvm_pmc pmc[ARMV8_PMU_MAX_COUNTERS]; bool ready; + bool created; bool irq_level; }; @@ -63,6 +64,7 @@ int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr); +int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu); #else struct kvm_pmu { }; @@ -112,6 +114,10 @@ static inline int kvm_arm_pmu_v3_has_attr(struct kvm_vcpu *vcpu, { return -ENXIO; } +static inline int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) +{ + return 0; +} #endif #endif diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index ef718586321c..2d923a6b2175 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -38,6 +38,10 @@ #define VGIC_MIN_LPI 8192 #define KVM_IRQCHIP_NUM_PINS (1020 - 32) +#define irq_is_ppi(irq) ((irq) >= VGIC_NR_SGIS && (irq) < VGIC_NR_PRIVATE_IRQS) +#define irq_is_spi(irq) ((irq) >= VGIC_NR_PRIVATE_IRQS && \ + (irq) <= VGIC_MAX_SPI) + enum vgic_type { VGIC_V2, /* Good ol' GICv2 */ VGIC_V3, /* New fancy GICv3 */ @@ -119,6 +123,9 @@ struct vgic_irq { u8 source; /* GICv2 SGIs only */ u8 priority; enum vgic_irq_config config; /* Level or edge */ + + void *owner; /* Opaque pointer to reserve an interrupt + for in-kernel devices. */ }; struct vgic_register_region; @@ -298,9 +305,7 @@ int kvm_vgic_hyp_init(void); void kvm_vgic_init_cpu_hardware(void); int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, - bool level); -int kvm_vgic_inject_mapped_irq(struct kvm *kvm, int cpuid, unsigned int intid, - bool level); + bool level, void *owner); int kvm_vgic_map_phys_irq(struct kvm_vcpu *vcpu, u32 virt_irq, u32 phys_irq); int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq); bool kvm_vgic_map_is_active(struct kvm_vcpu *vcpu, unsigned int virt_irq); @@ -341,4 +346,6 @@ int kvm_send_userspace_msi(struct kvm *kvm, struct kvm_msi *msi); */ int kvm_vgic_setup_default_irq_routing(struct kvm *kvm); +int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner); + #endif /* __KVM_ARM_VGIC_H */ diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 8c0664309815..0b50e7b35ed4 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -126,6 +126,13 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_MMU_RELOAD (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_PENDING_TIMER 2 #define KVM_REQ_UNHALT 3 +#define KVM_REQUEST_ARCH_BASE 8 + +#define KVM_ARCH_REQ_FLAGS(nr, flags) ({ \ + BUILD_BUG_ON((unsigned)(nr) >= 32 - KVM_REQUEST_ARCH_BASE); \ + (unsigned)(((nr) + KVM_REQUEST_ARCH_BASE) | (flags)); \ +}) +#define KVM_ARCH_REQ(nr) KVM_ARCH_REQ_FLAGS(nr, 0) #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 @@ -1098,6 +1105,11 @@ static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu) set_bit(req & KVM_REQUEST_MASK, &vcpu->requests); } +static inline bool kvm_request_pending(struct kvm_vcpu *vcpu) +{ + return READ_ONCE(vcpu->requests); +} + static inline bool kvm_test_request(int req, struct kvm_vcpu *vcpu) { return test_bit(req & KVM_REQUEST_MASK, &vcpu->requests); diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c index 5976609ef27c..8e89d63005c7 100644 --- a/virt/kvm/arm/arch_timer.c +++ b/virt/kvm/arm/arch_timer.c @@ -21,6 +21,7 @@ #include <linux/kvm_host.h> #include <linux/interrupt.h> #include <linux/irq.h> +#include <linux/uaccess.h> #include <clocksource/arm_arch_timer.h> #include <asm/arch_timer.h> @@ -35,6 +36,16 @@ static struct timecounter *timecounter; static unsigned int host_vtimer_irq; static u32 host_vtimer_irq_flags; +static const struct kvm_irq_level default_ptimer_irq = { + .irq = 30, + .level = 1, +}; + +static const struct kvm_irq_level default_vtimer_irq = { + .irq = 27, + .level = 1, +}; + void kvm_timer_vcpu_put(struct kvm_vcpu *vcpu) { vcpu_vtimer(vcpu)->active_cleared_last = false; @@ -95,7 +106,7 @@ static void kvm_timer_inject_irq_work(struct work_struct *work) * If the vcpu is blocked we want to wake it up so that it will see * the timer has expired when entering the guest. */ - kvm_vcpu_kick(vcpu); + kvm_vcpu_wake_up(vcpu); } static u64 kvm_timer_compute_delta(struct arch_timer_context *timer_ctx) @@ -215,7 +226,8 @@ static void kvm_timer_update_irq(struct kvm_vcpu *vcpu, bool new_level, if (likely(irqchip_in_kernel(vcpu->kvm))) { ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, timer_ctx->irq.irq, - timer_ctx->irq.level); + timer_ctx->irq.level, + timer_ctx); WARN_ON(ret); } } @@ -445,23 +457,12 @@ void kvm_timer_sync_hwstate(struct kvm_vcpu *vcpu) kvm_timer_update_state(vcpu); } -int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu, - const struct kvm_irq_level *virt_irq, - const struct kvm_irq_level *phys_irq) +int kvm_timer_vcpu_reset(struct kvm_vcpu *vcpu) { struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); /* - * The vcpu timer irq number cannot be determined in - * kvm_timer_vcpu_init() because it is called much before - * kvm_vcpu_set_target(). To handle this, we determine - * vcpu timer irq number when the vcpu is reset. - */ - vtimer->irq.irq = virt_irq->irq; - ptimer->irq.irq = phys_irq->irq; - - /* * The bits in CNTV_CTL are architecturally reset to UNKNOWN for ARMv8 * and to 0 for ARMv7. We provide an implementation that always * resets the timer to be disabled and unmasked and is compliant with @@ -496,6 +497,8 @@ static void update_vtimer_cntvoff(struct kvm_vcpu *vcpu, u64 cntvoff) void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); /* Synchronize cntvoff across all vtimers of a VM. */ update_vtimer_cntvoff(vcpu, kvm_phys_timer_read()); @@ -504,6 +507,9 @@ void kvm_timer_vcpu_init(struct kvm_vcpu *vcpu) INIT_WORK(&timer->expired, kvm_timer_inject_irq_work); hrtimer_init(&timer->timer, CLOCK_MONOTONIC, HRTIMER_MODE_ABS); timer->timer.function = kvm_timer_expire; + + vtimer->irq.irq = default_vtimer_irq.irq; + ptimer->irq.irq = default_ptimer_irq.irq; } static void kvm_timer_init_interrupt(void *info) @@ -613,6 +619,30 @@ void kvm_timer_vcpu_terminate(struct kvm_vcpu *vcpu) kvm_vgic_unmap_phys_irq(vcpu, vtimer->irq.irq); } +static bool timer_irqs_are_valid(struct kvm_vcpu *vcpu) +{ + int vtimer_irq, ptimer_irq; + int i, ret; + + vtimer_irq = vcpu_vtimer(vcpu)->irq.irq; + ret = kvm_vgic_set_owner(vcpu, vtimer_irq, vcpu_vtimer(vcpu)); + if (ret) + return false; + + ptimer_irq = vcpu_ptimer(vcpu)->irq.irq; + ret = kvm_vgic_set_owner(vcpu, ptimer_irq, vcpu_ptimer(vcpu)); + if (ret) + return false; + + kvm_for_each_vcpu(i, vcpu, vcpu->kvm) { + if (vcpu_vtimer(vcpu)->irq.irq != vtimer_irq || + vcpu_ptimer(vcpu)->irq.irq != ptimer_irq) + return false; + } + + return true; +} + int kvm_timer_enable(struct kvm_vcpu *vcpu) { struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu; @@ -632,6 +662,11 @@ int kvm_timer_enable(struct kvm_vcpu *vcpu) if (!vgic_initialized(vcpu->kvm)) return -ENODEV; + if (!timer_irqs_are_valid(vcpu)) { + kvm_debug("incorrectly configured timer irqs\n"); + return -EINVAL; + } + /* * Find the physical IRQ number corresponding to the host_vtimer_irq */ @@ -681,3 +716,79 @@ void kvm_timer_init_vhe(void) val |= (CNTHCTL_EL1PCTEN << cnthctl_shift); write_sysreg(val, cnthctl_el2); } + +static void set_timer_irqs(struct kvm *kvm, int vtimer_irq, int ptimer_irq) +{ + struct kvm_vcpu *vcpu; + int i; + + kvm_for_each_vcpu(i, vcpu, kvm) { + vcpu_vtimer(vcpu)->irq.irq = vtimer_irq; + vcpu_ptimer(vcpu)->irq.irq = ptimer_irq; + } +} + +int kvm_arm_timer_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) +{ + int __user *uaddr = (int __user *)(long)attr->addr; + struct arch_timer_context *vtimer = vcpu_vtimer(vcpu); + struct arch_timer_context *ptimer = vcpu_ptimer(vcpu); + int irq; + + if (!irqchip_in_kernel(vcpu->kvm)) + return -EINVAL; + + if (get_user(irq, uaddr)) + return -EFAULT; + + if (!(irq_is_ppi(irq))) + return -EINVAL; + + if (vcpu->arch.timer_cpu.enabled) + return -EBUSY; + + switch (attr->attr) { + case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: + set_timer_irqs(vcpu->kvm, irq, ptimer->irq.irq); + break; + case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: + set_timer_irqs(vcpu->kvm, vtimer->irq.irq, irq); + break; + default: + return -ENXIO; + } + + return 0; +} + +int kvm_arm_timer_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) +{ + int __user *uaddr = (int __user *)(long)attr->addr; + struct arch_timer_context *timer; + int irq; + + switch (attr->attr) { + case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: + timer = vcpu_vtimer(vcpu); + break; + case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: + timer = vcpu_ptimer(vcpu); + break; + default: + return -ENXIO; + } + + irq = timer->irq.irq; + return put_user(irq, uaddr); +} + +int kvm_arm_timer_has_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) +{ + switch (attr->attr) { + case KVM_ARM_VCPU_TIMER_IRQ_VTIMER: + case KVM_ARM_VCPU_TIMER_IRQ_PTIMER: + return 0; + } + + return -ENXIO; +} diff --git a/virt/kvm/arm/arm.c b/virt/kvm/arm/arm.c index 3417e184c8e1..a265acc53e39 100644 --- a/virt/kvm/arm/arm.c +++ b/virt/kvm/arm/arm.c @@ -368,6 +368,13 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) kvm_timer_vcpu_put(vcpu); } +static void vcpu_power_off(struct kvm_vcpu *vcpu) +{ + vcpu->arch.power_off = true; + kvm_make_request(KVM_REQ_SLEEP, vcpu); + kvm_vcpu_kick(vcpu); +} + int kvm_arch_vcpu_ioctl_get_mpstate(struct kvm_vcpu *vcpu, struct kvm_mp_state *mp_state) { @@ -387,7 +394,7 @@ int kvm_arch_vcpu_ioctl_set_mpstate(struct kvm_vcpu *vcpu, vcpu->arch.power_off = false; break; case KVM_MP_STATE_STOPPED: - vcpu->arch.power_off = true; + vcpu_power_off(vcpu); break; default: return -EINVAL; @@ -520,6 +527,10 @@ static int kvm_vcpu_first_run_init(struct kvm_vcpu *vcpu) } ret = kvm_timer_enable(vcpu); + if (ret) + return ret; + + ret = kvm_arm_pmu_v3_enable(vcpu); return ret; } @@ -536,21 +547,7 @@ void kvm_arm_halt_guest(struct kvm *kvm) kvm_for_each_vcpu(i, vcpu, kvm) vcpu->arch.pause = true; - kvm_make_all_cpus_request(kvm, KVM_REQ_VCPU_EXIT); -} - -void kvm_arm_halt_vcpu(struct kvm_vcpu *vcpu) -{ - vcpu->arch.pause = true; - kvm_vcpu_kick(vcpu); -} - -void kvm_arm_resume_vcpu(struct kvm_vcpu *vcpu) -{ - struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu); - - vcpu->arch.pause = false; - swake_up(wq); + kvm_make_all_cpus_request(kvm, KVM_REQ_SLEEP); } void kvm_arm_resume_guest(struct kvm *kvm) @@ -558,16 +555,23 @@ void kvm_arm_resume_guest(struct kvm *kvm) int i; struct kvm_vcpu *vcpu; - kvm_for_each_vcpu(i, vcpu, kvm) - kvm_arm_resume_vcpu(vcpu); + kvm_for_each_vcpu(i, vcpu, kvm) { + vcpu->arch.pause = false; + swake_up(kvm_arch_vcpu_wq(vcpu)); + } } -static void vcpu_sleep(struct kvm_vcpu *vcpu) +static void vcpu_req_sleep(struct kvm_vcpu *vcpu) { struct swait_queue_head *wq = kvm_arch_vcpu_wq(vcpu); swait_event_interruptible(*wq, ((!vcpu->arch.power_off) && (!vcpu->arch.pause))); + + if (vcpu->arch.power_off || vcpu->arch.pause) { + /* Awaken to handle a signal, request we sleep again later. */ + kvm_make_request(KVM_REQ_SLEEP, vcpu); + } } static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu) @@ -575,6 +579,20 @@ static int kvm_vcpu_initialized(struct kvm_vcpu *vcpu) return vcpu->arch.target >= 0; } +static void check_vcpu_requests(struct kvm_vcpu *vcpu) +{ + if (kvm_request_pending(vcpu)) { + if (kvm_check_request(KVM_REQ_SLEEP, vcpu)) + vcpu_req_sleep(vcpu); + + /* + * Clear IRQ_PENDING requests that were made to guarantee + * that a VCPU sees new virtual interrupts. + */ + kvm_check_request(KVM_REQ_IRQ_PENDING, vcpu); + } +} + /** * kvm_arch_vcpu_ioctl_run - the main VCPU run function to execute guest code * @vcpu: The VCPU pointer @@ -620,8 +638,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) update_vttbr(vcpu->kvm); - if (vcpu->arch.power_off || vcpu->arch.pause) - vcpu_sleep(vcpu); + check_vcpu_requests(vcpu); /* * Preparing the interrupts to be injected also @@ -650,8 +667,17 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) run->exit_reason = KVM_EXIT_INTR; } + /* + * Ensure we set mode to IN_GUEST_MODE after we disable + * interrupts and before the final VCPU requests check. + * See the comment in kvm_vcpu_exiting_guest_mode() and + * Documentation/virtual/kvm/vcpu-requests.rst + */ + smp_store_mb(vcpu->mode, IN_GUEST_MODE); + if (ret <= 0 || need_new_vmid_gen(vcpu->kvm) || - vcpu->arch.power_off || vcpu->arch.pause) { + kvm_request_pending(vcpu)) { + vcpu->mode = OUTSIDE_GUEST_MODE; local_irq_enable(); kvm_pmu_sync_hwstate(vcpu); kvm_timer_sync_hwstate(vcpu); @@ -667,7 +693,6 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) */ trace_kvm_entry(*vcpu_pc(vcpu)); guest_enter_irqoff(); - vcpu->mode = IN_GUEST_MODE; ret = kvm_call_hyp(__kvm_vcpu_run, vcpu); @@ -756,6 +781,7 @@ static int vcpu_interrupt_line(struct kvm_vcpu *vcpu, int number, bool level) * trigger a world-switch round on the running physical CPU to set the * virtual IRQ/FIQ fields in the HCR appropriately. */ + kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); kvm_vcpu_kick(vcpu); return 0; @@ -806,7 +832,7 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, if (irq_num < VGIC_NR_SGIS || irq_num >= VGIC_NR_PRIVATE_IRQS) return -EINVAL; - return kvm_vgic_inject_irq(kvm, vcpu->vcpu_id, irq_num, level); + return kvm_vgic_inject_irq(kvm, vcpu->vcpu_id, irq_num, level, NULL); case KVM_ARM_IRQ_TYPE_SPI: if (!irqchip_in_kernel(kvm)) return -ENXIO; @@ -814,7 +840,7 @@ int kvm_vm_ioctl_irq_line(struct kvm *kvm, struct kvm_irq_level *irq_level, if (irq_num < VGIC_NR_PRIVATE_IRQS) return -EINVAL; - return kvm_vgic_inject_irq(kvm, 0, irq_num, level); + return kvm_vgic_inject_irq(kvm, 0, irq_num, level, NULL); } return -EINVAL; @@ -884,7 +910,7 @@ static int kvm_arch_vcpu_ioctl_vcpu_init(struct kvm_vcpu *vcpu, * Handle the "start in power-off" case. */ if (test_bit(KVM_ARM_VCPU_POWER_OFF, vcpu->arch.features)) - vcpu->arch.power_off = true; + vcpu_power_off(vcpu); else vcpu->arch.power_off = false; diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c index 4b43e7f3b158..fc8a723ff387 100644 --- a/virt/kvm/arm/pmu.c +++ b/virt/kvm/arm/pmu.c @@ -203,6 +203,24 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) return reg; } +static void kvm_pmu_check_overflow(struct kvm_vcpu *vcpu) +{ + struct kvm_pmu *pmu = &vcpu->arch.pmu; + bool overflow = !!kvm_pmu_overflow_status(vcpu); + + if (pmu->irq_level == overflow) + return; + + pmu->irq_level = overflow; + + if (likely(irqchip_in_kernel(vcpu->kvm))) { + int ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, + pmu->irq_num, overflow, + &vcpu->arch.pmu); + WARN_ON(ret); + } +} + /** * kvm_pmu_overflow_set - set PMU overflow interrupt * @vcpu: The vcpu pointer @@ -210,37 +228,18 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu) */ void kvm_pmu_overflow_set(struct kvm_vcpu *vcpu, u64 val) { - u64 reg; - if (val == 0) return; vcpu_sys_reg(vcpu, PMOVSSET_EL0) |= val; - reg = kvm_pmu_overflow_status(vcpu); - if (reg != 0) - kvm_vcpu_kick(vcpu); + kvm_pmu_check_overflow(vcpu); } static void kvm_pmu_update_state(struct kvm_vcpu *vcpu) { - struct kvm_pmu *pmu = &vcpu->arch.pmu; - bool overflow; - if (!kvm_arm_pmu_v3_ready(vcpu)) return; - - overflow = !!kvm_pmu_overflow_status(vcpu); - if (pmu->irq_level == overflow) - return; - - pmu->irq_level = overflow; - - if (likely(irqchip_in_kernel(vcpu->kvm))) { - int ret; - ret = kvm_vgic_inject_irq(vcpu->kvm, vcpu->vcpu_id, - pmu->irq_num, overflow); - WARN_ON(ret); - } + kvm_pmu_check_overflow(vcpu); } bool kvm_pmu_should_notify_user(struct kvm_vcpu *vcpu) @@ -451,34 +450,74 @@ bool kvm_arm_support_pmu_v3(void) return (perf_num_counters() > 0); } -static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu) +int kvm_arm_pmu_v3_enable(struct kvm_vcpu *vcpu) { - if (!kvm_arm_support_pmu_v3()) - return -ENODEV; + if (!vcpu->arch.pmu.created) + return 0; /* - * We currently require an in-kernel VGIC to use the PMU emulation, - * because we do not support forwarding PMU overflow interrupts to - * userspace yet. + * A valid interrupt configuration for the PMU is either to have a + * properly configured interrupt number and using an in-kernel + * irqchip, or to not have an in-kernel GIC and not set an IRQ. */ - if (!irqchip_in_kernel(vcpu->kvm) || !vgic_initialized(vcpu->kvm)) + if (irqchip_in_kernel(vcpu->kvm)) { + int irq = vcpu->arch.pmu.irq_num; + if (!kvm_arm_pmu_irq_initialized(vcpu)) + return -EINVAL; + + /* + * If we are using an in-kernel vgic, at this point we know + * the vgic will be initialized, so we can check the PMU irq + * number against the dimensions of the vgic and make sure + * it's valid. + */ + if (!irq_is_ppi(irq) && !vgic_valid_spi(vcpu->kvm, irq)) + return -EINVAL; + } else if (kvm_arm_pmu_irq_initialized(vcpu)) { + return -EINVAL; + } + + kvm_pmu_vcpu_reset(vcpu); + vcpu->arch.pmu.ready = true; + + return 0; +} + +static int kvm_arm_pmu_v3_init(struct kvm_vcpu *vcpu) +{ + if (!kvm_arm_support_pmu_v3()) return -ENODEV; - if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features) || - !kvm_arm_pmu_irq_initialized(vcpu)) + if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) return -ENXIO; - if (kvm_arm_pmu_v3_ready(vcpu)) + if (vcpu->arch.pmu.created) return -EBUSY; - kvm_pmu_vcpu_reset(vcpu); - vcpu->arch.pmu.ready = true; + if (irqchip_in_kernel(vcpu->kvm)) { + int ret; + + /* + * If using the PMU with an in-kernel virtual GIC + * implementation, we require the GIC to be already + * initialized when initializing the PMU. + */ + if (!vgic_initialized(vcpu->kvm)) + return -ENODEV; + + if (!kvm_arm_pmu_irq_initialized(vcpu)) + return -ENXIO; + ret = kvm_vgic_set_owner(vcpu, vcpu->arch.pmu.irq_num, + &vcpu->arch.pmu); + if (ret) + return ret; + } + + vcpu->arch.pmu.created = true; return 0; } -#define irq_is_ppi(irq) ((irq) >= VGIC_NR_SGIS && (irq) < VGIC_NR_PRIVATE_IRQS) - /* * For one VM the interrupt type must be same for each vcpu. * As a PPI, the interrupt number is the same for all vcpus, @@ -512,6 +551,9 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) int __user *uaddr = (int __user *)(long)attr->addr; int irq; + if (!irqchip_in_kernel(vcpu->kvm)) + return -EINVAL; + if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) return -ENODEV; @@ -519,7 +561,7 @@ int kvm_arm_pmu_v3_set_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) return -EFAULT; /* The PMU overflow interrupt can be a PPI or a valid SPI. */ - if (!(irq_is_ppi(irq) || vgic_valid_spi(vcpu->kvm, irq))) + if (!(irq_is_ppi(irq) || irq_is_spi(irq))) return -EINVAL; if (!pmu_irq_is_valid(vcpu->kvm, irq)) @@ -546,6 +588,9 @@ int kvm_arm_pmu_v3_get_attr(struct kvm_vcpu *vcpu, struct kvm_device_attr *attr) int __user *uaddr = (int __user *)(long)attr->addr; int irq; + if (!irqchip_in_kernel(vcpu->kvm)) + return -EINVAL; + if (!test_bit(KVM_ARM_VCPU_PMU_V3, vcpu->arch.features)) return -ENODEV; diff --git a/virt/kvm/arm/psci.c b/virt/kvm/arm/psci.c index a08d7a93aebb..f1e363bab5e8 100644 --- a/virt/kvm/arm/psci.c +++ b/virt/kvm/arm/psci.c @@ -57,6 +57,7 @@ static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu) * for KVM will preserve the register state. */ kvm_vcpu_block(vcpu); + kvm_clear_request(KVM_REQ_UNHALT, vcpu); return PSCI_RET_SUCCESS; } @@ -64,6 +65,8 @@ static unsigned long kvm_psci_vcpu_suspend(struct kvm_vcpu *vcpu) static void kvm_psci_vcpu_off(struct kvm_vcpu *vcpu) { vcpu->arch.power_off = true; + kvm_make_request(KVM_REQ_SLEEP, vcpu); + kvm_vcpu_kick(vcpu); } static unsigned long kvm_psci_vcpu_on(struct kvm_vcpu *source_vcpu) @@ -178,10 +181,9 @@ static void kvm_prepare_system_event(struct kvm_vcpu *vcpu, u32 type) * after this call is handled and before the VCPUs have been * re-initialized. */ - kvm_for_each_vcpu(i, tmp, vcpu->kvm) { + kvm_for_each_vcpu(i, tmp, vcpu->kvm) tmp->arch.power_off = true; - kvm_vcpu_kick(tmp); - } + kvm_make_all_cpus_request(vcpu->kvm, KVM_REQ_SLEEP); memset(&vcpu->run->system_event, 0, sizeof(vcpu->run->system_event)); vcpu->run->system_event.type = type; diff --git a/virt/kvm/arm/vgic/vgic-irqfd.c b/virt/kvm/arm/vgic/vgic-irqfd.c index f138ed2e9c63..b7baf581611a 100644 --- a/virt/kvm/arm/vgic/vgic-irqfd.c +++ b/virt/kvm/arm/vgic/vgic-irqfd.c @@ -34,7 +34,7 @@ static int vgic_irqfd_set_irq(struct kvm_kernel_irq_routing_entry *e, if (!vgic_valid_spi(kvm, spi_id)) return -EINVAL; - return kvm_vgic_inject_irq(kvm, 0, spi_id, level); + return kvm_vgic_inject_irq(kvm, 0, spi_id, level, NULL); } /** diff --git a/virt/kvm/arm/vgic/vgic-mmio-v2.c b/virt/kvm/arm/vgic/vgic-mmio-v2.c index 63e0bbdcddcc..37522e65eb53 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v2.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v2.c @@ -308,34 +308,36 @@ static const struct vgic_register_region vgic_v2_dist_registers[] = { vgic_mmio_read_v2_misc, vgic_mmio_write_v2_misc, 12, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_IGROUP, - vgic_mmio_read_rao, vgic_mmio_write_wi, 1, + vgic_mmio_read_rao, vgic_mmio_write_wi, NULL, NULL, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_SET, - vgic_mmio_read_enable, vgic_mmio_write_senable, 1, + vgic_mmio_read_enable, vgic_mmio_write_senable, NULL, NULL, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ENABLE_CLEAR, - vgic_mmio_read_enable, vgic_mmio_write_cenable, 1, + vgic_mmio_read_enable, vgic_mmio_write_cenable, NULL, NULL, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_SET, - vgic_mmio_read_pending, vgic_mmio_write_spending, 1, + vgic_mmio_read_pending, vgic_mmio_write_spending, NULL, NULL, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PENDING_CLEAR, - vgic_mmio_read_pending, vgic_mmio_write_cpending, 1, + vgic_mmio_read_pending, vgic_mmio_write_cpending, NULL, NULL, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_SET, - vgic_mmio_read_active, vgic_mmio_write_sactive, 1, + vgic_mmio_read_active, vgic_mmio_write_sactive, + NULL, vgic_mmio_uaccess_write_sactive, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_ACTIVE_CLEAR, - vgic_mmio_read_active, vgic_mmio_write_cactive, 1, + vgic_mmio_read_active, vgic_mmio_write_cactive, + NULL, vgic_mmio_uaccess_write_cactive, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_PRI, - vgic_mmio_read_priority, vgic_mmio_write_priority, 8, - VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), + vgic_mmio_read_priority, vgic_mmio_write_priority, NULL, NULL, + 8, VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_TARGET, - vgic_mmio_read_target, vgic_mmio_write_target, 8, + vgic_mmio_read_target, vgic_mmio_write_target, NULL, NULL, 8, VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), REGISTER_DESC_WITH_BITS_PER_IRQ(GIC_DIST_CONFIG, - vgic_mmio_read_config, vgic_mmio_write_config, 2, + vgic_mmio_read_config, vgic_mmio_write_config, NULL, NULL, 2, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_LENGTH(GIC_DIST_SOFTINT, vgic_mmio_read_raz, vgic_mmio_write_sgir, 4, diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index 201d5e2e973d..714fa3933546 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c @@ -456,11 +456,13 @@ static const struct vgic_register_region vgic_v3_dist_registers[] = { vgic_mmio_read_raz, vgic_mmio_write_wi, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ISACTIVER, - vgic_mmio_read_active, vgic_mmio_write_sactive, NULL, NULL, 1, + vgic_mmio_read_active, vgic_mmio_write_sactive, + NULL, vgic_mmio_uaccess_write_sactive, 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_ICACTIVER, - vgic_mmio_read_active, vgic_mmio_write_cactive, NULL, NULL, 1, - VGIC_ACCESS_32bit), + vgic_mmio_read_active, vgic_mmio_write_cactive, + NULL, vgic_mmio_uaccess_write_cactive, + 1, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_BITS_PER_IRQ_SHARED(GICD_IPRIORITYR, vgic_mmio_read_priority, vgic_mmio_write_priority, NULL, NULL, 8, VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), @@ -526,12 +528,14 @@ static const struct vgic_register_region vgic_v3_sgibase_registers[] = { vgic_mmio_read_pending, vgic_mmio_write_cpending, vgic_mmio_read_raz, vgic_mmio_write_wi, 4, VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH(GICR_ISACTIVER0, - vgic_mmio_read_active, vgic_mmio_write_sactive, 4, - VGIC_ACCESS_32bit), - REGISTER_DESC_WITH_LENGTH(GICR_ICACTIVER0, - vgic_mmio_read_active, vgic_mmio_write_cactive, 4, - VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ISACTIVER0, + vgic_mmio_read_active, vgic_mmio_write_sactive, + NULL, vgic_mmio_uaccess_write_sactive, + 4, VGIC_ACCESS_32bit), + REGISTER_DESC_WITH_LENGTH_UACCESS(GICR_ICACTIVER0, + vgic_mmio_read_active, vgic_mmio_write_cactive, + NULL, vgic_mmio_uaccess_write_cactive, + 4, VGIC_ACCESS_32bit), REGISTER_DESC_WITH_LENGTH(GICR_IPRIORITYR0, vgic_mmio_read_priority, vgic_mmio_write_priority, 32, VGIC_ACCESS_32bit | VGIC_ACCESS_8bit), diff --git a/virt/kvm/arm/vgic/vgic-mmio.c b/virt/kvm/arm/vgic/vgic-mmio.c index 1c17b2a2f105..c1e4bdd66131 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.c +++ b/virt/kvm/arm/vgic/vgic-mmio.c @@ -231,56 +231,94 @@ static void vgic_mmio_change_active(struct kvm_vcpu *vcpu, struct vgic_irq *irq, * be migrated while we don't hold the IRQ locks and we don't want to be * chasing moving targets. * - * For private interrupts, we only have to make sure the single and only VCPU - * that can potentially queue the IRQ is stopped. + * For private interrupts we don't have to do anything because userspace + * accesses to the VGIC state already require all VCPUs to be stopped, and + * only the VCPU itself can modify its private interrupts active state, which + * guarantees that the VCPU is not running. */ static void vgic_change_active_prepare(struct kvm_vcpu *vcpu, u32 intid) { - if (intid < VGIC_NR_PRIVATE_IRQS) - kvm_arm_halt_vcpu(vcpu); - else + if (intid > VGIC_NR_PRIVATE_IRQS) kvm_arm_halt_guest(vcpu->kvm); } /* See vgic_change_active_prepare */ static void vgic_change_active_finish(struct kvm_vcpu *vcpu, u32 intid) { - if (intid < VGIC_NR_PRIVATE_IRQS) - kvm_arm_resume_vcpu(vcpu); - else + if (intid > VGIC_NR_PRIVATE_IRQS) kvm_arm_resume_guest(vcpu->kvm); } -void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu, - gpa_t addr, unsigned int len, - unsigned long val) +static void __vgic_mmio_write_cactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) { u32 intid = VGIC_ADDR_TO_INTID(addr, 1); int i; - vgic_change_active_prepare(vcpu, intid); for_each_set_bit(i, &val, len * 8) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); vgic_mmio_change_active(vcpu, irq, false); vgic_put_irq(vcpu->kvm, irq); } - vgic_change_active_finish(vcpu, intid); } -void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu, +void vgic_mmio_write_cactive(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val) { u32 intid = VGIC_ADDR_TO_INTID(addr, 1); - int i; + mutex_lock(&vcpu->kvm->lock); vgic_change_active_prepare(vcpu, intid); + + __vgic_mmio_write_cactive(vcpu, addr, len, val); + + vgic_change_active_finish(vcpu, intid); + mutex_unlock(&vcpu->kvm->lock); +} + +void vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + __vgic_mmio_write_cactive(vcpu, addr, len, val); +} + +static void __vgic_mmio_write_sactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + int i; + for_each_set_bit(i, &val, len * 8) { struct vgic_irq *irq = vgic_get_irq(vcpu->kvm, vcpu, intid + i); vgic_mmio_change_active(vcpu, irq, true); vgic_put_irq(vcpu->kvm, irq); } +} + +void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + u32 intid = VGIC_ADDR_TO_INTID(addr, 1); + + mutex_lock(&vcpu->kvm->lock); + vgic_change_active_prepare(vcpu, intid); + + __vgic_mmio_write_sactive(vcpu, addr, len, val); + vgic_change_active_finish(vcpu, intid); + mutex_unlock(&vcpu->kvm->lock); +} + +void vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val) +{ + __vgic_mmio_write_sactive(vcpu, addr, len, val); } unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu, diff --git a/virt/kvm/arm/vgic/vgic-mmio.h b/virt/kvm/arm/vgic/vgic-mmio.h index ea4171acdef3..5693f6df45ec 100644 --- a/virt/kvm/arm/vgic/vgic-mmio.h +++ b/virt/kvm/arm/vgic/vgic-mmio.h @@ -75,7 +75,7 @@ extern struct kvm_io_device_ops kvm_io_gic_ops; * The _WITH_LENGTH version instantiates registers with a fixed length * and is mutually exclusive with the _PER_IRQ version. */ -#define REGISTER_DESC_WITH_BITS_PER_IRQ(off, rd, wr, bpi, acc) \ +#define REGISTER_DESC_WITH_BITS_PER_IRQ(off, rd, wr, ur, uw, bpi, acc) \ { \ .reg_offset = off, \ .bits_per_irq = bpi, \ @@ -83,6 +83,8 @@ extern struct kvm_io_device_ops kvm_io_gic_ops; .access_flags = acc, \ .read = rd, \ .write = wr, \ + .uaccess_read = ur, \ + .uaccess_write = uw, \ } #define REGISTER_DESC_WITH_LENGTH(off, rd, wr, length, acc) \ @@ -165,6 +167,14 @@ void vgic_mmio_write_sactive(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len, unsigned long val); +void vgic_mmio_uaccess_write_cactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + +void vgic_mmio_uaccess_write_sactive(struct kvm_vcpu *vcpu, + gpa_t addr, unsigned int len, + unsigned long val); + unsigned long vgic_mmio_read_priority(struct kvm_vcpu *vcpu, gpa_t addr, unsigned int len); diff --git a/virt/kvm/arm/vgic/vgic.c b/virt/kvm/arm/vgic/vgic.c index 83b24d20ff8f..fed717e07938 100644 --- a/virt/kvm/arm/vgic/vgic.c +++ b/virt/kvm/arm/vgic/vgic.c @@ -35,11 +35,12 @@ struct vgic_global kvm_vgic_global_state __ro_after_init = { /* * Locking order is always: - * its->cmd_lock (mutex) - * its->its_lock (mutex) - * vgic_cpu->ap_list_lock - * kvm->lpi_list_lock - * vgic_irq->irq_lock + * kvm->lock (mutex) + * its->cmd_lock (mutex) + * its->its_lock (mutex) + * vgic_cpu->ap_list_lock + * kvm->lpi_list_lock + * vgic_irq->irq_lock * * If you need to take multiple locks, always take the upper lock first, * then the lower ones, e.g. first take the its_lock, then the irq_lock. @@ -234,10 +235,14 @@ static void vgic_sort_ap_list(struct kvm_vcpu *vcpu) /* * Only valid injection if changing level for level-triggered IRQs or for a - * rising edge. + * rising edge, and in-kernel connected IRQ lines can only be controlled by + * their owner. */ -static bool vgic_validate_injection(struct vgic_irq *irq, bool level) +static bool vgic_validate_injection(struct vgic_irq *irq, bool level, void *owner) { + if (irq->owner != owner) + return false; + switch (irq->config) { case VGIC_CONFIG_LEVEL: return irq->line_level != level; @@ -285,8 +290,10 @@ retry: * won't see this one until it exits for some other * reason. */ - if (vcpu) + if (vcpu) { + kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); kvm_vcpu_kick(vcpu); + } return false; } @@ -332,6 +339,7 @@ retry: spin_unlock(&irq->irq_lock); spin_unlock(&vcpu->arch.vgic_cpu.ap_list_lock); + kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); kvm_vcpu_kick(vcpu); return true; @@ -346,13 +354,16 @@ retry: * false: to ignore the call * Level-sensitive true: raise the input signal * false: lower the input signal + * @owner: The opaque pointer to the owner of the IRQ being raised to verify + * that the caller is allowed to inject this IRQ. Userspace + * injections will have owner == NULL. * * The VGIC is not concerned with devices being active-LOW or active-HIGH for * level-sensitive interrupts. You can think of the level parameter as 1 * being HIGH and 0 being LOW and all devices being active-HIGH. */ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, - bool level) + bool level, void *owner) { struct kvm_vcpu *vcpu; struct vgic_irq *irq; @@ -374,7 +385,7 @@ int kvm_vgic_inject_irq(struct kvm *kvm, int cpuid, unsigned int intid, spin_lock(&irq->irq_lock); - if (!vgic_validate_injection(irq, level)) { + if (!vgic_validate_injection(irq, level, owner)) { /* Nothing to see here, move along... */ spin_unlock(&irq->irq_lock); vgic_put_irq(kvm, irq); @@ -431,6 +442,39 @@ int kvm_vgic_unmap_phys_irq(struct kvm_vcpu *vcpu, unsigned int virt_irq) } /** + * kvm_vgic_set_owner - Set the owner of an interrupt for a VM + * + * @vcpu: Pointer to the VCPU (used for PPIs) + * @intid: The virtual INTID identifying the interrupt (PPI or SPI) + * @owner: Opaque pointer to the owner + * + * Returns 0 if intid is not already used by another in-kernel device and the + * owner is set, otherwise returns an error code. + */ +int kvm_vgic_set_owner(struct kvm_vcpu *vcpu, unsigned int intid, void *owner) +{ + struct vgic_irq *irq; + int ret = 0; + + if (!vgic_initialized(vcpu->kvm)) + return -EAGAIN; + + /* SGIs and LPIs cannot be wired up to any device */ + if (!irq_is_ppi(intid) && !vgic_valid_spi(vcpu->kvm, intid)) + return -EINVAL; + + irq = vgic_get_irq(vcpu->kvm, vcpu, intid); + spin_lock(&irq->irq_lock); + if (irq->owner && irq->owner != owner) + ret = -EEXIST; + else + irq->owner = owner; + spin_unlock(&irq->irq_lock); + + return ret; +} + +/** * vgic_prune_ap_list - Remove non-relevant interrupts from the list * * @vcpu: The VCPU pointer @@ -721,8 +765,10 @@ void vgic_kick_vcpus(struct kvm *kvm) * a good kick... */ kvm_for_each_vcpu(c, vcpu, kvm) { - if (kvm_vgic_vcpu_pending_irq(vcpu)) + if (kvm_vgic_vcpu_pending_irq(vcpu)) { + kvm_make_request(KVM_REQ_IRQ_PENDING, vcpu); kvm_vcpu_kick(vcpu); + } } } |