From 344d9588a9df06182684168be4f1408b55c7da3e Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 14 Oct 2010 11:22:50 +0200 Subject: KVM: Add PV MSR to enable asynchronous page faults delivery. Guest enables async PF vcpu functionality using this MSR. Reviewed-by: Rik van Riel Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- Documentation/kvm/cpuid.txt | 3 +++ Documentation/kvm/msr.txt | 35 ++++++++++++++++++++++++++++++++++- 2 files changed, 37 insertions(+), 1 deletion(-) (limited to 'Documentation') diff --git a/Documentation/kvm/cpuid.txt b/Documentation/kvm/cpuid.txt index 14a12ea92b7f..882068538c9c 100644 --- a/Documentation/kvm/cpuid.txt +++ b/Documentation/kvm/cpuid.txt @@ -36,6 +36,9 @@ KVM_FEATURE_MMU_OP || 2 || deprecated. KVM_FEATURE_CLOCKSOURCE2 || 3 || kvmclock available at msrs || || 0x4b564d00 and 0x4b564d01 ------------------------------------------------------------------------------ +KVM_FEATURE_ASYNC_PF || 4 || async pf can be enabled by + || || writing to msr 0x4b564d02 +------------------------------------------------------------------------------ KVM_FEATURE_CLOCKSOURCE_STABLE_BIT || 24 || host will warn if no guest-side || || per-cpu warps are expected in || || kvmclock. diff --git a/Documentation/kvm/msr.txt b/Documentation/kvm/msr.txt index 8ddcfe84c09a..e67b4a8783df 100644 --- a/Documentation/kvm/msr.txt +++ b/Documentation/kvm/msr.txt @@ -3,7 +3,6 @@ Glauber Costa , Red Hat Inc, 2010 ===================================================== KVM makes use of some custom MSRs to service some requests. -At present, this facility is only used by kvmclock. Custom MSRs have a range reserved for them, that goes from 0x4b564d00 to 0x4b564dff. There are MSRs outside this area, @@ -151,3 +150,37 @@ MSR_KVM_SYSTEM_TIME: 0x12 return PRESENT; } else return NON_PRESENT; + +MSR_KVM_ASYNC_PF_EN: 0x4b564d02 + data: Bits 63-6 hold 64-byte aligned physical address of a + 64 byte memory area which must be in guest RAM and must be + zeroed. Bits 5-1 are reserved and should be zero. Bit 0 is 1 + when asynchronous page faults are enabled on the vcpu 0 when + disabled. + + First 4 byte of 64 byte memory location will be written to by + the hypervisor at the time of asynchronous page fault (APF) + injection to indicate type of asynchronous page fault. Value + of 1 means that the page referred to by the page fault is not + present. Value 2 means that the page is now available. Disabling + interrupt inhibits APFs. Guest must not enable interrupt + before the reason is read, or it may be overwritten by another + APF. Since APF uses the same exception vector as regular page + fault guest must reset the reason to 0 before it does + something that can generate normal page fault. If during page + fault APF reason is 0 it means that this is regular page + fault. + + During delivery of type 1 APF cr2 contains a token that will + be used to notify a guest when missing page becomes + available. When page becomes available type 2 APF is sent with + cr2 set to the token associated with the page. There is special + kind of token 0xffffffff which tells vcpu that it should wake + up all processes waiting for APFs and no individual type 2 APFs + will be sent. + + If APF is disabled while there are outstanding APFs, they will + not be delivered. + + Currently type 2 APF will be always delivered on the same vcpu as + type 1 was, but guest should not rely on that. -- cgit v1.2.3-70-g09d2 From fd10cde9294f73eeccbc16f3fec1ae6cde7b800c Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 14 Oct 2010 11:22:51 +0200 Subject: KVM paravirt: Add async PF initialization to PV guest. Enable async PF in a guest if async PF capability is discovered. Acked-by: Rik van Riel Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- Documentation/kernel-parameters.txt | 3 ++ arch/x86/include/asm/kvm_para.h | 6 +++ arch/x86/kernel/kvm.c | 92 +++++++++++++++++++++++++++++++++++++ 3 files changed, 101 insertions(+) (limited to 'Documentation') diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 8b61c9360999..369580da1953 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -1707,6 +1707,9 @@ and is between 256 and 4096 characters. It is defined in the file no-kvmclock [X86,KVM] Disable paravirtualized KVM clock driver + no-kvmapf [X86,KVM] Disable paravirtualized asynchronous page + fault handling. + nolapic [X86-32,APIC] Do not enable or use the local APIC. nolapic_timer [X86-32,APIC] Do not use the local APIC timer. diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index 8662ae0a035c..2315398230d1 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -65,6 +65,12 @@ struct kvm_mmu_op_release_pt { __u64 pt_phys; }; +struct kvm_vcpu_pv_apf_data { + __u32 reason; + __u8 pad[60]; + __u32 enabled; +}; + #ifdef __KERNEL__ #include diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index e6db17976b82..032d03b6b54a 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -27,16 +27,30 @@ #include #include #include +#include +#include #include +#include #define MMU_QUEUE_SIZE 1024 +static int kvmapf = 1; + +static int parse_no_kvmapf(char *arg) +{ + kvmapf = 0; + return 0; +} + +early_param("no-kvmapf", parse_no_kvmapf); + struct kvm_para_state { u8 mmu_queue[MMU_QUEUE_SIZE]; int mmu_queue_len; }; static DEFINE_PER_CPU(struct kvm_para_state, para_state); +static DEFINE_PER_CPU(struct kvm_vcpu_pv_apf_data, apf_reason) __aligned(64); static struct kvm_para_state *kvm_para_state(void) { @@ -231,12 +245,86 @@ static void __init paravirt_ops_setup(void) #endif } +void __cpuinit kvm_guest_cpu_init(void) +{ + if (!kvm_para_available()) + return; + + if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) { + u64 pa = __pa(&__get_cpu_var(apf_reason)); + + wrmsrl(MSR_KVM_ASYNC_PF_EN, pa | KVM_ASYNC_PF_ENABLED); + __get_cpu_var(apf_reason).enabled = 1; + printk(KERN_INFO"KVM setup async PF for cpu %d\n", + smp_processor_id()); + } +} + +static void kvm_pv_disable_apf(void *unused) +{ + if (!__get_cpu_var(apf_reason).enabled) + return; + + wrmsrl(MSR_KVM_ASYNC_PF_EN, 0); + __get_cpu_var(apf_reason).enabled = 0; + + printk(KERN_INFO"Unregister pv shared memory for cpu %d\n", + smp_processor_id()); +} + +static int kvm_pv_reboot_notify(struct notifier_block *nb, + unsigned long code, void *unused) +{ + if (code == SYS_RESTART) + on_each_cpu(kvm_pv_disable_apf, NULL, 1); + return NOTIFY_DONE; +} + +static struct notifier_block kvm_pv_reboot_nb = { + .notifier_call = kvm_pv_reboot_notify, +}; + #ifdef CONFIG_SMP static void __init kvm_smp_prepare_boot_cpu(void) { WARN_ON(kvm_register_clock("primary cpu clock")); + kvm_guest_cpu_init(); native_smp_prepare_boot_cpu(); } + +static void kvm_guest_cpu_online(void *dummy) +{ + kvm_guest_cpu_init(); +} + +static void kvm_guest_cpu_offline(void *dummy) +{ + kvm_pv_disable_apf(NULL); +} + +static int __cpuinit kvm_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + int cpu = (unsigned long)hcpu; + switch (action) { + case CPU_ONLINE: + case CPU_DOWN_FAILED: + case CPU_ONLINE_FROZEN: + smp_call_function_single(cpu, kvm_guest_cpu_online, NULL, 0); + break; + case CPU_DOWN_PREPARE: + case CPU_DOWN_PREPARE_FROZEN: + smp_call_function_single(cpu, kvm_guest_cpu_offline, NULL, 1); + break; + default: + break; + } + return NOTIFY_OK; +} + +static struct notifier_block __cpuinitdata kvm_cpu_notifier = { + .notifier_call = kvm_cpu_notify, +}; #endif void __init kvm_guest_init(void) @@ -245,7 +333,11 @@ void __init kvm_guest_init(void) return; paravirt_ops_setup(); + register_reboot_notifier(&kvm_pv_reboot_nb); #ifdef CONFIG_SMP smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu; + register_cpu_notifier(&kvm_cpu_notifier); +#else + kvm_guest_cpu_init(); #endif } -- cgit v1.2.3-70-g09d2 From 6adba527420651b6cacaf392541c09fb108711a2 Mon Sep 17 00:00:00 2001 From: Gleb Natapov Date: Thu, 14 Oct 2010 11:22:55 +0200 Subject: KVM: Let host know whether the guest can handle async PF in non-userspace context. If guest can detect that it runs in non-preemptable context it can handle async PFs at any time, so let host know that it can send async PF even if guest cpu is not in userspace. Acked-by: Rik van Riel Signed-off-by: Gleb Natapov Signed-off-by: Marcelo Tosatti --- Documentation/kvm/msr.txt | 5 +++-- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/include/asm/kvm_para.h | 1 + arch/x86/kernel/kvm.c | 3 +++ arch/x86/kvm/x86.c | 5 +++-- 5 files changed, 11 insertions(+), 4 deletions(-) (limited to 'Documentation') diff --git a/Documentation/kvm/msr.txt b/Documentation/kvm/msr.txt index e67b4a8783df..d079aed27e03 100644 --- a/Documentation/kvm/msr.txt +++ b/Documentation/kvm/msr.txt @@ -154,9 +154,10 @@ MSR_KVM_SYSTEM_TIME: 0x12 MSR_KVM_ASYNC_PF_EN: 0x4b564d02 data: Bits 63-6 hold 64-byte aligned physical address of a 64 byte memory area which must be in guest RAM and must be - zeroed. Bits 5-1 are reserved and should be zero. Bit 0 is 1 + zeroed. Bits 5-2 are reserved and should be zero. Bit 0 is 1 when asynchronous page faults are enabled on the vcpu 0 when - disabled. + disabled. Bit 2 is 1 if asynchronous page faults can be injected + when vcpu is in cpl == 0. First 4 byte of 64 byte memory location will be written to by the hypervisor at the time of asynchronous page fault (APF) diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 167375cc49ff..b2ea42870e47 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -422,6 +422,7 @@ struct kvm_vcpu_arch { struct gfn_to_hva_cache data; u64 msr_val; u32 id; + bool send_user_only; } apf; }; diff --git a/arch/x86/include/asm/kvm_para.h b/arch/x86/include/asm/kvm_para.h index fbfd3679bc18..d3a1a4805ab8 100644 --- a/arch/x86/include/asm/kvm_para.h +++ b/arch/x86/include/asm/kvm_para.h @@ -38,6 +38,7 @@ #define KVM_MAX_MMU_OP_BATCH 32 #define KVM_ASYNC_PF_ENABLED (1 << 0) +#define KVM_ASYNC_PF_SEND_ALWAYS (1 << 1) /* Operations for KVM_HC_MMU_OP */ #define KVM_MMU_OP_WRITE_PTE 1 diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 47ea93e6b0d8..91b3d650898c 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -449,6 +449,9 @@ void __cpuinit kvm_guest_cpu_init(void) if (kvm_para_has_feature(KVM_FEATURE_ASYNC_PF) && kvmapf) { u64 pa = __pa(&__get_cpu_var(apf_reason)); +#ifdef CONFIG_PREEMPT + pa |= KVM_ASYNC_PF_SEND_ALWAYS; +#endif wrmsrl(MSR_KVM_ASYNC_PF_EN, pa | KVM_ASYNC_PF_ENABLED); __get_cpu_var(apf_reason).enabled = 1; printk(KERN_INFO"KVM setup async PF for cpu %d\n", diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index ac4c368afd40..fff70b50725c 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1429,8 +1429,8 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) { gpa_t gpa = data & ~0x3f; - /* Bits 1:5 are resrved, Should be zero */ - if (data & 0x3e) + /* Bits 2:5 are resrved, Should be zero */ + if (data & 0x3c) return 1; vcpu->arch.apf.msr_val = data; @@ -1444,6 +1444,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa)) return 1; + vcpu->arch.apf.send_user_only = !(data & KVM_ASYNC_PF_SEND_ALWAYS); kvm_async_pf_wakeup_all(vcpu); return 0; } -- cgit v1.2.3-70-g09d2 From 49f481720438bbd0138218b0bcb494c3512a454f Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Tue, 16 Nov 2010 22:30:07 +0100 Subject: KVM: Document device assigment API Adds API documentation for KVM_[DE]ASSIGN_PCI_DEVICE, KVM_[DE]ASSIGN_DEV_IRQ, KVM_SET_GSI_ROUTING, KVM_ASSIGN_SET_MSIX_NR, and KVM_ASSIGN_SET_MSIX_ENTRY. Acked-by: Alex Williamson Acked-by: Michael S. Tsirkin Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- Documentation/kvm/api.txt | 178 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 178 insertions(+) (limited to 'Documentation') diff --git a/Documentation/kvm/api.txt b/Documentation/kvm/api.txt index b336266bea5e..e1a92977833e 100644 --- a/Documentation/kvm/api.txt +++ b/Documentation/kvm/api.txt @@ -1085,6 +1085,184 @@ of 4 instructions that make up a hypercall. If any additional field gets added to this structure later on, a bit for that additional piece of information will be set in the flags bitmap. +4.47 KVM_ASSIGN_PCI_DEVICE + +Capability: KVM_CAP_DEVICE_ASSIGNMENT +Architectures: x86 ia64 +Type: vm ioctl +Parameters: struct kvm_assigned_pci_dev (in) +Returns: 0 on success, -1 on error + +Assigns a host PCI device to the VM. + +struct kvm_assigned_pci_dev { + __u32 assigned_dev_id; + __u32 busnr; + __u32 devfn; + __u32 flags; + __u32 segnr; + union { + __u32 reserved[11]; + }; +}; + +The PCI device is specified by the triple segnr, busnr, and devfn. +Identification in succeeding service requests is done via assigned_dev_id. The +following flags are specified: + +/* Depends on KVM_CAP_IOMMU */ +#define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) + +4.48 KVM_DEASSIGN_PCI_DEVICE + +Capability: KVM_CAP_DEVICE_DEASSIGNMENT +Architectures: x86 ia64 +Type: vm ioctl +Parameters: struct kvm_assigned_pci_dev (in) +Returns: 0 on success, -1 on error + +Ends PCI device assignment, releasing all associated resources. + +See KVM_CAP_DEVICE_ASSIGNMENT for the data structure. Only assigned_dev_id is +used in kvm_assigned_pci_dev to identify the device. + +4.49 KVM_ASSIGN_DEV_IRQ + +Capability: KVM_CAP_ASSIGN_DEV_IRQ +Architectures: x86 ia64 +Type: vm ioctl +Parameters: struct kvm_assigned_irq (in) +Returns: 0 on success, -1 on error + +Assigns an IRQ to a passed-through device. + +struct kvm_assigned_irq { + __u32 assigned_dev_id; + __u32 host_irq; + __u32 guest_irq; + __u32 flags; + union { + struct { + __u32 addr_lo; + __u32 addr_hi; + __u32 data; + } guest_msi; + __u32 reserved[12]; + }; +}; + +The following flags are defined: + +#define KVM_DEV_IRQ_HOST_INTX (1 << 0) +#define KVM_DEV_IRQ_HOST_MSI (1 << 1) +#define KVM_DEV_IRQ_HOST_MSIX (1 << 2) + +#define KVM_DEV_IRQ_GUEST_INTX (1 << 8) +#define KVM_DEV_IRQ_GUEST_MSI (1 << 9) +#define KVM_DEV_IRQ_GUEST_MSIX (1 << 10) + +It is not valid to specify multiple types per host or guest IRQ. However, the +IRQ type of host and guest can differ or can even be null. + +4.50 KVM_DEASSIGN_DEV_IRQ + +Capability: KVM_CAP_ASSIGN_DEV_IRQ +Architectures: x86 ia64 +Type: vm ioctl +Parameters: struct kvm_assigned_irq (in) +Returns: 0 on success, -1 on error + +Ends an IRQ assignment to a passed-through device. + +See KVM_ASSIGN_DEV_IRQ for the data structure. The target device is specified +by assigned_dev_id, flags must correspond to the IRQ type specified on +KVM_ASSIGN_DEV_IRQ. Partial deassignment of host or guest IRQ is allowed. + +4.51 KVM_SET_GSI_ROUTING + +Capability: KVM_CAP_IRQ_ROUTING +Architectures: x86 ia64 +Type: vm ioctl +Parameters: struct kvm_irq_routing (in) +Returns: 0 on success, -1 on error + +Sets the GSI routing table entries, overwriting any previously set entries. + +struct kvm_irq_routing { + __u32 nr; + __u32 flags; + struct kvm_irq_routing_entry entries[0]; +}; + +No flags are specified so far, the corresponding field must be set to zero. + +struct kvm_irq_routing_entry { + __u32 gsi; + __u32 type; + __u32 flags; + __u32 pad; + union { + struct kvm_irq_routing_irqchip irqchip; + struct kvm_irq_routing_msi msi; + __u32 pad[8]; + } u; +}; + +/* gsi routing entry types */ +#define KVM_IRQ_ROUTING_IRQCHIP 1 +#define KVM_IRQ_ROUTING_MSI 2 + +No flags are specified so far, the corresponding field must be set to zero. + +struct kvm_irq_routing_irqchip { + __u32 irqchip; + __u32 pin; +}; + +struct kvm_irq_routing_msi { + __u32 address_lo; + __u32 address_hi; + __u32 data; + __u32 pad; +}; + +4.52 KVM_ASSIGN_SET_MSIX_NR + +Capability: KVM_CAP_DEVICE_MSIX +Architectures: x86 ia64 +Type: vm ioctl +Parameters: struct kvm_assigned_msix_nr (in) +Returns: 0 on success, -1 on error + +Set the number of MSI-X interrupts for an assigned device. This service can +only be called once in the lifetime of an assigned device. + +struct kvm_assigned_msix_nr { + __u32 assigned_dev_id; + __u16 entry_nr; + __u16 padding; +}; + +#define KVM_MAX_MSIX_PER_DEV 256 + +4.53 KVM_ASSIGN_SET_MSIX_ENTRY + +Capability: KVM_CAP_DEVICE_MSIX +Architectures: x86 ia64 +Type: vm ioctl +Parameters: struct kvm_assigned_msix_entry (in) +Returns: 0 on success, -1 on error + +Specifies the routing of an MSI-X assigned device interrupt to a GSI. Setting +the GSI vector to zero means disabling the interrupt. + +struct kvm_assigned_msix_entry { + __u32 assigned_dev_id; + __u32 gsi; + __u16 entry; /* The index of entry in the MSI-X table */ + __u16 padding[3]; +}; + 5. The kvm_run structure Application code obtains a pointer to the kvm_run structure by -- cgit v1.2.3-70-g09d2