diff options
Diffstat (limited to 'drivers/pci/controller/pci-hyperv.c')
| -rw-r--r-- | drivers/pci/controller/pci-hyperv.c | 90 | 
1 files changed, 75 insertions, 15 deletions
diff --git a/drivers/pci/controller/pci-hyperv.c b/drivers/pci/controller/pci-hyperv.c index ba64284eaf9f..f1ec8931dfbc 100644 --- a/drivers/pci/controller/pci-hyperv.c +++ b/drivers/pci/controller/pci-hyperv.c @@ -1613,7 +1613,7 @@ out:  }  static u32 hv_compose_msi_req_v1( -	struct pci_create_interrupt *int_pkt, const struct cpumask *affinity, +	struct pci_create_interrupt *int_pkt,  	u32 slot, u8 vector, u16 vector_count)  {  	int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE; @@ -1632,6 +1632,35 @@ static u32 hv_compose_msi_req_v1(  }  /* + * The vCPU selected by hv_compose_multi_msi_req_get_cpu() and + * hv_compose_msi_req_get_cpu() is a "dummy" vCPU because the final vCPU to be + * interrupted is specified later in hv_irq_unmask() and communicated to Hyper-V + * via the HVCALL_RETARGET_INTERRUPT hypercall. But the choice of dummy vCPU is + * not irrelevant because Hyper-V chooses the physical CPU to handle the + * interrupts based on the vCPU specified in message sent to the vPCI VSP in + * hv_compose_msi_msg(). Hyper-V's choice of pCPU is not visible to the guest, + * but assigning too many vPCI device interrupts to the same pCPU can cause a + * performance bottleneck. So we spread out the dummy vCPUs to influence Hyper-V + * to spread out the pCPUs that it selects. + * + * For the single-MSI and MSI-X cases, it's OK for hv_compose_msi_req_get_cpu() + * to always return the same dummy vCPU, because a second call to + * hv_compose_msi_msg() contains the "real" vCPU, causing Hyper-V to choose a + * new pCPU for the interrupt. But for the multi-MSI case, the second call to + * hv_compose_msi_msg() exits without sending a message to the vPCI VSP, so the + * original dummy vCPU is used. This dummy vCPU must be round-robin'ed so that + * the pCPUs are spread out. All interrupts for a multi-MSI device end up using + * the same pCPU, even though the vCPUs will be spread out by later calls + * to hv_irq_unmask(), but that is the best we can do now. + * + * With Hyper-V in Nov 2022, the HVCALL_RETARGET_INTERRUPT hypercall does *not* + * cause Hyper-V to reselect the pCPU based on the specified vCPU. Such an + * enhancement is planned for a future version. With that enhancement, the + * dummy vCPU selection won't matter, and interrupts for the same multi-MSI + * device will be spread across multiple pCPUs. + */ + +/*   * Create MSI w/ dummy vCPU set targeting just one vCPU, overwritten   * by subsequent retarget in hv_irq_unmask().   */ @@ -1640,18 +1669,39 @@ static int hv_compose_msi_req_get_cpu(const struct cpumask *affinity)  	return cpumask_first_and(affinity, cpu_online_mask);  } -static u32 hv_compose_msi_req_v2( -	struct pci_create_interrupt2 *int_pkt, const struct cpumask *affinity, -	u32 slot, u8 vector, u16 vector_count) +/* + * Make sure the dummy vCPU values for multi-MSI don't all point to vCPU0. + */ +static int hv_compose_multi_msi_req_get_cpu(void)  { +	static DEFINE_SPINLOCK(multi_msi_cpu_lock); + +	/* -1 means starting with CPU 0 */ +	static int cpu_next = -1; + +	unsigned long flags;  	int cpu; +	spin_lock_irqsave(&multi_msi_cpu_lock, flags); + +	cpu_next = cpumask_next_wrap(cpu_next, cpu_online_mask, nr_cpu_ids, +				     false); +	cpu = cpu_next; + +	spin_unlock_irqrestore(&multi_msi_cpu_lock, flags); + +	return cpu; +} + +static u32 hv_compose_msi_req_v2( +	struct pci_create_interrupt2 *int_pkt, int cpu, +	u32 slot, u8 vector, u16 vector_count) +{  	int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE2;  	int_pkt->wslot.slot = slot;  	int_pkt->int_desc.vector = vector;  	int_pkt->int_desc.vector_count = vector_count;  	int_pkt->int_desc.delivery_mode = DELIVERY_MODE; -	cpu = hv_compose_msi_req_get_cpu(affinity);  	int_pkt->int_desc.processor_array[0] =  		hv_cpu_number_to_vp_number(cpu);  	int_pkt->int_desc.processor_count = 1; @@ -1660,18 +1710,15 @@ static u32 hv_compose_msi_req_v2(  }  static u32 hv_compose_msi_req_v3( -	struct pci_create_interrupt3 *int_pkt, const struct cpumask *affinity, +	struct pci_create_interrupt3 *int_pkt, int cpu,  	u32 slot, u32 vector, u16 vector_count)  { -	int cpu; -  	int_pkt->message_type.type = PCI_CREATE_INTERRUPT_MESSAGE3;  	int_pkt->wslot.slot = slot;  	int_pkt->int_desc.vector = vector;  	int_pkt->int_desc.reserved = 0;  	int_pkt->int_desc.vector_count = vector_count;  	int_pkt->int_desc.delivery_mode = DELIVERY_MODE; -	cpu = hv_compose_msi_req_get_cpu(affinity);  	int_pkt->int_desc.processor_array[0] =  		hv_cpu_number_to_vp_number(cpu);  	int_pkt->int_desc.processor_count = 1; @@ -1715,12 +1762,18 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)  			struct pci_create_interrupt3 v3;  		} int_pkts;  	} __packed ctxt; +	bool multi_msi;  	u64 trans_id;  	u32 size;  	int ret; +	int cpu; + +	msi_desc  = irq_data_get_msi_desc(data); +	multi_msi = !msi_desc->pci.msi_attrib.is_msix && +		    msi_desc->nvec_used > 1;  	/* Reuse the previous allocation */ -	if (data->chip_data) { +	if (data->chip_data && multi_msi) {  		int_desc = data->chip_data;  		msg->address_hi = int_desc->address >> 32;  		msg->address_lo = int_desc->address & 0xffffffff; @@ -1728,7 +1781,6 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)  		return;  	} -	msi_desc  = irq_data_get_msi_desc(data);  	pdev = msi_desc_to_pci_dev(msi_desc);  	dest = irq_data_get_effective_affinity_mask(data);  	pbus = pdev->bus; @@ -1738,11 +1790,18 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)  	if (!hpdev)  		goto return_null_message; +	/* Free any previous message that might have already been composed. */ +	if (data->chip_data && !multi_msi) { +		int_desc = data->chip_data; +		data->chip_data = NULL; +		hv_int_desc_free(hpdev, int_desc); +	} +  	int_desc = kzalloc(sizeof(*int_desc), GFP_ATOMIC);  	if (!int_desc)  		goto drop_reference; -	if (!msi_desc->pci.msi_attrib.is_msix && msi_desc->nvec_used > 1) { +	if (multi_msi) {  		/*  		 * If this is not the first MSI of Multi MSI, we already have  		 * a mapping.  Can exit early. @@ -1767,9 +1826,11 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)  		 */  		vector = 32;  		vector_count = msi_desc->nvec_used; +		cpu = hv_compose_multi_msi_req_get_cpu();  	} else {  		vector = hv_msi_get_int_vector(data);  		vector_count = 1; +		cpu = hv_compose_msi_req_get_cpu(dest);  	}  	/* @@ -1785,7 +1846,6 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)  	switch (hbus->protocol_version) {  	case PCI_PROTOCOL_VERSION_1_1:  		size = hv_compose_msi_req_v1(&ctxt.int_pkts.v1, -					dest,  					hpdev->desc.win_slot.slot,  					(u8)vector,  					vector_count); @@ -1794,7 +1854,7 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)  	case PCI_PROTOCOL_VERSION_1_2:  	case PCI_PROTOCOL_VERSION_1_3:  		size = hv_compose_msi_req_v2(&ctxt.int_pkts.v2, -					dest, +					cpu,  					hpdev->desc.win_slot.slot,  					(u8)vector,  					vector_count); @@ -1802,7 +1862,7 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)  	case PCI_PROTOCOL_VERSION_1_4:  		size = hv_compose_msi_req_v3(&ctxt.int_pkts.v3, -					dest, +					cpu,  					hpdev->desc.win_slot.slot,  					vector,  					vector_count);  | 
