@@ -1613,7 +1613,7 @@ static void hv_pci_compose_compl(void *context, struct pci_response *resp,
16131613}
16141614
16151615static u32 hv_compose_msi_req_v1 (
1616- struct pci_create_interrupt * int_pkt , const struct cpumask * affinity ,
1616+ struct pci_create_interrupt * int_pkt ,
16171617 u32 slot , u8 vector , u16 vector_count )
16181618{
16191619 int_pkt -> message_type .type = PCI_CREATE_INTERRUPT_MESSAGE ;
@@ -1631,6 +1631,35 @@ static u32 hv_compose_msi_req_v1(
16311631 return sizeof (* int_pkt );
16321632}
16331633
1634+ /*
1635+ * The vCPU selected by hv_compose_multi_msi_req_get_cpu() and
1636+ * hv_compose_msi_req_get_cpu() is a "dummy" vCPU because the final vCPU to be
1637+ * interrupted is specified later in hv_irq_unmask() and communicated to Hyper-V
1638+ * via the HVCALL_RETARGET_INTERRUPT hypercall. But the choice of dummy vCPU is
1639+ * not irrelevant because Hyper-V chooses the physical CPU to handle the
1640+ * interrupts based on the vCPU specified in message sent to the vPCI VSP in
1641+ * hv_compose_msi_msg(). Hyper-V's choice of pCPU is not visible to the guest,
1642+ * but assigning too many vPCI device interrupts to the same pCPU can cause a
1643+ * performance bottleneck. So we spread out the dummy vCPUs to influence Hyper-V
1644+ * to spread out the pCPUs that it selects.
1645+ *
1646+ * For the single-MSI and MSI-X cases, it's OK for hv_compose_msi_req_get_cpu()
1647+ * to always return the same dummy vCPU, because a second call to
1648+ * hv_compose_msi_msg() contains the "real" vCPU, causing Hyper-V to choose a
1649+ * new pCPU for the interrupt. But for the multi-MSI case, the second call to
1650+ * hv_compose_msi_msg() exits without sending a message to the vPCI VSP, so the
1651+ * original dummy vCPU is used. This dummy vCPU must be round-robin'ed so that
1652+ * the pCPUs are spread out. All interrupts for a multi-MSI device end up using
1653+ * the same pCPU, even though the vCPUs will be spread out by later calls
1654+ * to hv_irq_unmask(), but that is the best we can do now.
1655+ *
1656+ * With Hyper-V in Nov 2022, the HVCALL_RETARGET_INTERRUPT hypercall does *not*
1657+ * cause Hyper-V to reselect the pCPU based on the specified vCPU. Such an
1658+ * enhancement is planned for a future version. With that enhancement, the
1659+ * dummy vCPU selection won't matter, and interrupts for the same multi-MSI
1660+ * device will be spread across multiple pCPUs.
1661+ */
1662+
16341663/*
16351664 * Create MSI w/ dummy vCPU set targeting just one vCPU, overwritten
16361665 * by subsequent retarget in hv_irq_unmask().
@@ -1640,18 +1669,39 @@ static int hv_compose_msi_req_get_cpu(const struct cpumask *affinity)
16401669 return cpumask_first_and (affinity , cpu_online_mask );
16411670}
16421671
1643- static u32 hv_compose_msi_req_v2 (
1644- struct pci_create_interrupt2 * int_pkt , const struct cpumask * affinity ,
1645- u32 slot , u8 vector , u16 vector_count )
1672+ /*
1673+ * Make sure the dummy vCPU values for multi-MSI don't all point to vCPU0.
1674+ */
1675+ static int hv_compose_multi_msi_req_get_cpu (void )
16461676{
1677+ static DEFINE_SPINLOCK (multi_msi_cpu_lock );
1678+
1679+ /* -1 means starting with CPU 0 */
1680+ static int cpu_next = -1 ;
1681+
1682+ unsigned long flags ;
16471683 int cpu ;
16481684
1685+ spin_lock_irqsave (& multi_msi_cpu_lock , flags );
1686+
1687+ cpu_next = cpumask_next_wrap (cpu_next , cpu_online_mask , nr_cpu_ids ,
1688+ false);
1689+ cpu = cpu_next ;
1690+
1691+ spin_unlock_irqrestore (& multi_msi_cpu_lock , flags );
1692+
1693+ return cpu ;
1694+ }
1695+
1696+ static u32 hv_compose_msi_req_v2 (
1697+ struct pci_create_interrupt2 * int_pkt , int cpu ,
1698+ u32 slot , u8 vector , u16 vector_count )
1699+ {
16491700 int_pkt -> message_type .type = PCI_CREATE_INTERRUPT_MESSAGE2 ;
16501701 int_pkt -> wslot .slot = slot ;
16511702 int_pkt -> int_desc .vector = vector ;
16521703 int_pkt -> int_desc .vector_count = vector_count ;
16531704 int_pkt -> int_desc .delivery_mode = DELIVERY_MODE ;
1654- cpu = hv_compose_msi_req_get_cpu (affinity );
16551705 int_pkt -> int_desc .processor_array [0 ] =
16561706 hv_cpu_number_to_vp_number (cpu );
16571707 int_pkt -> int_desc .processor_count = 1 ;
@@ -1660,18 +1710,15 @@ static u32 hv_compose_msi_req_v2(
16601710}
16611711
16621712static u32 hv_compose_msi_req_v3 (
1663- struct pci_create_interrupt3 * int_pkt , const struct cpumask * affinity ,
1713+ struct pci_create_interrupt3 * int_pkt , int cpu ,
16641714 u32 slot , u32 vector , u16 vector_count )
16651715{
1666- int cpu ;
1667-
16681716 int_pkt -> message_type .type = PCI_CREATE_INTERRUPT_MESSAGE3 ;
16691717 int_pkt -> wslot .slot = slot ;
16701718 int_pkt -> int_desc .vector = vector ;
16711719 int_pkt -> int_desc .reserved = 0 ;
16721720 int_pkt -> int_desc .vector_count = vector_count ;
16731721 int_pkt -> int_desc .delivery_mode = DELIVERY_MODE ;
1674- cpu = hv_compose_msi_req_get_cpu (affinity );
16751722 int_pkt -> int_desc .processor_array [0 ] =
16761723 hv_cpu_number_to_vp_number (cpu );
16771724 int_pkt -> int_desc .processor_count = 1 ;
@@ -1715,20 +1762,25 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
17151762 struct pci_create_interrupt3 v3 ;
17161763 } int_pkts ;
17171764 } __packed ctxt ;
1765+ bool multi_msi ;
17181766 u64 trans_id ;
17191767 u32 size ;
17201768 int ret ;
1769+ int cpu ;
1770+
1771+ msi_desc = irq_data_get_msi_desc (data );
1772+ multi_msi = !msi_desc -> pci .msi_attrib .is_msix &&
1773+ msi_desc -> nvec_used > 1 ;
17211774
17221775 /* Reuse the previous allocation */
1723- if (data -> chip_data ) {
1776+ if (data -> chip_data && multi_msi ) {
17241777 int_desc = data -> chip_data ;
17251778 msg -> address_hi = int_desc -> address >> 32 ;
17261779 msg -> address_lo = int_desc -> address & 0xffffffff ;
17271780 msg -> data = int_desc -> data ;
17281781 return ;
17291782 }
17301783
1731- msi_desc = irq_data_get_msi_desc (data );
17321784 pdev = msi_desc_to_pci_dev (msi_desc );
17331785 dest = irq_data_get_effective_affinity_mask (data );
17341786 pbus = pdev -> bus ;
@@ -1738,11 +1790,18 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
17381790 if (!hpdev )
17391791 goto return_null_message ;
17401792
1793+ /* Free any previous message that might have already been composed. */
1794+ if (data -> chip_data && !multi_msi ) {
1795+ int_desc = data -> chip_data ;
1796+ data -> chip_data = NULL ;
1797+ hv_int_desc_free (hpdev , int_desc );
1798+ }
1799+
17411800 int_desc = kzalloc (sizeof (* int_desc ), GFP_ATOMIC );
17421801 if (!int_desc )
17431802 goto drop_reference ;
17441803
1745- if (! msi_desc -> pci . msi_attrib . is_msix && msi_desc -> nvec_used > 1 ) {
1804+ if (multi_msi ) {
17461805 /*
17471806 * If this is not the first MSI of Multi MSI, we already have
17481807 * a mapping. Can exit early.
@@ -1767,9 +1826,11 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
17671826 */
17681827 vector = 32 ;
17691828 vector_count = msi_desc -> nvec_used ;
1829+ cpu = hv_compose_multi_msi_req_get_cpu ();
17701830 } else {
17711831 vector = hv_msi_get_int_vector (data );
17721832 vector_count = 1 ;
1833+ cpu = hv_compose_msi_req_get_cpu (dest );
17731834 }
17741835
17751836 /*
@@ -1785,7 +1846,6 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
17851846 switch (hbus -> protocol_version ) {
17861847 case PCI_PROTOCOL_VERSION_1_1 :
17871848 size = hv_compose_msi_req_v1 (& ctxt .int_pkts .v1 ,
1788- dest ,
17891849 hpdev -> desc .win_slot .slot ,
17901850 (u8 )vector ,
17911851 vector_count );
@@ -1794,15 +1854,15 @@ static void hv_compose_msi_msg(struct irq_data *data, struct msi_msg *msg)
17941854 case PCI_PROTOCOL_VERSION_1_2 :
17951855 case PCI_PROTOCOL_VERSION_1_3 :
17961856 size = hv_compose_msi_req_v2 (& ctxt .int_pkts .v2 ,
1797- dest ,
1857+ cpu ,
17981858 hpdev -> desc .win_slot .slot ,
17991859 (u8 )vector ,
18001860 vector_count );
18011861 break ;
18021862
18031863 case PCI_PROTOCOL_VERSION_1_4 :
18041864 size = hv_compose_msi_req_v3 (& ctxt .int_pkts .v3 ,
1805- dest ,
1865+ cpu ,
18061866 hpdev -> desc .win_slot .slot ,
18071867 vector ,
18081868 vector_count );
0 commit comments