diff options
Diffstat (limited to 'drivers/net/ethernet/intel')
86 files changed, 12066 insertions, 1238 deletions
diff --git a/drivers/net/ethernet/intel/Kconfig b/drivers/net/ethernet/intel/Kconfig index ed8ea63bb172..0b274d8fa45b 100644 --- a/drivers/net/ethernet/intel/Kconfig +++ b/drivers/net/ethernet/intel/Kconfig @@ -313,6 +313,20 @@ config ICE To compile this driver as a module, choose M here. The module will be called ice. +config ICE_SWITCHDEV + bool "Switchdev Support" + default y + depends on ICE && NET_SWITCHDEV + help + Switchdev support provides internal SRIOV packet steering and switching. + + To enable it on running kernel use devlink tool: + #devlink dev eswitch set pci/0000:XX:XX.X mode switchdev + + Say Y here if you want to use Switchdev in the driver. + + If unsure, say N. + config FM10K tristate "Intel(R) FM10000 Ethernet Switch Host Interface Support" default n diff --git a/drivers/net/ethernet/intel/e100.c b/drivers/net/ethernet/intel/e100.c index 09ae1939e6db..5039a2536951 100644 --- a/drivers/net/ethernet/intel/e100.c +++ b/drivers/net/ethernet/intel/e100.c @@ -2259,7 +2259,7 @@ static int e100_set_mac_address(struct net_device *netdev, void *p) if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); + eth_hw_addr_set(netdev, addr->sa_data); e100_exec_cb(nic, NULL, e100_setup_iaaddr); return 0; @@ -2921,7 +2921,7 @@ static int e100_probe(struct pci_dev *pdev, const struct pci_device_id *ent) e100_phy_init(nic); - memcpy(netdev->dev_addr, nic->eeprom, ETH_ALEN); + eth_hw_addr_set(netdev, (u8 *)nic->eeprom); if (!is_valid_ether_addr(netdev->dev_addr)) { if (!eeprom_bad_csum_allow) { netif_err(nic, probe, nic->netdev, "Invalid MAC address from EEPROM, aborting\n"); diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c index bed4f040face..669060a2e6aa 100644 --- a/drivers/net/ethernet/intel/e1000/e1000_main.c +++ b/drivers/net/ethernet/intel/e1000/e1000_main.c @@ -1103,7 +1103,7 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) e_err(probe, "EEPROM Read Error\n"); } /* don't block initialization here due to bad MAC address */ - memcpy(netdev->dev_addr, hw->mac_addr, netdev->addr_len); + eth_hw_addr_set(netdev, hw->mac_addr); if (!is_valid_ether_addr(netdev->dev_addr)) e_err(probe, "Invalid MAC Address\n"); @@ -2209,7 +2209,7 @@ static int e1000_set_mac(struct net_device *netdev, void *p) if (hw->mac_type == e1000_82542_rev2_0) e1000_enter_82542_rst(adapter); - memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); + eth_hw_addr_set(netdev, addr->sa_data); memcpy(hw->mac_addr, addr->sa_data, netdev->addr_len); e1000_rar_set(hw, hw->mac_addr, 0); diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h index 3178efd98006..c3def0ee7788 100644 --- a/drivers/net/ethernet/intel/e1000e/e1000.h +++ b/drivers/net/ethernet/intel/e1000e/e1000.h @@ -21,6 +21,7 @@ #include <linux/ptp_classify.h> #include <linux/mii.h> #include <linux/mdio.h> +#include <linux/mutex.h> #include <linux/pm_qos.h> #include "hw.h" diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index ebcb2a30add0..44e2dc8328a2 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -2550,7 +2550,6 @@ static void e1000_set_itr(struct e1000_adapter *adapter) /* for non-gigabit speeds, just fix the interrupt rate at 4000 */ if (adapter->link_speed != SPEED_1000) { - current_itr = 0; new_itr = 4000; goto set_itr_now; } @@ -4787,7 +4786,7 @@ static int e1000_set_mac(struct net_device *netdev, void *p) if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); + eth_hw_addr_set(netdev, addr->sa_data); memcpy(adapter->hw.mac.addr, addr->sa_data, netdev->addr_len); hw->mac.ops.rar_set(&adapter->hw, adapter->hw.mac.addr, 0); @@ -7590,7 +7589,7 @@ static int e1000_probe(struct pci_dev *pdev, const struct pci_device_id *ent) dev_err(&pdev->dev, "NVM Read Error while reading MAC address\n"); - memcpy(netdev->dev_addr, adapter->hw.mac.addr, netdev->addr_len); + eth_hw_addr_set(netdev, adapter->hw.mac.addr); if (!is_valid_ether_addr(netdev->dev_addr)) { dev_err(&pdev->dev, "Invalid MAC Address: %pM\n", diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c index 2fb52bd6fc0e..2cca9e84e31e 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_netdev.c @@ -990,7 +990,7 @@ static int fm10k_set_mac(struct net_device *dev, void *p) } if (!err) { - ether_addr_copy(dev->dev_addr, addr->sa_data); + eth_hw_addr_set(dev, addr->sa_data); ether_addr_copy(hw->mac.addr, addr->sa_data); dev->addr_assign_type &= ~NET_ADDR_RANDOM; } diff --git a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c index adfa2768f024..b473cb7d7c57 100644 --- a/drivers/net/ethernet/intel/fm10k/fm10k_pci.c +++ b/drivers/net/ethernet/intel/fm10k/fm10k_pci.c @@ -300,7 +300,7 @@ static int fm10k_handle_reset(struct fm10k_intfc *interface) if (is_valid_ether_addr(hw->mac.perm_addr)) { ether_addr_copy(hw->mac.addr, hw->mac.perm_addr); ether_addr_copy(netdev->perm_addr, hw->mac.perm_addr); - ether_addr_copy(netdev->dev_addr, hw->mac.perm_addr); + eth_hw_addr_set(netdev, hw->mac.perm_addr); netdev->addr_assign_type &= ~NET_ADDR_RANDOM; } @@ -2045,7 +2045,7 @@ static int fm10k_sw_init(struct fm10k_intfc *interface, netdev->addr_assign_type |= NET_ADDR_RANDOM; } - ether_addr_copy(netdev->dev_addr, hw->mac.addr); + eth_hw_addr_set(netdev, hw->mac.addr); ether_addr_copy(netdev->perm_addr, hw->mac.addr); if (!is_valid_ether_addr(netdev->perm_addr)) { diff --git a/drivers/net/ethernet/intel/i40e/i40e.h b/drivers/net/ethernet/intel/i40e/i40e.h index 39fb3d57c057..3d528fba754b 100644 --- a/drivers/net/ethernet/intel/i40e/i40e.h +++ b/drivers/net/ethernet/intel/i40e/i40e.h @@ -435,7 +435,7 @@ static inline bool i40e_is_channel_macvlan(struct i40e_channel *ch) return !!ch->fwd; } -static inline u8 *i40e_channel_mac(struct i40e_channel *ch) +static inline const u8 *i40e_channel_mac(struct i40e_channel *ch) { if (i40e_is_channel_macvlan(ch)) return ch->fwd->netdev->dev_addr; diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c index e04b540cedc8..ba862131b9bd 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_main.c +++ b/drivers/net/ethernet/intel/i40e/i40e_main.c @@ -1587,7 +1587,7 @@ static int i40e_set_mac(struct net_device *netdev, void *p) */ spin_lock_bh(&vsi->mac_filter_hash_lock); i40e_del_mac_filter(vsi, netdev->dev_addr); - ether_addr_copy(netdev->dev_addr, addr->sa_data); + eth_hw_addr_set(netdev, addr->sa_data); i40e_add_mac_filter(vsi, netdev->dev_addr); spin_unlock_bh(&vsi->mac_filter_hash_lock); @@ -13425,7 +13425,7 @@ static int i40e_config_netdev(struct i40e_vsi *vsi) i40e_add_mac_filter(vsi, broadcast); spin_unlock_bh(&vsi->mac_filter_hash_lock); - ether_addr_copy(netdev->dev_addr, mac_addr); + eth_hw_addr_set(netdev, mac_addr); ether_addr_copy(netdev->perm_addr, mac_addr); /* i40iw_net_event() reads 16 bytes from neigh->primary_key */ diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index e7e778ca074c..ea06e957393e 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -193,42 +193,40 @@ bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 count) { u16 ntu = rx_ring->next_to_use; union i40e_rx_desc *rx_desc; - struct xdp_buff **bi, *xdp; + struct xdp_buff **xdp; + u32 nb_buffs, i; dma_addr_t dma; - bool ok = true; rx_desc = I40E_RX_DESC(rx_ring, ntu); - bi = i40e_rx_bi(rx_ring, ntu); - do { - xdp = xsk_buff_alloc(rx_ring->xsk_pool); - if (!xdp) { - ok = false; - goto no_buffers; - } - *bi = xdp; - dma = xsk_buff_xdp_get_dma(xdp); + xdp = i40e_rx_bi(rx_ring, ntu); + + nb_buffs = min_t(u16, count, rx_ring->count - ntu); + nb_buffs = xsk_buff_alloc_batch(rx_ring->xsk_pool, xdp, nb_buffs); + if (!nb_buffs) + return false; + + i = nb_buffs; + while (i--) { + dma = xsk_buff_xdp_get_dma(*xdp); rx_desc->read.pkt_addr = cpu_to_le64(dma); rx_desc->read.hdr_addr = 0; rx_desc++; - bi++; - ntu++; - - if (unlikely(ntu == rx_ring->count)) { - rx_desc = I40E_RX_DESC(rx_ring, 0); - bi = i40e_rx_bi(rx_ring, 0); - ntu = 0; - } - } while (--count); + xdp++; + } -no_buffers: - if (rx_ring->next_to_use != ntu) { - /* clear the status bits for the next_to_use descriptor */ - rx_desc->wb.qword1.status_error_len = 0; - i40e_release_rx_desc(rx_ring, ntu); + ntu += nb_buffs; + if (ntu == rx_ring->count) { + rx_desc = I40E_RX_DESC(rx_ring, 0); + xdp = i40e_rx_bi(rx_ring, 0); + ntu = 0; } - return ok; + /* clear the status bits for the next_to_use descriptor */ + rx_desc->wb.qword1.status_error_len = 0; + i40e_release_rx_desc(rx_ring, ntu); + + return count == nb_buffs; } /** @@ -365,7 +363,7 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget) break; bi = *i40e_rx_bi(rx_ring, next_to_clean); - bi->data_end = bi->data + size; + xsk_buff_set_size(bi, size); xsk_buff_dma_sync_for_cpu(bi, rx_ring->xsk_pool); xdp_res = i40e_run_xdp_zc(rx_ring, bi); diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index 68c80f04113c..e6e7c1da47fb 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -177,6 +177,7 @@ enum iavf_state_t { __IAVF_INIT_VERSION_CHECK, /* aq msg sent, awaiting reply */ __IAVF_INIT_GET_RESOURCES, /* aq msg sent, awaiting reply */ __IAVF_INIT_SW, /* got resources, setting up structs */ + __IAVF_INIT_FAILED, /* init failed, restarting procedure */ __IAVF_RESETTING, /* in reset */ __IAVF_COMM_FAILED, /* communication with PF failed */ /* Below here, watchdog is running */ @@ -225,7 +226,6 @@ struct iavf_adapter { struct work_struct reset_task; struct work_struct adminq_task; struct delayed_work client_task; - struct delayed_work init_task; wait_queue_head_t down_waitqueue; struct iavf_q_vector *q_vectors; struct list_head vlan_filter_list; @@ -312,6 +312,7 @@ struct iavf_adapter { struct iavf_hw hw; /* defined in iavf_type.h */ enum iavf_state_t state; + enum iavf_state_t last_state; unsigned long crit_section; struct delayed_work watchdog_task; @@ -393,6 +394,51 @@ struct iavf_device { extern char iavf_driver_name[]; extern struct workqueue_struct *iavf_wq; +static inline const char *iavf_state_str(enum iavf_state_t state) +{ + switch (state) { + case __IAVF_STARTUP: + return "__IAVF_STARTUP"; + case __IAVF_REMOVE: + return "__IAVF_REMOVE"; + case __IAVF_INIT_VERSION_CHECK: + return "__IAVF_INIT_VERSION_CHECK"; + case __IAVF_INIT_GET_RESOURCES: + return "__IAVF_INIT_GET_RESOURCES"; + case __IAVF_INIT_SW: + return "__IAVF_INIT_SW"; + case __IAVF_INIT_FAILED: + return "__IAVF_INIT_FAILED"; + case __IAVF_RESETTING: + return "__IAVF_RESETTING"; + case __IAVF_COMM_FAILED: + return "__IAVF_COMM_FAILED"; + case __IAVF_DOWN: + return "__IAVF_DOWN"; + case __IAVF_DOWN_PENDING: + return "__IAVF_DOWN_PENDING"; + case __IAVF_TESTING: + return "__IAVF_TESTING"; + case __IAVF_RUNNING: + return "__IAVF_RUNNING"; + default: + return "__IAVF_UNKNOWN_STATE"; + } +} + +static inline void iavf_change_state(struct iavf_adapter *adapter, + enum iavf_state_t state) +{ + if (adapter->state != state) { + adapter->last_state = adapter->state; + adapter->state = state; + } + dev_dbg(&adapter->pdev->dev, + "state transition from:%s to:%s\n", + iavf_state_str(adapter->last_state), + iavf_state_str(adapter->state)); +} + int iavf_up(struct iavf_adapter *adapter); void iavf_down(struct iavf_adapter *adapter); int iavf_process_config(struct iavf_adapter *adapter); diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index cada4e0e40b4..847d67e32a54 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -14,7 +14,7 @@ static int iavf_setup_all_tx_resources(struct iavf_adapter *adapter); static int iavf_setup_all_rx_resources(struct iavf_adapter *adapter); static int iavf_close(struct net_device *netdev); -static int iavf_init_get_resources(struct iavf_adapter *adapter); +static void iavf_init_get_resources(struct iavf_adapter *adapter); static int iavf_check_reset_complete(struct iavf_hw *hw); char iavf_driver_name[] = "iavf"; @@ -52,6 +52,15 @@ static const struct net_device_ops iavf_netdev_ops; struct workqueue_struct *iavf_wq; /** + * iavf_pdev_to_adapter - go from pci_dev to adapter + * @pdev: pci_dev pointer + */ +static struct iavf_adapter *iavf_pdev_to_adapter(struct pci_dev *pdev) +{ + return netdev_priv(pci_get_drvdata(pdev)); +} + +/** * iavf_allocate_dma_mem_d - OS specific memory alloc for shared code * @hw: pointer to the HW structure * @mem: ptr to mem struct to fill out @@ -960,7 +969,7 @@ static void iavf_configure(struct iavf_adapter *adapter) **/ static void iavf_up_complete(struct iavf_adapter *adapter) { - adapter->state = __IAVF_RUNNING; + iavf_change_state(adapter, __IAVF_RUNNING); clear_bit(__IAVF_VSI_DOWN, adapter->vsi.state); iavf_napi_enable_all(adapter); @@ -1688,9 +1697,9 @@ static int iavf_process_aq_command(struct iavf_adapter *adapter) * * Function process __IAVF_STARTUP driver state. * When success the state is changed to __IAVF_INIT_VERSION_CHECK - * when fails it returns -EAGAIN + * when fails the state is changed to __IAVF_INIT_FAILED **/ -static int iavf_startup(struct iavf_adapter *adapter) +static void iavf_startup(struct iavf_adapter *adapter) { struct pci_dev *pdev = adapter->pdev; struct iavf_hw *hw = &adapter->hw; @@ -1729,9 +1738,10 @@ static int iavf_startup(struct iavf_adapter *adapter) iavf_shutdown_adminq(hw); goto err; } - adapter->state = __IAVF_INIT_VERSION_CHECK; + iavf_change_state(adapter, __IAVF_INIT_VERSION_CHECK); + return; err: - return err; + iavf_change_state(adapter, __IAVF_INIT_FAILED); } /** @@ -1740,9 +1750,9 @@ err: * * Function process __IAVF_INIT_VERSION_CHECK driver state. * When success the state is changed to __IAVF_INIT_GET_RESOURCES - * when fails it returns -EAGAIN + * when fails the state is changed to __IAVF_INIT_FAILED **/ -static int iavf_init_version_check(struct iavf_adapter *adapter) +static void iavf_init_version_check(struct iavf_adapter *adapter) { struct pci_dev *pdev = adapter->pdev; struct iavf_hw *hw = &adapter->hw; @@ -1753,7 +1763,7 @@ static int iavf_init_version_check(struct iavf_adapter *adapter) if (!iavf_asq_done(hw)) { dev_err(&pdev->dev, "Admin queue command never completed\n"); iavf_shutdown_adminq(hw); - adapter->state = __IAVF_STARTUP; + iavf_change_state(adapter, __IAVF_STARTUP); goto err; } @@ -1776,10 +1786,10 @@ static int iavf_init_version_check(struct iavf_adapter *adapter) err); goto err; } - adapter->state = __IAVF_INIT_GET_RESOURCES; - + iavf_change_state(adapter, __IAVF_INIT_GET_RESOURCES); + return; err: - return err; + iavf_change_state(adapter, __IAVF_INIT_FAILED); } /** @@ -1789,9 +1799,9 @@ err: * Function process __IAVF_INIT_GET_RESOURCES driver state and * finishes driver initialization procedure. * When success the state is changed to __IAVF_DOWN - * when fails it returns -EAGAIN + * when fails the state is changed to __IAVF_INIT_FAILED **/ -static int iavf_init_get_resources(struct iavf_adapter *adapter) +static void iavf_init_get_resources(struct iavf_adapter *adapter) { struct net_device *netdev = adapter->netdev; struct pci_dev *pdev = adapter->pdev; @@ -1819,7 +1829,7 @@ static int iavf_init_get_resources(struct iavf_adapter *adapter) */ iavf_shutdown_adminq(hw); dev_err(&pdev->dev, "Unable to get VF config due to PF error condition, not retrying\n"); - return 0; + return; } if (err) { dev_err(&pdev->dev, "Unable to get VF config (%d)\n", err); @@ -1847,7 +1857,7 @@ static int iavf_init_get_resources(struct iavf_adapter *adapter) eth_hw_addr_random(netdev); ether_addr_copy(adapter->hw.mac.addr, netdev->dev_addr); } else { - ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr); + eth_hw_addr_set(netdev, adapter->hw.mac.addr); ether_addr_copy(netdev->perm_addr, adapter->hw.mac.addr); } @@ -1893,7 +1903,7 @@ static int iavf_init_get_resources(struct iavf_adapter *adapter) if (netdev->features & NETIF_F_GRO) dev_info(&pdev->dev, "GRO is enabled\n"); - adapter->state = __IAVF_DOWN; + iavf_change_state(adapter, __IAVF_DOWN); set_bit(__IAVF_VSI_DOWN, adapter->vsi.state); rtnl_unlock(); @@ -1911,7 +1921,7 @@ static int iavf_init_get_resources(struct iavf_adapter *adapter) else iavf_init_rss(adapter); - return err; + return; err_mem: iavf_free_rss(adapter); err_register: @@ -1922,7 +1932,7 @@ err_alloc: kfree(adapter->vf_res); adapter->vf_res = NULL; err: - return err; + iavf_change_state(adapter, __IAVF_INIT_FAILED); } /** @@ -1941,9 +1951,50 @@ static void iavf_watchdog_task(struct work_struct *work) goto restart_watchdog; if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) - adapter->state = __IAVF_COMM_FAILED; + iavf_change_state(adapter, __IAVF_COMM_FAILED); + + if (adapter->flags & IAVF_FLAG_RESET_NEEDED && + adapter->state != __IAVF_RESETTING) { + iavf_change_state(adapter, __IAVF_RESETTING); + adapter->aq_required = 0; + adapter->current_op = VIRTCHNL_OP_UNKNOWN; + } switch (adapter->state) { + case __IAVF_STARTUP: + iavf_startup(adapter); + mutex_unlock(&adapter->crit_lock); + queue_delayed_work(iavf_wq, &adapter->watchdog_task, + msecs_to_jiffies(30)); + return; + case __IAVF_INIT_VERSION_CHECK: + iavf_init_version_check(adapter); + mutex_unlock(&adapter->crit_lock); + queue_delayed_work(iavf_wq, &adapter->watchdog_task, + msecs_to_jiffies(30)); + return; + case __IAVF_INIT_GET_RESOURCES: + iavf_init_get_resources(adapter); + mutex_unlock(&adapter->crit_lock); + queue_delayed_work(iavf_wq, &adapter->watchdog_task, + msecs_to_jiffies(1)); + return; + case __IAVF_INIT_FAILED: + if (++adapter->aq_wait_count > IAVF_AQ_MAX_ERR) { + dev_err(&adapter->pdev->dev, + "Failed to communicate with PF; waiting before retry\n"); + adapter->flags |= IAVF_FLAG_PF_COMMS_FAILED; + iavf_shutdown_adminq(hw); + mutex_unlock(&adapter->crit_lock); + queue_delayed_work(iavf_wq, + &adapter->watchdog_task, (5 * HZ)); + return; + } + /* Try again from failed step*/ + iavf_change_state(adapter, adapter->last_state); + mutex_unlock(&adapter->crit_lock); + queue_delayed_work(iavf_wq, &adapter->watchdog_task, HZ); + return; case __IAVF_COMM_FAILED: reg_val = rd32(hw, IAVF_VFGEN_RSTAT) & IAVF_VFGEN_RSTAT_VFR_STATE_MASK; @@ -1952,23 +2003,19 @@ static void iavf_watchdog_task(struct work_struct *work) /* A chance for redemption! */ dev_err(&adapter->pdev->dev, "Hardware came out of reset. Attempting reinit.\n"); - adapter->state = __IAVF_STARTUP; - adapter->flags &= ~IAVF_FLAG_PF_COMMS_FAILED; - queue_delayed_work(iavf_wq, &adapter->init_task, 10); - mutex_unlock(&adapter->crit_lock); - /* Don't reschedule the watchdog, since we've restarted - * the init task. When init_task contacts the PF and + /* When init task contacts the PF and * gets everything set up again, it'll restart the * watchdog for us. Down, boy. Sit. Stay. Woof. */ - return; + iavf_change_state(adapter, __IAVF_STARTUP); + adapter->flags &= ~IAVF_FLAG_PF_COMMS_FAILED; } adapter->aq_required = 0; adapter->current_op = VIRTCHNL_OP_UNKNOWN; queue_delayed_work(iavf_wq, &adapter->watchdog_task, msecs_to_jiffies(10)); - goto watchdog_done; + return; case __IAVF_RESETTING: mutex_unlock(&adapter->crit_lock); queue_delayed_work(iavf_wq, &adapter->watchdog_task, HZ * 2); @@ -1991,38 +2038,40 @@ static void iavf_watchdog_task(struct work_struct *work) adapter->state == __IAVF_RUNNING) iavf_request_stats(adapter); } + if (adapter->state == __IAVF_RUNNING) + iavf_detect_recover_hung(&adapter->vsi); break; case __IAVF_REMOVE: mutex_unlock(&adapter->crit_lock); return; default: - goto restart_watchdog; + return; } - /* check for hw reset */ + /* check for hw reset */ reg_val = rd32(hw, IAVF_VF_ARQLEN1) & IAVF_VF_ARQLEN1_ARQENABLE_MASK; if (!reg_val) { + iavf_change_state(adapter, __IAVF_RESETTING); adapter->flags |= IAVF_FLAG_RESET_PENDING; adapter->aq_required = 0; adapter->current_op = VIRTCHNL_OP_UNKNOWN; dev_err(&adapter->pdev->dev, "Hardware reset detected\n"); queue_work(iavf_wq, &adapter->reset_task); - goto watchdog_done; + mutex_unlock(&adapter->crit_lock); + queue_delayed_work(iavf_wq, + &adapter->watchdog_task, HZ * 2); + return; } schedule_delayed_work(&adapter->client_task, msecs_to_jiffies(5)); -watchdog_done: - if (adapter->state == __IAVF_RUNNING || - adapter->state == __IAVF_COMM_FAILED) - iavf_detect_recover_hung(&adapter->vsi); mutex_unlock(&adapter->crit_lock); restart_watchdog: + queue_work(iavf_wq, &adapter->adminq_task); if (adapter->aq_required) queue_delayed_work(iavf_wq, &adapter->watchdog_task, msecs_to_jiffies(20)); else queue_delayed_work(iavf_wq, &adapter->watchdog_task, HZ * 2); - queue_work(iavf_wq, &adapter->adminq_task); } static void iavf_disable_vf(struct iavf_adapter *adapter) @@ -2081,7 +2130,7 @@ static void iavf_disable_vf(struct iavf_adapter *adapter) adapter->netdev->flags &= ~IFF_UP; mutex_unlock(&adapter->crit_lock); adapter->flags &= ~IAVF_FLAG_RESET_PENDING; - adapter->state = __IAVF_DOWN; + iavf_change_state(adapter, __IAVF_DOWN); wake_up(&adapter->down_waitqueue); dev_info(&adapter->pdev->dev, "Reset task did not complete, VF disabled\n"); } @@ -2191,7 +2240,7 @@ continue_reset: } iavf_irq_disable(adapter); - adapter->state = __IAVF_RESETTING; + iavf_change_state(adapter, __IAVF_RESETTING); adapter->flags &= ~IAVF_FLAG_RESET_PENDING; /* free the Tx/Rx rings and descriptors, might be better to just @@ -2291,11 +2340,14 @@ continue_reset: iavf_configure(adapter); + /* iavf_up_complete() will switch device back + * to __IAVF_RUNNING + */ iavf_up_complete(adapter); iavf_irq_enable(adapter, true); } else { - adapter->state = __IAVF_DOWN; + iavf_change_state(adapter, __IAVF_DOWN); wake_up(&adapter->down_waitqueue); } mutex_unlock(&adapter->client_lock); @@ -2305,6 +2357,8 @@ continue_reset: reset_err: mutex_unlock(&adapter->client_lock); mutex_unlock(&adapter->crit_lock); + if (running) + iavf_change_state(adapter, __IAVF_RUNNING); dev_err(&adapter->pdev->dev, "failed to allocate resources during reinit\n"); iavf_close(netdev); } @@ -3226,6 +3280,13 @@ static int iavf_open(struct net_device *netdev) goto err_unlock; } + if (adapter->state == __IAVF_RUNNING && + !test_bit(__IAVF_VSI_DOWN, adapter->vsi.state)) { + dev_dbg(&adapter->pdev->dev, "VF is already open.\n"); + err = 0; + goto err_unlock; + } + /* allocate transmit descriptors */ err = iavf_setup_all_tx_resources(adapter); if (err) @@ -3297,7 +3358,7 @@ static int iavf_close(struct net_device *netdev) adapter->flags |= IAVF_FLAG_CLIENT_NEEDS_CLOSE; iavf_down(adapter); - adapter->state = __IAVF_DOWN_PENDING; + iavf_change_state(adapter, __IAVF_DOWN_PENDING); iavf_free_traffic_irqs(adapter); mutex_unlock(&adapter->crit_lock); @@ -3631,71 +3692,13 @@ int iavf_process_config(struct iavf_adapter *adapter) } /** - * iavf_init_task - worker thread to perform delayed initialization - * @work: pointer to work_struct containing our data - * - * This task completes the work that was begun in probe. Due to the nature - * of VF-PF communications, we may need to wait tens of milliseconds to get - * responses back from the PF. Rather than busy-wait in probe and bog down the - * whole system, we'll do it in a task so we can sleep. - * This task only runs during driver init. Once we've established - * communications with the PF driver and set up our netdev, the watchdog - * takes over. - **/ -static void iavf_init_task(struct work_struct *work) -{ - struct iavf_adapter *adapter = container_of(work, - struct iavf_adapter, - init_task.work); - struct iavf_hw *hw = &adapter->hw; - - if (iavf_lock_timeout(&adapter->crit_lock, 5000)) { - dev_warn(&adapter->pdev->dev, "failed to acquire crit_lock in %s\n", __FUNCTION__); - return; - } - switch (adapter->state) { - case __IAVF_STARTUP: - if (iavf_startup(adapter) < 0) - goto init_failed; - break; - case __IAVF_INIT_VERSION_CHECK: - if (iavf_init_version_check(adapter) < 0) - goto init_failed; - break; - case __IAVF_INIT_GET_RESOURCES: - if (iavf_init_get_resources(adapter) < 0) - goto init_failed; - goto out; - default: - goto init_failed; - } - - queue_delayed_work(iavf_wq, &adapter->init_task, - msecs_to_jiffies(30)); - goto out; -init_failed: - if (++adapter->aq_wait_count > IAVF_AQ_MAX_ERR) { - dev_err(&adapter->pdev->dev, - "Failed to communicate with PF; waiting before retry\n"); - adapter->flags |= IAVF_FLAG_PF_COMMS_FAILED; - iavf_shutdown_adminq(hw); - adapter->state = __IAVF_STARTUP; - queue_delayed_work(iavf_wq, &adapter->init_task, HZ * 5); - goto out; - } - queue_delayed_work(iavf_wq, &adapter->init_task, HZ); -out: - mutex_unlock(&adapter->crit_lock); -} - -/** * iavf_shutdown - Shutdown the device in preparation for a reboot * @pdev: pci device structure **/ static void iavf_shutdown(struct pci_dev *pdev) { - struct net_device *netdev = pci_get_drvdata(pdev); - struct iavf_adapter *adapter = netdev_priv(netdev); + struct iavf_adapter *adapter = iavf_pdev_to_adapter(pdev); + struct net_device *netdev = adapter->netdev; netif_device_detach(netdev); @@ -3705,7 +3708,7 @@ static void iavf_shutdown(struct pci_dev *pdev) if (iavf_lock_timeout(&adapter->crit_lock, 5000)) dev_warn(&adapter->pdev->dev, "failed to acquire crit_lock in %s\n", __FUNCTION__); /* Prevent the watchdog from running. */ - adapter->state = __IAVF_REMOVE; + iavf_change_state(adapter, __IAVF_REMOVE); adapter->aq_required = 0; mutex_unlock(&adapter->crit_lock); @@ -3778,7 +3781,7 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) hw->back = adapter; adapter->msg_enable = BIT(DEFAULT_DEBUG_LEVEL_SHIFT) - 1; - adapter->state = __IAVF_STARTUP; + iavf_change_state(adapter, __IAVF_STARTUP); /* Call save state here because it relies on the adapter struct. */ pci_save_state(pdev); @@ -3822,8 +3825,7 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) INIT_WORK(&adapter->adminq_task, iavf_adminq_task); INIT_DELAYED_WORK(&adapter->watchdog_task, iavf_watchdog_task); INIT_DELAYED_WORK(&adapter->client_task, iavf_client_task); - INIT_DELAYED_WORK(&adapter->init_task, iavf_init_task); - queue_delayed_work(iavf_wq, &adapter->init_task, + queue_delayed_work(iavf_wq, &adapter->watchdog_task, msecs_to_jiffies(5 * (pdev->devfn & 0x07))); /* Setup the wait queue for indicating transition to down status */ @@ -3880,10 +3882,11 @@ static int __maybe_unused iavf_suspend(struct device *dev_d) static int __maybe_unused iavf_resume(struct device *dev_d) { struct pci_dev *pdev = to_pci_dev(dev_d); - struct net_device *netdev = pci_get_drvdata(pdev); - struct iavf_adapter *adapter = netdev_priv(netdev); + struct iavf_adapter *adapter; u32 err; + adapter = iavf_pdev_to_adapter(pdev); + pci_set_master(pdev); rtnl_lock(); @@ -3902,7 +3905,7 @@ static int __maybe_unused iavf_resume(struct device *dev_d) queue_work(iavf_wq, &adapter->reset_task); - netif_device_attach(netdev); + netif_device_attach(adapter->netdev); return err; } @@ -3918,8 +3921,9 @@ static int __maybe_unused iavf_resume(struct device *dev_d) **/ static void iavf_remove(struct pci_dev *pdev) { - struct net_device *netdev = pci_get_drvdata(pdev); - struct iavf_adapter *adapter = netdev_priv(netdev); + struct iavf_adapter *adapter = iavf_pdev_to_adapter(pdev); + enum iavf_state_t prev_state = adapter->last_state; + struct net_device *netdev = adapter->netdev; struct iavf_fdir_fltr *fdir, *fdirtmp; struct iavf_vlan_filter *vlf, *vlftmp; struct iavf_adv_rss *rss, *rsstmp; @@ -3929,8 +3933,8 @@ static void iavf_remove(struct pci_dev *pdev) int err; /* Indicate we are in remove and not to run reset_task */ mutex_lock(&adapter->remove_lock); - cancel_delayed_work_sync(&adapter->init_task); cancel_work_sync(&adapter->reset_task); + cancel_delayed_work_sync(&adapter->watchdog_task); cancel_delayed_work_sync(&adapter->client_task); if (adapter->netdev_registered) { unregister_netdev(netdev); @@ -3954,13 +3958,25 @@ static void iavf_remove(struct pci_dev *pdev) dev_warn(&adapter->pdev->dev, "failed to acquire crit_lock in %s\n", __FUNCTION__); /* Shut down all the garbage mashers on the detention level */ - adapter->state = __IAVF_REMOVE; + iavf_change_state(adapter, __IAVF_REMOVE); adapter->aq_required = 0; adapter->flags &= ~IAVF_FLAG_REINIT_ITR_NEEDED; + iavf_free_all_tx_resources(adapter); iavf_free_all_rx_resources(adapter); iavf_misc_irq_disable(adapter); iavf_free_misc_irq(adapter); + + /* In case we enter iavf_remove from erroneous state, free traffic irqs + * here, so as to not cause a kernel crash, when calling + * iavf_reset_interrupt_capability. + */ + if ((adapter->last_state == __IAVF_RESETTING && + prev_state != __IAVF_DOWN) || + (adapter->last_state == __IAVF_RUNNING && + !(netdev->flags & IFF_UP))) + iavf_free_traffic_irqs(adapter); + iavf_reset_interrupt_capability(adapter); iavf_free_q_vectors(adapter); diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index 3c735968e1b8..8c3f0f77cb57 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -1685,7 +1685,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, if (!v_retval) iavf_mac_add_ok(adapter); if (!ether_addr_equal(netdev->dev_addr, adapter->hw.mac.addr)) - ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr); + eth_hw_addr_set(netdev, adapter->hw.mac.addr); break; case VIRTCHNL_OP_GET_STATS: { struct iavf_eth_stats *stats = @@ -1716,7 +1716,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, ether_addr_copy(adapter->hw.mac.addr, netdev->dev_addr); } else { /* refresh current mac address if changed */ - ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr); + eth_hw_addr_set(netdev, adapter->hw.mac.addr); ether_addr_copy(netdev->perm_addr, adapter->hw.mac.addr); } @@ -1735,7 +1735,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, iavf_free_all_tx_resources(adapter); iavf_free_all_rx_resources(adapter); if (adapter->state == __IAVF_DOWN_PENDING) { - adapter->state = __IAVF_DOWN; + iavf_change_state(adapter, __IAVF_DOWN); wake_up(&adapter->down_waitqueue); } break; diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile index 4f538cdf42c1..c36faa7d1471 100644 --- a/drivers/net/ethernet/intel/ice/Makefile +++ b/drivers/net/ethernet/intel/ice/Makefile @@ -26,10 +26,13 @@ ice-y := ice_main.o \ ice_devlink.o \ ice_fw_update.o \ ice_lag.o \ - ice_ethtool.o + ice_ethtool.o \ + ice_repr.o \ + ice_tc_lib.o ice-$(CONFIG_PCI_IOV) += ice_virtchnl_allowlist.o ice-$(CONFIG_PCI_IOV) += ice_virtchnl_pf.o ice_sriov.o ice_virtchnl_fdir.o ice-$(CONFIG_PTP_1588_CLOCK) += ice_ptp.o ice_ptp_hw.o ice-$(CONFIG_DCB) += ice_dcb.o ice_dcb_nl.o ice_dcb_lib.o ice-$(CONFIG_RFS_ACCEL) += ice_arfs.o ice-$(CONFIG_XDP_SOCKETS) += ice_xsk.o +ice-$(CONFIG_ICE_SWITCHDEV) += ice_eswitch.o diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h index 3c4f08d20414..bf4ecd9a517c 100644 --- a/drivers/net/ethernet/intel/ice/ice.h +++ b/drivers/net/ethernet/intel/ice/ice.h @@ -34,10 +34,15 @@ #include <linux/if_bridge.h> #include <linux/ctype.h> #include <linux/bpf.h> +#include <linux/btf.h> #include <linux/auxiliary_bus.h> #include <linux/avf/virtchnl.h> #include <linux/cpu_rmap.h> #include <linux/dim.h> +#include <net/pkt_cls.h> +#include <net/tc_act/tc_mirred.h> +#include <net/tc_act/tc_gact.h> +#include <net/ip.h> #include <net/devlink.h> #include <net/ipv6.h> #include <net/xdp_sock.h> @@ -55,6 +60,7 @@ #include "ice_dcb.h" #include "ice_switch.h" #include "ice_common.h" +#include "ice_flow.h" #include "ice_sched.h" #include "ice_idc_int.h" #include "ice_virtchnl_pf.h" @@ -63,6 +69,8 @@ #include "ice_fdir.h" #include "ice_xsk.h" #include "ice_arfs.h" +#include "ice_repr.h" +#include "ice_eswitch.h" #include "ice_lag.h" #define ICE_BAR0 0 @@ -84,6 +92,7 @@ #define ICE_FDIR_MSIX 2 #define ICE_RDMA_NUM_AEQ_MSIX 4 #define ICE_MIN_RDMA_MSIX 2 +#define ICE_ESWITCH_MSIX 1 #define ICE_NO_VSI 0xffff #define ICE_VSI_MAP_CONTIG 0 #define ICE_VSI_MAP_SCATTER 1 @@ -101,6 +110,10 @@ #define ICE_INVAL_VFID 256 #define ICE_MAX_RXQS_PER_TC 256 /* Used when setting VSI context per TC Rx queues */ + +#define ICE_CHNL_START_TC 1 +#define ICE_CHNL_MAX_TC 16 + #define ICE_MAX_RESET_WAIT 20 #define ICE_VSIQF_HKEY_ARRAY_SIZE ((VSIQF_HKEY_MAX_INDEX + 1) * 4) @@ -118,14 +131,24 @@ #define ICE_TX_CTX_DESC(R, i) (&(((struct ice_tx_ctx_desc *)((R)->desc))[i])) #define ICE_TX_FDIRDESC(R, i) (&(((struct ice_fltr_desc *)((R)->desc))[i])) +/* Minimum BW limit is 500 Kbps for any scheduler node */ +#define ICE_MIN_BW_LIMIT 500 +/* User can specify BW in either Kbit/Mbit/Gbit and OS converts it in bytes. + * use it to convert user specified BW limit into Kbps + */ +#define ICE_BW_KBPS_DIVISOR 125 + /* Macro for each VSI in a PF */ #define ice_for_each_vsi(pf, i) \ for ((i) = 0; (i) < (pf)->num_alloc_vsi; (i)++) -/* Macros for each Tx/Rx ring in a VSI */ +/* Macros for each Tx/Xdp/Rx ring in a VSI */ #define ice_for_each_txq(vsi, i) \ for ((i) = 0; (i) < (vsi)->num_txq; (i)++) +#define ice_for_each_xdp_txq(vsi, i) \ + for ((i) = 0; (i) < (vsi)->num_xdp_txq; (i)++) + #define ice_for_each_rxq(vsi, i) \ for ((i) = 0; (i) < (vsi)->num_rxq; (i)++) @@ -139,6 +162,9 @@ #define ice_for_each_q_vector(vsi, i) \ for ((i) = 0; (i) < (vsi)->num_q_vectors; (i)++) +#define ice_for_each_chnl_tc(i) \ + for ((i) = ICE_CHNL_START_TC; (i) < ICE_CHNL_MAX_TC; (i)++) + #define ICE_UCAST_PROMISC_BITS (ICE_PROMISC_UCAST_TX | ICE_PROMISC_MCAST_TX | \ ICE_PROMISC_UCAST_RX | ICE_PROMISC_MCAST_RX) @@ -158,6 +184,29 @@ #define ice_pf_to_dev(pf) (&((pf)->pdev->dev)) +enum ice_feature { + ICE_F_DSCP, + ICE_F_SMA_CTRL, + ICE_F_MAX +}; + +DECLARE_STATIC_KEY_FALSE(ice_xdp_locking_key); + +struct ice_channel { + struct list_head list; + u8 type; + u16 sw_id; + u16 base_q; + u16 num_rxq; + u16 num_txq; + u16 vsi_num; + u8 ena_tc; + struct ice_aqc_vsi_props info; + u64 max_tx_rate; + u64 min_tx_rate; + struct ice_vsi *ch_vsi; +}; + struct ice_txq_meta { u32 q_teid; /* Tx-scheduler element identifier */ u16 q_id; /* Entry in VSI's txq_map bitmap */ @@ -175,7 +224,7 @@ struct ice_tc_info { struct ice_tc_cfg { u8 numtc; /* Total number of enabled TCs */ - u8 ena_tc; /* Tx map */ + u16 ena_tc; /* Tx map */ struct ice_tc_info tc_info[ICE_MAX_TRAFFIC_CLASS]; }; @@ -266,8 +315,8 @@ struct ice_vsi { struct ice_sw *vsw; /* switch this VSI is on */ struct ice_pf *back; /* back pointer to PF */ struct ice_port_info *port_info; /* back pointer to port_info */ - struct ice_ring **rx_rings; /* Rx ring array */ - struct ice_ring **tx_rings; /* Tx ring array */ + struct ice_rx_ring **rx_rings; /* Rx ring array */ + struct ice_tx_ring **tx_rings; /* Tx ring array */ struct ice_q_vector **q_vectors; /* q_vector array */ irqreturn_t (*irq_handler)(int irq, void *data); @@ -306,10 +355,6 @@ struct ice_vsi { spinlock_t arfs_lock; /* protects aRFS hash table and filter state */ atomic_t *arfs_last_fltr_id; - /* devlink port data */ - struct devlink_port devlink_port; - bool devlink_port_registered; - u16 max_frame; u16 rx_buf_len; @@ -344,11 +389,42 @@ struct ice_vsi { u16 qset_handle[ICE_MAX_TRAFFIC_CLASS]; struct ice_tc_cfg tc_cfg; struct bpf_prog *xdp_prog; - struct ice_ring **xdp_rings; /* XDP ring array */ + struct ice_tx_ring **xdp_rings; /* XDP ring array */ unsigned long *af_xdp_zc_qps; /* tracks AF_XDP ZC enabled qps */ u16 num_xdp_txq; /* Used XDP queues */ u8 xdp_mapping_mode; /* ICE_MAP_MODE_[CONTIG|SCATTER] */ + struct net_device **target_netdevs; + + struct tc_mqprio_qopt_offload mqprio_qopt; /* queue parameters */ + + /* Channel Specific Fields */ + struct ice_vsi *tc_map_vsi[ICE_CHNL_MAX_TC]; + u16 cnt_q_avail; + u16 next_base_q; /* next queue to be used for channel setup */ + struct list_head ch_list; + u16 num_chnl_rxq; + u16 num_chnl_txq; + u16 ch_rss_size; + u16 num_chnl_fltr; + /* store away rss size info before configuring ADQ channels so that, + * it can be used after tc-qdisc delete, to get back RSS setting as + * they were before + */ + u16 orig_rss_size; + /* this keeps tracks of all enabled TC with and without DCB + * and inclusive of ADQ, vsi->mqprio_opt keeps track of queue + * information + */ + u8 all_numtc; + u16 all_enatc; + + /* store away TC info, to be used for rebuild logic */ + u8 old_numtc; + u16 old_ena_tc; + + struct ice_channel *ch; + /* setup back reference, to which aggregator node this VSI * corresponds to */ @@ -377,6 +453,8 @@ struct ice_q_vector { cpumask_t affinity_mask; struct irq_affinity_notify affinity_notify; + struct ice_channel *ch; + char name[ICE_INT_NAME_STR_LEN]; u16 total_events; /* net_dim(): number of interrupts processed */ @@ -395,11 +473,14 @@ enum ice_pf_flags { ICE_FLAG_PTP, /* PTP is enabled by software */ ICE_FLAG_AUX_ENA, ICE_FLAG_ADV_FEATURES, + ICE_FLAG_TC_MQPRIO, /* support for Multi queue TC */ + ICE_FLAG_CLS_FLOWER, ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, ICE_FLAG_TOTAL_PORT_SHUTDOWN_ENA, ICE_FLAG_NO_MEDIA, ICE_FLAG_FW_LLDP_AGENT, ICE_FLAG_MOD_POWER_UNSUPPORTED, + ICE_FLAG_PHY_FW_LOAD_FAILED, ICE_FLAG_ETHTOOL_CTXT, /* set when ethtool holds RTNL lock */ ICE_FLAG_LEGACY_RX, ICE_FLAG_VF_TRUE_PROMISC_ENA, @@ -408,6 +489,12 @@ enum ice_pf_flags { ICE_PF_FLAGS_NBITS /* must be last */ }; +struct ice_switchdev_info { + struct ice_vsi *control_vsi; + struct ice_vsi *uplink_vsi; + bool is_running; +}; + struct ice_agg_node { u32 agg_id; #define ICE_MAX_VSIS_IN_AGG_NODE 64 @@ -421,6 +508,9 @@ struct ice_pf { struct devlink_region *nvm_region; struct devlink_region *devcaps_region; + /* devlink port data */ + struct devlink_port devlink_port; + /* OS reserved IRQ details */ struct msix_entry *msix_entries; struct ice_res_tracker *irq_tracker; @@ -434,6 +524,7 @@ struct ice_pf { struct ice_vsi **vsi; /* VSIs created by the driver */ struct ice_sw *first_sw; /* first switch created by firmware */ + u16 eswitch_mode; /* current mode of eswitch */ /* Virtchnl/SR-IOV config info */ struct ice_vf *vf; u16 num_alloc_vfs; /* actual number of VFs allocated */ @@ -443,6 +534,7 @@ struct ice_pf { /* used to ratelimit the MDD event logging */ unsigned long last_printed_mdd_jiffies; DECLARE_BITMAP(malvfs, ICE_MAX_VF_COUNT); + DECLARE_BITMAP(features, ICE_F_MAX); DECLARE_BITMAP(state, ICE_STATE_NBITS); DECLARE_BITMAP(flags, ICE_PF_FLAGS_NBITS); unsigned long *avail_txqs; /* bitmap to track PF Tx queue usage */ @@ -495,12 +587,19 @@ struct ice_pf { struct auxiliary_device *adev; int aux_idx; u32 sw_int_count; + /* count of tc_flower filters specific to channel (aka where filter + * action is "hw_tc <tc_num>") + */ + u16 num_dmac_chnl_fltrs; + struct hlist_head tc_flower_fltr_list; __le64 nvm_phy_type_lo; /* NVM PHY type low */ __le64 nvm_phy_type_hi; /* NVM PHY type high */ struct ice_link_default_override_tlv link_dflt_override; struct ice_lag *lag; /* Link Aggregation information */ + struct ice_switchdev_info switchdev; + #define ICE_INVALID_AGG_NODE_ID 0 #define ICE_PF_AGG_NODE_ID_START 1 #define ICE_MAX_PF_AGG_NODES 32 @@ -512,9 +611,28 @@ struct ice_pf { struct ice_netdev_priv { struct ice_vsi *vsi; + struct ice_repr *repr; + /* indirect block callbacks on registered higher level devices + * (e.g. tunnel devices) + * + * tc_indr_block_cb_priv_list is used to look up indirect callback + * private data + */ + struct list_head tc_indr_block_priv_list; }; /** + * ice_vector_ch_enabled + * @qv: pointer to q_vector, can be NULL + * + * This function returns true if vector is channel enabled otherwise false + */ +static inline bool ice_vector_ch_enabled(struct ice_q_vector *qv) +{ + return !!qv->ch; /* Enable it to run with TC */ +} + +/** * ice_irq_dynamic_ena - Enable default interrupt generation settings * @hw: pointer to HW struct * @vsi: pointer to VSI struct, can be NULL @@ -556,25 +674,42 @@ static inline bool ice_is_xdp_ena_vsi(struct ice_vsi *vsi) return !!vsi->xdp_prog; } -static inline void ice_set_ring_xdp(struct ice_ring *ring) +static inline void ice_set_ring_xdp(struct ice_tx_ring *ring) { ring->flags |= ICE_TX_FLAGS_RING_XDP; } /** * ice_xsk_pool - get XSK buffer pool bound to a ring - * @ring: ring to use + * @ring: Rx ring to use * * Returns a pointer to xdp_umem structure if there is a buffer pool present, * NULL otherwise. */ -static inline struct xsk_buff_pool *ice_xsk_pool(struct ice_ring *ring) +static inline struct xsk_buff_pool *ice_xsk_pool(struct ice_rx_ring *ring) { struct ice_vsi *vsi = ring->vsi; u16 qid = ring->q_index; - if (ice_ring_is_xdp(ring)) - qid -= vsi->num_xdp_txq; + if (!ice_is_xdp_ena_vsi(vsi) || !test_bit(qid, vsi->af_xdp_zc_qps)) + return NULL; + + return xsk_get_pool_from_qid(vsi->netdev, qid); +} + +/** + * ice_tx_xsk_pool - get XSK buffer pool bound to a ring + * @ring: Tx ring to use + * + * Returns a pointer to xdp_umem structure if there is a buffer pool present, + * NULL otherwise. Tx equivalent of ice_xsk_pool. + */ +static inline struct xsk_buff_pool *ice_tx_xsk_pool(struct ice_tx_ring *ring) +{ + struct ice_vsi *vsi = ring->vsi; + u16 qid; + + qid = ring->q_index - vsi->num_xdp_txq; if (!ice_is_xdp_ena_vsi(vsi) || !test_bit(qid, vsi->af_xdp_zc_qps)) return NULL; @@ -597,6 +732,19 @@ static inline struct ice_vsi *ice_get_main_vsi(struct ice_pf *pf) } /** + * ice_get_netdev_priv_vsi - return VSI associated with netdev priv. + * @np: private netdev structure + */ +static inline struct ice_vsi *ice_get_netdev_priv_vsi(struct ice_netdev_priv *np) +{ + /* In case of port representor return source port VSI. */ + if (np->repr) + return np->repr->src_vsi; + else + return np->vsi; +} + +/** * ice_get_ctrl_vsi - Get the control VSI * @pf: PF instance */ @@ -610,6 +758,18 @@ static inline struct ice_vsi *ice_get_ctrl_vsi(struct ice_pf *pf) } /** + * ice_is_switchdev_running - check if switchdev is configured + * @pf: pointer to PF structure + * + * Returns true if eswitch mode is set to DEVLINK_ESWITCH_MODE_SWITCHDEV + * and switchdev is configured, false otherwise. + */ +static inline bool ice_is_switchdev_running(struct ice_pf *pf) +{ + return pf->switchdev.is_running; +} + +/** * ice_set_sriov_cap - enable SRIOV in PF flags * @pf: PF struct */ @@ -633,11 +793,37 @@ static inline void ice_clear_sriov_cap(struct ice_pf *pf) ((base_idx) * ICE_FD_STAT_CTR_BLOCK_COUNT) #define ICE_FD_SB_STAT_IDX(base_idx) ICE_FD_STAT_PF_IDX(base_idx) +/** + * ice_is_adq_active - any active ADQs + * @pf: pointer to PF + * + * This function returns true if there are any ADQs configured (which is + * determined by looking at VSI type (which should be VSI_PF), numtc, and + * TC_MQPRIO flag) otherwise return false + */ +static inline bool ice_is_adq_active(struct ice_pf *pf) +{ + struct ice_vsi *vsi; + + vsi = ice_get_main_vsi(pf); + if (!vsi) + return false; + + /* is ADQ configured */ + if (vsi->tc_cfg.numtc > ICE_CHNL_START_TC && + test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) + return true; + + return false; +} + bool netif_is_ice(struct net_device *dev); int ice_vsi_setup_tx_rings(struct ice_vsi *vsi); int ice_vsi_setup_rx_rings(struct ice_vsi *vsi); int ice_vsi_open_ctrl(struct ice_vsi *vsi); +int ice_vsi_open(struct ice_vsi *vsi); void ice_set_ethtool_ops(struct net_device *netdev); +void ice_set_ethtool_repr_ops(struct net_device *netdev); void ice_set_ethtool_safe_mode_ops(struct net_device *netdev); u16 ice_get_avail_txq_count(struct ice_pf *pf); u16 ice_get_avail_rxq_count(struct ice_pf *pf); @@ -648,6 +834,7 @@ int ice_up(struct ice_vsi *vsi); int ice_down(struct ice_vsi *vsi); int ice_vsi_cfg(struct ice_vsi *vsi); struct ice_vsi *ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi); +int ice_vsi_determine_xdp_res(struct ice_vsi *vsi); int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog); int ice_destroy_xdp_rings(struct ice_vsi *vsi); int diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h index 21b4c7cd6f05..4eef3488d86f 100644 --- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h +++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h @@ -233,6 +233,7 @@ struct ice_aqc_get_sw_cfg_resp_elem { */ #define ICE_AQC_RES_TYPE_VSI_LIST_REP 0x03 #define ICE_AQC_RES_TYPE_VSI_LIST_PRUNE 0x04 +#define ICE_AQC_RES_TYPE_RECIPE 0x05 #define ICE_AQC_RES_TYPE_FDIR_COUNTER_BLOCK 0x21 #define ICE_AQC_RES_TYPE_FDIR_GUARANTEED_ENTRIES 0x22 #define ICE_AQC_RES_TYPE_FDIR_SHARED_ENTRIES 0x23 @@ -241,6 +242,7 @@ struct ice_aqc_get_sw_cfg_resp_elem { #define ICE_AQC_RES_TYPE_HASH_PROF_BLDR_PROFID 0x60 #define ICE_AQC_RES_TYPE_HASH_PROF_BLDR_TCAM 0x61 +#define ICE_AQC_RES_TYPE_FLAG_SHARED BIT(7) #define ICE_AQC_RES_TYPE_FLAG_SCAN_BOTTOM BIT(12) #define ICE_AQC_RES_TYPE_FLAG_IGNORE_INDEX BIT(13) @@ -474,6 +476,53 @@ struct ice_aqc_vsi_props { #define ICE_MAX_NUM_RECIPES 64 +/* Add/Get Recipe (indirect 0x0290/0x0292) */ +struct ice_aqc_add_get_recipe { + __le16 num_sub_recipes; /* Input in Add cmd, Output in Get cmd */ + __le16 return_index; /* Input, used for Get cmd only */ + u8 reserved[4]; + __le32 addr_high; + __le32 addr_low; +}; + +struct ice_aqc_recipe_content { + u8 rid; +#define ICE_AQ_RECIPE_ID_IS_ROOT BIT(7) +#define ICE_AQ_SW_ID_LKUP_IDX 0 + u8 lkup_indx[5]; +#define ICE_AQ_RECIPE_LKUP_IGNORE BIT(7) +#define ICE_AQ_SW_ID_LKUP_MASK 0x00FF + __le16 mask[5]; + u8 result_indx; +#define ICE_AQ_RECIPE_RESULT_DATA_S 0 +#define ICE_AQ_RECIPE_RESULT_DATA_M (0x3F << ICE_AQ_RECIPE_RESULT_DATA_S) +#define ICE_AQ_RECIPE_RESULT_EN BIT(7) + u8 rsvd0[3]; + u8 act_ctrl_join_priority; + u8 act_ctrl_fwd_priority; + u8 act_ctrl; +#define ICE_AQ_RECIPE_ACT_INV_ACT BIT(2) + u8 rsvd1; + __le32 dflt_act; +}; + +struct ice_aqc_recipe_data_elem { + u8 recipe_indx; + u8 resp_bits; + u8 rsvd0[2]; + u8 recipe_bitmap[8]; + u8 rsvd1[4]; + struct ice_aqc_recipe_content content; + u8 rsvd2[20]; +}; + +/* Set/Get Recipes to Profile Association (direct 0x0291/0x0293) */ +struct ice_aqc_recipe_to_profile { + __le16 profile_id; + u8 rsvd[6]; + DECLARE_BITMAP(recipe_assoc, ICE_MAX_NUM_RECIPES); +}; + /* Add/Update/Remove/Get switch rules (indirect 0x02A0, 0x02A1, 0x02A2, 0x02A3) */ struct ice_aqc_sw_rules { @@ -671,6 +720,16 @@ struct ice_aqc_sw_rules_elem { } __packed pdata; }; +/* Query PFC Mode (direct 0x0302) + * Set PFC Mode (direct 0x0303) + */ +struct ice_aqc_set_query_pfc_mode { + u8 pfc_mode; +/* For Query Command response, reserved in all other cases */ +#define ICE_AQC_PFC_VLAN_BASED_PFC 1 +#define ICE_AQC_PFC_DSCP_BASED_PFC 2 + u8 rsvd[15]; +}; /* Get Default Topology (indirect 0x0400) */ struct ice_aqc_get_topo { u8 port_num; @@ -1126,6 +1185,7 @@ struct ice_aqc_get_link_status_data { #define ICE_AQ_LINK_TOPO_UNSUPP_MEDIA BIT(7) u8 link_cfg_err; #define ICE_AQ_LINK_MODULE_POWER_UNSUPPORTED BIT(5) +#define ICE_AQ_LINK_EXTERNAL_PHY_LOAD_FAILURE BIT(6) #define ICE_AQ_LINK_INVAL_MAX_POWER_LIMIT BIT(7) u8 link_info; #define ICE_AQ_LINK_UP BIT(0) /* Link Status */ @@ -1209,6 +1269,7 @@ struct ice_aqc_set_event_mask { #define ICE_AQ_LINK_EVENT_AN_COMPLETED BIT(7) #define ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL BIT(8) #define ICE_AQ_LINK_EVENT_PORT_TX_SUSPENDED BIT(9) +#define ICE_AQ_LINK_EVENT_PHY_FW_LOAD_FAIL BIT(12) u8 reserved1[6]; }; @@ -1220,7 +1281,7 @@ struct ice_aqc_set_mac_lb { u8 reserved[15]; }; -struct ice_aqc_link_topo_addr { +struct ice_aqc_link_topo_params { u8 lport_num; u8 lport_num_valid; #define ICE_AQC_LINK_TOPO_PORT_NUM_VALID BIT(0) @@ -1246,6 +1307,10 @@ struct ice_aqc_link_topo_addr { #define ICE_AQC_LINK_TOPO_NODE_CTX_PROVIDED 4 #define ICE_AQC_LINK_TOPO_NODE_CTX_OVERRIDE 5 u8 index; +}; + +struct ice_aqc_link_topo_addr { + struct ice_aqc_link_topo_params topo_params; __le16 handle; #define ICE_AQC_LINK_TOPO_HANDLE_S 0 #define ICE_AQC_LINK_TOPO_HANDLE_M (0x3FF << ICE_AQC_LINK_TOPO_HANDLE_S) @@ -1268,6 +1333,7 @@ struct ice_aqc_link_topo_addr { struct ice_aqc_get_link_topo { struct ice_aqc_link_topo_addr addr; u8 node_part_num; +#define ICE_AQC_GET_LINK_TOPO_NODE_NR_PCA9575 0x21 u8 rsvd[9]; }; @@ -1281,6 +1347,16 @@ struct ice_aqc_set_port_id_led { u8 rsvd[13]; }; +/* Set/Get GPIO (direct, 0x06EC/0x06ED) */ +struct ice_aqc_gpio { + __le16 gpio_ctrl_handle; +#define ICE_AQC_GPIO_HANDLE_S 0 +#define ICE_AQC_GPIO_HANDLE_M (0x3FF << ICE_AQC_GPIO_HANDLE_S) + u8 gpio_num; + u8 gpio_val; + u8 rsvd[12]; +}; + /* Read/Write SFF EEPROM command (indirect 0x06EE) */ struct ice_aqc_sff_eeprom { u8 lport_num; @@ -1922,10 +1998,13 @@ struct ice_aq_desc { struct ice_aqc_get_phy_caps get_phy; struct ice_aqc_set_phy_cfg set_phy; struct ice_aqc_restart_an restart_an; + struct ice_aqc_gpio read_write_gpio; struct ice_aqc_sff_eeprom read_write_sff_param; struct ice_aqc_set_port_id_led set_port_id_led; struct ice_aqc_get_sw_cfg get_sw_conf; struct ice_aqc_sw_rules sw_rules; + struct ice_aqc_add_get_recipe add_get_recipe; + struct ice_aqc_recipe_to_profile recipe_to_profile; struct ice_aqc_get_topo get_topo; struct ice_aqc_sched_elem_cmd sched_elem_cmd; struct ice_aqc_query_txsched_res query_sched_res; @@ -1936,6 +2015,7 @@ struct ice_aq_desc { struct ice_aqc_nvm_pkg_data pkg_data; struct ice_aqc_nvm_pass_comp_tbl pass_comp_tbl; struct ice_aqc_pf_vf_msg virt; + struct ice_aqc_set_query_pfc_mode set_query_pfc_mode; struct ice_aqc_lldp_get_mib lldp_get_mib; struct ice_aqc_lldp_set_mib_change lldp_set_event; struct ice_aqc_lldp_stop lldp_stop; @@ -2033,6 +2113,12 @@ enum ice_adminq_opc { ice_aqc_opc_update_vsi = 0x0211, ice_aqc_opc_free_vsi = 0x0213, + /* recipe commands */ + ice_aqc_opc_add_recipe = 0x0290, + ice_aqc_opc_recipe_to_profile = 0x0291, + ice_aqc_opc_get_recipe = 0x0292, + ice_aqc_opc_get_recipe_to_profile = 0x0293, + /* switch rules population commands */ ice_aqc_opc_add_sw_rules = 0x02A0, ice_aqc_opc_update_sw_rules = 0x02A1, @@ -2040,6 +2126,10 @@ enum ice_adminq_opc { ice_aqc_opc_clear_pf_cfg = 0x02A4, + /* DCB commands */ + ice_aqc_opc_query_pfc_mode = 0x0302, + ice_aqc_opc_set_pfc_mode = 0x0303, + /* transmit scheduler commands */ ice_aqc_opc_get_dflt_topo = 0x0400, ice_aqc_opc_add_sched_elems = 0x0401, @@ -2064,6 +2154,8 @@ enum ice_adminq_opc { ice_aqc_opc_set_mac_lb = 0x0620, ice_aqc_opc_get_link_topo = 0x06E0, ice_aqc_opc_set_port_id_led = 0x06E9, + ice_aqc_opc_set_gpio = 0x06EC, + ice_aqc_opc_get_gpio = 0x06ED, ice_aqc_opc_sff_eeprom = 0x06EE, /* NVM commands */ diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.c b/drivers/net/ethernet/intel/ice/ice_arfs.c index 88d98c9e5f91..5daade32ea62 100644 --- a/drivers/net/ethernet/intel/ice/ice_arfs.c +++ b/drivers/net/ethernet/intel/ice/ice_arfs.c @@ -513,7 +513,7 @@ void ice_init_arfs(struct ice_vsi *vsi) if (!vsi || vsi->type != ICE_VSI_PF) return; - arfs_fltr_list = kzalloc(sizeof(*arfs_fltr_list) * ICE_MAX_ARFS_LIST, + arfs_fltr_list = kcalloc(ICE_MAX_ARFS_LIST, sizeof(*arfs_fltr_list), GFP_KERNEL); if (!arfs_fltr_list) return; @@ -614,7 +614,7 @@ int ice_set_cpu_rx_rmap(struct ice_vsi *vsi) return -EINVAL; base_idx = vsi->base_vector; - for (i = 0; i < vsi->num_q_vectors; i++) + ice_for_each_q_vector(vsi, i) if (irq_cpu_rmap_add(netdev->rx_cpu_rmap, pf->msix_entries[base_idx + i].vector)) { ice_free_cpu_rx_rmap(vsi); diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c index c36057efc7ae..fa6cd63cbf1f 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.c +++ b/drivers/net/ethernet/intel/ice/ice_base.c @@ -115,6 +115,8 @@ static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, u16 v_idx) q_vector->rx.itr_setting = ICE_DFLT_RX_ITR; q_vector->tx.itr_mode = ITR_DYNAMIC; q_vector->rx.itr_mode = ITR_DYNAMIC; + q_vector->tx.type = ICE_TX_CONTAINER; + q_vector->rx.type = ICE_RX_CONTAINER; if (vsi->type == ICE_VSI_VF) goto out; @@ -146,7 +148,8 @@ static void ice_free_q_vector(struct ice_vsi *vsi, int v_idx) { struct ice_q_vector *q_vector; struct ice_pf *pf = vsi->back; - struct ice_ring *ring; + struct ice_tx_ring *tx_ring; + struct ice_rx_ring *rx_ring; struct device *dev; dev = ice_pf_to_dev(pf); @@ -156,10 +159,10 @@ static void ice_free_q_vector(struct ice_vsi *vsi, int v_idx) } q_vector = vsi->q_vectors[v_idx]; - ice_for_each_ring(ring, q_vector->tx) - ring->q_vector = NULL; - ice_for_each_ring(ring, q_vector->rx) - ring->q_vector = NULL; + ice_for_each_tx_ring(tx_ring, q_vector->tx) + tx_ring->q_vector = NULL; + ice_for_each_rx_ring(rx_ring, q_vector->rx) + rx_ring->q_vector = NULL; /* only VSI with an associated netdev is set up with NAPI */ if (vsi->netdev) @@ -201,15 +204,18 @@ static void ice_cfg_itr_gran(struct ice_hw *hw) } /** - * ice_calc_q_handle - calculate the queue handle + * ice_calc_txq_handle - calculate the queue handle * @vsi: VSI that ring belongs to * @ring: ring to get the absolute queue index * @tc: traffic class number */ -static u16 ice_calc_q_handle(struct ice_vsi *vsi, struct ice_ring *ring, u8 tc) +static u16 ice_calc_txq_handle(struct ice_vsi *vsi, struct ice_tx_ring *ring, u8 tc) { WARN_ONCE(ice_ring_is_xdp(ring) && tc, "XDP ring can't belong to TC other than 0\n"); + if (ring->ch) + return ring->q_index - ring->ch->base_q; + /* Idea here for calculation is that we subtract the number of queue * count from TC that ring belongs to from it's absolute queue index * and as a result we get the queue's index within TC. @@ -218,13 +224,37 @@ static u16 ice_calc_q_handle(struct ice_vsi *vsi, struct ice_ring *ring, u8 tc) } /** + * ice_eswitch_calc_txq_handle + * @ring: pointer to ring which unique index is needed + * + * To correctly work with many netdevs ring->q_index of Tx rings on switchdev + * VSI can repeat. Hardware ring setup requires unique q_index. Calculate it + * here by finding index in vsi->tx_rings of this ring. + * + * Return ICE_INVAL_Q_INDEX when index wasn't found. Should never happen, + * because VSI is get from ring->vsi, so it has to be present in this VSI. + */ +static u16 ice_eswitch_calc_txq_handle(struct ice_tx_ring *ring) +{ + struct ice_vsi *vsi = ring->vsi; + int i; + + ice_for_each_txq(vsi, i) { + if (vsi->tx_rings[i] == ring) + return i; + } + + return ICE_INVAL_Q_INDEX; +} + +/** * ice_cfg_xps_tx_ring - Configure XPS for a Tx ring * @ring: The Tx ring to configure * * This enables/disables XPS for a given Tx descriptor ring * based on the TCs enabled for the VSI that ring belongs to. */ -static void ice_cfg_xps_tx_ring(struct ice_ring *ring) +static void ice_cfg_xps_tx_ring(struct ice_tx_ring *ring) { if (!ring->q_vector || !ring->netdev) return; @@ -246,7 +276,7 @@ static void ice_cfg_xps_tx_ring(struct ice_ring *ring) * Configure the Tx descriptor ring in TLAN context. */ static void -ice_setup_tx_ctx(struct ice_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q) +ice_setup_tx_ctx(struct ice_tx_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q) { struct ice_vsi *vsi = ring->vsi; struct ice_hw *hw = &vsi->back->hw; @@ -258,7 +288,7 @@ ice_setup_tx_ctx(struct ice_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q) /* Transmit Queue Length */ tlan_ctx->qlen = ring->count; - ice_set_cgd_num(tlan_ctx, ring); + ice_set_cgd_num(tlan_ctx, ring->dcb_tc); /* PF number */ tlan_ctx->pf_num = hw->pf_id; @@ -273,19 +303,28 @@ ice_setup_tx_ctx(struct ice_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q) case ICE_VSI_LB: case ICE_VSI_CTRL: case ICE_VSI_PF: - tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF; + if (ring->ch) + tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VMQ; + else + tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF; break; case ICE_VSI_VF: /* Firmware expects vmvf_num to be absolute VF ID */ tlan_ctx->vmvf_num = hw->func_caps.vf_base_id + vsi->vf_id; tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VF; break; + case ICE_VSI_SWITCHDEV_CTRL: + tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_VMQ; + break; default: return; } /* make sure the context is associated with the right VSI */ - tlan_ctx->src_vsi = ice_get_hw_vsi_num(hw, vsi->idx); + if (ring->ch) + tlan_ctx->src_vsi = ring->ch->vsi_num; + else + tlan_ctx->src_vsi = ice_get_hw_vsi_num(hw, vsi->idx); /* Restrict Tx timestamps to the PF VSI */ switch (vsi->type) { @@ -312,7 +351,7 @@ ice_setup_tx_ctx(struct ice_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q) * * Returns the offset value for ring into the data buffer. */ -static unsigned int ice_rx_offset(struct ice_ring *rx_ring) +static unsigned int ice_rx_offset(struct ice_rx_ring *rx_ring) { if (ice_ring_uses_build_skb(rx_ring)) return ICE_SKB_PAD; @@ -328,7 +367,7 @@ static unsigned int ice_rx_offset(struct ice_ring *rx_ring) * * Configure the Rx descriptor ring in RLAN context. */ -static int ice_setup_rx_ctx(struct ice_ring *ring) +static int ice_setup_rx_ctx(struct ice_rx_ring *ring) { int chain_len = ICE_MAX_CHAINED_RX_BUFS; struct ice_vsi *vsi = ring->vsi; @@ -439,7 +478,7 @@ static int ice_setup_rx_ctx(struct ice_ring *ring) * * Return 0 on success and a negative value on error. */ -int ice_vsi_cfg_rxq(struct ice_ring *ring) +int ice_vsi_cfg_rxq(struct ice_rx_ring *ring) { struct device *dev = ice_pf_to_dev(ring->vsi->back); u16 num_bufs = ICE_DESC_UNUSED(ring); @@ -660,16 +699,16 @@ void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi) tx_rings_per_v = (u8)DIV_ROUND_UP(tx_rings_rem, q_vectors - v_id); q_vector->num_ring_tx = tx_rings_per_v; - q_vector->tx.ring = NULL; + q_vector->tx.tx_ring = NULL; q_vector->tx.itr_idx = ICE_TX_ITR; q_base = vsi->num_txq - tx_rings_rem; for (q_id = q_base; q_id < (q_base + tx_rings_per_v); q_id++) { - struct ice_ring *tx_ring = vsi->tx_rings[q_id]; + struct ice_tx_ring *tx_ring = vsi->tx_rings[q_id]; tx_ring->q_vector = q_vector; - tx_ring->next = q_vector->tx.ring; - q_vector->tx.ring = tx_ring; + tx_ring->next = q_vector->tx.tx_ring; + q_vector->tx.tx_ring = tx_ring; } tx_rings_rem -= tx_rings_per_v; @@ -677,16 +716,16 @@ void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi) rx_rings_per_v = (u8)DIV_ROUND_UP(rx_rings_rem, q_vectors - v_id); q_vector->num_ring_rx = rx_rings_per_v; - q_vector->rx.ring = NULL; + q_vector->rx.rx_ring = NULL; q_vector->rx.itr_idx = ICE_RX_ITR; q_base = vsi->num_rxq - rx_rings_rem; for (q_id = q_base; q_id < (q_base + rx_rings_per_v); q_id++) { - struct ice_ring *rx_ring = vsi->rx_rings[q_id]; + struct ice_rx_ring *rx_ring = vsi->rx_rings[q_id]; rx_ring->q_vector = q_vector; - rx_ring->next = q_vector->rx.ring; - q_vector->rx.ring = rx_ring; + rx_ring->next = q_vector->rx.rx_ring; + q_vector->rx.rx_ring = rx_ring; } rx_rings_rem -= rx_rings_per_v; } @@ -711,12 +750,13 @@ void ice_vsi_free_q_vectors(struct ice_vsi *vsi) * @qg_buf: queue group buffer */ int -ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_ring *ring, +ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_tx_ring *ring, struct ice_aqc_add_tx_qgrp *qg_buf) { u8 buf_len = struct_size(qg_buf, txqs, 1); struct ice_tlan_ctx tlan_ctx = { 0 }; struct ice_aqc_add_txqs_perq *txq; + struct ice_channel *ch = ring->ch; struct ice_pf *pf = vsi->back; struct ice_hw *hw = &pf->hw; enum ice_status status; @@ -746,10 +786,23 @@ ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_ring *ring, /* Add unique software queue handle of the Tx queue per * TC into the VSI Tx ring */ - ring->q_handle = ice_calc_q_handle(vsi, ring, tc); + if (vsi->type == ICE_VSI_SWITCHDEV_CTRL) { + ring->q_handle = ice_eswitch_calc_txq_handle(ring); - status = ice_ena_vsi_txq(vsi->port_info, vsi->idx, tc, ring->q_handle, - 1, qg_buf, buf_len, NULL); + if (ring->q_handle == ICE_INVAL_Q_INDEX) + return -ENODEV; + } else { + ring->q_handle = ice_calc_txq_handle(vsi, ring, tc); + } + + if (ch) + status = ice_ena_vsi_txq(vsi->port_info, ch->ch_vsi->idx, 0, + ring->q_handle, 1, qg_buf, buf_len, + NULL); + else + status = ice_ena_vsi_txq(vsi->port_info, vsi->idx, tc, + ring->q_handle, 1, qg_buf, buf_len, + NULL); if (status) { dev_err(ice_pf_to_dev(pf), "Failed to set LAN Tx queue context, error: %s\n", ice_stat_str(status)); @@ -870,7 +923,7 @@ void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector) */ int ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, - u16 rel_vmvf_num, struct ice_ring *ring, + u16 rel_vmvf_num, struct ice_tx_ring *ring, struct ice_txq_meta *txq_meta) { struct ice_pf *pf = vsi->back; @@ -927,9 +980,10 @@ ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, * are needed for stopping Tx queue */ void -ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_ring *ring, +ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_tx_ring *ring, struct ice_txq_meta *txq_meta) { + struct ice_channel *ch = ring->ch; u8 tc; if (IS_ENABLED(CONFIG_DCB)) @@ -940,6 +994,11 @@ ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_ring *ring, txq_meta->q_id = ring->reg_idx; txq_meta->q_teid = ring->txq_teid; txq_meta->q_handle = ring->q_handle; - txq_meta->vsi_idx = vsi->idx; - txq_meta->tc = tc; + if (ch) { + txq_meta->vsi_idx = ch->ch_vsi->idx; + txq_meta->tc = 0; + } else { + txq_meta->vsi_idx = vsi->idx; + txq_meta->tc = tc; + } } diff --git a/drivers/net/ethernet/intel/ice/ice_base.h b/drivers/net/ethernet/intel/ice/ice_base.h index 20e1c29aa68a..b67dca417acb 100644 --- a/drivers/net/ethernet/intel/ice/ice_base.h +++ b/drivers/net/ethernet/intel/ice/ice_base.h @@ -6,7 +6,7 @@ #include "ice.h" -int ice_vsi_cfg_rxq(struct ice_ring *ring); +int ice_vsi_cfg_rxq(struct ice_rx_ring *ring); int __ice_vsi_get_qs(struct ice_qs_cfg *qs_cfg); int ice_vsi_ctrl_one_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx, bool wait); @@ -15,7 +15,7 @@ int ice_vsi_alloc_q_vectors(struct ice_vsi *vsi); void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi); void ice_vsi_free_q_vectors(struct ice_vsi *vsi); int -ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_ring *ring, +ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_tx_ring *ring, struct ice_aqc_add_tx_qgrp *qg_buf); void ice_cfg_itr(struct ice_hw *hw, struct ice_q_vector *q_vector); void @@ -25,9 +25,9 @@ ice_cfg_rxq_interrupt(struct ice_vsi *vsi, u16 rxq, u16 msix_idx, u16 itr_idx); void ice_trigger_sw_intr(struct ice_hw *hw, struct ice_q_vector *q_vector); int ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, - u16 rel_vmvf_num, struct ice_ring *ring, + u16 rel_vmvf_num, struct ice_tx_ring *ring, struct ice_txq_meta *txq_meta); void -ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_ring *ring, +ice_fill_txq_meta(struct ice_vsi *vsi, struct ice_tx_ring *ring, struct ice_txq_meta *txq_meta); #endif /* _ICE_BASE_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c index df5ad4de1f00..b3066d0fea8b 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.c +++ b/drivers/net/ethernet/intel/ice/ice_common.c @@ -72,6 +72,27 @@ bool ice_is_e810(struct ice_hw *hw) } /** + * ice_is_e810t + * @hw: pointer to the hardware structure + * + * returns true if the device is E810T based, false if not. + */ +bool ice_is_e810t(struct ice_hw *hw) +{ + switch (hw->device_id) { + case ICE_DEV_ID_E810C_SFP: + if (hw->subsystem_device_id == ICE_SUBDEV_ID_E810T || + hw->subsystem_device_id == ICE_SUBDEV_ID_E810T2) + return true; + break; + default: + break; + } + + return false; +} + +/** * ice_clear_pf_cfg - Clear PF configuration * @hw: pointer to the hardware structure * @@ -242,11 +263,13 @@ ice_aq_get_link_topo_handle(struct ice_port_info *pi, u8 node_type, ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_link_topo); - cmd->addr.node_type_ctx = (ICE_AQC_LINK_TOPO_NODE_CTX_PORT << - ICE_AQC_LINK_TOPO_NODE_CTX_S); + cmd->addr.topo_params.node_type_ctx = + (ICE_AQC_LINK_TOPO_NODE_CTX_PORT << + ICE_AQC_LINK_TOPO_NODE_CTX_S); /* set node type */ - cmd->addr.node_type_ctx |= (ICE_AQC_LINK_TOPO_NODE_TYPE_M & node_type); + cmd->addr.topo_params.node_type_ctx |= + (ICE_AQC_LINK_TOPO_NODE_TYPE_M & node_type); return ice_aq_send_cmd(pi->hw, &desc, NULL, 0, cd); } @@ -570,6 +593,7 @@ static enum ice_status ice_init_fltr_mgmt_struct(struct ice_hw *hw) return ICE_ERR_NO_MEMORY; INIT_LIST_HEAD(&sw->vsi_list_map_head); + sw->prof_res_bm_init = 0; status = ice_init_def_sw_recp(hw); if (status) { @@ -596,17 +620,42 @@ static void ice_cleanup_fltr_mgmt_struct(struct ice_hw *hw) list_del(&v_pos_map->list_entry); devm_kfree(ice_hw_to_dev(hw), v_pos_map); } - recps = hw->switch_info->recp_list; - for (i = 0; i < ICE_SW_LKUP_LAST; i++) { - struct ice_fltr_mgmt_list_entry *lst_itr, *tmp_entry; + recps = sw->recp_list; + for (i = 0; i < ICE_MAX_NUM_RECIPES; i++) { + struct ice_recp_grp_entry *rg_entry, *tmprg_entry; recps[i].root_rid = i; - mutex_destroy(&recps[i].filt_rule_lock); - list_for_each_entry_safe(lst_itr, tmp_entry, - &recps[i].filt_rules, list_entry) { - list_del(&lst_itr->list_entry); - devm_kfree(ice_hw_to_dev(hw), lst_itr); + list_for_each_entry_safe(rg_entry, tmprg_entry, + &recps[i].rg_list, l_entry) { + list_del(&rg_entry->l_entry); + devm_kfree(ice_hw_to_dev(hw), rg_entry); + } + + if (recps[i].adv_rule) { + struct ice_adv_fltr_mgmt_list_entry *tmp_entry; + struct ice_adv_fltr_mgmt_list_entry *lst_itr; + + mutex_destroy(&recps[i].filt_rule_lock); + list_for_each_entry_safe(lst_itr, tmp_entry, + &recps[i].filt_rules, + list_entry) { + list_del(&lst_itr->list_entry); + devm_kfree(ice_hw_to_dev(hw), lst_itr->lkups); + devm_kfree(ice_hw_to_dev(hw), lst_itr); + } + } else { + struct ice_fltr_mgmt_list_entry *lst_itr, *tmp_entry; + + mutex_destroy(&recps[i].filt_rule_lock); + list_for_each_entry_safe(lst_itr, tmp_entry, + &recps[i].filt_rules, + list_entry) { + list_del(&lst_itr->list_entry); + devm_kfree(ice_hw_to_dev(hw), lst_itr); + } } + if (recps[i].root_buf) + devm_kfree(ice_hw_to_dev(hw), recps[i].root_buf); } ice_rm_all_sw_replay_rule_info(hw); devm_kfree(ice_hw_to_dev(hw), sw->recp_list); @@ -4769,6 +4818,64 @@ ice_aq_get_driver_param(struct ice_hw *hw, enum ice_aqc_driver_params idx, } /** + * ice_aq_set_gpio + * @hw: pointer to the hw struct + * @gpio_ctrl_handle: GPIO controller node handle + * @pin_idx: IO Number of the GPIO that needs to be set + * @value: SW provide IO value to set in the LSB + * @cd: pointer to command details structure or NULL + * + * Sends 0x06EC AQ command to set the GPIO pin state that's part of the topology + */ +int +ice_aq_set_gpio(struct ice_hw *hw, u16 gpio_ctrl_handle, u8 pin_idx, bool value, + struct ice_sq_cd *cd) +{ + struct ice_aqc_gpio *cmd; + struct ice_aq_desc desc; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_gpio); + cmd = &desc.params.read_write_gpio; + cmd->gpio_ctrl_handle = cpu_to_le16(gpio_ctrl_handle); + cmd->gpio_num = pin_idx; + cmd->gpio_val = value ? 1 : 0; + + return ice_status_to_errno(ice_aq_send_cmd(hw, &desc, NULL, 0, cd)); +} + +/** + * ice_aq_get_gpio + * @hw: pointer to the hw struct + * @gpio_ctrl_handle: GPIO controller node handle + * @pin_idx: IO Number of the GPIO that needs to be set + * @value: IO value read + * @cd: pointer to command details structure or NULL + * + * Sends 0x06ED AQ command to get the value of a GPIO signal which is part of + * the topology + */ +int +ice_aq_get_gpio(struct ice_hw *hw, u16 gpio_ctrl_handle, u8 pin_idx, + bool *value, struct ice_sq_cd *cd) +{ + struct ice_aqc_gpio *cmd; + struct ice_aq_desc desc; + enum ice_status status; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_gpio); + cmd = &desc.params.read_write_gpio; + cmd->gpio_ctrl_handle = cpu_to_le16(gpio_ctrl_handle); + cmd->gpio_num = pin_idx; + + status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd); + if (status) + return ice_status_to_errno(status); + + *value = !!cmd->gpio_val; + return 0; +} + +/** * ice_fw_supports_link_override * @hw: pointer to the hardware structure * diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index fb16070f02e2..65c1b3244264 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -183,6 +183,7 @@ ice_stat_update40(struct ice_hw *hw, u32 reg, bool prev_stat_loaded, void ice_stat_update32(struct ice_hw *hw, u32 reg, bool prev_stat_loaded, u64 *prev_stat, u64 *cur_stat); +bool ice_is_e810t(struct ice_hw *hw); enum ice_status ice_sched_query_elem(struct ice_hw *hw, u32 node_teid, struct ice_aqc_txsched_elem_data *buf); @@ -192,6 +193,12 @@ ice_aq_set_driver_param(struct ice_hw *hw, enum ice_aqc_driver_params idx, int ice_aq_get_driver_param(struct ice_hw *hw, enum ice_aqc_driver_params idx, u32 *value, struct ice_sq_cd *cd); +int +ice_aq_set_gpio(struct ice_hw *hw, u16 gpio_ctrl_handle, u8 pin_idx, bool value, + struct ice_sq_cd *cd); +int +ice_aq_get_gpio(struct ice_hw *hw, u16 gpio_ctrl_handle, u8 pin_idx, + bool *value, struct ice_sq_cd *cd); enum ice_status ice_aq_set_lldp_mib(struct ice_hw *hw, u8 mib_type, void *buf, u16 buf_size, struct ice_sq_cd *cd); diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.c b/drivers/net/ethernet/intel/ice/ice_dcb.c index 849fcf605479..241427cd9bc0 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb.c @@ -2,6 +2,7 @@ /* Copyright (c) 2019, Intel Corporation. */ #include "ice_common.h" +#include "ice_lib.h" #include "ice_sched.h" #include "ice_dcb.h" @@ -736,6 +737,45 @@ ice_aq_get_cee_dcb_cfg(struct ice_hw *hw, } /** + * ice_aq_set_pfc_mode - Set PFC mode + * @hw: pointer to the HW struct + * @pfc_mode: value of PFC mode to set + * @cd: pointer to command details structure or NULL + * + * This AQ call configures the PFC mode to DSCP-based PFC mode or + * VLAN-based PFC (0x0303) + */ +int ice_aq_set_pfc_mode(struct ice_hw *hw, u8 pfc_mode, struct ice_sq_cd *cd) +{ + struct ice_aqc_set_query_pfc_mode *cmd; + struct ice_aq_desc desc; + enum ice_status status; + + if (pfc_mode > ICE_AQC_PFC_DSCP_BASED_PFC) + return -EINVAL; + + cmd = &desc.params.set_query_pfc_mode; + + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_pfc_mode); + + cmd->pfc_mode = pfc_mode; + + status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd); + if (status) + return ice_status_to_errno(status); + + /* FW will write the PFC mode set back into cmd->pfc_mode, but if DCB is + * disabled, FW will write back 0 to cmd->pfc_mode. After the AQ has + * been executed, check if cmd->pfc_mode is what was requested. If not, + * return an error. + */ + if (cmd->pfc_mode != pfc_mode) + return -EOPNOTSUPP; + + return 0; +} + +/** * ice_cee_to_dcb_cfg * @cee_cfg: pointer to CEE configuration struct * @pi: port information structure @@ -1207,7 +1247,140 @@ ice_add_ieee_app_pri_tlv(struct ice_lldp_org_tlv *tlv, } /** - * ice_add_dcb_tlv - Add all IEEE TLVs + * ice_add_dscp_up_tlv - Prepare DSCP to UP TLV + * @tlv: location to build the TLV data + * @dcbcfg: location of data to convert to TLV + */ +static void +ice_add_dscp_up_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg) +{ + u8 *buf = tlv->tlvinfo; + u32 ouisubtype; + u16 typelen; + int i; + + typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) | + ICE_DSCP_UP_TLV_LEN); + tlv->typelen = htons(typelen); + + ouisubtype = (u32)((ICE_DSCP_OUI << ICE_LLDP_TLV_OUI_S) | + ICE_DSCP_SUBTYPE_DSCP2UP); + tlv->ouisubtype = htonl(ouisubtype); + + /* bytes 0 - 63 - IPv4 DSCP2UP LUT */ + for (i = 0; i < ICE_DSCP_NUM_VAL; i++) { + /* IPv4 mapping */ + buf[i] = dcbcfg->dscp_map[i]; + /* IPv6 mapping */ + buf[i + ICE_DSCP_IPV6_OFFSET] = dcbcfg->dscp_map[i]; + } + + /* byte 64 - IPv4 untagged traffic */ + buf[i] = 0; + + /* byte 144 - IPv6 untagged traffic */ + buf[i + ICE_DSCP_IPV6_OFFSET] = 0; +} + +#define ICE_BYTES_PER_TC 8 +/** + * ice_add_dscp_enf_tlv - Prepare DSCP Enforcement TLV + * @tlv: location to build the TLV data + */ +static void +ice_add_dscp_enf_tlv(struct ice_lldp_org_tlv *tlv) +{ + u8 *buf = tlv->tlvinfo; + u32 ouisubtype; + u16 typelen; + + typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) | + ICE_DSCP_ENF_TLV_LEN); + tlv->typelen = htons(typelen); + + ouisubtype = (u32)((ICE_DSCP_OUI << ICE_LLDP_TLV_OUI_S) | + ICE_DSCP_SUBTYPE_ENFORCE); + tlv->ouisubtype = htonl(ouisubtype); + + /* Allow all DSCP values to be valid for all TC's (IPv4 and IPv6) */ + memset(buf, 0, 2 * (ICE_MAX_TRAFFIC_CLASS * ICE_BYTES_PER_TC)); +} + +/** + * ice_add_dscp_tc_bw_tlv - Prepare DSCP BW for TC TLV + * @tlv: location to build the TLV data + * @dcbcfg: location of the data to convert to TLV + */ +static void +ice_add_dscp_tc_bw_tlv(struct ice_lldp_org_tlv *tlv, + struct ice_dcbx_cfg *dcbcfg) +{ + struct ice_dcb_ets_cfg *etscfg; + u8 *buf = tlv->tlvinfo; + u32 ouisubtype; + u8 offset = 0; + u16 typelen; + int i; + + typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) | + ICE_DSCP_TC_BW_TLV_LEN); + tlv->typelen = htons(typelen); + + ouisubtype = (u32)((ICE_DSCP_OUI << ICE_LLDP_TLV_OUI_S) | + ICE_DSCP_SUBTYPE_TCBW); + tlv->ouisubtype = htonl(ouisubtype); + + /* First Octect after subtype + * ---------------------------- + * | RSV | CBS | RSV | Max TCs | + * | 1b | 1b | 3b | 3b | + * ---------------------------- + */ + etscfg = &dcbcfg->etscfg; + buf[0] = etscfg->maxtcs & ICE_IEEE_ETS_MAXTC_M; + + /* bytes 1 - 4 reserved */ + offset = 5; + + /* TC BW table + * bytes 0 - 7 for TC 0 - 7 + * + * TSA Assignment table + * bytes 8 - 15 for TC 0 - 7 + */ + for (i = 0; i < ICE_MAX_TRAFFIC_CLASS; i++) { + buf[offset] = etscfg->tcbwtable[i]; + buf[offset + ICE_MAX_TRAFFIC_CLASS] = etscfg->tsatable[i]; + offset++; + } +} + +/** + * ice_add_dscp_pfc_tlv - Prepare DSCP PFC TLV + * @tlv: Fill PFC TLV in IEEE format + * @dcbcfg: Local store which holds the PFC CFG data + */ +static void +ice_add_dscp_pfc_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg) +{ + u8 *buf = tlv->tlvinfo; + u32 ouisubtype; + u16 typelen; + + typelen = ((ICE_TLV_TYPE_ORG << ICE_LLDP_TLV_TYPE_S) | + ICE_DSCP_PFC_TLV_LEN); + tlv->typelen = htons(typelen); + + ouisubtype = (u32)((ICE_DSCP_OUI << ICE_LLDP_TLV_OUI_S) | + ICE_DSCP_SUBTYPE_PFC); + tlv->ouisubtype = htonl(ouisubtype); + + buf[0] = dcbcfg->pfc.pfccap & 0xF; + buf[1] = dcbcfg->pfc.pfcena & 0xF; +} + +/** + * ice_add_dcb_tlv - Add all IEEE or DSCP TLVs * @tlv: Fill TLV data in IEEE format * @dcbcfg: Local store which holds the DCB Config * @tlvid: Type of IEEE TLV @@ -1218,21 +1391,41 @@ static void ice_add_dcb_tlv(struct ice_lldp_org_tlv *tlv, struct ice_dcbx_cfg *dcbcfg, u16 tlvid) { - switch (tlvid) { - case ICE_IEEE_TLV_ID_ETS_CFG: - ice_add_ieee_ets_tlv(tlv, dcbcfg); - break; - case ICE_IEEE_TLV_ID_ETS_REC: - ice_add_ieee_etsrec_tlv(tlv, dcbcfg); - break; - case ICE_IEEE_TLV_ID_PFC_CFG: - ice_add_ieee_pfc_tlv(tlv, dcbcfg); - break; - case ICE_IEEE_TLV_ID_APP_PRI: - ice_add_ieee_app_pri_tlv(tlv, dcbcfg); - break; - default: - break; + if (dcbcfg->pfc_mode == ICE_QOS_MODE_VLAN) { + switch (tlvid) { + case ICE_IEEE_TLV_ID_ETS_CFG: + ice_add_ieee_ets_tlv(tlv, dcbcfg); + break; + case ICE_IEEE_TLV_ID_ETS_REC: + ice_add_ieee_etsrec_tlv(tlv, dcbcfg); + break; + case ICE_IEEE_TLV_ID_PFC_CFG: + ice_add_ieee_pfc_tlv(tlv, dcbcfg); + break; + case ICE_IEEE_TLV_ID_APP_PRI: + ice_add_ieee_app_pri_tlv(tlv, dcbcfg); + break; + default: + break; + } + } else { + /* pfc_mode == ICE_QOS_MODE_DSCP */ + switch (tlvid) { + case ICE_TLV_ID_DSCP_UP: + ice_add_dscp_up_tlv(tlv, dcbcfg); + break; + case ICE_TLV_ID_DSCP_ENF: + ice_add_dscp_enf_tlv(tlv); + break; + case ICE_TLV_ID_DSCP_TC_BW: + ice_add_dscp_tc_bw_tlv(tlv, dcbcfg); + break; + case ICE_TLV_ID_DSCP_TO_PFC: + ice_add_dscp_pfc_tlv(tlv, dcbcfg); + break; + default: + break; + } } } diff --git a/drivers/net/ethernet/intel/ice/ice_dcb.h b/drivers/net/ethernet/intel/ice/ice_dcb.h index d7e5e6178a21..9b6f87a889a6 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb.h +++ b/drivers/net/ethernet/intel/ice/ice_dcb.h @@ -22,6 +22,14 @@ #define ICE_CEE_DCBX_OUI 0x001B21 #define ICE_CEE_DCBX_TYPE 2 + +#define ICE_DSCP_OUI 0xFFFFFF +#define ICE_DSCP_SUBTYPE_DSCP2UP 0x41 +#define ICE_DSCP_SUBTYPE_ENFORCE 0x42 +#define ICE_DSCP_SUBTYPE_TCBW 0x43 +#define ICE_DSCP_SUBTYPE_PFC 0x44 +#define ICE_DSCP_IPV6_OFFSET 80 + #define ICE_CEE_SUBTYPE_PG_CFG 2 #define ICE_CEE_SUBTYPE_PFC_CFG 3 #define ICE_CEE_SUBTYPE_APP_PRI 4 @@ -78,11 +86,20 @@ #define ICE_IEEE_TLV_ID_APP_PRI 6 #define ICE_TLV_ID_END_OF_LLDPPDU 7 #define ICE_TLV_ID_START ICE_IEEE_TLV_ID_ETS_CFG +#define ICE_TLV_ID_DSCP_UP 3 +#define ICE_TLV_ID_DSCP_ENF 4 +#define ICE_TLV_ID_DSCP_TC_BW 5 +#define ICE_TLV_ID_DSCP_TO_PFC 6 #define ICE_IEEE_ETS_TLV_LEN 25 #define ICE_IEEE_PFC_TLV_LEN 6 #define ICE_IEEE_APP_TLV_LEN 11 +#define ICE_DSCP_UP_TLV_LEN 148 +#define ICE_DSCP_ENF_TLV_LEN 132 +#define ICE_DSCP_TC_BW_TLV_LEN 25 +#define ICE_DSCP_PFC_TLV_LEN 6 + /* IEEE 802.1AB LLDP Organization specific TLV */ struct ice_lldp_org_tlv { __be16 typelen; @@ -120,6 +137,7 @@ struct ice_cee_app_prio { u8 prio_map; } __packed; +int ice_aq_set_pfc_mode(struct ice_hw *hw, u8 pfc_mode, struct ice_sq_cd *cd); enum ice_status ice_aq_get_dcb_cfg(struct ice_hw *hw, u8 mib_type, u8 bridgetype, struct ice_dcbx_cfg *dcbcfg); diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c index 926cf748c5ec..a72e18320a22 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c @@ -5,52 +5,10 @@ #include "ice_dcb_nl.h" /** - * ice_vsi_cfg_netdev_tc - Setup the netdev TC configuration - * @vsi: the VSI being configured - * @ena_tc: TC map to be enabled - */ -void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc) -{ - struct net_device *netdev = vsi->netdev; - struct ice_pf *pf = vsi->back; - struct ice_dcbx_cfg *dcbcfg; - u8 netdev_tc; - int i; - - if (!netdev) - return; - - if (!ena_tc) { - netdev_reset_tc(netdev); - return; - } - - if (netdev_set_num_tc(netdev, vsi->tc_cfg.numtc)) - return; - - dcbcfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg; - - ice_for_each_traffic_class(i) - if (vsi->tc_cfg.ena_tc & BIT(i)) - netdev_set_tc_queue(netdev, - vsi->tc_cfg.tc_info[i].netdev_tc, - vsi->tc_cfg.tc_info[i].qcount_tx, - vsi->tc_cfg.tc_info[i].qoffset); - - for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) { - u8 ets_tc = dcbcfg->etscfg.prio_table[i]; - - /* Get the mapped netdev TC# for the UP */ - netdev_tc = vsi->tc_cfg.tc_info[ets_tc].netdev_tc; - netdev_set_prio_tc_map(netdev, i, netdev_tc); - } -} - -/** * ice_dcb_get_ena_tc - return bitmap of enabled TCs * @dcbcfg: DCB config to evaluate for enabled TCs */ -u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg *dcbcfg) +static u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg *dcbcfg) { u8 i, num_tc, ena_tc = 1; @@ -179,6 +137,67 @@ u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg) } /** + * ice_get_first_droptc - returns number of first droptc + * @vsi: used to find the first droptc + * + * This function returns the value of first_droptc. + * When DCB is enabled, first droptc information is derived from enabled_tc + * and PFC enabled bits. otherwise this function returns 0 as there is one + * TC without DCB (tc0) + */ +static u8 ice_get_first_droptc(struct ice_vsi *vsi) +{ + struct ice_dcbx_cfg *cfg = &vsi->port_info->qos_cfg.local_dcbx_cfg; + struct device *dev = ice_pf_to_dev(vsi->back); + u8 num_tc, ena_tc_map, pfc_ena_map; + u8 i; + + num_tc = ice_dcb_get_num_tc(cfg); + + /* get bitmap of enabled TCs */ + ena_tc_map = ice_dcb_get_ena_tc(cfg); + + /* get bitmap of PFC enabled TCs */ + pfc_ena_map = cfg->pfc.pfcena; + + /* get first TC that is not PFC enabled */ + for (i = 0; i < num_tc; i++) { + if ((ena_tc_map & BIT(i)) && (!(pfc_ena_map & BIT(i)))) { + dev_dbg(dev, "first drop tc = %d\n", i); + return i; + } + } + + dev_dbg(dev, "first drop tc = 0\n"); + return 0; +} + +/** + * ice_vsi_set_dcb_tc_cfg - Set VSI's TC based on DCB configuration + * @vsi: pointer to the VSI instance + */ +void ice_vsi_set_dcb_tc_cfg(struct ice_vsi *vsi) +{ + struct ice_dcbx_cfg *cfg = &vsi->port_info->qos_cfg.local_dcbx_cfg; + + switch (vsi->type) { + case ICE_VSI_PF: + vsi->tc_cfg.ena_tc = ice_dcb_get_ena_tc(cfg); + vsi->tc_cfg.numtc = ice_dcb_get_num_tc(cfg); + break; + case ICE_VSI_CHNL: + vsi->tc_cfg.ena_tc = BIT(ice_get_first_droptc(vsi)); + vsi->tc_cfg.numtc = 1; + break; + case ICE_VSI_CTRL: + case ICE_VSI_LB: + default: + vsi->tc_cfg.ena_tc = ICE_DFLT_TRAFFIC_CLASS; + vsi->tc_cfg.numtc = 1; + } +} + +/** * ice_dcb_get_tc - Get the TC associated with the queue * @vsi: ptr to the VSI * @queue_index: queue number associated with VSI @@ -194,17 +213,18 @@ u8 ice_dcb_get_tc(struct ice_vsi *vsi, int queue_index) */ void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi) { - struct ice_ring *tx_ring, *rx_ring; + struct ice_tx_ring *tx_ring; + struct ice_rx_ring *rx_ring; u16 qoffset, qcount; int i, n; if (!test_bit(ICE_FLAG_DCB_ENA, vsi->back->flags)) { /* Reset the TC information */ - for (i = 0; i < vsi->num_txq; i++) { + ice_for_each_txq(vsi, i) { tx_ring = vsi->tx_rings[i]; tx_ring->dcb_tc = 0; } - for (i = 0; i < vsi->num_rxq; i++) { + ice_for_each_rxq(vsi, i) { rx_ring = vsi->rx_rings[i]; rx_ring->dcb_tc = 0; } @@ -217,11 +237,68 @@ void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi) qoffset = vsi->tc_cfg.tc_info[n].qoffset; qcount = vsi->tc_cfg.tc_info[n].qcount_tx; - for (i = qoffset; i < (qoffset + qcount); i++) { - tx_ring = vsi->tx_rings[i]; - rx_ring = vsi->rx_rings[i]; - tx_ring->dcb_tc = n; - rx_ring->dcb_tc = n; + for (i = qoffset; i < (qoffset + qcount); i++) + vsi->tx_rings[i]->dcb_tc = n; + + qcount = vsi->tc_cfg.tc_info[n].qcount_rx; + for (i = qoffset; i < (qoffset + qcount); i++) + vsi->rx_rings[i]->dcb_tc = n; + } + /* applicable only if "all_enatc" is set, which will be set from + * setup_tc method as part of configuring channels + */ + if (vsi->all_enatc) { + u8 first_droptc = ice_get_first_droptc(vsi); + + /* When DCB is configured, TC for ADQ queues (which are really + * PF queues) should be the first drop TC of the main VSI + */ + ice_for_each_chnl_tc(n) { + if (!(vsi->all_enatc & BIT(n))) + break; + + qoffset = vsi->mqprio_qopt.qopt.offset[n]; + qcount = vsi->mqprio_qopt.qopt.count[n]; + for (i = qoffset; i < (qoffset + qcount); i++) { + vsi->tx_rings[i]->dcb_tc = first_droptc; + vsi->rx_rings[i]->dcb_tc = first_droptc; + } + } + } +} + +/** + * ice_dcb_ena_dis_vsi - disable certain VSIs for DCB config/reconfig + * @pf: pointer to the PF instance + * @ena: true to enable VSIs, false to disable + * @locked: true if caller holds RTNL lock, false otherwise + * + * Before a new DCB configuration can be applied, VSIs of type PF, SWITCHDEV + * and CHNL need to be brought down. Following completion of DCB configuration + * the VSIs that were downed need to be brought up again. This helper function + * does both. + */ +static void ice_dcb_ena_dis_vsi(struct ice_pf *pf, bool ena, bool locked) +{ + int i; + + ice_for_each_vsi(pf, i) { + struct ice_vsi *vsi = pf->vsi[i]; + + if (!vsi) + continue; + + switch (vsi->type) { + case ICE_VSI_CHNL: + case ICE_VSI_SWITCHDEV_CTRL: + case ICE_VSI_PF: + if (ena) + ice_ena_vsi(vsi, locked); + else + ice_dis_vsi(vsi, locked); + break; + default: + continue; } } } @@ -330,7 +407,9 @@ int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked) */ if (!locked) rtnl_lock(); - ice_dis_vsi(pf_vsi, true); + + /* disable VSIs affected by DCB changes */ + ice_dcb_ena_dis_vsi(pf, false, true); memcpy(curr_cfg, new_cfg, sizeof(*curr_cfg)); memcpy(&curr_cfg->etsrec, &curr_cfg->etscfg, sizeof(curr_cfg->etsrec)); @@ -358,7 +437,8 @@ int ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked) ice_pf_dcb_recfg(pf); out: - ice_ena_vsi(pf_vsi, true); + /* enable previously downed VSIs */ + ice_dcb_ena_dis_vsi(pf, true, true); if (!locked) rtnl_unlock(); free_cfg: @@ -544,7 +624,7 @@ static int ice_dcb_init_cfg(struct ice_pf *pf, bool locked) * @ets_willing: configure ETS willing * @locked: was this function called with RTNL held */ -static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool ets_willing, bool locked) +int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool ets_willing, bool locked) { struct ice_aqc_port_ets_elem buf = { 0 }; struct ice_dcbx_cfg *dcbcfg; @@ -673,6 +753,8 @@ void ice_pf_dcb_recfg(struct ice_pf *pf) tc_map = ICE_DFLT_TRAFFIC_CLASS; ice_dcb_noncontig_cfg(pf); } + } else if (vsi->type == ICE_VSI_CHNL) { + tc_map = BIT(ice_get_first_droptc(vsi)); } else { tc_map = ICE_DFLT_TRAFFIC_CLASS; } @@ -683,6 +765,12 @@ void ice_pf_dcb_recfg(struct ice_pf *pf) vsi->idx); continue; } + /* no need to proceed with remaining cfg if it is CHNL + * or switchdev VSI + */ + if (vsi->type == ICE_VSI_CHNL || + vsi->type == ICE_VSI_SWITCHDEV_CTRL) + continue; ice_vsi_map_rings_to_vectors(vsi); if (vsi->type == ICE_VSI_PF) @@ -726,6 +814,11 @@ int ice_init_pf_dcb(struct ice_pf *pf, bool locked) /* FW LLDP is disabled, activate SW DCBX/LLDP mode */ dev_info(dev, "FW LLDP is disabled, DCBx/LLDP in SW mode.\n"); clear_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags); + err = ice_aq_set_pfc_mode(&pf->hw, ICE_AQC_PFC_VLAN_BASED_PFC, + NULL); + if (err) + dev_info(dev, "Failed to set VLAN PFC mode\n"); + err = ice_dcb_sw_dflt_cfg(pf, true, locked); if (err) { dev_err(dev, "Failed to set local DCB config %d\n", @@ -814,7 +907,7 @@ void ice_update_dcb_stats(struct ice_pf *pf) * tag will already be configured with the correct ID and priority bits */ void -ice_tx_prepare_vlan_flags_dcb(struct ice_ring *tx_ring, +ice_tx_prepare_vlan_flags_dcb(struct ice_tx_ring *tx_ring, struct ice_tx_buf *first) { struct sk_buff *skb = first->skb; @@ -851,7 +944,6 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf, struct ice_dcbx_cfg tmp_dcbx_cfg; bool need_reconfig = false; struct ice_port_info *pi; - struct ice_vsi *pf_vsi; u8 mib_type; int ret; @@ -927,14 +1019,9 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf, clear_bit(ICE_FLAG_DCB_ENA, pf->flags); } - pf_vsi = ice_get_main_vsi(pf); - if (!pf_vsi) { - dev_dbg(dev, "PF VSI doesn't exist\n"); - goto out; - } - rtnl_lock(); - ice_dis_vsi(pf_vsi, true); + /* disable VSIs affected by DCB changes */ + ice_dcb_ena_dis_vsi(pf, false, true); ret = ice_query_port_ets(pf->hw.port_info, &buf, sizeof(buf), NULL); if (ret) { @@ -945,7 +1032,8 @@ ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf, /* changes in configuration update VSI */ ice_pf_dcb_recfg(pf); - ice_ena_vsi(pf_vsi, true); + /* enable previously downed VSIs */ + ice_dcb_ena_dis_vsi(pf, true, true); unlock_rtnl: rtnl_unlock(); out: diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h index 261b6e2ed7bc..4c421c842a13 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h @@ -15,7 +15,7 @@ #define ICE_DCB_HW_CHG 2 /* DCB configuration changed, no reset */ void ice_dcb_rebuild(struct ice_pf *pf); -u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg *dcbcfg); +int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool ets_willing, bool locked); u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg); void ice_vsi_set_dcb_tc_cfg(struct ice_vsi *vsi); bool ice_is_pfc_causing_hung_q(struct ice_pf *pf, unsigned int txqueue); @@ -28,13 +28,11 @@ void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi); int ice_init_pf_dcb(struct ice_pf *pf, bool locked); void ice_update_dcb_stats(struct ice_pf *pf); void -ice_tx_prepare_vlan_flags_dcb(struct ice_ring *tx_ring, +ice_tx_prepare_vlan_flags_dcb(struct ice_tx_ring *tx_ring, struct ice_tx_buf *first); void ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf, struct ice_rq_event_info *event); -void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc); - /** * ice_find_q_in_range * @low: start of queue range for a TC i.e. offset of TC @@ -49,9 +47,9 @@ static inline bool ice_find_q_in_range(u16 low, u16 high, unsigned int tx_q) } static inline void -ice_set_cgd_num(struct ice_tlan_ctx *tlan_ctx, struct ice_ring *ring) +ice_set_cgd_num(struct ice_tlan_ctx *tlan_ctx, u8 dcb_tc) { - tlan_ctx->cgd_num = ring->dcb_tc; + tlan_ctx->cgd_num = dcb_tc; } static inline bool ice_is_dcb_active(struct ice_pf *pf) @@ -59,9 +57,21 @@ static inline bool ice_is_dcb_active(struct ice_pf *pf) return (test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags) || test_bit(ICE_FLAG_DCB_ENA, pf->flags)); } + +static inline u8 ice_get_pfc_mode(struct ice_pf *pf) +{ + return pf->hw.port_info->qos_cfg.local_dcbx_cfg.pfc_mode; +} + #else static inline void ice_dcb_rebuild(struct ice_pf *pf) { } +static inline void ice_vsi_set_dcb_tc_cfg(struct ice_vsi *vsi) +{ + vsi->tc_cfg.ena_tc = ICE_DFLT_TRAFFIC_CLASS; + vsi->tc_cfg.numtc = 1; +} + static inline u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg __always_unused *dcbcfg) { return ICE_DFLT_TRAFFIC_CLASS; @@ -95,7 +105,7 @@ ice_pf_dcb_cfg(struct ice_pf __always_unused *pf, } static inline int -ice_tx_prepare_vlan_flags_dcb(struct ice_ring __always_unused *tx_ring, +ice_tx_prepare_vlan_flags_dcb(struct ice_tx_ring __always_unused *tx_ring, struct ice_tx_buf __always_unused *first) { return 0; @@ -113,12 +123,16 @@ ice_is_pfc_causing_hung_q(struct ice_pf __always_unused *pf, return false; } +static inline u8 ice_get_pfc_mode(struct ice_pf *pf) +{ + return 0; +} + static inline void ice_pf_dcb_recfg(struct ice_pf *pf) { } static inline void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi) { } static inline void ice_update_dcb_stats(struct ice_pf *pf) { } static inline void ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf, struct ice_rq_event_info *event) { } -static inline void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc) { } -static inline void ice_set_cgd_num(struct ice_tlan_ctx *tlan_ctx, struct ice_ring *ring) { } +static inline void ice_set_cgd_num(struct ice_tlan_ctx *tlan_ctx, u8 dcb_tc) { } #endif /* CONFIG_DCB */ #endif /* _ICE_DCB_LIB_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_nl.c b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c index 4180f1f35fb8..7fdeb411b6df 100644 --- a/drivers/net/ethernet/intel/ice/ice_dcb_nl.c +++ b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c @@ -64,7 +64,7 @@ static int ice_dcbnl_setets(struct net_device *netdev, struct ieee_ets *ets) struct ice_pf *pf = ice_netdev_to_pf(netdev); struct ice_dcbx_cfg *new_cfg; int bwcfg = 0, bwrec = 0; - int err, i, max_tc = 0; + int err, i; if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) || !(pf->dcbx_cap & DCB_CAP_DCBX_VER_IEEE)) @@ -80,13 +80,14 @@ static int ice_dcbnl_setets(struct net_device *netdev, struct ieee_ets *ets) new_cfg->etscfg.tcbwtable[i] = ets->tc_tx_bw[i]; bwcfg += ets->tc_tx_bw[i]; new_cfg->etscfg.tsatable[i] = ets->tc_tsa[i]; - new_cfg->etscfg.prio_table[i] = ets->prio_tc[i]; - if (ets->prio_tc[i] > max_tc) - max_tc = ets->prio_tc[i]; + if (new_cfg->pfc_mode == ICE_QOS_MODE_VLAN) { + /* in DSCP mode up->tc mapping cannot change */ + new_cfg->etscfg.prio_table[i] = ets->prio_tc[i]; + new_cfg->etsrec.prio_table[i] = ets->reco_prio_tc[i]; + } new_cfg->etsrec.tcbwtable[i] = ets->tc_reco_bw[i]; bwrec += ets->tc_reco_bw[i]; new_cfg->etsrec.tsatable[i] = ets->tc_reco_tsa[i]; - new_cfg->etsrec.prio_table[i] = ets->reco_prio_tc[i]; } if (ice_dcb_bwchk(pf, new_cfg)) { @@ -94,12 +95,7 @@ static int ice_dcbnl_setets(struct net_device *netdev, struct ieee_ets *ets) goto ets_out; } - max_tc = pf->hw.func_caps.common_cap.maxtc; - - new_cfg->etscfg.maxtcs = max_tc; - - if (!bwcfg) - new_cfg->etscfg.tcbwtable[0] = 100; + new_cfg->etscfg.maxtcs = pf->hw.func_caps.common_cap.maxtc; if (!bwrec) new_cfg->etsrec.tcbwtable[0] = 100; @@ -173,10 +169,13 @@ static u8 ice_dcbnl_setdcbx(struct net_device *netdev, u8 mode) pf->dcbx_cap = mode; qos_cfg = &pf->hw.port_info->qos_cfg; - if (mode & DCB_CAP_DCBX_VER_CEE) + if (mode & DCB_CAP_DCBX_VER_CEE) { + if (qos_cfg->local_dcbx_cfg.pfc_mode == ICE_QOS_MODE_DSCP) + return ICE_DCB_NO_HW_CHG; qos_cfg->local_dcbx_cfg.dcbx_mode = ICE_DCBX_MODE_CEE; - else + } else { qos_cfg->local_dcbx_cfg.dcbx_mode = ICE_DCBX_MODE_IEEE; + } dev_info(ice_pf_to_dev(pf), "DCBx mode = 0x%x\n", mode); return ICE_DCB_HW_CHG_RST; @@ -683,6 +682,8 @@ ice_dcbnl_find_app(struct ice_dcbx_cfg *cfg, return false; } +#define ICE_BYTES_PER_DSCP_VAL 8 + /** * ice_dcbnl_setapp - set local IEEE App config * @netdev: relevant netdev struct @@ -693,42 +694,117 @@ static int ice_dcbnl_setapp(struct net_device *netdev, struct dcb_app *app) struct ice_pf *pf = ice_netdev_to_pf(netdev); struct ice_dcb_app_priority_table new_app; struct ice_dcbx_cfg *old_cfg, *new_cfg; + u8 max_tc; int ret; - if ((pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) || - !(pf->dcbx_cap & DCB_CAP_DCBX_VER_IEEE)) + /* ONLY DSCP APP TLVs have operational significance */ + if (app->selector != IEEE_8021QAZ_APP_SEL_DSCP) return -EINVAL; - mutex_lock(&pf->tc_mutex); + /* only allow APP TLVs in SW Mode */ + if (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) { + netdev_err(netdev, "can't do DSCP QoS when FW DCB agent active\n"); + return -EINVAL; + } - new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg; + if (!(pf->dcbx_cap & DCB_CAP_DCBX_VER_IEEE)) + return -EINVAL; - old_cfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg; + if (!ice_is_feature_supported(pf, ICE_F_DSCP)) + return -EOPNOTSUPP; - if (old_cfg->numapps == ICE_DCBX_MAX_APPS) { - ret = -EINVAL; - goto setapp_out; + if (app->protocol >= ICE_DSCP_NUM_VAL) { + netdev_err(netdev, "DSCP value 0x%04X out of range\n", + app->protocol); + return -EINVAL; + } + + max_tc = pf->hw.func_caps.common_cap.maxtc; + if (app->priority >= max_tc) { + netdev_err(netdev, "TC %d out of range, max TC %d\n", + app->priority, max_tc); + return -EINVAL; } + /* grab TC mutex */ + mutex_lock(&pf->tc_mutex); + + new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg; + old_cfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg; + ret = dcb_ieee_setapp(netdev, app); if (ret) goto setapp_out; + if (test_and_set_bit(app->protocol, new_cfg->dscp_mapped)) { + netdev_err(netdev, "DSCP value 0x%04X already user mapped\n", + app->protocol); + ret = dcb_ieee_delapp(netdev, app); + if (ret) + netdev_err(netdev, "Failed to delete re-mapping TLV\n"); + ret = -EINVAL; + goto setapp_out; + } + new_app.selector = app->selector; new_app.prot_id = app->protocol; new_app.priority = app->priority; - if (ice_dcbnl_find_app(old_cfg, &new_app)) { - ret = 0; - goto setapp_out; - } + /* If port is not in DSCP mode, need to set */ + if (old_cfg->pfc_mode == ICE_QOS_MODE_VLAN) { + int i, j; + + /* set DSCP mode */ + ret = ice_aq_set_pfc_mode(&pf->hw, ICE_AQC_PFC_DSCP_BASED_PFC, + NULL); + if (ret) { + netdev_err(netdev, "Failed to set DSCP PFC mode %d\n", + ret); + goto setapp_out; + } + netdev_info(netdev, "Switched QoS to L3 DSCP mode\n"); + + new_cfg->pfc_mode = ICE_QOS_MODE_DSCP; + + /* set default DSCP QoS values */ + new_cfg->etscfg.willing = 0; + new_cfg->pfc.pfccap = max_tc; + new_cfg->pfc.willing = 0; + + for (i = 0; i < max_tc; i++) + for (j = 0; j < ICE_BYTES_PER_DSCP_VAL; j++) { + int dscp, offset; + + dscp = (i * max_tc) + j; + offset = max_tc * ICE_BYTES_PER_DSCP_VAL; + + new_cfg->dscp_map[dscp] = i; + /* if less that 8 TCs supported */ + if (max_tc < ICE_MAX_TRAFFIC_CLASS) + new_cfg->dscp_map[dscp + offset] = i; + } + + new_cfg->etscfg.tcbwtable[0] = 100; + new_cfg->etscfg.tsatable[0] = ICE_IEEE_TSA_ETS; + new_cfg->etscfg.prio_table[0] = 0; + + for (i = 1; i < max_tc; i++) { + new_cfg->etscfg.tcbwtable[i] = 0; + new_cfg->etscfg.tsatable[i] = ICE_IEEE_TSA_ETS; + new_cfg->etscfg.prio_table[i] = i; + } + } /* end of switching to DSCP mode */ + + /* apply new mapping for this DSCP value */ + new_cfg->dscp_map[app->protocol] = app->priority; new_cfg->app[new_cfg->numapps++] = new_app; + ret = ice_pf_dcb_cfg(pf, new_cfg, true); /* return of zero indicates new cfg applied */ if (ret == ICE_DCB_HW_CHG_RST) ice_dcbnl_devreset(netdev); - if (ret == ICE_DCB_NO_HW_CHG) - ret = ICE_DCB_HW_CHG_RST; + else + ret = ICE_DCB_NO_HW_CHG; setapp_out: mutex_unlock(&pf->tc_mutex); @@ -749,22 +825,21 @@ static int ice_dcbnl_delapp(struct net_device *netdev, struct dcb_app *app) unsigned int i, j; int ret = 0; - if (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) + if (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED) { + netdev_err(netdev, "can't delete DSCP netlink app when FW DCB agent is active\n"); return -EINVAL; + } mutex_lock(&pf->tc_mutex); old_cfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg; - if (old_cfg->numapps <= 1) - goto delapp_out; - ret = dcb_ieee_delapp(netdev, app); if (ret) goto delapp_out; new_cfg = &pf->hw.port_info->qos_cfg.desired_dcbx_cfg; - for (i = 1; i < new_cfg->numapps; i++) { + for (i = 0; i < new_cfg->numapps; i++) { if (app->selector == new_cfg->app[i].selector && app->protocol == new_cfg->app[i].prot_id && app->priority == new_cfg->app[i].priority) { @@ -784,17 +859,58 @@ static int ice_dcbnl_delapp(struct net_device *netdev, struct dcb_app *app) new_cfg->numapps--; for (j = i; j < new_cfg->numapps; j++) { - new_cfg->app[i].selector = old_cfg->app[j + 1].selector; - new_cfg->app[i].prot_id = old_cfg->app[j + 1].prot_id; - new_cfg->app[i].priority = old_cfg->app[j + 1].priority; + new_cfg->app[j].selector = old_cfg->app[j + 1].selector; + new_cfg->app[j].prot_id = old_cfg->app[j + 1].prot_id; + new_cfg->app[j].priority = old_cfg->app[j + 1].priority; } - ret = ice_pf_dcb_cfg(pf, new_cfg, true); - /* return of zero indicates new cfg applied */ + /* if not a DSCP APP TLV or DSCP is not supported, we are done */ + if (app->selector != IEEE_8021QAZ_APP_SEL_DSCP || + !ice_is_feature_supported(pf, ICE_F_DSCP)) { + ret = ICE_DCB_HW_CHG; + goto delapp_out; + } + + /* if DSCP TLV, then need to address change in mapping */ + clear_bit(app->protocol, new_cfg->dscp_mapped); + /* remap this DSCP value to default value */ + new_cfg->dscp_map[app->protocol] = app->protocol % + ICE_BYTES_PER_DSCP_VAL; + + /* if the last DSCP mapping just got deleted, need to switch + * to L2 VLAN QoS mode + */ + if (bitmap_empty(new_cfg->dscp_mapped, ICE_DSCP_NUM_VAL) && + new_cfg->pfc_mode == ICE_QOS_MODE_DSCP) { + ret = ice_aq_set_pfc_mode(&pf->hw, + ICE_AQC_PFC_VLAN_BASED_PFC, + NULL); + if (ret) { + netdev_info(netdev, "Failed to set VLAN PFC mode %d\n", + ret); + goto delapp_out; + } + netdev_info(netdev, "Switched QoS to L2 VLAN mode\n"); + + new_cfg->pfc_mode = ICE_QOS_MODE_VLAN; + + ret = ice_dcb_sw_dflt_cfg(pf, true, true); + } else { + ret = ice_pf_dcb_cfg(pf, new_cfg, true); + } + + /* return of ICE_DCB_HW_CHG_RST indicates new cfg applied + * and reset needs to be performed + */ if (ret == ICE_DCB_HW_CHG_RST) ice_dcbnl_devreset(netdev); + + /* if the change was not siginificant enough to actually call + * the reconfiguration flow, we still need to tell caller that + * their request was successfully handled + */ if (ret == ICE_DCB_NO_HW_CHG) - ret = ICE_DCB_HW_CHG_RST; + ret = ICE_DCB_HW_CHG; delapp_out: mutex_unlock(&pf->tc_mutex); diff --git a/drivers/net/ethernet/intel/ice/ice_devids.h b/drivers/net/ethernet/intel/ice/ice_devids.h index ef4392e6e244..61dd2f18dee8 100644 --- a/drivers/net/ethernet/intel/ice/ice_devids.h +++ b/drivers/net/ethernet/intel/ice/ice_devids.h @@ -21,6 +21,8 @@ #define ICE_DEV_ID_E810C_QSFP 0x1592 /* Intel(R) Ethernet Controller E810-C for SFP */ #define ICE_DEV_ID_E810C_SFP 0x1593 +#define ICE_SUBDEV_ID_E810T 0x000E +#define ICE_SUBDEV_ID_E810T2 0x000F /* Intel(R) Ethernet Controller E810-XXV for backplane */ #define ICE_DEV_ID_E810_XXV_BACKPLANE 0x1599 /* Intel(R) Ethernet Controller E810-XXV for QSFP */ diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c index da7288bdc9a3..b9bd9f9472f6 100644 --- a/drivers/net/ethernet/intel/ice/ice_devlink.c +++ b/drivers/net/ethernet/intel/ice/ice_devlink.c @@ -4,6 +4,7 @@ #include "ice.h" #include "ice_lib.h" #include "ice_devlink.h" +#include "ice_eswitch.h" #include "ice_fw_update.h" /* context for devlink info version reporting */ @@ -22,7 +23,7 @@ struct ice_info_ctx { * * If a version does not exist, for example when attempting to get the * inactive version of flash when there is no pending update, the function - * should leave the buffer in the ctx structure empty and return 0. + * should leave the buffer in the ctx structure empty. */ static void ice_info_get_dsn(struct ice_pf *pf, struct ice_info_ctx *ctx) @@ -35,7 +36,7 @@ static void ice_info_get_dsn(struct ice_pf *pf, struct ice_info_ctx *ctx) snprintf(ctx->buf, sizeof(ctx->buf), "%8phD", dsn); } -static int ice_info_pba(struct ice_pf *pf, struct ice_info_ctx *ctx) +static void ice_info_pba(struct ice_pf *pf, struct ice_info_ctx *ctx) { struct ice_hw *hw = &pf->hw; enum ice_status status; @@ -45,149 +46,127 @@ static int ice_info_pba(struct ice_pf *pf, struct ice_info_ctx *ctx) /* We failed to locate the PBA, so just skip this entry */ dev_dbg(ice_pf_to_dev(pf), "Failed to read Product Board Assembly string, status %s\n", ice_stat_str(status)); - - return 0; } -static int ice_info_fw_mgmt(struct ice_pf *pf, struct ice_info_ctx *ctx) +static void ice_info_fw_mgmt(struct ice_pf *pf, struct ice_info_ctx *ctx) { struct ice_hw *hw = &pf->hw; - snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u", hw->fw_maj_ver, hw->fw_min_ver, - hw->fw_patch); - - return 0; + snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u", + hw->fw_maj_ver, hw->fw_min_ver, hw->fw_patch); } -static int ice_info_fw_api(struct ice_pf *pf, struct ice_info_ctx *ctx) +static void ice_info_fw_api(struct ice_pf *pf, struct ice_info_ctx *ctx) { struct ice_hw *hw = &pf->hw; snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u", hw->api_maj_ver, hw->api_min_ver, hw->api_patch); - - return 0; } -static int ice_info_fw_build(struct ice_pf *pf, struct ice_info_ctx *ctx) +static void ice_info_fw_build(struct ice_pf *pf, struct ice_info_ctx *ctx) { struct ice_hw *hw = &pf->hw; snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", hw->fw_build); - - return 0; } -static int ice_info_orom_ver(struct ice_pf *pf, struct ice_info_ctx *ctx) +static void ice_info_orom_ver(struct ice_pf *pf, struct ice_info_ctx *ctx) { struct ice_orom_info *orom = &pf->hw.flash.orom; - snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u", orom->major, orom->build, orom->patch); - - return 0; + snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u", + orom->major, orom->build, orom->patch); } -static int -ice_info_pending_orom_ver(struct ice_pf __always_unused *pf, struct ice_info_ctx *ctx) +static void +ice_info_pending_orom_ver(struct ice_pf __always_unused *pf, + struct ice_info_ctx *ctx) { struct ice_orom_info *orom = &ctx->pending_orom; if (ctx->dev_caps.common_cap.nvm_update_pending_orom) snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u", orom->major, orom->build, orom->patch); - - return 0; } -static int ice_info_nvm_ver(struct ice_pf *pf, struct ice_info_ctx *ctx) +static void ice_info_nvm_ver(struct ice_pf *pf, struct ice_info_ctx *ctx) { struct ice_nvm_info *nvm = &pf->hw.flash.nvm; snprintf(ctx->buf, sizeof(ctx->buf), "%x.%02x", nvm->major, nvm->minor); - - return 0; } -static int -ice_info_pending_nvm_ver(struct ice_pf __always_unused *pf, struct ice_info_ctx *ctx) +static void +ice_info_pending_nvm_ver(struct ice_pf __always_unused *pf, + struct ice_info_ctx *ctx) { struct ice_nvm_info *nvm = &ctx->pending_nvm; if (ctx->dev_caps.common_cap.nvm_update_pending_nvm) - snprintf(ctx->buf, sizeof(ctx->buf), "%x.%02x", nvm->major, nvm->minor); - - return 0; + snprintf(ctx->buf, sizeof(ctx->buf), "%x.%02x", + nvm->major, nvm->minor); } -static int ice_info_eetrack(struct ice_pf *pf, struct ice_info_ctx *ctx) +static void ice_info_eetrack(struct ice_pf *pf, struct ice_info_ctx *ctx) { struct ice_nvm_info *nvm = &pf->hw.flash.nvm; snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", nvm->eetrack); - - return 0; } -static int -ice_info_pending_eetrack(struct ice_pf __always_unused *pf, struct ice_info_ctx *ctx) +static void +ice_info_pending_eetrack(struct ice_pf *pf, struct ice_info_ctx *ctx) { struct ice_nvm_info *nvm = &ctx->pending_nvm; if (ctx->dev_caps.common_cap.nvm_update_pending_nvm) snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", nvm->eetrack); - - return 0; } -static int ice_info_ddp_pkg_name(struct ice_pf *pf, struct ice_info_ctx *ctx) +static void ice_info_ddp_pkg_name(struct ice_pf *pf, struct ice_info_ctx *ctx) { struct ice_hw *hw = &pf->hw; snprintf(ctx->buf, sizeof(ctx->buf), "%s", hw->active_pkg_name); - - return 0; } -static int ice_info_ddp_pkg_version(struct ice_pf *pf, struct ice_info_ctx *ctx) +static void +ice_info_ddp_pkg_version(struct ice_pf *pf, struct ice_info_ctx *ctx) { struct ice_pkg_ver *pkg = &pf->hw.active_pkg_ver; - snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u.%u", pkg->major, pkg->minor, pkg->update, - pkg->draft); - - return 0; + snprintf(ctx->buf, sizeof(ctx->buf), "%u.%u.%u.%u", + pkg->major, pkg->minor, pkg->update, pkg->draft); } -static int ice_info_ddp_pkg_bundle_id(struct ice_pf *pf, struct ice_info_ctx *ctx) +static void +ice_info_ddp_pkg_bundle_id(struct ice_pf *pf, struct ice_info_ctx *ctx) { snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", pf->hw.active_track_id); - - return 0; } -static int ice_info_netlist_ver(struct ice_pf *pf, struct ice_info_ctx *ctx) +static void ice_info_netlist_ver(struct ice_pf *pf, struct ice_info_ctx *ctx) { struct ice_netlist_info *netlist = &pf->hw.flash.netlist; /* The netlist version fields are BCD formatted */ - snprintf(ctx->buf, sizeof(ctx->buf), "%x.%x.%x-%x.%x.%x", netlist->major, netlist->minor, - netlist->type >> 16, netlist->type & 0xFFFF, netlist->rev, - netlist->cust_ver); - - return 0; + snprintf(ctx->buf, sizeof(ctx->buf), "%x.%x.%x-%x.%x.%x", + netlist->major, netlist->minor, + netlist->type >> 16, netlist->type & 0xFFFF, + netlist->rev, netlist->cust_ver); } -static int ice_info_netlist_build(struct ice_pf *pf, struct ice_info_ctx *ctx) +static void ice_info_netlist_build(struct ice_pf *pf, struct ice_info_ctx *ctx) { struct ice_netlist_info *netlist = &pf->hw.flash.netlist; snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", netlist->hash); - - return 0; } -static int -ice_info_pending_netlist_ver(struct ice_pf __always_unused *pf, struct ice_info_ctx *ctx) +static void +ice_info_pending_netlist_ver(struct ice_pf __always_unused *pf, + struct ice_info_ctx *ctx) { struct ice_netlist_info *netlist = &ctx->pending_netlist; @@ -195,21 +174,18 @@ ice_info_pending_netlist_ver(struct ice_pf __always_unused *pf, struct ice_info_ if (ctx->dev_caps.common_cap.nvm_update_pending_netlist) snprintf(ctx->buf, sizeof(ctx->buf), "%x.%x.%x-%x.%x.%x", netlist->major, netlist->minor, - netlist->type >> 16, netlist->type & 0xFFFF, netlist->rev, - netlist->cust_ver); - - return 0; + netlist->type >> 16, netlist->type & 0xFFFF, + netlist->rev, netlist->cust_ver); } -static int -ice_info_pending_netlist_build(struct ice_pf __always_unused *pf, struct ice_info_ctx *ctx) +static void +ice_info_pending_netlist_build(struct ice_pf __always_unused *pf, + struct ice_info_ctx *ctx) { struct ice_netlist_info *netlist = &ctx->pending_netlist; if (ctx->dev_caps.common_cap.nvm_update_pending_netlist) snprintf(ctx->buf, sizeof(ctx->buf), "0x%08x", netlist->hash); - - return 0; } #define fixed(key, getter) { ICE_VERSION_FIXED, key, getter, NULL } @@ -239,8 +215,8 @@ enum ice_version_type { static const struct ice_devlink_version { enum ice_version_type type; const char *key; - int (*getter)(struct ice_pf *pf, struct ice_info_ctx *ctx); - int (*fallback)(struct ice_pf *pf, struct ice_info_ctx *ctx); + void (*getter)(struct ice_pf *pf, struct ice_info_ctx *ctx); + void (*fallback)(struct ice_pf *pf, struct ice_info_ctx *ctx); } ice_devlink_versions[] = { fixed(DEVLINK_INFO_VERSION_GENERIC_BOARD_ID, ice_info_pba), running(DEVLINK_INFO_VERSION_GENERIC_FW_MGMT, ice_info_fw_mgmt), @@ -352,24 +328,15 @@ static int ice_devlink_info_get(struct devlink *devlink, memset(ctx->buf, 0, sizeof(ctx->buf)); - err = ice_devlink_versions[i].getter(pf, ctx); - if (err) { - NL_SET_ERR_MSG_MOD(extack, "Unable to obtain version info"); - goto out_free_ctx; - } + ice_devlink_versions[i].getter(pf, ctx); /* If the default getter doesn't report a version, use the * fallback function. This is primarily useful in the case of * "stored" versions that want to report the same value as the * running version in the normal case of no pending update. */ - if (ctx->buf[0] == '\0' && ice_devlink_versions[i].fallback) { - err = ice_devlink_versions[i].fallback(pf, ctx); - if (err) { - NL_SET_ERR_MSG_MOD(extack, "Unable to obtain version info"); - goto out_free_ctx; - } - } + if (ctx->buf[0] == '\0' && ice_devlink_versions[i].fallback) + ice_devlink_versions[i].fallback(pf, ctx); /* Do not report missing versions */ if (ctx->buf[0] == '\0') @@ -457,6 +424,8 @@ ice_devlink_flash_update(struct devlink *devlink, static const struct devlink_ops ice_devlink_ops = { .supported_flash_update_params = DEVLINK_SUPPORT_FLASH_UPDATE_OVERWRITE_MASK, + .eswitch_mode_get = ice_eswitch_mode_get, + .eswitch_mode_set = ice_eswitch_mode_set, .info_get = ice_devlink_info_get, .flash_update = ice_devlink_flash_update, }; @@ -483,10 +452,8 @@ struct ice_pf *ice_allocate_pf(struct device *dev) return NULL; /* Add an action to teardown the devlink when unwinding the driver */ - if (devm_add_action(dev, ice_devlink_free, devlink)) { - devlink_free(devlink); + if (devm_add_action_or_reset(dev, ice_devlink_free, devlink)) return NULL; - } return devlink_priv(devlink); } @@ -499,15 +466,58 @@ struct ice_pf *ice_allocate_pf(struct device *dev) * * Return: zero on success or an error code on failure. */ -int ice_devlink_register(struct ice_pf *pf) +void ice_devlink_register(struct ice_pf *pf) { struct devlink *devlink = priv_to_devlink(pf); - struct device *dev = ice_pf_to_dev(pf); + + devlink_register(devlink); +} + +/** + * ice_devlink_unregister - Unregister devlink resources for this PF. + * @pf: the PF structure to cleanup + * + * Releases resources used by devlink and cleans up associated memory. + */ +void ice_devlink_unregister(struct ice_pf *pf) +{ + devlink_unregister(priv_to_devlink(pf)); +} + +/** + * ice_devlink_create_pf_port - Create a devlink port for this PF + * @pf: the PF to create a devlink port for + * + * Create and register a devlink_port for this PF. + * + * Return: zero on success or an error code on failure. + */ +int ice_devlink_create_pf_port(struct ice_pf *pf) +{ + struct devlink_port_attrs attrs = {}; + struct devlink_port *devlink_port; + struct devlink *devlink; + struct ice_vsi *vsi; + struct device *dev; int err; - err = devlink_register(devlink); + dev = ice_pf_to_dev(pf); + + devlink_port = &pf->devlink_port; + + vsi = ice_get_main_vsi(pf); + if (!vsi) + return -EIO; + + attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; + attrs.phys.port_number = pf->hw.bus.func; + devlink_port_attrs_set(devlink_port, &attrs); + devlink = priv_to_devlink(pf); + + err = devlink_port_register(devlink, devlink_port, vsi->idx); if (err) { - dev_err(dev, "devlink registration failed: %d\n", err); + dev_err(dev, "Failed to create devlink port for PF %d, error %d\n", + pf->hw.pf_id, err); return err; } @@ -515,71 +525,75 @@ int ice_devlink_register(struct ice_pf *pf) } /** - * ice_devlink_unregister - Unregister devlink resources for this PF. - * @pf: the PF structure to cleanup + * ice_devlink_destroy_pf_port - Destroy the devlink_port for this PF + * @pf: the PF to cleanup * - * Releases resources used by devlink and cleans up associated memory. + * Unregisters the devlink_port structure associated with this PF. */ -void ice_devlink_unregister(struct ice_pf *pf) +void ice_devlink_destroy_pf_port(struct ice_pf *pf) { - devlink_unregister(priv_to_devlink(pf)); + struct devlink_port *devlink_port; + + devlink_port = &pf->devlink_port; + + devlink_port_type_clear(devlink_port); + devlink_port_unregister(devlink_port); } /** - * ice_devlink_create_port - Create a devlink port for this VSI - * @vsi: the VSI to create a port for + * ice_devlink_create_vf_port - Create a devlink port for this VF + * @vf: the VF to create a port for * - * Create and register a devlink_port for this VSI. + * Create and register a devlink_port for this VF. * * Return: zero on success or an error code on failure. */ -int ice_devlink_create_port(struct ice_vsi *vsi) +int ice_devlink_create_vf_port(struct ice_vf *vf) { struct devlink_port_attrs attrs = {}; - struct ice_port_info *pi; + struct devlink_port *devlink_port; struct devlink *devlink; + struct ice_vsi *vsi; struct device *dev; struct ice_pf *pf; int err; - /* Currently we only create devlink_port instances for PF VSIs */ - if (vsi->type != ICE_VSI_PF) - return -EINVAL; + pf = vf->pf; + dev = ice_pf_to_dev(pf); + vsi = ice_get_vf_vsi(vf); + devlink_port = &vf->devlink_port; + + attrs.flavour = DEVLINK_PORT_FLAVOUR_PCI_VF; + attrs.pci_vf.pf = pf->hw.bus.func; + attrs.pci_vf.vf = vf->vf_id; - pf = vsi->back; + devlink_port_attrs_set(devlink_port, &attrs); devlink = priv_to_devlink(pf); - dev = ice_pf_to_dev(pf); - pi = pf->hw.port_info; - attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; - attrs.phys.port_number = pi->lport; - devlink_port_attrs_set(&vsi->devlink_port, &attrs); - err = devlink_port_register(devlink, &vsi->devlink_port, vsi->idx); + err = devlink_port_register(devlink, devlink_port, vsi->idx); if (err) { - dev_err(dev, "devlink_port_register failed: %d\n", err); + dev_err(dev, "Failed to create devlink port for VF %d, error %d\n", + vf->vf_id, err); return err; } - vsi->devlink_port_registered = true; - return 0; } /** - * ice_devlink_destroy_port - Destroy the devlink_port for this VSI - * @vsi: the VSI to cleanup + * ice_devlink_destroy_vf_port - Destroy the devlink_port for this VF + * @vf: the VF to cleanup * - * Unregisters the devlink_port structure associated with this VSI. + * Unregisters the devlink_port structure associated with this VF. */ -void ice_devlink_destroy_port(struct ice_vsi *vsi) +void ice_devlink_destroy_vf_port(struct ice_vf *vf) { - if (!vsi->devlink_port_registered) - return; + struct devlink_port *devlink_port; - devlink_port_type_clear(&vsi->devlink_port); - devlink_port_unregister(&vsi->devlink_port); + devlink_port = &vf->devlink_port; - vsi->devlink_port_registered = false; + devlink_port_type_clear(devlink_port); + devlink_port_unregister(devlink_port); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.h b/drivers/net/ethernet/intel/ice/ice_devlink.h index e07e74426bde..b7f9551e4fc4 100644 --- a/drivers/net/ethernet/intel/ice/ice_devlink.h +++ b/drivers/net/ethernet/intel/ice/ice_devlink.h @@ -6,10 +6,12 @@ struct ice_pf *ice_allocate_pf(struct device *dev); -int ice_devlink_register(struct ice_pf *pf); +void ice_devlink_register(struct ice_pf *pf); void ice_devlink_unregister(struct ice_pf *pf); -int ice_devlink_create_port(struct ice_vsi *vsi); -void ice_devlink_destroy_port(struct ice_vsi *vsi); +int ice_devlink_create_pf_port(struct ice_pf *pf); +void ice_devlink_destroy_pf_port(struct ice_pf *pf); +int ice_devlink_create_vf_port(struct ice_vf *vf); +void ice_devlink_destroy_vf_port(struct ice_vf *vf); void ice_devlink_init_regions(struct ice_pf *pf); void ice_devlink_destroy_regions(struct ice_pf *pf); diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.c b/drivers/net/ethernet/intel/ice/ice_eswitch.c new file mode 100644 index 000000000000..d1d7389b0bff --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.c @@ -0,0 +1,655 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2019-2021, Intel Corporation. */ + +#include "ice.h" +#include "ice_lib.h" +#include "ice_eswitch.h" +#include "ice_fltr.h" +#include "ice_repr.h" +#include "ice_devlink.h" +#include "ice_tc_lib.h" + +/** + * ice_eswitch_setup_env - configure switchdev HW filters + * @pf: pointer to PF struct + * + * This function adds HW filters configuration specific for switchdev + * mode. + */ +static int ice_eswitch_setup_env(struct ice_pf *pf) +{ + struct ice_vsi *uplink_vsi = pf->switchdev.uplink_vsi; + struct net_device *uplink_netdev = uplink_vsi->netdev; + struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi; + struct ice_port_info *pi = pf->hw.port_info; + bool rule_added = false; + + ice_vsi_manage_vlan_stripping(ctrl_vsi, false); + + ice_remove_vsi_fltr(&pf->hw, uplink_vsi->idx); + + netif_addr_lock_bh(uplink_netdev); + __dev_uc_unsync(uplink_netdev, NULL); + __dev_mc_unsync(uplink_netdev, NULL); + netif_addr_unlock_bh(uplink_netdev); + + if (ice_vsi_add_vlan(uplink_vsi, 0, ICE_FWD_TO_VSI)) + goto err_def_rx; + + if (!ice_is_dflt_vsi_in_use(uplink_vsi->vsw)) { + if (ice_set_dflt_vsi(uplink_vsi->vsw, uplink_vsi)) + goto err_def_rx; + rule_added = true; + } + + if (ice_cfg_dflt_vsi(pi->hw, ctrl_vsi->idx, true, ICE_FLTR_TX)) + goto err_def_tx; + + if (ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_set_allow_override)) + goto err_override_uplink; + + if (ice_vsi_update_security(ctrl_vsi, ice_vsi_ctx_set_allow_override)) + goto err_override_control; + + if (ice_fltr_update_flags_dflt_rule(ctrl_vsi, pi->dflt_tx_vsi_rule_id, + ICE_FLTR_TX, + ICE_SINGLE_ACT_LB_ENABLE)) + goto err_update_action; + + return 0; + +err_update_action: + ice_vsi_update_security(ctrl_vsi, ice_vsi_ctx_clear_allow_override); +err_override_control: + ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_clear_allow_override); +err_override_uplink: + ice_cfg_dflt_vsi(pi->hw, ctrl_vsi->idx, false, ICE_FLTR_TX); +err_def_tx: + if (rule_added) + ice_clear_dflt_vsi(uplink_vsi->vsw); +err_def_rx: + ice_fltr_add_mac_and_broadcast(uplink_vsi, + uplink_vsi->port_info->mac.perm_addr, + ICE_FWD_TO_VSI); + return -ENODEV; +} + +/** + * ice_eswitch_remap_rings_to_vectors - reconfigure rings of switchdev ctrl VSI + * @pf: pointer to PF struct + * + * In switchdev number of allocated Tx/Rx rings is equal. + * + * This function fills q_vectors structures associated with representor and + * move each ring pairs to port representor netdevs. Each port representor + * will have dedicated 1 Tx/Rx ring pair, so number of rings pair is equal to + * number of VFs. + */ +static void ice_eswitch_remap_rings_to_vectors(struct ice_pf *pf) +{ + struct ice_vsi *vsi = pf->switchdev.control_vsi; + int q_id; + + ice_for_each_txq(vsi, q_id) { + struct ice_repr *repr = pf->vf[q_id].repr; + struct ice_q_vector *q_vector = repr->q_vector; + struct ice_tx_ring *tx_ring = vsi->tx_rings[q_id]; + struct ice_rx_ring *rx_ring = vsi->rx_rings[q_id]; + + q_vector->vsi = vsi; + q_vector->reg_idx = vsi->q_vectors[0]->reg_idx; + + q_vector->num_ring_tx = 1; + q_vector->tx.tx_ring = tx_ring; + tx_ring->q_vector = q_vector; + tx_ring->next = NULL; + tx_ring->netdev = repr->netdev; + /* In switchdev mode, from OS stack perspective, there is only + * one queue for given netdev, so it needs to be indexed as 0. + */ + tx_ring->q_index = 0; + + q_vector->num_ring_rx = 1; + q_vector->rx.rx_ring = rx_ring; + rx_ring->q_vector = q_vector; + rx_ring->next = NULL; + rx_ring->netdev = repr->netdev; + } +} + +/** + * ice_eswitch_setup_reprs - configure port reprs to run in switchdev mode + * @pf: pointer to PF struct + */ +static int ice_eswitch_setup_reprs(struct ice_pf *pf) +{ + struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi; + int max_vsi_num = 0; + int i; + + ice_for_each_vf(pf, i) { + struct ice_vsi *vsi = pf->vf[i].repr->src_vsi; + struct ice_vf *vf = &pf->vf[i]; + + ice_remove_vsi_fltr(&pf->hw, vsi->idx); + vf->repr->dst = metadata_dst_alloc(0, METADATA_HW_PORT_MUX, + GFP_KERNEL); + if (!vf->repr->dst) { + ice_fltr_add_mac_and_broadcast(vsi, + vf->hw_lan_addr.addr, + ICE_FWD_TO_VSI); + goto err; + } + + if (ice_vsi_update_security(vsi, ice_vsi_ctx_clear_antispoof)) { + ice_fltr_add_mac_and_broadcast(vsi, + vf->hw_lan_addr.addr, + ICE_FWD_TO_VSI); + metadata_dst_free(vf->repr->dst); + goto err; + } + + if (ice_vsi_add_vlan(vsi, 0, ICE_FWD_TO_VSI)) { + ice_fltr_add_mac_and_broadcast(vsi, + vf->hw_lan_addr.addr, + ICE_FWD_TO_VSI); + metadata_dst_free(vf->repr->dst); + ice_vsi_update_security(vsi, ice_vsi_ctx_set_antispoof); + goto err; + } + + if (max_vsi_num < vsi->vsi_num) + max_vsi_num = vsi->vsi_num; + + netif_napi_add(vf->repr->netdev, &vf->repr->q_vector->napi, ice_napi_poll, + NAPI_POLL_WEIGHT); + + netif_keep_dst(vf->repr->netdev); + } + + kfree(ctrl_vsi->target_netdevs); + + ctrl_vsi->target_netdevs = kcalloc(max_vsi_num + 1, + sizeof(*ctrl_vsi->target_netdevs), + GFP_KERNEL); + if (!ctrl_vsi->target_netdevs) + goto err; + + ice_for_each_vf(pf, i) { + struct ice_repr *repr = pf->vf[i].repr; + struct ice_vsi *vsi = repr->src_vsi; + struct metadata_dst *dst; + + ctrl_vsi->target_netdevs[vsi->vsi_num] = repr->netdev; + + dst = repr->dst; + dst->u.port_info.port_id = vsi->vsi_num; + dst->u.port_info.lower_dev = repr->netdev; + ice_repr_set_traffic_vsi(repr, ctrl_vsi); + } + + return 0; + +err: + for (i = i - 1; i >= 0; i--) { + struct ice_vsi *vsi = pf->vf[i].repr->src_vsi; + struct ice_vf *vf = &pf->vf[i]; + + ice_vsi_update_security(vsi, ice_vsi_ctx_set_antispoof); + metadata_dst_free(vf->repr->dst); + ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr.addr, + ICE_FWD_TO_VSI); + } + + return -ENODEV; +} + +/** + * ice_eswitch_release_reprs - clear PR VSIs configuration + * @pf: poiner to PF struct + * @ctrl_vsi: pointer to switchdev control VSI + */ +static void +ice_eswitch_release_reprs(struct ice_pf *pf, struct ice_vsi *ctrl_vsi) +{ + int i; + + kfree(ctrl_vsi->target_netdevs); + ice_for_each_vf(pf, i) { + struct ice_vsi *vsi = pf->vf[i].repr->src_vsi; + struct ice_vf *vf = &pf->vf[i]; + + ice_vsi_update_security(vsi, ice_vsi_ctx_set_antispoof); + metadata_dst_free(vf->repr->dst); + ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr.addr, + ICE_FWD_TO_VSI); + + netif_napi_del(&vf->repr->q_vector->napi); + } +} + +/** + * ice_eswitch_update_repr - reconfigure VF port representor + * @vsi: VF VSI for which port representor is configured + */ +void ice_eswitch_update_repr(struct ice_vsi *vsi) +{ + struct ice_pf *pf = vsi->back; + struct ice_repr *repr; + struct ice_vf *vf; + int ret; + + if (!ice_is_switchdev_running(pf)) + return; + + vf = &pf->vf[vsi->vf_id]; + repr = vf->repr; + repr->src_vsi = vsi; + repr->dst->u.port_info.port_id = vsi->vsi_num; + + ret = ice_vsi_update_security(vsi, ice_vsi_ctx_clear_antispoof); + if (ret) { + ice_fltr_add_mac_and_broadcast(vsi, vf->hw_lan_addr.addr, ICE_FWD_TO_VSI); + dev_err(ice_pf_to_dev(pf), "Failed to update VF %d port representor", vsi->vf_id); + } +} + +/** + * ice_eswitch_port_start_xmit - callback for packets transmit + * @skb: send buffer + * @netdev: network interface device structure + * + * Returns NETDEV_TX_OK if sent, else an error code + */ +netdev_tx_t +ice_eswitch_port_start_xmit(struct sk_buff *skb, struct net_device *netdev) +{ + struct ice_netdev_priv *np; + struct ice_repr *repr; + struct ice_vsi *vsi; + + np = netdev_priv(netdev); + vsi = np->vsi; + + if (ice_is_reset_in_progress(vsi->back->state)) + return NETDEV_TX_BUSY; + + repr = ice_netdev_to_repr(netdev); + skb_dst_drop(skb); + dst_hold((struct dst_entry *)repr->dst); + skb_dst_set(skb, (struct dst_entry *)repr->dst); + skb->queue_mapping = repr->vf->vf_id; + + return ice_start_xmit(skb, netdev); +} + +/** + * ice_eswitch_set_target_vsi - set switchdev context in Tx context descriptor + * @skb: pointer to send buffer + * @off: pointer to offload struct + */ +void +ice_eswitch_set_target_vsi(struct sk_buff *skb, + struct ice_tx_offload_params *off) +{ + struct metadata_dst *dst = skb_metadata_dst(skb); + u64 cd_cmd, dst_vsi; + + if (!dst) { + cd_cmd = ICE_TX_CTX_DESC_SWTCH_UPLINK << ICE_TXD_CTX_QW1_CMD_S; + off->cd_qw1 |= (cd_cmd | ICE_TX_DESC_DTYPE_CTX); + } else { + cd_cmd = ICE_TX_CTX_DESC_SWTCH_VSI << ICE_TXD_CTX_QW1_CMD_S; + dst_vsi = ((u64)dst->u.port_info.port_id << + ICE_TXD_CTX_QW1_VSI_S) & ICE_TXD_CTX_QW1_VSI_M; + off->cd_qw1 = cd_cmd | dst_vsi | ICE_TX_DESC_DTYPE_CTX; + } +} + +/** + * ice_eswitch_release_env - clear switchdev HW filters + * @pf: pointer to PF struct + * + * This function removes HW filters configuration specific for switchdev + * mode and restores default legacy mode settings. + */ +static void ice_eswitch_release_env(struct ice_pf *pf) +{ + struct ice_vsi *uplink_vsi = pf->switchdev.uplink_vsi; + struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi; + + ice_vsi_update_security(ctrl_vsi, ice_vsi_ctx_clear_allow_override); + ice_vsi_update_security(uplink_vsi, ice_vsi_ctx_clear_allow_override); + ice_cfg_dflt_vsi(&pf->hw, ctrl_vsi->idx, false, ICE_FLTR_TX); + ice_clear_dflt_vsi(uplink_vsi->vsw); + ice_fltr_add_mac_and_broadcast(uplink_vsi, + uplink_vsi->port_info->mac.perm_addr, + ICE_FWD_TO_VSI); +} + +/** + * ice_eswitch_vsi_setup - configure switchdev control VSI + * @pf: pointer to PF structure + * @pi: pointer to port_info structure + */ +static struct ice_vsi * +ice_eswitch_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) +{ + return ice_vsi_setup(pf, pi, ICE_VSI_SWITCHDEV_CTRL, ICE_INVAL_VFID, NULL); +} + +/** + * ice_eswitch_napi_del - remove NAPI handle for all port representors + * @pf: pointer to PF structure + */ +static void ice_eswitch_napi_del(struct ice_pf *pf) +{ + int i; + + ice_for_each_vf(pf, i) + netif_napi_del(&pf->vf[i].repr->q_vector->napi); +} + +/** + * ice_eswitch_napi_enable - enable NAPI for all port representors + * @pf: pointer to PF structure + */ +static void ice_eswitch_napi_enable(struct ice_pf *pf) +{ + int i; + + ice_for_each_vf(pf, i) + napi_enable(&pf->vf[i].repr->q_vector->napi); +} + +/** + * ice_eswitch_napi_disable - disable NAPI for all port representors + * @pf: pointer to PF structure + */ +static void ice_eswitch_napi_disable(struct ice_pf *pf) +{ + int i; + + ice_for_each_vf(pf, i) + napi_disable(&pf->vf[i].repr->q_vector->napi); +} + +/** + * ice_eswitch_set_rxdid - configure rxdid on all Rx queues from VSI + * @vsi: VSI to setup rxdid on + * @rxdid: flex descriptor id + */ +static void ice_eswitch_set_rxdid(struct ice_vsi *vsi, u32 rxdid) +{ + struct ice_hw *hw = &vsi->back->hw; + int i; + + ice_for_each_rxq(vsi, i) { + struct ice_rx_ring *ring = vsi->rx_rings[i]; + u16 pf_q = vsi->rxq_map[ring->q_index]; + + ice_write_qrxflxp_cntxt(hw, pf_q, rxdid, 0x3, true); + } +} + +/** + * ice_eswitch_enable_switchdev - configure eswitch in switchdev mode + * @pf: pointer to PF structure + */ +static int ice_eswitch_enable_switchdev(struct ice_pf *pf) +{ + struct ice_vsi *ctrl_vsi; + + pf->switchdev.control_vsi = ice_eswitch_vsi_setup(pf, pf->hw.port_info); + if (!pf->switchdev.control_vsi) + return -ENODEV; + + ctrl_vsi = pf->switchdev.control_vsi; + pf->switchdev.uplink_vsi = ice_get_main_vsi(pf); + if (!pf->switchdev.uplink_vsi) + goto err_vsi; + + if (ice_eswitch_setup_env(pf)) + goto err_vsi; + + if (ice_repr_add_for_all_vfs(pf)) + goto err_repr_add; + + if (ice_eswitch_setup_reprs(pf)) + goto err_setup_reprs; + + ice_eswitch_remap_rings_to_vectors(pf); + + if (ice_vsi_open(ctrl_vsi)) + goto err_setup_reprs; + + ice_eswitch_napi_enable(pf); + + ice_eswitch_set_rxdid(ctrl_vsi, ICE_RXDID_FLEX_NIC_2); + + return 0; + +err_setup_reprs: + ice_repr_rem_from_all_vfs(pf); +err_repr_add: + ice_eswitch_release_env(pf); +err_vsi: + ice_vsi_release(ctrl_vsi); + return -ENODEV; +} + +/** + * ice_eswitch_disable_switchdev - disable switchdev resources + * @pf: pointer to PF structure + */ +static void ice_eswitch_disable_switchdev(struct ice_pf *pf) +{ + struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi; + + ice_eswitch_napi_disable(pf); + ice_eswitch_release_env(pf); + ice_eswitch_release_reprs(pf, ctrl_vsi); + ice_vsi_release(ctrl_vsi); + ice_repr_rem_from_all_vfs(pf); +} + +/** + * ice_eswitch_mode_set - set new eswitch mode + * @devlink: pointer to devlink structure + * @mode: eswitch mode to switch to + * @extack: pointer to extack structure + */ +int +ice_eswitch_mode_set(struct devlink *devlink, u16 mode, + struct netlink_ext_ack *extack) +{ + struct ice_pf *pf = devlink_priv(devlink); + + if (pf->eswitch_mode == mode) + return 0; + + if (pf->num_alloc_vfs) { + dev_info(ice_pf_to_dev(pf), "Changing eswitch mode is allowed only if there is no VFs created"); + NL_SET_ERR_MSG_MOD(extack, "Changing eswitch mode is allowed only if there is no VFs created"); + return -EOPNOTSUPP; + } + + switch (mode) { + case DEVLINK_ESWITCH_MODE_LEGACY: + dev_info(ice_pf_to_dev(pf), "PF %d changed eswitch mode to legacy", + pf->hw.pf_id); + NL_SET_ERR_MSG_MOD(extack, "Changed eswitch mode to legacy"); + break; + case DEVLINK_ESWITCH_MODE_SWITCHDEV: + { + dev_info(ice_pf_to_dev(pf), "PF %d changed eswitch mode to switchdev", + pf->hw.pf_id); + NL_SET_ERR_MSG_MOD(extack, "Changed eswitch mode to switchdev"); + break; + } + default: + NL_SET_ERR_MSG_MOD(extack, "Unknown eswitch mode"); + return -EINVAL; + } + + pf->eswitch_mode = mode; + return 0; +} + +/** + * ice_eswitch_get_target_netdev - return port representor netdev + * @rx_ring: pointer to Rx ring + * @rx_desc: pointer to Rx descriptor + * + * When working in switchdev mode context (when control VSI is used), this + * function returns netdev of appropriate port representor. For non-switchdev + * context, regular netdev associated with Rx ring is returned. + */ +struct net_device * +ice_eswitch_get_target_netdev(struct ice_rx_ring *rx_ring, + union ice_32b_rx_flex_desc *rx_desc) +{ + struct ice_32b_rx_flex_desc_nic_2 *desc; + struct ice_vsi *vsi = rx_ring->vsi; + struct ice_vsi *control_vsi; + u16 target_vsi_id; + + control_vsi = vsi->back->switchdev.control_vsi; + if (vsi != control_vsi) + return rx_ring->netdev; + + desc = (struct ice_32b_rx_flex_desc_nic_2 *)rx_desc; + target_vsi_id = le16_to_cpu(desc->src_vsi); + + return vsi->target_netdevs[target_vsi_id]; +} + +/** + * ice_eswitch_mode_get - get current eswitch mode + * @devlink: pointer to devlink structure + * @mode: output parameter for current eswitch mode + */ +int ice_eswitch_mode_get(struct devlink *devlink, u16 *mode) +{ + struct ice_pf *pf = devlink_priv(devlink); + + *mode = pf->eswitch_mode; + return 0; +} + +/** + * ice_is_eswitch_mode_switchdev - check if eswitch mode is set to switchdev + * @pf: pointer to PF structure + * + * Returns true if eswitch mode is set to DEVLINK_ESWITCH_MODE_SWITCHDEV, + * false otherwise. + */ +bool ice_is_eswitch_mode_switchdev(struct ice_pf *pf) +{ + return pf->eswitch_mode == DEVLINK_ESWITCH_MODE_SWITCHDEV; +} + +/** + * ice_eswitch_release - cleanup eswitch + * @pf: pointer to PF structure + */ +void ice_eswitch_release(struct ice_pf *pf) +{ + if (pf->eswitch_mode == DEVLINK_ESWITCH_MODE_LEGACY) + return; + + ice_eswitch_disable_switchdev(pf); + pf->switchdev.is_running = false; +} + +/** + * ice_eswitch_configure - configure eswitch + * @pf: pointer to PF structure + */ +int ice_eswitch_configure(struct ice_pf *pf) +{ + int status; + + if (pf->eswitch_mode == DEVLINK_ESWITCH_MODE_LEGACY || pf->switchdev.is_running) + return 0; + + status = ice_eswitch_enable_switchdev(pf); + if (status) + return status; + + pf->switchdev.is_running = true; + return 0; +} + +/** + * ice_eswitch_start_all_tx_queues - start Tx queues of all port representors + * @pf: pointer to PF structure + */ +static void ice_eswitch_start_all_tx_queues(struct ice_pf *pf) +{ + struct ice_repr *repr; + int i; + + if (test_bit(ICE_DOWN, pf->state)) + return; + + ice_for_each_vf(pf, i) { + repr = pf->vf[i].repr; + if (repr) + ice_repr_start_tx_queues(repr); + } +} + +/** + * ice_eswitch_stop_all_tx_queues - stop Tx queues of all port representors + * @pf: pointer to PF structure + */ +void ice_eswitch_stop_all_tx_queues(struct ice_pf *pf) +{ + struct ice_repr *repr; + int i; + + if (test_bit(ICE_DOWN, pf->state)) + return; + + ice_for_each_vf(pf, i) { + repr = pf->vf[i].repr; + if (repr) + ice_repr_stop_tx_queues(repr); + } +} + +/** + * ice_eswitch_rebuild - rebuild eswitch + * @pf: pointer to PF structure + */ +int ice_eswitch_rebuild(struct ice_pf *pf) +{ + struct ice_vsi *ctrl_vsi = pf->switchdev.control_vsi; + int status; + + ice_eswitch_napi_disable(pf); + ice_eswitch_napi_del(pf); + + status = ice_eswitch_setup_env(pf); + if (status) + return status; + + status = ice_eswitch_setup_reprs(pf); + if (status) + return status; + + ice_eswitch_remap_rings_to_vectors(pf); + + ice_replay_tc_fltrs(pf); + + status = ice_vsi_open(ctrl_vsi); + if (status) + return status; + + ice_eswitch_napi_enable(pf); + ice_eswitch_set_rxdid(ctrl_vsi, ICE_RXDID_FLEX_NIC_2); + ice_eswitch_start_all_tx_queues(pf); + + return 0; +} diff --git a/drivers/net/ethernet/intel/ice/ice_eswitch.h b/drivers/net/ethernet/intel/ice/ice_eswitch.h new file mode 100644 index 000000000000..364cd2a79c37 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_eswitch.h @@ -0,0 +1,83 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2019-2021, Intel Corporation. */ + +#ifndef _ICE_ESWITCH_H_ +#define _ICE_ESWITCH_H_ + +#include <net/devlink.h> + +#ifdef CONFIG_ICE_SWITCHDEV +void ice_eswitch_release(struct ice_pf *pf); +int ice_eswitch_configure(struct ice_pf *pf); +int ice_eswitch_rebuild(struct ice_pf *pf); + +int ice_eswitch_mode_get(struct devlink *devlink, u16 *mode); +int +ice_eswitch_mode_set(struct devlink *devlink, u16 mode, + struct netlink_ext_ack *extack); +bool ice_is_eswitch_mode_switchdev(struct ice_pf *pf); + +void ice_eswitch_update_repr(struct ice_vsi *vsi); + +void ice_eswitch_stop_all_tx_queues(struct ice_pf *pf); + +struct net_device * +ice_eswitch_get_target_netdev(struct ice_rx_ring *rx_ring, + union ice_32b_rx_flex_desc *rx_desc); + +void ice_eswitch_set_target_vsi(struct sk_buff *skb, + struct ice_tx_offload_params *off); +netdev_tx_t +ice_eswitch_port_start_xmit(struct sk_buff *skb, struct net_device *netdev); +#else /* CONFIG_ICE_SWITCHDEV */ +static inline void ice_eswitch_release(struct ice_pf *pf) { } + +static inline void ice_eswitch_stop_all_tx_queues(struct ice_pf *pf) { } + +static inline void +ice_eswitch_set_target_vsi(struct sk_buff *skb, + struct ice_tx_offload_params *off) { } + +static inline void ice_eswitch_update_repr(struct ice_vsi *vsi) { } + +static inline int ice_eswitch_configure(struct ice_pf *pf) +{ + return -EOPNOTSUPP; +} + +static inline int ice_eswitch_rebuild(struct ice_pf *pf) +{ + return -EOPNOTSUPP; +} + +static inline int ice_eswitch_mode_get(struct devlink *devlink, u16 *mode) +{ + return DEVLINK_ESWITCH_MODE_LEGACY; +} + +static inline int +ice_eswitch_mode_set(struct devlink *devlink, u16 mode, + struct netlink_ext_ack *extack) +{ + return -EOPNOTSUPP; +} + +static inline bool ice_is_eswitch_mode_switchdev(struct ice_pf *pf) +{ + return false; +} + +static inline struct net_device * +ice_eswitch_get_target_netdev(struct ice_rx_ring *rx_ring, + union ice_32b_rx_flex_desc *rx_desc) +{ + return rx_ring->netdev; +} + +static inline netdev_tx_t +ice_eswitch_port_start_xmit(struct sk_buff *skb, struct net_device *netdev) +{ + return NETDEV_TX_BUSY; +} +#endif /* CONFIG_ICE_SWITCHDEV */ +#endif /* _ICE_ESWITCH_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c index c451cf401e63..572519e402f4 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c @@ -170,10 +170,9 @@ static const struct ice_priv_flag ice_gstrings_priv_flags[] = { #define ICE_PRIV_FLAG_ARRAY_SIZE ARRAY_SIZE(ice_gstrings_priv_flags) static void -ice_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) +__ice_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo, + struct ice_vsi *vsi) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; struct ice_hw *hw = &pf->hw; struct ice_orom_info *orom; @@ -190,9 +189,19 @@ ice_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version), "%x.%02x 0x%x %d.%d.%d", nvm->major, nvm->minor, nvm->eetrack, orom->major, orom->build, orom->patch); +} + +static void +ice_get_drvinfo(struct net_device *netdev, struct ethtool_drvinfo *drvinfo) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_pf *pf = np->vsi->back; + + __ice_get_drvinfo(netdev, drvinfo, np->vsi); strscpy(drvinfo->bus_info, pci_name(pf->pdev), sizeof(drvinfo->bus_info)); + drvinfo->n_priv_flags = ICE_PRIV_FLAG_ARRAY_SIZE; } @@ -584,7 +593,7 @@ static bool ice_lbtest_check_frame(u8 *frame) * * Function sends loopback packets on a test Tx ring. */ -static int ice_diag_send(struct ice_ring *tx_ring, u8 *data, u16 size) +static int ice_diag_send(struct ice_tx_ring *tx_ring, u8 *data, u16 size) { struct ice_tx_desc *tx_desc; struct ice_tx_buf *tx_buf; @@ -637,7 +646,7 @@ static int ice_diag_send(struct ice_ring *tx_ring, u8 *data, u16 size) * Function receives loopback packets and verify their correctness. * Returns number of received valid frames. */ -static int ice_lbtest_receive_frames(struct ice_ring *rx_ring) +static int ice_lbtest_receive_frames(struct ice_rx_ring *rx_ring) { struct ice_rx_buf *rx_buf; int valid_frames, i; @@ -676,9 +685,10 @@ static u64 ice_loopback_test(struct net_device *netdev) struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *orig_vsi = np->vsi, *test_vsi; struct ice_pf *pf = orig_vsi->back; - struct ice_ring *tx_ring, *rx_ring; u8 broadcast[ETH_ALEN], ret = 0; int num_frames, valid_frames; + struct ice_tx_ring *tx_ring; + struct ice_rx_ring *rx_ring; struct device *dev; u8 *tx_frame; int i; @@ -866,10 +876,10 @@ skip_ol_tests: netdev_info(netdev, "testing finished\n"); } -static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data) +static void +__ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data, + struct ice_vsi *vsi) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_vsi *vsi = np->vsi; unsigned int i; u8 *p = data; @@ -879,6 +889,9 @@ static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data) ethtool_sprintf(&p, ice_gstrings_vsi_stats[i].stat_string); + if (ice_is_port_repr_netdev(netdev)) + return; + ice_for_each_alloc_txq(vsi, i) { ethtool_sprintf(&p, "tx_queue_%u_packets", i); ethtool_sprintf(&p, "tx_queue_%u_bytes", i); @@ -917,6 +930,13 @@ static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data) } } +static void ice_get_strings(struct net_device *netdev, u32 stringset, u8 *data) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + + __ice_get_strings(netdev, stringset, data, np->vsi); +} + static int ice_set_phys_id(struct net_device *netdev, enum ethtool_phys_id_state state) { @@ -1215,6 +1235,13 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags) enum ice_status status; bool dcbx_agent_status; + if (ice_get_pfc_mode(pf) == ICE_QOS_MODE_DSCP) { + clear_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags); + dev_err(dev, "QoS in L3 DSCP mode, FW Agent not allowed to start\n"); + ret = -EOPNOTSUPP; + goto ethtool_exit; + } + /* Remove rule to direct LLDP packets to default VSI. * The FW LLDP engine will now be consuming them. */ @@ -1312,13 +1339,13 @@ static int ice_get_sset_count(struct net_device *netdev, int sset) } static void -ice_get_ethtool_stats(struct net_device *netdev, - struct ethtool_stats __always_unused *stats, u64 *data) +__ice_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats __always_unused *stats, u64 *data, + struct ice_vsi *vsi) { - struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; - struct ice_ring *ring; + struct ice_tx_ring *tx_ring; + struct ice_rx_ring *rx_ring; unsigned int j; int i = 0; char *p; @@ -1332,14 +1359,17 @@ ice_get_ethtool_stats(struct net_device *netdev, sizeof(u64)) ? *(u64 *)p : *(u32 *)p; } + if (ice_is_port_repr_netdev(netdev)) + return; + /* populate per queue stats */ rcu_read_lock(); ice_for_each_alloc_txq(vsi, j) { - ring = READ_ONCE(vsi->tx_rings[j]); - if (ring) { - data[i++] = ring->stats.pkts; - data[i++] = ring->stats.bytes; + tx_ring = READ_ONCE(vsi->tx_rings[j]); + if (tx_ring) { + data[i++] = tx_ring->stats.pkts; + data[i++] = tx_ring->stats.bytes; } else { data[i++] = 0; data[i++] = 0; @@ -1347,10 +1377,10 @@ ice_get_ethtool_stats(struct net_device *netdev, } ice_for_each_alloc_rxq(vsi, j) { - ring = READ_ONCE(vsi->rx_rings[j]); - if (ring) { - data[i++] = ring->stats.pkts; - data[i++] = ring->stats.bytes; + rx_ring = READ_ONCE(vsi->rx_rings[j]); + if (rx_ring) { + data[i++] = rx_ring->stats.pkts; + data[i++] = rx_ring->stats.bytes; } else { data[i++] = 0; data[i++] = 0; @@ -1379,6 +1409,15 @@ ice_get_ethtool_stats(struct net_device *netdev, } } +static void +ice_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats __always_unused *stats, u64 *data) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + + __ice_get_ethtool_stats(netdev, stats, data, np->vsi); +} + #define ICE_PHY_TYPE_LOW_MASK_MIN_1G (ICE_PHY_TYPE_LOW_100BASE_TX | \ ICE_PHY_TYPE_LOW_100M_SGMII) @@ -2667,9 +2706,10 @@ ice_get_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring) static int ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring) { - struct ice_ring *tx_rings = NULL, *rx_rings = NULL; struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_ring *xdp_rings = NULL; + struct ice_tx_ring *xdp_rings = NULL; + struct ice_tx_ring *tx_rings = NULL; + struct ice_rx_ring *rx_rings = NULL; struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; int i, timeout = 50, err = 0; @@ -2718,12 +2758,12 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring) /* set for the next time the netdev is started */ if (!netif_running(vsi->netdev)) { - for (i = 0; i < vsi->alloc_txq; i++) + ice_for_each_alloc_txq(vsi, i) vsi->tx_rings[i]->count = new_tx_cnt; - for (i = 0; i < vsi->alloc_rxq; i++) + ice_for_each_alloc_rxq(vsi, i) vsi->rx_rings[i]->count = new_rx_cnt; if (ice_is_xdp_ena_vsi(vsi)) - for (i = 0; i < vsi->num_xdp_txq; i++) + ice_for_each_xdp_txq(vsi, i) vsi->xdp_rings[i]->count = new_tx_cnt; vsi->num_tx_desc = (u16)new_tx_cnt; vsi->num_rx_desc = (u16)new_rx_cnt; @@ -2772,7 +2812,7 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring) goto free_tx; } - for (i = 0; i < vsi->num_xdp_txq; i++) { + ice_for_each_xdp_txq(vsi, i) { /* clone ring and setup updated count */ xdp_rings[i] = *vsi->xdp_rings[i]; xdp_rings[i].count = new_tx_cnt; @@ -2866,7 +2906,7 @@ process_link: } if (xdp_rings) { - for (i = 0; i < vsi->num_xdp_txq; i++) { + ice_for_each_xdp_txq(vsi, i) { ice_free_tx_ring(vsi->xdp_rings[i]); *vsi->xdp_rings[i] = xdp_rings[i]; } @@ -3155,6 +3195,11 @@ ice_set_rxfh(struct net_device *netdev, const u32 *indir, const u8 *key, return -EIO; } + if (ice_is_adq_active(pf)) { + netdev_err(netdev, "Cannot change RSS params with ADQ configured.\n"); + return -EOPNOTSUPP; + } + if (key) { if (!vsi->rss_hkey_user) { vsi->rss_hkey_user = @@ -3255,7 +3300,7 @@ static u32 ice_get_combined_cnt(struct ice_vsi *vsi) ice_for_each_q_vector(vsi, q_idx) { struct ice_q_vector *q_vector = vsi->q_vectors[q_idx]; - if (q_vector->rx.ring && q_vector->tx.ring) + if (q_vector->rx.rx_ring && q_vector->tx.tx_ring) combined++; } @@ -3365,6 +3410,11 @@ static int ice_set_channels(struct net_device *dev, struct ethtool_channels *ch) if (ch->other_count != (test_bit(ICE_FLAG_FD_ENA, pf->flags) ? 1U : 0U)) return -EINVAL; + if (ice_is_adq_active(pf)) { + netdev_err(dev, "Cannot set channels with ADQ configured.\n"); + return -EOPNOTSUPP; + } + if (test_bit(ICE_FLAG_FD_ENA, pf->flags) && pf->hw.fdir_active_fltr) { netdev_err(dev, "Cannot set channels when Flow Director filters are active\n"); return -EOPNOTSUPP; @@ -3466,15 +3516,9 @@ static int ice_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol) return 0; } -enum ice_container_type { - ICE_RX_CONTAINER, - ICE_TX_CONTAINER, -}; - /** * ice_get_rc_coalesce - get ITR values for specific ring container * @ec: ethtool structure to fill with driver's coalesce settings - * @c_type: container type, Rx or Tx * @rc: ring container that the ITR values will come from * * Query the device for ice_ring_container specific ITR values. This is @@ -3484,24 +3528,23 @@ enum ice_container_type { * Returns 0 on success, negative otherwise. */ static int -ice_get_rc_coalesce(struct ethtool_coalesce *ec, enum ice_container_type c_type, - struct ice_ring_container *rc) +ice_get_rc_coalesce(struct ethtool_coalesce *ec, struct ice_ring_container *rc) { - if (!rc->ring) + if (!rc->rx_ring) return -EINVAL; - switch (c_type) { + switch (rc->type) { case ICE_RX_CONTAINER: ec->use_adaptive_rx_coalesce = ITR_IS_DYNAMIC(rc); ec->rx_coalesce_usecs = rc->itr_setting; - ec->rx_coalesce_usecs_high = rc->ring->q_vector->intrl; + ec->rx_coalesce_usecs_high = rc->rx_ring->q_vector->intrl; break; case ICE_TX_CONTAINER: ec->use_adaptive_tx_coalesce = ITR_IS_DYNAMIC(rc); ec->tx_coalesce_usecs = rc->itr_setting; break; default: - dev_dbg(ice_pf_to_dev(rc->ring->vsi->back), "Invalid c_type %d\n", c_type); + dev_dbg(ice_pf_to_dev(rc->rx_ring->vsi->back), "Invalid c_type %d\n", rc->type); return -EINVAL; } @@ -3522,18 +3565,18 @@ static int ice_get_q_coalesce(struct ice_vsi *vsi, struct ethtool_coalesce *ec, int q_num) { if (q_num < vsi->num_rxq && q_num < vsi->num_txq) { - if (ice_get_rc_coalesce(ec, ICE_RX_CONTAINER, + if (ice_get_rc_coalesce(ec, &vsi->rx_rings[q_num]->q_vector->rx)) return -EINVAL; - if (ice_get_rc_coalesce(ec, ICE_TX_CONTAINER, + if (ice_get_rc_coalesce(ec, &vsi->tx_rings[q_num]->q_vector->tx)) return -EINVAL; } else if (q_num < vsi->num_rxq) { - if (ice_get_rc_coalesce(ec, ICE_RX_CONTAINER, + if (ice_get_rc_coalesce(ec, &vsi->rx_rings[q_num]->q_vector->rx)) return -EINVAL; } else if (q_num < vsi->num_txq) { - if (ice_get_rc_coalesce(ec, ICE_TX_CONTAINER, + if (ice_get_rc_coalesce(ec, &vsi->tx_rings[q_num]->q_vector->tx)) return -EINVAL; } else { @@ -3585,7 +3628,6 @@ ice_get_per_q_coalesce(struct net_device *netdev, u32 q_num, /** * ice_set_rc_coalesce - set ITR values for specific ring container - * @c_type: container type, Rx or Tx * @ec: ethtool structure from user to update ITR settings * @rc: ring container that the ITR values will come from * @vsi: VSI associated to the ring container @@ -3597,19 +3639,22 @@ ice_get_per_q_coalesce(struct net_device *netdev, u32 q_num, * Returns 0 on success, negative otherwise. */ static int -ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec, +ice_set_rc_coalesce(struct ethtool_coalesce *ec, struct ice_ring_container *rc, struct ice_vsi *vsi) { - const char *c_type_str = (c_type == ICE_RX_CONTAINER) ? "rx" : "tx"; + const char *c_type_str = (rc->type == ICE_RX_CONTAINER) ? "rx" : "tx"; u32 use_adaptive_coalesce, coalesce_usecs; struct ice_pf *pf = vsi->back; u16 itr_setting; - if (!rc->ring) + if (!rc->rx_ring) return -EINVAL; - switch (c_type) { + switch (rc->type) { case ICE_RX_CONTAINER: + { + struct ice_q_vector *q_vector = rc->rx_ring->q_vector; + if (ec->rx_coalesce_usecs_high > ICE_MAX_INTRL || (ec->rx_coalesce_usecs_high && ec->rx_coalesce_usecs_high < pf->hw.intrl_gran)) { @@ -3618,22 +3663,20 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec, ICE_MAX_INTRL); return -EINVAL; } - if (ec->rx_coalesce_usecs_high != rc->ring->q_vector->intrl && + if (ec->rx_coalesce_usecs_high != q_vector->intrl && (ec->use_adaptive_rx_coalesce || ec->use_adaptive_tx_coalesce)) { netdev_info(vsi->netdev, "Invalid value, %s-usecs-high cannot be changed if adaptive-tx or adaptive-rx is enabled\n", c_type_str); return -EINVAL; } - if (ec->rx_coalesce_usecs_high != rc->ring->q_vector->intrl) { - rc->ring->q_vector->intrl = ec->rx_coalesce_usecs_high; - ice_write_intrl(rc->ring->q_vector, - ec->rx_coalesce_usecs_high); - } + if (ec->rx_coalesce_usecs_high != q_vector->intrl) + q_vector->intrl = ec->rx_coalesce_usecs_high; use_adaptive_coalesce = ec->use_adaptive_rx_coalesce; coalesce_usecs = ec->rx_coalesce_usecs; break; + } case ICE_TX_CONTAINER: use_adaptive_coalesce = ec->use_adaptive_tx_coalesce; coalesce_usecs = ec->tx_coalesce_usecs; @@ -3641,7 +3684,7 @@ ice_set_rc_coalesce(enum ice_container_type c_type, struct ethtool_coalesce *ec, break; default: dev_dbg(ice_pf_to_dev(pf), "Invalid container type %d\n", - c_type); + rc->type); return -EINVAL; } @@ -3690,22 +3733,22 @@ static int ice_set_q_coalesce(struct ice_vsi *vsi, struct ethtool_coalesce *ec, int q_num) { if (q_num < vsi->num_rxq && q_num < vsi->num_txq) { - if (ice_set_rc_coalesce(ICE_RX_CONTAINER, ec, + if (ice_set_rc_coalesce(ec, &vsi->rx_rings[q_num]->q_vector->rx, vsi)) return -EINVAL; - if (ice_set_rc_coalesce(ICE_TX_CONTAINER, ec, + if (ice_set_rc_coalesce(ec, &vsi->tx_rings[q_num]->q_vector->tx, vsi)) return -EINVAL; } else if (q_num < vsi->num_rxq) { - if (ice_set_rc_coalesce(ICE_RX_CONTAINER, ec, + if (ice_set_rc_coalesce(ec, &vsi->rx_rings[q_num]->q_vector->rx, vsi)) return -EINVAL; } else if (q_num < vsi->num_txq) { - if (ice_set_rc_coalesce(ICE_TX_CONTAINER, ec, + if (ice_set_rc_coalesce(ec, &vsi->tx_rings[q_num]->q_vector->tx, vsi)) return -EINVAL; @@ -3778,6 +3821,8 @@ __ice_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec, if (ice_set_q_coalesce(vsi, ec, v_idx)) return -EINVAL; + + ice_set_q_vector_intrl(vsi->q_vectors[v_idx]); } goto set_complete; } @@ -3785,6 +3830,8 @@ __ice_set_coalesce(struct net_device *netdev, struct ethtool_coalesce *ec, if (ice_set_q_coalesce(vsi, ec, q_num)) return -EINVAL; + ice_set_q_vector_intrl(vsi->q_vectors[q_num]); + set_complete: return 0; } @@ -3804,6 +3851,54 @@ ice_set_per_q_coalesce(struct net_device *netdev, u32 q_num, return __ice_set_coalesce(netdev, ec, q_num); } +static void +ice_repr_get_drvinfo(struct net_device *netdev, + struct ethtool_drvinfo *drvinfo) +{ + struct ice_repr *repr = ice_netdev_to_repr(netdev); + + if (ice_check_vf_ready_for_cfg(repr->vf)) + return; + + __ice_get_drvinfo(netdev, drvinfo, repr->src_vsi); +} + +static void +ice_repr_get_strings(struct net_device *netdev, u32 stringset, u8 *data) +{ + struct ice_repr *repr = ice_netdev_to_repr(netdev); + + /* for port representors only ETH_SS_STATS is supported */ + if (ice_check_vf_ready_for_cfg(repr->vf) || + stringset != ETH_SS_STATS) + return; + + __ice_get_strings(netdev, stringset, data, repr->src_vsi); +} + +static void +ice_repr_get_ethtool_stats(struct net_device *netdev, + struct ethtool_stats __always_unused *stats, + u64 *data) +{ + struct ice_repr *repr = ice_netdev_to_repr(netdev); + + if (ice_check_vf_ready_for_cfg(repr->vf)) + return; + + __ice_get_ethtool_stats(netdev, stats, data, repr->src_vsi); +} + +static int ice_repr_get_sset_count(struct net_device *netdev, int sset) +{ + switch (sset) { + case ETH_SS_STATS: + return ICE_VSI_STATS_LEN; + default: + return -EOPNOTSUPP; + } +} + #define ICE_I2C_EEPROM_DEV_ADDR 0xA0 #define ICE_I2C_EEPROM_DEV_ADDR2 0xA2 #define ICE_MODULE_TYPE_SFP 0x03 @@ -4055,6 +4150,23 @@ void ice_set_ethtool_safe_mode_ops(struct net_device *netdev) netdev->ethtool_ops = &ice_ethtool_safe_mode_ops; } +static const struct ethtool_ops ice_ethtool_repr_ops = { + .get_drvinfo = ice_repr_get_drvinfo, + .get_link = ethtool_op_get_link, + .get_strings = ice_repr_get_strings, + .get_ethtool_stats = ice_repr_get_ethtool_stats, + .get_sset_count = ice_repr_get_sset_count, +}; + +/** + * ice_set_ethtool_repr_ops - setup VF's port representor ethtool ops + * @netdev: network interface device structure + */ +void ice_set_ethtool_repr_ops(struct net_device *netdev) +{ + netdev->ethtool_ops = &ice_ethtool_repr_ops; +} + /** * ice_set_ethtool_ops - setup netdev ethtool ops * @netdev: network interface device structure diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c index 16de603b280c..38960bcc384c 100644 --- a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c @@ -706,7 +706,7 @@ ice_create_init_fdir_rule(struct ice_pf *pf, enum ice_fltr_ptype flow) if (!seg) return -ENOMEM; - tun_seg = devm_kzalloc(dev, sizeof(*seg) * ICE_FD_HW_SEG_MAX, + tun_seg = devm_kcalloc(dev, sizeof(*seg), ICE_FD_HW_SEG_MAX, GFP_KERNEL); if (!tun_seg) { devm_kfree(dev, seg); @@ -1068,7 +1068,7 @@ ice_cfg_fdir_xtrct_seq(struct ice_pf *pf, struct ethtool_rx_flow_spec *fsp, if (!seg) return -ENOMEM; - tun_seg = devm_kzalloc(dev, sizeof(*seg) * ICE_FD_HW_SEG_MAX, + tun_seg = devm_kcalloc(dev, sizeof(*seg), ICE_FD_HW_SEG_MAX, GFP_KERNEL); if (!tun_seg) { devm_kfree(dev, seg); diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.c b/drivers/net/ethernet/intel/ice/ice_fdir.c index 59ef68f072c0..cbd8424631e3 100644 --- a/drivers/net/ethernet/intel/ice/ice_fdir.c +++ b/drivers/net/ethernet/intel/ice/ice_fdir.c @@ -952,7 +952,7 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input, ice_pkt_insert_u8(loc, ICE_IPV4_TTL_OFFSET, input->ip.v4.ttl); ice_pkt_insert_mac_addr(loc, input->ext_data.dst_mac); if (frag) - loc[20] = ICE_FDIR_IPV4_PKT_FLAG_DF; + loc[20] = ICE_FDIR_IPV4_PKT_FLAG_MF; break; case ICE_FLTR_PTYPE_NONF_IPV4_UDP: ice_pkt_insert_u32(loc, ICE_IPV4_DST_ADDR_OFFSET, diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.h b/drivers/net/ethernet/intel/ice/ice_fdir.h index d2d40e18ae8a..da4163856f4c 100644 --- a/drivers/net/ethernet/intel/ice/ice_fdir.h +++ b/drivers/net/ethernet/intel/ice/ice_fdir.h @@ -48,7 +48,7 @@ * requests that the packet not be fragmented. MF indicates that a packet has * been fragmented. */ -#define ICE_FDIR_IPV4_PKT_FLAG_DF 0x20 +#define ICE_FDIR_IPV4_PKT_FLAG_MF 0x20 enum ice_fltr_prgm_desc_dest { ICE_FLTR_PRGM_DESC_DEST_DROP_PKT, diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c index 1ac96dc66d0d..23cfcceb1536 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c +++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c @@ -735,7 +735,7 @@ static void ice_release_global_cfg_lock(struct ice_hw *hw) * * This function will request ownership of the change lock. */ -static enum ice_status +enum ice_status ice_acquire_change_lock(struct ice_hw *hw, enum ice_aq_res_access_type access) { return ice_acquire_res(hw, ICE_CHANGE_LOCK_RES_ID, access, @@ -748,7 +748,7 @@ ice_acquire_change_lock(struct ice_hw *hw, enum ice_aq_res_access_type access) * * This function will release the change lock using the proper Admin Command. */ -static void ice_release_change_lock(struct ice_hw *hw) +void ice_release_change_lock(struct ice_hw *hw) { ice_release_res(hw, ICE_CHANGE_LOCK_RES_ID); } @@ -1330,6 +1330,86 @@ fw_ddp_compat_free_alloc: } /** + * ice_sw_fv_handler + * @sect_type: section type + * @section: pointer to section + * @index: index of the field vector entry to be returned + * @offset: ptr to variable that receives the offset in the field vector table + * + * This is a callback function that can be passed to ice_pkg_enum_entry. + * This function treats the given section as of type ice_sw_fv_section and + * enumerates offset field. "offset" is an index into the field vector table. + */ +static void * +ice_sw_fv_handler(u32 sect_type, void *section, u32 index, u32 *offset) +{ + struct ice_sw_fv_section *fv_section = section; + + if (!section || sect_type != ICE_SID_FLD_VEC_SW) + return NULL; + if (index >= le16_to_cpu(fv_section->count)) + return NULL; + if (offset) + /* "index" passed in to this function is relative to a given + * 4k block. To get to the true index into the field vector + * table need to add the relative index to the base_offset + * field of this section + */ + *offset = le16_to_cpu(fv_section->base_offset) + index; + return fv_section->fv + index; +} + +/** + * ice_get_prof_index_max - get the max profile index for used profile + * @hw: pointer to the HW struct + * + * Calling this function will get the max profile index for used profile + * and store the index number in struct ice_switch_info *switch_info + * in HW for following use. + */ +static enum ice_status ice_get_prof_index_max(struct ice_hw *hw) +{ + u16 prof_index = 0, j, max_prof_index = 0; + struct ice_pkg_enum state; + struct ice_seg *ice_seg; + bool flag = false; + struct ice_fv *fv; + u32 offset; + + memset(&state, 0, sizeof(state)); + + if (!hw->seg) + return ICE_ERR_PARAM; + + ice_seg = hw->seg; + + do { + fv = ice_pkg_enum_entry(ice_seg, &state, ICE_SID_FLD_VEC_SW, + &offset, ice_sw_fv_handler); + if (!fv) + break; + ice_seg = NULL; + + /* in the profile that not be used, the prot_id is set to 0xff + * and the off is set to 0x1ff for all the field vectors. + */ + for (j = 0; j < hw->blk[ICE_BLK_SW].es.fvw; j++) + if (fv->ew[j].prot_id != ICE_PROT_INVALID || + fv->ew[j].off != ICE_FV_OFFSET_INVAL) + flag = true; + if (flag && prof_index > max_prof_index) + max_prof_index = prof_index; + + prof_index++; + flag = false; + } while (fv); + + hw->switch_info->max_used_prof_index = max_prof_index; + + return 0; +} + +/** * ice_init_pkg - initialize/download package * @hw: pointer to the hardware structure * @buf: pointer to the package buffer @@ -1408,6 +1488,7 @@ enum ice_status ice_init_pkg(struct ice_hw *hw, u8 *buf, u32 len) */ ice_init_pkg_regs(hw); ice_fill_blk_tbls(hw); + ice_get_prof_index_max(hw); } else { ice_debug(hw, ICE_DBG_INIT, "package load failed, %d\n", status); @@ -1485,6 +1566,195 @@ static struct ice_buf_build *ice_pkg_buf_alloc(struct ice_hw *hw) } /** + * ice_get_sw_prof_type - determine switch profile type + * @hw: pointer to the HW structure + * @fv: pointer to the switch field vector + */ +static enum ice_prof_type +ice_get_sw_prof_type(struct ice_hw *hw, struct ice_fv *fv) +{ + u16 i; + + for (i = 0; i < hw->blk[ICE_BLK_SW].es.fvw; i++) { + /* UDP tunnel will have UDP_OF protocol ID and VNI offset */ + if (fv->ew[i].prot_id == (u8)ICE_PROT_UDP_OF && + fv->ew[i].off == ICE_VNI_OFFSET) + return ICE_PROF_TUN_UDP; + + /* GRE tunnel will have GRE protocol */ + if (fv->ew[i].prot_id == (u8)ICE_PROT_GRE_OF) + return ICE_PROF_TUN_GRE; + } + + return ICE_PROF_NON_TUN; +} + +/** + * ice_get_sw_fv_bitmap - Get switch field vector bitmap based on profile type + * @hw: pointer to hardware structure + * @req_profs: type of profiles requested + * @bm: pointer to memory for returning the bitmap of field vectors + */ +void +ice_get_sw_fv_bitmap(struct ice_hw *hw, enum ice_prof_type req_profs, + unsigned long *bm) +{ + struct ice_pkg_enum state; + struct ice_seg *ice_seg; + struct ice_fv *fv; + + if (req_profs == ICE_PROF_ALL) { + bitmap_set(bm, 0, ICE_MAX_NUM_PROFILES); + return; + } + + memset(&state, 0, sizeof(state)); + bitmap_zero(bm, ICE_MAX_NUM_PROFILES); + ice_seg = hw->seg; + do { + enum ice_prof_type prof_type; + u32 offset; + + fv = ice_pkg_enum_entry(ice_seg, &state, ICE_SID_FLD_VEC_SW, + &offset, ice_sw_fv_handler); + ice_seg = NULL; + + if (fv) { + /* Determine field vector type */ + prof_type = ice_get_sw_prof_type(hw, fv); + + if (req_profs & prof_type) + set_bit((u16)offset, bm); + } + } while (fv); +} + +/** + * ice_get_sw_fv_list + * @hw: pointer to the HW structure + * @prot_ids: field vector to search for with a given protocol ID + * @ids_cnt: lookup/protocol count + * @bm: bitmap of field vectors to consider + * @fv_list: Head of a list + * + * Finds all the field vector entries from switch block that contain + * a given protocol ID and returns a list of structures of type + * "ice_sw_fv_list_entry". Every structure in the list has a field vector + * definition and profile ID information + * NOTE: The caller of the function is responsible for freeing the memory + * allocated for every list entry. + */ +enum ice_status +ice_get_sw_fv_list(struct ice_hw *hw, u8 *prot_ids, u16 ids_cnt, + unsigned long *bm, struct list_head *fv_list) +{ + struct ice_sw_fv_list_entry *fvl; + struct ice_sw_fv_list_entry *tmp; + struct ice_pkg_enum state; + struct ice_seg *ice_seg; + struct ice_fv *fv; + u32 offset; + + memset(&state, 0, sizeof(state)); + + if (!ids_cnt || !hw->seg) + return ICE_ERR_PARAM; + + ice_seg = hw->seg; + do { + u16 i; + + fv = ice_pkg_enum_entry(ice_seg, &state, ICE_SID_FLD_VEC_SW, + &offset, ice_sw_fv_handler); + if (!fv) + break; + ice_seg = NULL; + + /* If field vector is not in the bitmap list, then skip this + * profile. + */ + if (!test_bit((u16)offset, bm)) + continue; + + for (i = 0; i < ids_cnt; i++) { + int j; + + /* This code assumes that if a switch field vector line + * has a matching protocol, then this line will contain + * the entries necessary to represent every field in + * that protocol header. + */ + for (j = 0; j < hw->blk[ICE_BLK_SW].es.fvw; j++) + if (fv->ew[j].prot_id == prot_ids[i]) + break; + if (j >= hw->blk[ICE_BLK_SW].es.fvw) + break; + if (i + 1 == ids_cnt) { + fvl = devm_kzalloc(ice_hw_to_dev(hw), + sizeof(*fvl), GFP_KERNEL); + if (!fvl) + goto err; + fvl->fv_ptr = fv; + fvl->profile_id = offset; + list_add(&fvl->list_entry, fv_list); + break; + } + } + } while (fv); + if (list_empty(fv_list)) + return ICE_ERR_CFG; + return 0; + +err: + list_for_each_entry_safe(fvl, tmp, fv_list, list_entry) { + list_del(&fvl->list_entry); + devm_kfree(ice_hw_to_dev(hw), fvl); + } + + return ICE_ERR_NO_MEMORY; +} + +/** + * ice_init_prof_result_bm - Initialize the profile result index bitmap + * @hw: pointer to hardware structure + */ +void ice_init_prof_result_bm(struct ice_hw *hw) +{ + struct ice_pkg_enum state; + struct ice_seg *ice_seg; + struct ice_fv *fv; + + memset(&state, 0, sizeof(state)); + + if (!hw->seg) + return; + + ice_seg = hw->seg; + do { + u32 off; + u16 i; + + fv = ice_pkg_enum_entry(ice_seg, &state, ICE_SID_FLD_VEC_SW, + &off, ice_sw_fv_handler); + ice_seg = NULL; + if (!fv) + break; + + bitmap_zero(hw->switch_info->prof_res_bm[off], + ICE_MAX_FV_WORDS); + + /* Determine empty field vector indices, these can be + * used for recipe results. Skip index 0, since it is + * always used for Switch ID. + */ + for (i = 1; i < ICE_MAX_FV_WORDS; i++) + if (fv->ew[i].prot_id == ICE_PROT_INVALID && + fv->ew[i].off == ICE_FV_OFFSET_INVAL) + set_bit(i, hw->switch_info->prof_res_bm[off]); + } while (fv); +} + +/** * ice_pkg_buf_free * @hw: pointer to the HW structure * @bld: pointer to pkg build (allocated by ice_pkg_buf_alloc()) @@ -1863,6 +2133,35 @@ int ice_udp_tunnel_unset_port(struct net_device *netdev, unsigned int table, return 0; } +/** + * ice_find_prot_off - find prot ID and offset pair, based on prof and FV index + * @hw: pointer to the hardware structure + * @blk: hardware block + * @prof: profile ID + * @fv_idx: field vector word index + * @prot: variable to receive the protocol ID + * @off: variable to receive the protocol offset + */ +enum ice_status +ice_find_prot_off(struct ice_hw *hw, enum ice_block blk, u8 prof, u16 fv_idx, + u8 *prot, u16 *off) +{ + struct ice_fv_word *fv_ext; + + if (prof >= hw->blk[blk].es.count) + return ICE_ERR_PARAM; + + if (fv_idx >= hw->blk[blk].es.fvw) + return ICE_ERR_PARAM; + + fv_ext = hw->blk[blk].es.t + (prof * hw->blk[blk].es.fvw); + + *prot = fv_ext[fv_idx].prot_id; + *off = fv_ext[fv_idx].off; + + return 0; +} + /* PTG Management */ /** diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.h b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h index 8a58e79729b9..344c2637facd 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.h +++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h @@ -18,6 +18,20 @@ #define ICE_PKG_CNT 4 +enum ice_status +ice_acquire_change_lock(struct ice_hw *hw, enum ice_aq_res_access_type access); +void ice_release_change_lock(struct ice_hw *hw); +enum ice_status +ice_find_prot_off(struct ice_hw *hw, enum ice_block blk, u8 prof, u16 fv_idx, + u8 *prot, u16 *off); +void +ice_get_sw_fv_bitmap(struct ice_hw *hw, enum ice_prof_type type, + unsigned long *bm); +void +ice_init_prof_result_bm(struct ice_hw *hw); +enum ice_status +ice_get_sw_fv_list(struct ice_hw *hw, u8 *prot_ids, u16 ids_cnt, + unsigned long *bm, struct list_head *fv_list); bool ice_get_open_tunnel_port(struct ice_hw *hw, u16 *port); int ice_udp_tunnel_set_port(struct net_device *netdev, unsigned int table, diff --git a/drivers/net/ethernet/intel/ice/ice_flex_type.h b/drivers/net/ethernet/intel/ice/ice_flex_type.h index 7d8b517a63c9..0f572a36d021 100644 --- a/drivers/net/ethernet/intel/ice/ice_flex_type.h +++ b/drivers/net/ethernet/intel/ice/ice_flex_type.h @@ -13,6 +13,8 @@ struct ice_fv_word { u8 resvrd; } __packed; +#define ICE_MAX_NUM_PROFILES 256 + #define ICE_MAX_FV_WORDS 48 struct ice_fv { struct ice_fv_word ew[ICE_MAX_FV_WORDS]; @@ -279,6 +281,12 @@ struct ice_sw_fv_section { struct ice_fv fv[]; }; +struct ice_sw_fv_list_entry { + struct list_head list_entry; + u32 profile_id; + struct ice_fv *fv_ptr; +}; + /* The BOOST TCAM stores the match packet header in reverse order, meaning * the fields are reversed; in addition, this means that the normally big endian * fields of the packet are now little endian. @@ -365,6 +373,7 @@ struct ice_pkg_enum { enum ice_tunnel_type { TNL_VXLAN = 0, TNL_GENEVE, + TNL_GRETAP, __TNL_TYPE_CNT, TNL_LAST = 0xFF, TNL_ALL = 0xFF, @@ -603,4 +612,12 @@ struct ice_chs_chg { }; #define ICE_FLOW_PTYPE_MAX ICE_XLT1_CNT + +enum ice_prof_type { + ICE_PROF_NON_TUN = 0x1, + ICE_PROF_TUN_UDP = 0x2, + ICE_PROF_TUN_GRE = 0x4, + ICE_PROF_TUN_ALL = 0x6, + ICE_PROF_ALL = 0xFF, +}; #endif /* _ICE_FLEX_TYPE_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_fltr.c b/drivers/net/ethernet/intel/ice/ice_fltr.c index 2418d4fff037..c2e78eaf4ccb 100644 --- a/drivers/net/ethernet/intel/ice/ice_fltr.c +++ b/drivers/net/ethernet/intel/ice/ice_fltr.c @@ -395,3 +395,83 @@ enum ice_status ice_fltr_remove_eth(struct ice_vsi *vsi, u16 ethertype, return ice_fltr_prepare_eth(vsi, ethertype, flag, action, ice_fltr_remove_eth_list); } + +/** + * ice_fltr_update_rule_flags - update lan_en/lb_en flags + * @hw: pointer to hw + * @rule_id: id of rule being updated + * @recipe_id: recipe id of rule + * @act: current action field + * @type: Rx or Tx + * @src: source VSI + * @new_flags: combinations of lb_en and lan_en + */ +static enum ice_status +ice_fltr_update_rule_flags(struct ice_hw *hw, u16 rule_id, u16 recipe_id, + u32 act, u16 type, u16 src, u32 new_flags) +{ + struct ice_aqc_sw_rules_elem *s_rule; + enum ice_status err; + u32 flags_mask; + + s_rule = kzalloc(ICE_SW_RULE_RX_TX_NO_HDR_SIZE, GFP_KERNEL); + if (!s_rule) + return ICE_ERR_NO_MEMORY; + + flags_mask = ICE_SINGLE_ACT_LB_ENABLE | ICE_SINGLE_ACT_LAN_ENABLE; + act &= ~flags_mask; + act |= (flags_mask & new_flags); + + s_rule->pdata.lkup_tx_rx.recipe_id = cpu_to_le16(recipe_id); + s_rule->pdata.lkup_tx_rx.index = cpu_to_le16(rule_id); + s_rule->pdata.lkup_tx_rx.act = cpu_to_le32(act); + + if (type & ICE_FLTR_RX) { + s_rule->pdata.lkup_tx_rx.src = + cpu_to_le16(hw->port_info->lport); + s_rule->type = cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_RX); + + } else { + s_rule->pdata.lkup_tx_rx.src = cpu_to_le16(src); + s_rule->type = cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_TX); + } + + err = ice_aq_sw_rules(hw, s_rule, ICE_SW_RULE_RX_TX_NO_HDR_SIZE, 1, + ice_aqc_opc_update_sw_rules, NULL); + + kfree(s_rule); + return err; +} + +/** + * ice_fltr_build_action - build action for rule + * @vsi_id: id of VSI which is use to build action + */ +static u32 ice_fltr_build_action(u16 vsi_id) +{ + return ((vsi_id << ICE_SINGLE_ACT_VSI_ID_S) & ICE_SINGLE_ACT_VSI_ID_M) | + ICE_SINGLE_ACT_VSI_FORWARDING | ICE_SINGLE_ACT_VALID_BIT; +} + +/** + * ice_fltr_update_flags_dflt_rule - update flags on default rule + * @vsi: pointer to VSI + * @rule_id: id of rule + * @direction: Tx or Rx + * @new_flags: flags to update + * + * Function updates flags on default rule with ICE_SW_LKUP_DFLT. + * + * Flags should be a combination of ICE_SINGLE_ACT_LB_ENABLE and + * ICE_SINGLE_ACT_LAN_ENABLE. + */ +enum ice_status +ice_fltr_update_flags_dflt_rule(struct ice_vsi *vsi, u16 rule_id, u8 direction, + u32 new_flags) +{ + u32 action = ice_fltr_build_action(vsi->vsi_num); + struct ice_hw *hw = &vsi->back->hw; + + return ice_fltr_update_rule_flags(hw, rule_id, ICE_SW_LKUP_DFLT, action, + direction, vsi->vsi_num, new_flags); +} diff --git a/drivers/net/ethernet/intel/ice/ice_fltr.h b/drivers/net/ethernet/intel/ice/ice_fltr.h index 361cb4da9b43..8eec4febead1 100644 --- a/drivers/net/ethernet/intel/ice/ice_fltr.h +++ b/drivers/net/ethernet/intel/ice/ice_fltr.h @@ -36,4 +36,7 @@ enum ice_status ice_fltr_remove_eth(struct ice_vsi *vsi, u16 ethertype, u16 flag, enum ice_sw_fwd_act_type action); void ice_fltr_remove_all(struct ice_vsi *vsi); +enum ice_status +ice_fltr_update_flags_dflt_rule(struct ice_vsi *vsi, u16 rule_id, u8 direction, + u32 new_flags); #endif diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h index 76021d977b60..a49082485642 100644 --- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h +++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h @@ -182,6 +182,7 @@ #define GLINT_DYN_CTL_INTERVAL_S 5 #define GLINT_DYN_CTL_INTERVAL_M ICE_M(0xFFF, 5) #define GLINT_DYN_CTL_SW_ITR_INDX_ENA_M BIT(24) +#define GLINT_DYN_CTL_SW_ITR_INDX_S 25 #define GLINT_DYN_CTL_SW_ITR_INDX_M ICE_M(0x3, 25) #define GLINT_DYN_CTL_WB_ON_ITR_M BIT(30) #define GLINT_DYN_CTL_INTENA_MSK_M BIT(31) diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h index 80736e0ec0dc..d981dc6f2323 100644 --- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h +++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h @@ -301,6 +301,46 @@ struct ice_32b_rx_flex_desc_nic { } flex_ts; }; +/* Rx Flex Descriptor NIC Profile + * RxDID Profile ID 6 + * Flex-field 0: RSS hash lower 16-bits + * Flex-field 1: RSS hash upper 16-bits + * Flex-field 2: Flow ID lower 16-bits + * Flex-field 3: Source VSI + * Flex-field 4: reserved, VLAN ID taken from L2Tag + */ +struct ice_32b_rx_flex_desc_nic_2 { + /* Qword 0 */ + u8 rxdid; + u8 mir_id_umb_cast; + __le16 ptype_flexi_flags0; + __le16 pkt_len; + __le16 hdr_len_sph_flex_flags1; + + /* Qword 1 */ + __le16 status_error0; + __le16 l2tag1; + __le32 rss_hash; + + /* Qword 2 */ + __le16 status_error1; + u8 flexi_flags2; + u8 ts_low; + __le16 l2tag2_1st; + __le16 l2tag2_2nd; + + /* Qword 3 */ + __le16 flow_id; + __le16 src_vsi; + union { + struct { + __le16 rsvd; + __le16 flow_id_ipv6; + } flex; + __le32 ts_high; + } flex_ts; +}; + /* Receive Flex Descriptor profile IDs: There are a total * of 64 profiles where profile IDs 0/1 are for legacy; and * profiles 2-63 are flex profiles that can be programmed @@ -529,6 +569,9 @@ struct ice_tx_ctx_desc { #define ICE_TXD_CTX_QW1_MSS_S 50 +#define ICE_TXD_CTX_QW1_VSI_S 50 +#define ICE_TXD_CTX_QW1_VSI_M (0x3FFULL << ICE_TXD_CTX_QW1_VSI_S) + enum ice_tx_ctx_desc_cmd_bits { ICE_TX_CTX_DESC_TSO = 0x01, ICE_TX_CTX_DESC_TSYN = 0x02, diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index b718e196af2a..40562600a8cf 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -22,8 +22,12 @@ const char *ice_vsi_type_str(enum ice_vsi_type vsi_type) return "ICE_VSI_VF"; case ICE_VSI_CTRL: return "ICE_VSI_CTRL"; + case ICE_VSI_CHNL: + return "ICE_VSI_CHNL"; case ICE_VSI_LB: return "ICE_VSI_LB"; + case ICE_VSI_SWITCHDEV_CTRL: + return "ICE_VSI_SWITCHDEV_CTRL"; default: return "unknown"; } @@ -44,12 +48,12 @@ static int ice_vsi_ctrl_all_rx_rings(struct ice_vsi *vsi, bool ena) int ret = 0; u16 i; - for (i = 0; i < vsi->num_rxq; i++) + ice_for_each_rxq(vsi, i) ice_vsi_ctrl_one_rx_ring(vsi, ena, i, false); ice_flush(&vsi->back->hw); - for (i = 0; i < vsi->num_rxq; i++) { + ice_for_each_rxq(vsi, i) { ret = ice_vsi_wait_one_rx_ring(vsi, ena, i); if (ret) break; @@ -71,6 +75,8 @@ static int ice_vsi_alloc_arrays(struct ice_vsi *vsi) struct device *dev; dev = ice_pf_to_dev(pf); + if (vsi->type == ICE_VSI_CHNL) + return 0; /* allocate memory for both Tx and Rx ring pointers */ vsi->tx_rings = devm_kcalloc(dev, vsi->alloc_txq, @@ -132,6 +138,7 @@ static void ice_vsi_set_num_desc(struct ice_vsi *vsi) { switch (vsi->type) { case ICE_VSI_PF: + case ICE_VSI_SWITCHDEV_CTRL: case ICE_VSI_CTRL: case ICE_VSI_LB: /* a user could change the values of num_[tr]x_desc using @@ -200,6 +207,14 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id) max_t(int, vsi->alloc_rxq, vsi->alloc_txq)); break; + case ICE_VSI_SWITCHDEV_CTRL: + /* The number of queues for ctrl VSI is equal to number of VFs. + * Each ring is associated to the corresponding VF_PR netdev. + */ + vsi->alloc_txq = pf->num_alloc_vfs; + vsi->alloc_rxq = pf->num_alloc_vfs; + vsi->num_q_vectors = 1; + break; case ICE_VSI_VF: vf = &pf->vf[vsi->vf_id]; if (vf->num_req_qs) @@ -218,6 +233,10 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id) vsi->alloc_rxq = 1; vsi->num_q_vectors = 1; break; + case ICE_VSI_CHNL: + vsi->alloc_txq = 0; + vsi->alloc_rxq = 0; + break; case ICE_VSI_LB: vsi->alloc_txq = 1; vsi->alloc_rxq = 1; @@ -263,7 +282,7 @@ static int ice_get_free_slot(void *array, int size, int curr) * ice_vsi_delete - delete a VSI from the switch * @vsi: pointer to VSI being removed */ -static void ice_vsi_delete(struct ice_vsi *vsi) +void ice_vsi_delete(struct ice_vsi *vsi) { struct ice_pf *pf = vsi->back; struct ice_vsi_ctx *ctxt; @@ -334,7 +353,7 @@ static void ice_vsi_free_arrays(struct ice_vsi *vsi) * * Returns 0 on success, negative on failure */ -static int ice_vsi_clear(struct ice_vsi *vsi) +int ice_vsi_clear(struct ice_vsi *vsi) { struct ice_pf *pf = NULL; struct device *dev; @@ -379,12 +398,12 @@ static irqreturn_t ice_msix_clean_ctrl_vsi(int __always_unused irq, void *data) { struct ice_q_vector *q_vector = (struct ice_q_vector *)data; - if (!q_vector->tx.ring) + if (!q_vector->tx.tx_ring) return IRQ_HANDLED; #define FDIR_RX_DESC_CLEAN_BUDGET 64 - ice_clean_rx_irq(q_vector->rx.ring, FDIR_RX_DESC_CLEAN_BUDGET); - ice_clean_ctrl_tx_irq(q_vector->tx.ring); + ice_clean_rx_irq(q_vector->rx.rx_ring, FDIR_RX_DESC_CLEAN_BUDGET); + ice_clean_ctrl_tx_irq(q_vector->tx.tx_ring); return IRQ_HANDLED; } @@ -398,7 +417,7 @@ static irqreturn_t ice_msix_clean_rings(int __always_unused irq, void *data) { struct ice_q_vector *q_vector = (struct ice_q_vector *)data; - if (!q_vector->tx.ring && !q_vector->rx.ring) + if (!q_vector->tx.tx_ring && !q_vector->rx.rx_ring) return IRQ_HANDLED; q_vector->total_events++; @@ -408,16 +427,33 @@ static irqreturn_t ice_msix_clean_rings(int __always_unused irq, void *data) return IRQ_HANDLED; } +static irqreturn_t ice_eswitch_msix_clean_rings(int __always_unused irq, void *data) +{ + struct ice_q_vector *q_vector = (struct ice_q_vector *)data; + struct ice_pf *pf = q_vector->vsi->back; + int i; + + if (!q_vector->tx.tx_ring && !q_vector->rx.rx_ring) + return IRQ_HANDLED; + + ice_for_each_vf(pf, i) + napi_schedule(&pf->vf[i].repr->q_vector->napi); + + return IRQ_HANDLED; +} + /** * ice_vsi_alloc - Allocates the next available struct VSI in the PF * @pf: board private structure * @vsi_type: type of VSI + * @ch: ptr to channel * @vf_id: ID of the VF being configured * * returns a pointer to a VSI on success, NULL on failure. */ static struct ice_vsi * -ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type vsi_type, u16 vf_id) +ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type vsi_type, + struct ice_channel *ch, u16 vf_id) { struct device *dev = ice_pf_to_dev(pf); struct ice_vsi *vsi = NULL; @@ -444,10 +480,17 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type vsi_type, u16 vf_id) if (vsi_type == ICE_VSI_VF) ice_vsi_set_num_qs(vsi, vf_id); - else + else if (vsi_type != ICE_VSI_CHNL) ice_vsi_set_num_qs(vsi, ICE_INVAL_VFID); switch (vsi->type) { + case ICE_VSI_SWITCHDEV_CTRL: + if (ice_vsi_alloc_arrays(vsi)) + goto err_rings; + + /* Setup eswitch MSIX irq handler for VSI */ + vsi->irq_handler = ice_eswitch_msix_clean_rings; + break; case ICE_VSI_PF: if (ice_vsi_alloc_arrays(vsi)) goto err_rings; @@ -466,6 +509,13 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type vsi_type, u16 vf_id) if (ice_vsi_alloc_arrays(vsi)) goto err_rings; break; + case ICE_VSI_CHNL: + if (!ch) + goto err_rings; + vsi->num_rxq = ch->num_rxq; + vsi->num_txq = ch->num_txq; + vsi->next_base_q = ch->base_q; + break; case ICE_VSI_LB: if (ice_vsi_alloc_arrays(vsi)) goto err_rings; @@ -582,6 +632,9 @@ static int ice_vsi_get_qs(struct ice_vsi *vsi) }; int ret; + if (vsi->type == ICE_VSI_CHNL) + return 0; + ret = __ice_vsi_get_qs(&tx_qs_cfg); if (ret) return ret; @@ -606,12 +659,12 @@ static void ice_vsi_put_qs(struct ice_vsi *vsi) mutex_lock(&pf->avail_q_mutex); - for (i = 0; i < vsi->alloc_txq; i++) { + ice_for_each_alloc_txq(vsi, i) { clear_bit(vsi->txq_map[i], pf->avail_txqs); vsi->txq_map[i] = ICE_INVAL_Q_INDEX; } - for (i = 0; i < vsi->alloc_rxq; i++) { + ice_for_each_alloc_rxq(vsi, i) { clear_bit(vsi->rxq_map[i], pf->avail_rxqs); vsi->rxq_map[i] = ICE_INVAL_Q_INDEX; } @@ -700,12 +753,23 @@ static void ice_vsi_set_rss_params(struct ice_vsi *vsi) cap = &pf->hw.func_caps.common_cap; switch (vsi->type) { + case ICE_VSI_CHNL: case ICE_VSI_PF: /* PF VSI will inherit RSS instance of PF */ vsi->rss_table_size = (u16)cap->rss_table_size; + if (vsi->type == ICE_VSI_CHNL) + vsi->rss_size = min_t(u16, vsi->num_rxq, + BIT(cap->rss_table_entry_width)); + else + vsi->rss_size = min_t(u16, num_online_cpus(), + BIT(cap->rss_table_entry_width)); + vsi->rss_lut_type = ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF; + break; + case ICE_VSI_SWITCHDEV_CTRL: + vsi->rss_table_size = ICE_VSIQF_HLUT_ARRAY_SIZE; vsi->rss_size = min_t(u16, num_online_cpus(), BIT(cap->rss_table_entry_width)); - vsi->rss_lut_type = ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF; + vsi->rss_lut_type = ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_VSI; break; case ICE_VSI_VF: /* VF VSI will get a small RSS table. @@ -775,21 +839,13 @@ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) u16 num_txq_per_tc, num_rxq_per_tc; u16 qcount_tx = vsi->alloc_txq; u16 qcount_rx = vsi->alloc_rxq; - bool ena_tc0 = false; u8 netdev_tc = 0; int i; - /* at least TC0 should be enabled by default */ - if (vsi->tc_cfg.numtc) { - if (!(vsi->tc_cfg.ena_tc & BIT(0))) - ena_tc0 = true; - } else { - ena_tc0 = true; - } - - if (ena_tc0) { - vsi->tc_cfg.numtc++; - vsi->tc_cfg.ena_tc |= 1; + if (!vsi->tc_cfg.numtc) { + /* at least TC0 should be enabled by default */ + vsi->tc_cfg.numtc = 1; + vsi->tc_cfg.ena_tc = 1; } num_rxq_per_tc = min_t(u16, qcount_rx / vsi->tc_cfg.numtc, ICE_MAX_RXQS_PER_TC); @@ -931,6 +987,7 @@ static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi) dev = ice_pf_to_dev(pf); switch (vsi->type) { + case ICE_VSI_CHNL: case ICE_VSI_PF: /* PF VSI will inherit RSS instance of PF */ lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_PF; @@ -953,6 +1010,28 @@ static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi) ICE_AQ_VSI_Q_OPT_RSS_HASH_M); } +static void +ice_chnl_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt) +{ + struct ice_pf *pf = vsi->back; + u16 qcount, qmap; + u8 offset = 0; + int pow; + + qcount = min_t(int, vsi->num_rxq, pf->num_lan_msix); + + pow = order_base_2(qcount); + qmap = ((offset << ICE_AQ_VSI_TC_Q_OFFSET_S) & + ICE_AQ_VSI_TC_Q_OFFSET_M) | + ((pow << ICE_AQ_VSI_TC_Q_NUM_S) & + ICE_AQ_VSI_TC_Q_NUM_M); + + ctxt->info.tc_mapping[0] = cpu_to_le16(qmap); + ctxt->info.mapping_flags |= cpu_to_le16(ICE_AQ_VSI_Q_MAP_CONTIG); + ctxt->info.q_mapping[0] = cpu_to_le16(vsi->next_base_q); + ctxt->info.q_mapping[1] = cpu_to_le16(qcount); +} + /** * ice_vsi_init - Create and initialize a VSI * @vsi: the VSI being configured @@ -980,6 +1059,10 @@ static int ice_vsi_init(struct ice_vsi *vsi, bool init_vsi) case ICE_VSI_PF: ctxt->flags = ICE_AQ_VSI_TYPE_PF; break; + case ICE_VSI_SWITCHDEV_CTRL: + case ICE_VSI_CHNL: + ctxt->flags = ICE_AQ_VSI_TYPE_VMDQ2; + break; case ICE_VSI_VF: ctxt->flags = ICE_AQ_VSI_TYPE_VF; /* VF number here is the absolute VF number (0-255) */ @@ -990,6 +1073,21 @@ static int ice_vsi_init(struct ice_vsi *vsi, bool init_vsi) goto out; } + /* Handle VLAN pruning for channel VSI if main VSI has VLAN + * prune enabled + */ + if (vsi->type == ICE_VSI_CHNL) { + struct ice_vsi *main_vsi; + + main_vsi = ice_get_main_vsi(pf); + if (main_vsi && ice_vsi_is_vlan_pruning_ena(main_vsi)) + ctxt->info.sw_flags2 |= + ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA; + else + ctxt->info.sw_flags2 &= + ~ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA; + } + ice_set_dflt_vsi_ctx(ctxt); if (test_bit(ICE_FLAG_FD_ENA, pf->flags)) ice_set_fd_vsi_ctx(ctxt, vsi); @@ -1010,13 +1108,17 @@ static int ice_vsi_init(struct ice_vsi *vsi, bool init_vsi) } ctxt->info.sw_id = vsi->port_info->sw_id; - ice_vsi_setup_q_map(vsi, ctxt); - if (!init_vsi) /* means VSI being updated */ - /* must to indicate which section of VSI context are - * being modified - */ - ctxt->info.valid_sections |= - cpu_to_le16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID); + if (vsi->type == ICE_VSI_CHNL) { + ice_chnl_vsi_setup_q_map(vsi, ctxt); + } else { + ice_vsi_setup_q_map(vsi, ctxt); + if (!init_vsi) /* means VSI being updated */ + /* must to indicate which section of VSI context are + * being modified + */ + ctxt->info.valid_sections |= + cpu_to_le16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID); + } /* enable/disable MAC and VLAN anti-spoof when spoofchk is on/off * respectively @@ -1195,6 +1297,8 @@ static int ice_vsi_setup_vector_base(struct ice_vsi *vsi) /* SRIOV doesn't grab irq_tracker entries for each VSI */ if (vsi->type == ICE_VSI_VF) return 0; + if (vsi->type == ICE_VSI_CHNL) + return 0; if (vsi->base_vector) { dev_dbg(dev, "VSI %d has non-zero base vector %d\n", @@ -1249,14 +1353,14 @@ static void ice_vsi_clear_rings(struct ice_vsi *vsi) struct ice_q_vector *q_vector = vsi->q_vectors[i]; if (q_vector) { - q_vector->tx.ring = NULL; - q_vector->rx.ring = NULL; + q_vector->tx.tx_ring = NULL; + q_vector->rx.rx_ring = NULL; } } } if (vsi->tx_rings) { - for (i = 0; i < vsi->alloc_txq; i++) { + ice_for_each_alloc_txq(vsi, i) { if (vsi->tx_rings[i]) { kfree_rcu(vsi->tx_rings[i], rcu); WRITE_ONCE(vsi->tx_rings[i], NULL); @@ -1264,7 +1368,7 @@ static void ice_vsi_clear_rings(struct ice_vsi *vsi) } } if (vsi->rx_rings) { - for (i = 0; i < vsi->alloc_rxq; i++) { + ice_for_each_alloc_rxq(vsi, i) { if (vsi->rx_rings[i]) { kfree_rcu(vsi->rx_rings[i], rcu); WRITE_ONCE(vsi->rx_rings[i], NULL); @@ -1285,8 +1389,8 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi) dev = ice_pf_to_dev(pf); /* Allocate Tx rings */ - for (i = 0; i < vsi->alloc_txq; i++) { - struct ice_ring *ring; + ice_for_each_alloc_txq(vsi, i) { + struct ice_tx_ring *ring; /* allocate with kzalloc(), free with kfree_rcu() */ ring = kzalloc(sizeof(*ring), GFP_KERNEL); @@ -1296,7 +1400,6 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi) ring->q_index = i; ring->reg_idx = vsi->txq_map[i]; - ring->ring_active = false; ring->vsi = vsi; ring->tx_tstamps = &pf->ptp.port.tx; ring->dev = dev; @@ -1305,8 +1408,8 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi) } /* Allocate Rx rings */ - for (i = 0; i < vsi->alloc_rxq; i++) { - struct ice_ring *ring; + ice_for_each_alloc_rxq(vsi, i) { + struct ice_rx_ring *ring; /* allocate with kzalloc(), free with kfree_rcu() */ ring = kzalloc(sizeof(*ring), GFP_KERNEL); @@ -1315,7 +1418,6 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi) ring->q_index = i; ring->reg_idx = vsi->rxq_map[i]; - ring->ring_active = false; ring->vsi = vsi; ring->netdev = vsi->netdev; ring->dev = dev; @@ -1363,7 +1465,7 @@ void ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena) * ice_vsi_cfg_rss_lut_key - Configure RSS params for a VSI * @vsi: VSI to be configured */ -static int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi) +int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi) { struct ice_pf *pf = vsi->back; struct device *dev; @@ -1371,7 +1473,25 @@ static int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi) int err; dev = ice_pf_to_dev(pf); - vsi->rss_size = min_t(u16, vsi->rss_size, vsi->num_rxq); + if (vsi->type == ICE_VSI_PF && vsi->ch_rss_size && + (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags))) { + vsi->rss_size = min_t(u16, vsi->rss_size, vsi->ch_rss_size); + } else { + vsi->rss_size = min_t(u16, vsi->rss_size, vsi->num_rxq); + + /* If orig_rss_size is valid and it is less than determined + * main VSI's rss_size, update main VSI's rss_size to be + * orig_rss_size so that when tc-qdisc is deleted, main VSI + * RSS table gets programmed to be correct (whatever it was + * to begin with (prior to setup-tc for ADQ config) + */ + if (vsi->orig_rss_size && vsi->rss_size < vsi->orig_rss_size && + vsi->orig_rss_size <= vsi->num_rxq) { + vsi->rss_size = vsi->orig_rss_size; + /* now orig_rss_size is used, reset it to zero */ + vsi->orig_rss_size = 0; + } + } lut = kzalloc(vsi->rss_table_size, GFP_KERNEL); if (!lut) @@ -1710,7 +1830,7 @@ int ice_vsi_cfg_single_rxq(struct ice_vsi *vsi, u16 q_idx) return ice_vsi_cfg_rxq(vsi->rx_rings[q_idx]); } -int ice_vsi_cfg_single_txq(struct ice_vsi *vsi, struct ice_ring **tx_rings, u16 q_idx) +int ice_vsi_cfg_single_txq(struct ice_vsi *vsi, struct ice_tx_ring **tx_rings, u16 q_idx) { struct ice_aqc_add_tx_qgrp *qg_buf; int err; @@ -1766,7 +1886,7 @@ setup_rings: * Configure the Tx VSI for operation. */ static int -ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_ring **rings, u16 count) +ice_vsi_cfg_txqs(struct ice_vsi *vsi, struct ice_tx_ring **rings, u16 count) { struct ice_aqc_add_tx_qgrp *qg_buf; u16 q_idx = 0; @@ -1817,8 +1937,8 @@ int ice_vsi_cfg_xdp_txqs(struct ice_vsi *vsi) if (ret) return ret; - for (i = 0; i < vsi->num_xdp_txq; i++) - vsi->xdp_rings[i]->xsk_pool = ice_xsk_pool(vsi->xdp_rings[i]); + ice_for_each_xdp_txq(vsi, i) + vsi->xdp_rings[i]->xsk_pool = ice_tx_xsk_pool(vsi->xdp_rings[i]); return ret; } @@ -1853,6 +1973,24 @@ void ice_write_intrl(struct ice_q_vector *q_vector, u8 intrl) ice_intrl_usec_to_reg(intrl, ICE_INTRL_GRAN_ABOVE_25)); } +static struct ice_q_vector *ice_pull_qvec_from_rc(struct ice_ring_container *rc) +{ + switch (rc->type) { + case ICE_RX_CONTAINER: + if (rc->rx_ring) + return rc->rx_ring->q_vector; + break; + case ICE_TX_CONTAINER: + if (rc->tx_ring) + return rc->tx_ring->q_vector; + break; + default: + break; + } + + return NULL; +} + /** * __ice_write_itr - write throttle rate to register * @q_vector: pointer to interrupt data structure @@ -1877,15 +2015,39 @@ void ice_write_itr(struct ice_ring_container *rc, u16 itr) { struct ice_q_vector *q_vector; - if (!rc->ring) + q_vector = ice_pull_qvec_from_rc(rc); + if (!q_vector) return; - q_vector = rc->ring->q_vector; - __ice_write_itr(q_vector, rc, itr); } /** + * ice_set_q_vector_intrl - set up interrupt rate limiting + * @q_vector: the vector to be configured + * + * Interrupt rate limiting is local to the vector, not per-queue so we must + * detect if either ring container has dynamic moderation enabled to decide + * what to set the interrupt rate limit to via INTRL settings. In the case that + * dynamic moderation is disabled on both, write the value with the cached + * setting to make sure INTRL register matches the user visible value. + */ +void ice_set_q_vector_intrl(struct ice_q_vector *q_vector) +{ + if (ITR_IS_DYNAMIC(&q_vector->tx) || ITR_IS_DYNAMIC(&q_vector->rx)) { + /* in the case of dynamic enabled, cap each vector to no more + * than (4 us) 250,000 ints/sec, which allows low latency + * but still less than 500,000 interrupts per second, which + * reduces CPU a bit in the case of the lowest latency + * setting. The 4 here is a value in microseconds. + */ + ice_write_intrl(q_vector, 4); + } else { + ice_write_intrl(q_vector, q_vector->intrl); + } +} + +/** * ice_vsi_cfg_msix - MSIX mode Interrupt Config in the HW * @vsi: the VSI being configured * @@ -1899,7 +2061,7 @@ void ice_vsi_cfg_msix(struct ice_vsi *vsi) u16 txq = 0, rxq = 0; int i, q; - for (i = 0; i < vsi->num_q_vectors; i++) { + ice_for_each_q_vector(vsi, i) { struct ice_q_vector *q_vector = vsi->q_vectors[i]; u16 reg_idx = q_vector->reg_idx; @@ -2057,7 +2219,7 @@ int ice_vsi_stop_all_rx_rings(struct ice_vsi *vsi) */ static int ice_vsi_stop_tx_rings(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src, - u16 rel_vmvf_num, struct ice_ring **rings, u16 count) + u16 rel_vmvf_num, struct ice_tx_ring **rings, u16 count) { u16 q_idx; @@ -2122,11 +2284,10 @@ bool ice_vsi_is_vlan_pruning_ena(struct ice_vsi *vsi) * ice_cfg_vlan_pruning - enable or disable VLAN pruning on the VSI * @vsi: VSI to enable or disable VLAN pruning on * @ena: set to true to enable VLAN pruning and false to disable it - * @vlan_promisc: enable valid security flags if not in VLAN promiscuous mode * * returns 0 if VSI is updated, negative otherwise */ -int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc) +int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena) { struct ice_vsi_ctx *ctxt; struct ice_pf *pf; @@ -2154,9 +2315,7 @@ int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc) else ctxt->info.sw_flags2 &= ~ICE_AQ_VSI_SW_FLAG_RX_VLAN_PRUNE_ENA; - if (!vlan_promisc) - ctxt->info.valid_sections = - cpu_to_le16(ICE_AQ_VSI_PROP_SW_VALID); + ctxt->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SW_VALID); status = ice_update_vsi(&pf->hw, vsi->idx, ctxt, NULL); if (status) { @@ -2179,10 +2338,14 @@ err_out: static void ice_vsi_set_tc_cfg(struct ice_vsi *vsi) { - struct ice_dcbx_cfg *cfg = &vsi->port_info->qos_cfg.local_dcbx_cfg; + if (!test_bit(ICE_FLAG_DCB_ENA, vsi->back->flags)) { + vsi->tc_cfg.ena_tc = ICE_DFLT_TRAFFIC_CLASS; + vsi->tc_cfg.numtc = 1; + return; + } - vsi->tc_cfg.ena_tc = ice_dcb_get_ena_tc(cfg); - vsi->tc_cfg.numtc = ice_dcb_get_num_tc(cfg); + /* set VSI TC information based on DCB config */ + ice_vsi_set_dcb_tc_cfg(vsi); } /** @@ -2295,8 +2458,10 @@ static void ice_set_agg_vsi(struct ice_vsi *vsi) switch (vsi->type) { case ICE_VSI_CTRL: + case ICE_VSI_CHNL: case ICE_VSI_LB: case ICE_VSI_PF: + case ICE_VSI_SWITCHDEV_CTRL: max_agg_nodes = ICE_MAX_PF_AGG_NODES; agg_node_id_start = ICE_PF_AGG_NODE_ID_START; agg_node_iter = &pf->pf_agg_node[0]; @@ -2393,6 +2558,7 @@ static void ice_set_agg_vsi(struct ice_vsi *vsi) * @vf_id: defines VF ID to which this VSI connects. This field is meant to be * used only for ICE_VSI_VF VSI type. For other VSI types, should * fill-in ICE_INVAL_VFID as input. + * @ch: ptr to channel * * This allocates the sw VSI structure and its queue resources. * @@ -2401,7 +2567,7 @@ static void ice_set_agg_vsi(struct ice_vsi *vsi) */ struct ice_vsi * ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, - enum ice_vsi_type vsi_type, u16 vf_id) + enum ice_vsi_type vsi_type, u16 vf_id, struct ice_channel *ch) { u16 max_txqs[ICE_MAX_TRAFFIC_CLASS] = { 0 }; struct device *dev = ice_pf_to_dev(pf); @@ -2409,10 +2575,12 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, struct ice_vsi *vsi; int ret, i; - if (vsi_type == ICE_VSI_VF || vsi_type == ICE_VSI_CTRL) - vsi = ice_vsi_alloc(pf, vsi_type, vf_id); + if (vsi_type == ICE_VSI_CHNL) + vsi = ice_vsi_alloc(pf, vsi_type, ch, ICE_INVAL_VFID); + else if (vsi_type == ICE_VSI_VF || vsi_type == ICE_VSI_CTRL) + vsi = ice_vsi_alloc(pf, vsi_type, NULL, vf_id); else - vsi = ice_vsi_alloc(pf, vsi_type, ICE_INVAL_VFID); + vsi = ice_vsi_alloc(pf, vsi_type, NULL, ICE_INVAL_VFID); if (!vsi) { dev_err(dev, "could not allocate VSI\n"); @@ -2429,10 +2597,12 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, ice_alloc_fd_res(vsi); - if (ice_vsi_get_qs(vsi)) { - dev_err(dev, "Failed to allocate queues. vsi->idx = %d\n", - vsi->idx); - goto unroll_vsi_alloc; + if (vsi_type != ICE_VSI_CHNL) { + if (ice_vsi_get_qs(vsi)) { + dev_err(dev, "Failed to allocate queues. vsi->idx = %d\n", + vsi->idx); + goto unroll_vsi_alloc; + } } /* set RSS capabilities */ @@ -2448,6 +2618,7 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, switch (vsi->type) { case ICE_VSI_CTRL: + case ICE_VSI_SWITCHDEV_CTRL: case ICE_VSI_PF: ret = ice_vsi_alloc_q_vectors(vsi); if (ret) @@ -2490,6 +2661,12 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, } ice_init_arfs(vsi); break; + case ICE_VSI_CHNL: + if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) { + ice_vsi_cfg_rss_lut_key(vsi); + ice_vsi_set_rss_flow_fld(vsi); + } + break; case ICE_VSI_VF: /* VF driver will take care of creating netdev for this type and * map queues to vectors through Virtchnl, PF driver only @@ -2528,9 +2705,21 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, } /* configure VSI nodes based on number of queues and TC's */ - for (i = 0; i < vsi->tc_cfg.numtc; i++) - max_txqs[i] = vsi->alloc_txq; + ice_for_each_traffic_class(i) { + if (!(vsi->tc_cfg.ena_tc & BIT(i))) + continue; + + if (vsi->type == ICE_VSI_CHNL) { + if (!vsi->alloc_txq && vsi->num_txq) + max_txqs[i] = vsi->num_txq; + else + max_txqs[i] = pf->num_lan_tx; + } else { + max_txqs[i] = vsi->alloc_txq; + } + } + dev_dbg(dev, "vsi->tc_cfg.ena_tc = %d\n", vsi->tc_cfg.ena_tc); status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc, max_txqs); if (status) { @@ -2591,7 +2780,7 @@ static void ice_vsi_release_msix(struct ice_vsi *vsi) u32 rxq = 0; int i, q; - for (i = 0; i < vsi->num_q_vectors; i++) { + ice_for_each_q_vector(vsi, i) { struct ice_q_vector *q_vector = vsi->q_vectors[i]; ice_write_intrl(q_vector, 0); @@ -2757,7 +2946,8 @@ void ice_dis_vsi(struct ice_vsi *vsi, bool locked) } else { ice_vsi_close(vsi); } - } else if (vsi->type == ICE_VSI_CTRL) { + } else if (vsi->type == ICE_VSI_CTRL || + vsi->type == ICE_VSI_SWITCHDEV_CTRL) { ice_vsi_close(vsi); } } @@ -2860,7 +3050,8 @@ int ice_vsi_release(struct ice_vsi *vsi) clear_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state); } - ice_devlink_destroy_port(vsi); + if (vsi->type == ICE_VSI_PF) + ice_devlink_destroy_pf_port(pf); if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) ice_rss_clean(vsi); @@ -3041,7 +3232,7 @@ ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi, } vsi->q_vectors[i]->intrl = coalesce[i].intrl; - ice_write_intrl(vsi->q_vectors[i], coalesce[i].intrl); + ice_set_q_vector_intrl(vsi->q_vectors[i]); } /* the number of queue vectors increased so write whatever is in @@ -3059,7 +3250,7 @@ ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi, ice_write_itr(rc, rc->itr_setting); vsi->q_vectors[i]->intrl = coalesce[0].intrl; - ice_write_intrl(vsi->q_vectors[i], coalesce[0].intrl); + ice_set_q_vector_intrl(vsi->q_vectors[i]); } } @@ -3144,6 +3335,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) switch (vtype) { case ICE_VSI_CTRL: + case ICE_VSI_SWITCHDEV_CTRL: case ICE_VSI_PF: ret = ice_vsi_alloc_q_vectors(vsi); if (ret) @@ -3163,7 +3355,9 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) ice_vsi_map_rings_to_vectors(vsi); if (ice_is_xdp_ena_vsi(vsi)) { - vsi->num_xdp_txq = vsi->alloc_rxq; + ret = ice_vsi_determine_xdp_res(vsi); + if (ret) + goto err_vectors; ret = ice_prepare_xdp_rings(vsi, vsi->xdp_prog); if (ret) goto err_vectors; @@ -3191,20 +3385,42 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi) goto err_vectors; break; + case ICE_VSI_CHNL: + if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) { + ice_vsi_cfg_rss_lut_key(vsi); + ice_vsi_set_rss_flow_fld(vsi); + } + break; default: break; } /* configure VSI nodes based on number of queues and TC's */ for (i = 0; i < vsi->tc_cfg.numtc; i++) { - max_txqs[i] = vsi->alloc_txq; + /* configure VSI nodes based on number of queues and TC's. + * ADQ creates VSIs for each TC/Channel but doesn't + * allocate queues instead it reconfigures the PF queues + * as per the TC command. So max_txqs should point to the + * PF Tx queues. + */ + if (vtype == ICE_VSI_CHNL) + max_txqs[i] = pf->num_lan_tx; + else + max_txqs[i] = vsi->alloc_txq; if (ice_is_xdp_ena_vsi(vsi)) max_txqs[i] += vsi->num_xdp_txq; } - status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc, - max_txqs); + if (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) + /* If MQPRIO is set, means channel code path, hence for main + * VSI's, use TC as 1 + */ + status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, 1, max_txqs); + else + status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, + vsi->tc_cfg.ena_tc, max_txqs); + if (status) { dev_err(ice_pf_to_dev(pf), "VSI %d failed lan queue config, error %s\n", vsi->vsi_num, ice_stat_str(status)); @@ -3276,7 +3492,6 @@ int ice_wait_for_reset(struct ice_pf *pf, unsigned long timeout) return 0; } -#ifdef CONFIG_DCB /** * ice_vsi_update_q_map - update our copy of the VSI info with new queue map * @vsi: VSI being configured @@ -3292,6 +3507,146 @@ static void ice_vsi_update_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctx) } /** + * ice_vsi_cfg_netdev_tc - Setup the netdev TC configuration + * @vsi: the VSI being configured + * @ena_tc: TC map to be enabled + */ +void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc) +{ + struct net_device *netdev = vsi->netdev; + struct ice_pf *pf = vsi->back; + int numtc = vsi->tc_cfg.numtc; + struct ice_dcbx_cfg *dcbcfg; + u8 netdev_tc; + int i; + + if (!netdev) + return; + + /* CHNL VSI doesn't have it's own netdev, hence, no netdev_tc */ + if (vsi->type == ICE_VSI_CHNL) + return; + + if (!ena_tc) { + netdev_reset_tc(netdev); + return; + } + + if (vsi->type == ICE_VSI_PF && ice_is_adq_active(pf)) + numtc = vsi->all_numtc; + + if (netdev_set_num_tc(netdev, numtc)) + return; + + dcbcfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg; + + ice_for_each_traffic_class(i) + if (vsi->tc_cfg.ena_tc & BIT(i)) + netdev_set_tc_queue(netdev, + vsi->tc_cfg.tc_info[i].netdev_tc, + vsi->tc_cfg.tc_info[i].qcount_tx, + vsi->tc_cfg.tc_info[i].qoffset); + /* setup TC queue map for CHNL TCs */ + ice_for_each_chnl_tc(i) { + if (!(vsi->all_enatc & BIT(i))) + break; + if (!vsi->mqprio_qopt.qopt.count[i]) + break; + netdev_set_tc_queue(netdev, i, + vsi->mqprio_qopt.qopt.count[i], + vsi->mqprio_qopt.qopt.offset[i]); + } + + if (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) + return; + + for (i = 0; i < ICE_MAX_USER_PRIORITY; i++) { + u8 ets_tc = dcbcfg->etscfg.prio_table[i]; + + /* Get the mapped netdev TC# for the UP */ + netdev_tc = vsi->tc_cfg.tc_info[ets_tc].netdev_tc; + netdev_set_prio_tc_map(netdev, i, netdev_tc); + } +} + +/** + * ice_vsi_setup_q_map_mqprio - Prepares mqprio based tc_config + * @vsi: the VSI being configured, + * @ctxt: VSI context structure + * @ena_tc: number of traffic classes to enable + * + * Prepares VSI tc_config to have queue configurations based on MQPRIO options. + */ +static void +ice_vsi_setup_q_map_mqprio(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt, + u8 ena_tc) +{ + u16 pow, offset = 0, qcount_tx = 0, qcount_rx = 0, qmap; + u16 tc0_offset = vsi->mqprio_qopt.qopt.offset[0]; + int tc0_qcount = vsi->mqprio_qopt.qopt.count[0]; + u8 netdev_tc = 0; + int i; + + vsi->tc_cfg.ena_tc = ena_tc ? ena_tc : 1; + + pow = order_base_2(tc0_qcount); + qmap = ((tc0_offset << ICE_AQ_VSI_TC_Q_OFFSET_S) & + ICE_AQ_VSI_TC_Q_OFFSET_M) | + ((pow << ICE_AQ_VSI_TC_Q_NUM_S) & ICE_AQ_VSI_TC_Q_NUM_M); + + ice_for_each_traffic_class(i) { + if (!(vsi->tc_cfg.ena_tc & BIT(i))) { + /* TC is not enabled */ + vsi->tc_cfg.tc_info[i].qoffset = 0; + vsi->tc_cfg.tc_info[i].qcount_rx = 1; + vsi->tc_cfg.tc_info[i].qcount_tx = 1; + vsi->tc_cfg.tc_info[i].netdev_tc = 0; + ctxt->info.tc_mapping[i] = 0; + continue; + } + + offset = vsi->mqprio_qopt.qopt.offset[i]; + qcount_rx = vsi->mqprio_qopt.qopt.count[i]; + qcount_tx = vsi->mqprio_qopt.qopt.count[i]; + vsi->tc_cfg.tc_info[i].qoffset = offset; + vsi->tc_cfg.tc_info[i].qcount_rx = qcount_rx; + vsi->tc_cfg.tc_info[i].qcount_tx = qcount_tx; + vsi->tc_cfg.tc_info[i].netdev_tc = netdev_tc++; + } + + if (vsi->all_numtc && vsi->all_numtc != vsi->tc_cfg.numtc) { + ice_for_each_chnl_tc(i) { + if (!(vsi->all_enatc & BIT(i))) + continue; + offset = vsi->mqprio_qopt.qopt.offset[i]; + qcount_rx = vsi->mqprio_qopt.qopt.count[i]; + qcount_tx = vsi->mqprio_qopt.qopt.count[i]; + } + } + + /* Set actual Tx/Rx queue pairs */ + vsi->num_txq = offset + qcount_tx; + vsi->num_rxq = offset + qcount_rx; + + /* Setup queue TC[0].qmap for given VSI context */ + ctxt->info.tc_mapping[0] = cpu_to_le16(qmap); + ctxt->info.q_mapping[0] = cpu_to_le16(vsi->rxq_map[0]); + ctxt->info.q_mapping[1] = cpu_to_le16(tc0_qcount); + + /* Find queue count available for channel VSIs and starting offset + * for channel VSIs + */ + if (tc0_qcount && tc0_qcount < vsi->num_rxq) { + vsi->cnt_q_avail = vsi->num_rxq - tc0_qcount; + vsi->next_base_q = tc0_qcount; + } + dev_dbg(ice_pf_to_dev(vsi->back), "vsi->num_txq = %d\n", vsi->num_txq); + dev_dbg(ice_pf_to_dev(vsi->back), "vsi->num_rxq = %d\n", vsi->num_rxq); + dev_dbg(ice_pf_to_dev(vsi->back), "all_numtc %u, all_enatc: 0x%04x, tc_cfg.numtc %u\n", + vsi->all_numtc, vsi->all_enatc, vsi->tc_cfg.numtc); +} + +/** * ice_vsi_cfg_tc - Configure VSI Tx Sched for given TC map * @vsi: VSI to be configured * @ena_tc: TC bitmap @@ -3309,6 +3664,9 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc) u8 num_tc = 0; dev = ice_pf_to_dev(pf); + if (vsi->tc_cfg.ena_tc == ena_tc && + vsi->mqprio_qopt.mode != TC_MQPRIO_MODE_CHANNEL) + return ret; ice_for_each_traffic_class(i) { /* build bitmap of enabled TCs */ @@ -3316,6 +3674,12 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc) num_tc++; /* populate max_txqs per TC */ max_txqs[i] = vsi->alloc_txq; + /* Update max_txqs if it is CHNL VSI, because alloc_t[r]xq are + * zero for CHNL VSI, hence use num_txq instead as max_txqs + */ + if (vsi->type == ICE_VSI_CHNL && + test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) + max_txqs[i] = vsi->num_txq; } vsi->tc_cfg.ena_tc = ena_tc; @@ -3328,7 +3692,11 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc) ctx->vf_num = 0; ctx->info = vsi->info; - ice_vsi_setup_q_map(vsi, ctx); + if (vsi->type == ICE_VSI_PF && + test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) + ice_vsi_setup_q_map_mqprio(vsi, ctx, ena_tc); + else + ice_vsi_setup_q_map(vsi, ctx); /* must to indicate which section of VSI context are being modified */ ctx->info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_RXQ_MAP_VALID); @@ -3339,8 +3707,13 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc) goto out; } - status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc, - max_txqs); + if (vsi->type == ICE_VSI_PF && + test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) + status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, 1, + max_txqs); + else + status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, + vsi->tc_cfg.ena_tc, max_txqs); if (status) { dev_err(dev, "VSI %d failed TC config, error %s\n", @@ -3356,20 +3729,19 @@ out: kfree(ctx); return ret; } -#endif /* CONFIG_DCB */ /** * ice_update_ring_stats - Update ring statistics - * @ring: ring to update + * @stats: stats to be updated * @pkts: number of processed packets * @bytes: number of processed bytes * * This function assumes that caller has acquired a u64_stats_sync lock. */ -static void ice_update_ring_stats(struct ice_ring *ring, u64 pkts, u64 bytes) +static void ice_update_ring_stats(struct ice_q_stats *stats, u64 pkts, u64 bytes) { - ring->stats.bytes += bytes; - ring->stats.pkts += pkts; + stats->bytes += bytes; + stats->pkts += pkts; } /** @@ -3378,10 +3750,10 @@ static void ice_update_ring_stats(struct ice_ring *ring, u64 pkts, u64 bytes) * @pkts: number of processed packets * @bytes: number of processed bytes */ -void ice_update_tx_ring_stats(struct ice_ring *tx_ring, u64 pkts, u64 bytes) +void ice_update_tx_ring_stats(struct ice_tx_ring *tx_ring, u64 pkts, u64 bytes) { u64_stats_update_begin(&tx_ring->syncp); - ice_update_ring_stats(tx_ring, pkts, bytes); + ice_update_ring_stats(&tx_ring->stats, pkts, bytes); u64_stats_update_end(&tx_ring->syncp); } @@ -3391,10 +3763,10 @@ void ice_update_tx_ring_stats(struct ice_ring *tx_ring, u64 pkts, u64 bytes) * @pkts: number of processed packets * @bytes: number of processed bytes */ -void ice_update_rx_ring_stats(struct ice_ring *rx_ring, u64 pkts, u64 bytes) +void ice_update_rx_ring_stats(struct ice_rx_ring *rx_ring, u64 pkts, u64 bytes) { u64_stats_update_begin(&rx_ring->syncp); - ice_update_ring_stats(rx_ring, pkts, bytes); + ice_update_ring_stats(&rx_ring->stats, pkts, bytes); u64_stats_update_end(&rx_ring->syncp); } @@ -3547,6 +3919,180 @@ int ice_clear_dflt_vsi(struct ice_sw *sw) } /** + * ice_get_link_speed_mbps - get link speed in Mbps + * @vsi: the VSI whose link speed is being queried + * + * Return current VSI link speed and 0 if the speed is unknown. + */ +int ice_get_link_speed_mbps(struct ice_vsi *vsi) +{ + switch (vsi->port_info->phy.link_info.link_speed) { + case ICE_AQ_LINK_SPEED_100GB: + return SPEED_100000; + case ICE_AQ_LINK_SPEED_50GB: + return SPEED_50000; + case ICE_AQ_LINK_SPEED_40GB: + return SPEED_40000; + case ICE_AQ_LINK_SPEED_25GB: + return SPEED_25000; + case ICE_AQ_LINK_SPEED_20GB: + return SPEED_20000; + case ICE_AQ_LINK_SPEED_10GB: + return SPEED_10000; + case ICE_AQ_LINK_SPEED_5GB: + return SPEED_5000; + case ICE_AQ_LINK_SPEED_2500MB: + return SPEED_2500; + case ICE_AQ_LINK_SPEED_1000MB: + return SPEED_1000; + case ICE_AQ_LINK_SPEED_100MB: + return SPEED_100; + case ICE_AQ_LINK_SPEED_10MB: + return SPEED_10; + case ICE_AQ_LINK_SPEED_UNKNOWN: + default: + return 0; + } +} + +/** + * ice_get_link_speed_kbps - get link speed in Kbps + * @vsi: the VSI whose link speed is being queried + * + * Return current VSI link speed and 0 if the speed is unknown. + */ +int ice_get_link_speed_kbps(struct ice_vsi *vsi) +{ + int speed_mbps; + + speed_mbps = ice_get_link_speed_mbps(vsi); + + return speed_mbps * 1000; +} + +/** + * ice_set_min_bw_limit - setup minimum BW limit for Tx based on min_tx_rate + * @vsi: VSI to be configured + * @min_tx_rate: min Tx rate in Kbps to be configured as BW limit + * + * If the min_tx_rate is specified as 0 that means to clear the minimum BW limit + * profile, otherwise a non-zero value will force a minimum BW limit for the VSI + * on TC 0. + */ +int ice_set_min_bw_limit(struct ice_vsi *vsi, u64 min_tx_rate) +{ + struct ice_pf *pf = vsi->back; + enum ice_status status; + struct device *dev; + int speed; + + dev = ice_pf_to_dev(pf); + if (!vsi->port_info) { + dev_dbg(dev, "VSI %d, type %u specified doesn't have valid port_info\n", + vsi->idx, vsi->type); + return -EINVAL; + } + + speed = ice_get_link_speed_kbps(vsi); + if (min_tx_rate > (u64)speed) { + dev_err(dev, "invalid min Tx rate %llu Kbps specified for %s %d is greater than current link speed %u Kbps\n", + min_tx_rate, ice_vsi_type_str(vsi->type), vsi->idx, + speed); + return -EINVAL; + } + + /* Configure min BW for VSI limit */ + if (min_tx_rate) { + status = ice_cfg_vsi_bw_lmt_per_tc(vsi->port_info, vsi->idx, 0, + ICE_MIN_BW, min_tx_rate); + if (status) { + dev_err(dev, "failed to set min Tx rate(%llu Kbps) for %s %d\n", + min_tx_rate, ice_vsi_type_str(vsi->type), + vsi->idx); + return -EIO; + } + + dev_dbg(dev, "set min Tx rate(%llu Kbps) for %s\n", + min_tx_rate, ice_vsi_type_str(vsi->type)); + } else { + status = ice_cfg_vsi_bw_dflt_lmt_per_tc(vsi->port_info, + vsi->idx, 0, + ICE_MIN_BW); + if (status) { + dev_err(dev, "failed to clear min Tx rate configuration for %s %d\n", + ice_vsi_type_str(vsi->type), vsi->idx); + return -EIO; + } + + dev_dbg(dev, "cleared min Tx rate configuration for %s %d\n", + ice_vsi_type_str(vsi->type), vsi->idx); + } + + return 0; +} + +/** + * ice_set_max_bw_limit - setup maximum BW limit for Tx based on max_tx_rate + * @vsi: VSI to be configured + * @max_tx_rate: max Tx rate in Kbps to be configured as BW limit + * + * If the max_tx_rate is specified as 0 that means to clear the maximum BW limit + * profile, otherwise a non-zero value will force a maximum BW limit for the VSI + * on TC 0. + */ +int ice_set_max_bw_limit(struct ice_vsi *vsi, u64 max_tx_rate) +{ + struct ice_pf *pf = vsi->back; + enum ice_status status; + struct device *dev; + int speed; + + dev = ice_pf_to_dev(pf); + if (!vsi->port_info) { + dev_dbg(dev, "VSI %d, type %u specified doesn't have valid port_info\n", + vsi->idx, vsi->type); + return -EINVAL; + } + + speed = ice_get_link_speed_kbps(vsi); + if (max_tx_rate > (u64)speed) { + dev_err(dev, "invalid max Tx rate %llu Kbps specified for %s %d is greater than current link speed %u Kbps\n", + max_tx_rate, ice_vsi_type_str(vsi->type), vsi->idx, + speed); + return -EINVAL; + } + + /* Configure max BW for VSI limit */ + if (max_tx_rate) { + status = ice_cfg_vsi_bw_lmt_per_tc(vsi->port_info, vsi->idx, 0, + ICE_MAX_BW, max_tx_rate); + if (status) { + dev_err(dev, "failed setting max Tx rate(%llu Kbps) for %s %d\n", + max_tx_rate, ice_vsi_type_str(vsi->type), + vsi->idx); + return -EIO; + } + + dev_dbg(dev, "set max Tx rate(%llu Kbps) for %s %d\n", + max_tx_rate, ice_vsi_type_str(vsi->type), vsi->idx); + } else { + status = ice_cfg_vsi_bw_dflt_lmt_per_tc(vsi->port_info, + vsi->idx, 0, + ICE_MAX_BW); + if (status) { + dev_err(dev, "failed clearing max Tx rate configuration for %s %d\n", + ice_vsi_type_str(vsi->type), vsi->idx); + return -EIO; + } + + dev_dbg(dev, "cleared max Tx rate configuration for %s %d\n", + ice_vsi_type_str(vsi->type), vsi->idx); + } + + return 0; +} + +/** * ice_set_link - turn on/off physical link * @vsi: VSI to modify physical link on * @ena: turn on/off physical link @@ -3582,3 +4128,126 @@ int ice_set_link(struct ice_vsi *vsi, bool ena) return 0; } + +/** + * ice_is_feature_supported + * @pf: pointer to the struct ice_pf instance + * @f: feature enum to be checked + * + * returns true if feature is supported, false otherwise + */ +bool ice_is_feature_supported(struct ice_pf *pf, enum ice_feature f) +{ + if (f < 0 || f >= ICE_F_MAX) + return false; + + return test_bit(f, pf->features); +} + +/** + * ice_set_feature_support + * @pf: pointer to the struct ice_pf instance + * @f: feature enum to set + */ +static void ice_set_feature_support(struct ice_pf *pf, enum ice_feature f) +{ + if (f < 0 || f >= ICE_F_MAX) + return; + + set_bit(f, pf->features); +} + +/** + * ice_clear_feature_support + * @pf: pointer to the struct ice_pf instance + * @f: feature enum to clear + */ +void ice_clear_feature_support(struct ice_pf *pf, enum ice_feature f) +{ + if (f < 0 || f >= ICE_F_MAX) + return; + + clear_bit(f, pf->features); +} + +/** + * ice_init_feature_support + * @pf: pointer to the struct ice_pf instance + * + * called during init to setup supported feature + */ +void ice_init_feature_support(struct ice_pf *pf) +{ + switch (pf->hw.device_id) { + case ICE_DEV_ID_E810C_BACKPLANE: + case ICE_DEV_ID_E810C_QSFP: + case ICE_DEV_ID_E810C_SFP: + ice_set_feature_support(pf, ICE_F_DSCP); + if (ice_is_e810t(&pf->hw)) + ice_set_feature_support(pf, ICE_F_SMA_CTRL); + break; + default: + break; + } +} + +/** + * ice_vsi_update_security - update security block in VSI + * @vsi: pointer to VSI structure + * @fill: function pointer to fill ctx + */ +int +ice_vsi_update_security(struct ice_vsi *vsi, void (*fill)(struct ice_vsi_ctx *)) +{ + struct ice_vsi_ctx ctx = { 0 }; + + ctx.info = vsi->info; + ctx.info.valid_sections = cpu_to_le16(ICE_AQ_VSI_PROP_SECURITY_VALID); + fill(&ctx); + + if (ice_update_vsi(&vsi->back->hw, vsi->idx, &ctx, NULL)) + return -ENODEV; + + vsi->info = ctx.info; + return 0; +} + +/** + * ice_vsi_ctx_set_antispoof - set antispoof function in VSI ctx + * @ctx: pointer to VSI ctx structure + */ +void ice_vsi_ctx_set_antispoof(struct ice_vsi_ctx *ctx) +{ + ctx->info.sec_flags |= ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF | + (ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA << + ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S); +} + +/** + * ice_vsi_ctx_clear_antispoof - clear antispoof function in VSI ctx + * @ctx: pointer to VSI ctx structure + */ +void ice_vsi_ctx_clear_antispoof(struct ice_vsi_ctx *ctx) +{ + ctx->info.sec_flags &= ~ICE_AQ_VSI_SEC_FLAG_ENA_MAC_ANTI_SPOOF & + ~(ICE_AQ_VSI_SEC_TX_VLAN_PRUNE_ENA << + ICE_AQ_VSI_SEC_TX_PRUNE_ENA_S); +} + +/** + * ice_vsi_ctx_set_allow_override - allow destination override on VSI + * @ctx: pointer to VSI ctx structure + */ +void ice_vsi_ctx_set_allow_override(struct ice_vsi_ctx *ctx) +{ + ctx->info.sec_flags |= ICE_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD; +} + +/** + * ice_vsi_ctx_clear_allow_override - turn off destination override on VSI + * @ctx: pointer to VSI ctx structure + */ +void ice_vsi_ctx_clear_allow_override(struct ice_vsi_ctx *ctx) +{ + ctx->info.sec_flags &= ~ICE_AQ_VSI_SEC_FLAG_ALLOW_DEST_OVRD; +} diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h index d5a28bf0fc2c..6c803407b67d 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_lib.h @@ -14,7 +14,7 @@ void ice_update_eth_stats(struct ice_vsi *vsi); int ice_vsi_cfg_single_rxq(struct ice_vsi *vsi, u16 q_idx); -int ice_vsi_cfg_single_txq(struct ice_vsi *vsi, struct ice_ring **tx_rings, u16 q_idx); +int ice_vsi_cfg_single_txq(struct ice_vsi *vsi, struct ice_tx_ring **tx_rings, u16 q_idx); int ice_vsi_cfg_rxqs(struct ice_vsi *vsi); @@ -45,19 +45,24 @@ int ice_vsi_stop_xdp_tx_rings(struct ice_vsi *vsi); bool ice_vsi_is_vlan_pruning_ena(struct ice_vsi *vsi); -int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc); +int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena); void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create); int ice_set_link(struct ice_vsi *vsi, bool ena); -#ifdef CONFIG_DCB +void ice_vsi_delete(struct ice_vsi *vsi); +int ice_vsi_clear(struct ice_vsi *vsi); + int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc); -#endif /* CONFIG_DCB */ + +int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi); + +void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc); struct ice_vsi * ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, - enum ice_vsi_type vsi_type, u16 vf_id); + enum ice_vsi_type vsi_type, u16 vf_id, struct ice_channel *ch); void ice_napi_del(struct ice_vsi *vsi); @@ -93,9 +98,9 @@ void ice_vsi_free_tx_rings(struct ice_vsi *vsi); void ice_vsi_manage_rss_lut(struct ice_vsi *vsi, bool ena); -void ice_update_tx_ring_stats(struct ice_ring *ring, u64 pkts, u64 bytes); +void ice_update_tx_ring_stats(struct ice_tx_ring *ring, u64 pkts, u64 bytes); -void ice_update_rx_ring_stats(struct ice_ring *ring, u64 pkts, u64 bytes); +void ice_update_rx_ring_stats(struct ice_rx_ring *ring, u64 pkts, u64 bytes); void ice_vsi_cfg_frame_size(struct ice_vsi *vsi); @@ -103,6 +108,7 @@ int ice_status_to_errno(enum ice_status err); void ice_write_intrl(struct ice_q_vector *q_vector, u8 intrl); void ice_write_itr(struct ice_ring_container *rc, u16 itr); +void ice_set_q_vector_intrl(struct ice_q_vector *q_vector); enum ice_status ice_vsi_cfg_mac_fltr(struct ice_vsi *vsi, const u8 *macaddr, bool set); @@ -116,4 +122,22 @@ bool ice_is_vsi_dflt_vsi(struct ice_sw *sw, struct ice_vsi *vsi); int ice_set_dflt_vsi(struct ice_sw *sw, struct ice_vsi *vsi); int ice_clear_dflt_vsi(struct ice_sw *sw); +int ice_set_min_bw_limit(struct ice_vsi *vsi, u64 min_tx_rate); +int ice_set_max_bw_limit(struct ice_vsi *vsi, u64 max_tx_rate); +int ice_get_link_speed_kbps(struct ice_vsi *vsi); +int ice_get_link_speed_mbps(struct ice_vsi *vsi); +int +ice_vsi_update_security(struct ice_vsi *vsi, void (*fill)(struct ice_vsi_ctx *)); + +void ice_vsi_ctx_set_antispoof(struct ice_vsi_ctx *ctx); + +void ice_vsi_ctx_clear_antispoof(struct ice_vsi_ctx *ctx); + +void ice_vsi_ctx_set_allow_override(struct ice_vsi_ctx *ctx); + +void ice_vsi_ctx_clear_allow_override(struct ice_vsi_ctx *ctx); + +bool ice_is_feature_supported(struct ice_pf *pf, enum ice_feature f); +void ice_clear_feature_support(struct ice_pf *pf, enum ice_feature f); +void ice_init_feature_support(struct ice_pf *pf); #endif /* !_ICE_LIB_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 06fa93e597fb..f099797f35e3 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -19,6 +19,8 @@ */ #define CREATE_TRACE_POINTS #include "ice_trace.h" +#include "ice_eswitch.h" +#include "ice_tc_lib.h" #define DRV_SUMMARY "Intel(R) Ethernet Connection E800 Series Linux Driver" static const char ice_driver_string[] = DRV_SUMMARY; @@ -42,16 +44,26 @@ MODULE_PARM_DESC(debug, "netif level (0=none,...,16=all)"); #endif /* !CONFIG_DYNAMIC_DEBUG */ static DEFINE_IDA(ice_aux_ida); +DEFINE_STATIC_KEY_FALSE(ice_xdp_locking_key); +EXPORT_SYMBOL(ice_xdp_locking_key); static struct workqueue_struct *ice_wq; static const struct net_device_ops ice_netdev_safe_mode_ops; static const struct net_device_ops ice_netdev_ops; -static int ice_vsi_open(struct ice_vsi *vsi); static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type); static void ice_vsi_release_all(struct ice_pf *pf); +static int ice_rebuild_channels(struct ice_pf *pf); +static void ice_remove_q_channels(struct ice_vsi *vsi, bool rem_adv_fltr); + +static int +ice_indr_setup_tc_cb(struct net_device *netdev, struct Qdisc *sch, + void *cb_priv, enum tc_setup_type type, void *type_data, + void *data, + void (*cleanup)(struct flow_block_cb *block_cb)); + bool netif_is_ice(struct net_device *dev) { return dev && (dev->netdev_ops == &ice_netdev_ops); @@ -61,7 +73,7 @@ bool netif_is_ice(struct net_device *dev) * ice_get_tx_pending - returns number of Tx descriptors not processed * @ring: the ring of descriptors */ -static u16 ice_get_tx_pending(struct ice_ring *ring) +static u16 ice_get_tx_pending(struct ice_tx_ring *ring) { u16 head, tail; @@ -100,10 +112,15 @@ static void ice_check_for_hang_subtask(struct ice_pf *pf) hw = &vsi->back->hw; - for (i = 0; i < vsi->num_txq; i++) { - struct ice_ring *tx_ring = vsi->tx_rings[i]; + ice_for_each_txq(vsi, i) { + struct ice_tx_ring *tx_ring = vsi->tx_rings[i]; + + if (!tx_ring) + continue; + if (ice_ring_ch_enabled(tx_ring)) + continue; - if (tx_ring && tx_ring->desc) { + if (tx_ring->desc) { /* If packet counter has not changed the queue is * likely stalled, so force an interrupt for this * queue. @@ -379,7 +396,7 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) ~IFF_PROMISC; goto out_promisc; } - ice_cfg_vlan_pruning(vsi, false, false); + ice_cfg_vlan_pruning(vsi, false); } } else { /* Clear Rx filter to remove traffic from wire */ @@ -393,7 +410,7 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi) goto out_promisc; } if (vsi->num_vlan > 1) - ice_cfg_vlan_pruning(vsi, true, false); + ice_cfg_vlan_pruning(vsi, true); } } } @@ -455,17 +472,21 @@ static void ice_pf_dis_all_vsi(struct ice_pf *pf, bool locked) } /** - * ice_prepare_for_reset - prep for the core to reset + * ice_prepare_for_reset - prep for reset * @pf: board private structure + * @reset_type: reset type requested * * Inform or close all dependent features in prep for reset. */ static void -ice_prepare_for_reset(struct ice_pf *pf) +ice_prepare_for_reset(struct ice_pf *pf, enum ice_reset_req reset_type) { struct ice_hw *hw = &pf->hw; + struct ice_vsi *vsi; unsigned int i; + dev_dbg(ice_pf_to_dev(pf), "reset_type=%d\n", reset_type); + /* already prepared for reset */ if (test_bit(ICE_PREPARED_FOR_RESET, pf->state)) return; @@ -480,6 +501,38 @@ ice_prepare_for_reset(struct ice_pf *pf) ice_for_each_vf(pf, i) ice_set_vf_state_qs_dis(&pf->vf[i]); + /* release ADQ specific HW and SW resources */ + vsi = ice_get_main_vsi(pf); + if (!vsi) + goto skip; + + /* to be on safe side, reset orig_rss_size so that normal flow + * of deciding rss_size can take precedence + */ + vsi->orig_rss_size = 0; + + if (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) { + if (reset_type == ICE_RESET_PFR) { + vsi->old_ena_tc = vsi->all_enatc; + vsi->old_numtc = vsi->all_numtc; + } else { + ice_remove_q_channels(vsi, true); + + /* for other reset type, do not support channel rebuild + * hence reset needed info + */ + vsi->old_ena_tc = 0; + vsi->all_enatc = 0; + vsi->old_numtc = 0; + vsi->all_numtc = 0; + vsi->req_txq = 0; + vsi->req_rxq = 0; + clear_bit(ICE_FLAG_TC_MQPRIO, pf->flags); + memset(&vsi->mqprio_qopt, 0, sizeof(vsi->mqprio_qopt)); + } + } +skip: + /* clear SW filtering DB */ ice_clear_hw_tbls(hw); /* disable the VSIs and their queues that are not already DOWN */ @@ -499,8 +552,7 @@ ice_prepare_for_reset(struct ice_pf *pf) /** * ice_do_reset - Initiate one of many types of resets * @pf: board private structure - * @reset_type: reset type requested - * before this function was called. + * @reset_type: reset type requested before this function was called. */ static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type) { @@ -509,7 +561,7 @@ static void ice_do_reset(struct ice_pf *pf, enum ice_reset_req reset_type) dev_dbg(dev, "reset_type 0x%x requested\n", reset_type); - ice_prepare_for_reset(pf); + ice_prepare_for_reset(pf, reset_type); /* trigger the reset */ if (ice_reset(hw, reset_type)) { @@ -567,7 +619,7 @@ static void ice_reset_subtask(struct ice_pf *pf) /* return if no valid reset type requested */ if (reset_type == ICE_RESET_INVAL) return; - ice_prepare_for_reset(pf); + ice_prepare_for_reset(pf, reset_type); /* make sure we are ready to rebuild */ if (ice_check_reset(&pf->hw)) { @@ -624,7 +676,10 @@ static void ice_print_topo_conflict(struct ice_vsi *vsi) netdev_info(vsi->netdev, "Potential misconfiguration of the Ethernet port detected. If it was not intended, please use the Intel (R) Ethernet Port Configuration Tool to address the issue.\n"); break; case ICE_AQ_LINK_TOPO_UNSUPP_MEDIA: - netdev_info(vsi->netdev, "Rx/Tx is disabled on this device because an unsupported module type was detected. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n"); + if (test_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, vsi->back->flags)) + netdev_warn(vsi->netdev, "An unsupported module type was detected. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules\n"); + else + netdev_err(vsi->netdev, "Rx/Tx is disabled on this device because an unsupported module type was detected. Refer to the Intel(R) Ethernet Adapters and Devices User Guide for a list of supported modules.\n"); break; default: break; @@ -882,6 +937,29 @@ static void ice_set_dflt_mib(struct ice_pf *pf) } /** + * ice_check_phy_fw_load - check if PHY FW load failed + * @pf: pointer to PF struct + * @link_cfg_err: bitmap from the link info structure + * + * check if external PHY FW load failed and print an error message if it did + */ +static void ice_check_phy_fw_load(struct ice_pf *pf, u8 link_cfg_err) +{ + if (!(link_cfg_err & ICE_AQ_LINK_EXTERNAL_PHY_LOAD_FAILURE)) { + clear_bit(ICE_FLAG_PHY_FW_LOAD_FAILED, pf->flags); + return; + } + + if (test_bit(ICE_FLAG_PHY_FW_LOAD_FAILED, pf->flags)) + return; + + if (link_cfg_err & ICE_AQ_LINK_EXTERNAL_PHY_LOAD_FAILURE) { + dev_err(ice_pf_to_dev(pf), "Device failed to load the FW for the external PHY. Please download and install the latest NVM for your device and try again\n"); + set_bit(ICE_FLAG_PHY_FW_LOAD_FAILED, pf->flags); + } +} + +/** * ice_check_module_power * @pf: pointer to PF struct * @link_cfg_err: bitmap from the link info structure @@ -914,6 +992,20 @@ static void ice_check_module_power(struct ice_pf *pf, u8 link_cfg_err) } /** + * ice_check_link_cfg_err - check if link configuration failed + * @pf: pointer to the PF struct + * @link_cfg_err: bitmap from the link info structure + * + * print if any link configuration failure happens due to the value in the + * link_cfg_err parameter in the link info structure + */ +static void ice_check_link_cfg_err(struct ice_pf *pf, u8 link_cfg_err) +{ + ice_check_module_power(pf, link_cfg_err); + ice_check_phy_fw_load(pf, link_cfg_err); +} + +/** * ice_link_event - process the link event * @pf: PF that the link event is associated with * @pi: port_info for the port that the link event is associated with @@ -948,7 +1040,7 @@ ice_link_event(struct ice_pf *pf, struct ice_port_info *pi, bool link_up, pi->lport, ice_stat_str(status), ice_aq_str(pi->hw->adminq.sq_last_status)); - ice_check_module_power(pf, pi->phy.link_info.link_cfg_err); + ice_check_link_cfg_err(pf, pi->phy.link_info.link_cfg_err); /* Check if the link state is up after updating link info, and treat * this event as an UP event since the link is actually UP now. @@ -1026,7 +1118,8 @@ static int ice_init_link_events(struct ice_port_info *pi) u16 mask; mask = ~((u16)(ICE_AQ_LINK_EVENT_UPDOWN | ICE_AQ_LINK_EVENT_MEDIA_NA | - ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL)); + ICE_AQ_LINK_EVENT_MODULE_QUAL_FAIL | + ICE_AQ_LINK_EVENT_PHY_FW_LOAD_FAIL)); if (ice_aq_set_event_mask(pi->hw, pi->lport, mask, NULL)) { dev_dbg(ice_hw_to_dev(pi->hw), "Failed to set link event mask for port %d\n", @@ -1965,7 +2058,8 @@ static int ice_configure_phy(struct ice_vsi *vsi) ice_print_topo_conflict(vsi); - if (phy->link_info.topo_media_conflict == ICE_AQ_LINK_TOPO_UNSUPP_MEDIA) + if (!test_bit(ICE_FLAG_LINK_LENIENT_MODE_ENA, pf->flags) && + phy->link_info.topo_media_conflict == ICE_AQ_LINK_TOPO_UNSUPP_MEDIA) return -EPERM; if (test_bit(ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA, pf->flags)) @@ -2096,7 +2190,7 @@ static void ice_check_media_subtask(struct ice_pf *pf) if (err) return; - ice_check_module_power(pf, pi->phy.link_info.link_cfg_err); + ice_check_link_cfg_err(pf, pi->phy.link_info.link_cfg_err); if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) { if (!test_bit(ICE_PHY_INIT_COMPLETE, pf->state)) @@ -2302,14 +2396,14 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename) irq_num = pf->msix_entries[base + vector].vector; - if (q_vector->tx.ring && q_vector->rx.ring) { + if (q_vector->tx.tx_ring && q_vector->rx.rx_ring) { snprintf(q_vector->name, sizeof(q_vector->name) - 1, "%s-%s-%d", basename, "TxRx", rx_int_idx++); tx_int_idx++; - } else if (q_vector->rx.ring) { + } else if (q_vector->rx.rx_ring) { snprintf(q_vector->name, sizeof(q_vector->name) - 1, "%s-%s-%d", basename, "rx", rx_int_idx++); - } else if (q_vector->tx.ring) { + } else if (q_vector->tx.tx_ring) { snprintf(q_vector->name, sizeof(q_vector->name) - 1, "%s-%s-%d", basename, "tx", tx_int_idx++); } else { @@ -2367,11 +2461,12 @@ free_q_irqs: static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi) { struct device *dev = ice_pf_to_dev(vsi->back); - int i; + struct ice_tx_desc *tx_desc; + int i, j; - for (i = 0; i < vsi->num_xdp_txq; i++) { + ice_for_each_xdp_txq(vsi, i) { u16 xdp_q_idx = vsi->alloc_txq + i; - struct ice_ring *xdp_ring; + struct ice_tx_ring *xdp_ring; xdp_ring = kzalloc(sizeof(*xdp_ring), GFP_KERNEL); @@ -2380,16 +2475,29 @@ static int ice_xdp_alloc_setup_rings(struct ice_vsi *vsi) xdp_ring->q_index = xdp_q_idx; xdp_ring->reg_idx = vsi->txq_map[xdp_q_idx]; - xdp_ring->ring_active = false; xdp_ring->vsi = vsi; xdp_ring->netdev = NULL; + xdp_ring->next_dd = ICE_TX_THRESH - 1; + xdp_ring->next_rs = ICE_TX_THRESH - 1; xdp_ring->dev = dev; xdp_ring->count = vsi->num_tx_desc; WRITE_ONCE(vsi->xdp_rings[i], xdp_ring); if (ice_setup_tx_ring(xdp_ring)) goto free_xdp_rings; ice_set_ring_xdp(xdp_ring); - xdp_ring->xsk_pool = ice_xsk_pool(xdp_ring); + xdp_ring->xsk_pool = ice_tx_xsk_pool(xdp_ring); + spin_lock_init(&xdp_ring->tx_lock); + for (j = 0; j < xdp_ring->count; j++) { + tx_desc = ICE_TX_DESC(xdp_ring, j); + tx_desc->cmd_type_offset_bsz = cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE); + } + } + + ice_for_each_rxq(vsi, i) { + if (static_key_enabled(&ice_xdp_locking_key)) + vsi->rx_rings[i]->xdp_ring = vsi->xdp_rings[i % vsi->num_xdp_txq]; + else + vsi->rx_rings[i]->xdp_ring = vsi->xdp_rings[i]; } return 0; @@ -2455,6 +2563,10 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog) if (__ice_vsi_get_qs(&xdp_qs_cfg)) goto err_map_xdp; + if (static_key_enabled(&ice_xdp_locking_key)) + netdev_warn(vsi->netdev, + "Could not allocate one XDP Tx ring per CPU, XDP_TX/XDP_REDIRECT actions will be slower\n"); + if (ice_xdp_alloc_setup_rings(vsi)) goto clear_xdp_rings; @@ -2468,11 +2580,11 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog) q_base = vsi->num_xdp_txq - xdp_rings_rem; for (q_id = q_base; q_id < (q_base + xdp_rings_per_v); q_id++) { - struct ice_ring *xdp_ring = vsi->xdp_rings[q_id]; + struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_id]; xdp_ring->q_vector = q_vector; - xdp_ring->next = q_vector->tx.ring; - q_vector->tx.ring = xdp_ring; + xdp_ring->next = q_vector->tx.tx_ring; + q_vector->tx.tx_ring = xdp_ring; } xdp_rings_rem -= xdp_rings_per_v; } @@ -2501,7 +2613,7 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog) return 0; clear_xdp_rings: - for (i = 0; i < vsi->num_xdp_txq; i++) + ice_for_each_xdp_txq(vsi, i) if (vsi->xdp_rings[i]) { kfree_rcu(vsi->xdp_rings[i], rcu); vsi->xdp_rings[i] = NULL; @@ -2509,7 +2621,7 @@ clear_xdp_rings: err_map_xdp: mutex_lock(&pf->avail_q_mutex); - for (i = 0; i < vsi->num_xdp_txq; i++) { + ice_for_each_xdp_txq(vsi, i) { clear_bit(vsi->txq_map[i + vsi->alloc_txq], pf->avail_txqs); vsi->txq_map[i + vsi->alloc_txq] = ICE_INVAL_Q_INDEX; } @@ -2542,25 +2654,25 @@ int ice_destroy_xdp_rings(struct ice_vsi *vsi) ice_for_each_q_vector(vsi, v_idx) { struct ice_q_vector *q_vector = vsi->q_vectors[v_idx]; - struct ice_ring *ring; + struct ice_tx_ring *ring; - ice_for_each_ring(ring, q_vector->tx) + ice_for_each_tx_ring(ring, q_vector->tx) if (!ring->tx_buf || !ice_ring_is_xdp(ring)) break; /* restore the value of last node prior to XDP setup */ - q_vector->tx.ring = ring; + q_vector->tx.tx_ring = ring; } free_qmap: mutex_lock(&pf->avail_q_mutex); - for (i = 0; i < vsi->num_xdp_txq; i++) { + ice_for_each_xdp_txq(vsi, i) { clear_bit(vsi->txq_map[i + vsi->alloc_txq], pf->avail_txqs); vsi->txq_map[i + vsi->alloc_txq] = ICE_INVAL_Q_INDEX; } mutex_unlock(&pf->avail_q_mutex); - for (i = 0; i < vsi->num_xdp_txq; i++) + ice_for_each_xdp_txq(vsi, i) if (vsi->xdp_rings[i]) { if (vsi->xdp_rings[i]->desc) ice_free_tx_ring(vsi->xdp_rings[i]); @@ -2571,6 +2683,9 @@ free_qmap: devm_kfree(ice_pf_to_dev(pf), vsi->xdp_rings); vsi->xdp_rings = NULL; + if (static_key_enabled(&ice_xdp_locking_key)) + static_branch_dec(&ice_xdp_locking_key); + if (ice_is_reset_in_progress(pf->state) || !vsi->q_vectors[0]) return 0; @@ -2598,7 +2713,7 @@ static void ice_vsi_rx_napi_schedule(struct ice_vsi *vsi) int i; ice_for_each_rxq(vsi, i) { - struct ice_ring *rx_ring = vsi->rx_rings[i]; + struct ice_rx_ring *rx_ring = vsi->rx_rings[i]; if (rx_ring->xsk_pool) napi_schedule(&rx_ring->q_vector->napi); @@ -2606,6 +2721,29 @@ static void ice_vsi_rx_napi_schedule(struct ice_vsi *vsi) } /** + * ice_vsi_determine_xdp_res - figure out how many Tx qs can XDP have + * @vsi: VSI to determine the count of XDP Tx qs + * + * returns 0 if Tx qs count is higher than at least half of CPU count, + * -ENOMEM otherwise + */ +int ice_vsi_determine_xdp_res(struct ice_vsi *vsi) +{ + u16 avail = ice_get_avail_txq_count(vsi->back); + u16 cpus = num_possible_cpus(); + + if (avail < cpus / 2) + return -ENOMEM; + + vsi->num_xdp_txq = min_t(u16, avail, cpus); + + if (vsi->num_xdp_txq < cpus) + static_branch_inc(&ice_xdp_locking_key); + + return 0; +} + +/** * ice_xdp_setup_prog - Add or remove XDP eBPF program * @vsi: VSI to setup XDP for * @prog: XDP program @@ -2634,10 +2772,14 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog, } if (!ice_is_xdp_ena_vsi(vsi) && prog) { - vsi->num_xdp_txq = vsi->alloc_rxq; - xdp_ring_err = ice_prepare_xdp_rings(vsi, prog); - if (xdp_ring_err) - NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Tx resources failed"); + xdp_ring_err = ice_vsi_determine_xdp_res(vsi); + if (xdp_ring_err) { + NL_SET_ERR_MSG_MOD(extack, "Not enough Tx resources for XDP"); + } else { + xdp_ring_err = ice_prepare_xdp_rings(vsi, prog); + if (xdp_ring_err) + NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Tx resources failed"); + } } else if (ice_is_xdp_ena_vsi(vsi) && !prog) { xdp_ring_err = ice_destroy_xdp_rings(vsi); if (xdp_ring_err) @@ -3103,6 +3245,9 @@ static void ice_set_netdev_features(struct net_device *netdev) /* enable features */ netdev->features |= netdev->hw_features; + + netdev->hw_features |= NETIF_F_HW_TC; + /* encap and VLAN devices inherit default, csumo and tso features */ netdev->hw_enc_features |= dflt_features | csumo_features | tso_features; @@ -3139,7 +3284,7 @@ static int ice_cfg_netdev(struct ice_vsi *vsi) if (vsi->type == ICE_VSI_PF) { SET_NETDEV_DEV(netdev, ice_pf_to_dev(vsi->back)); ether_addr_copy(mac_addr, vsi->port_info->mac.perm_addr); - ether_addr_copy(netdev->dev_addr, mac_addr); + eth_hw_addr_set(netdev, mac_addr); ether_addr_copy(netdev->perm_addr, mac_addr); } @@ -3182,7 +3327,14 @@ void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size) static struct ice_vsi * ice_pf_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) { - return ice_vsi_setup(pf, pi, ICE_VSI_PF, ICE_INVAL_VFID); + return ice_vsi_setup(pf, pi, ICE_VSI_PF, ICE_INVAL_VFID, NULL); +} + +static struct ice_vsi * +ice_chnl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, + struct ice_channel *ch) +{ + return ice_vsi_setup(pf, pi, ICE_VSI_CHNL, ICE_INVAL_VFID, ch); } /** @@ -3196,7 +3348,7 @@ ice_pf_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) static struct ice_vsi * ice_ctrl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) { - return ice_vsi_setup(pf, pi, ICE_VSI_CTRL, ICE_INVAL_VFID); + return ice_vsi_setup(pf, pi, ICE_VSI_CTRL, ICE_INVAL_VFID, NULL); } /** @@ -3210,7 +3362,7 @@ ice_ctrl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) struct ice_vsi * ice_lb_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi) { - return ice_vsi_setup(pf, pi, ICE_VSI_LB, ICE_INVAL_VFID); + return ice_vsi_setup(pf, pi, ICE_VSI_LB, ICE_INVAL_VFID, NULL); } /** @@ -3235,7 +3387,7 @@ ice_vlan_rx_add_vid(struct net_device *netdev, __always_unused __be16 proto, /* Enable VLAN pruning when a VLAN other than 0 is added */ if (!ice_vsi_is_vlan_pruning_ena(vsi)) { - ret = ice_cfg_vlan_pruning(vsi, true, false); + ret = ice_cfg_vlan_pruning(vsi, true); if (ret) return ret; } @@ -3279,13 +3431,70 @@ ice_vlan_rx_kill_vid(struct net_device *netdev, __always_unused __be16 proto, /* Disable pruning when VLAN 0 is the only VLAN rule */ if (vsi->num_vlan == 1 && ice_vsi_is_vlan_pruning_ena(vsi)) - ret = ice_cfg_vlan_pruning(vsi, false, false); + ret = ice_cfg_vlan_pruning(vsi, false); set_bit(ICE_VSI_VLAN_FLTR_CHANGED, vsi->state); return ret; } /** + * ice_rep_indr_tc_block_unbind + * @cb_priv: indirection block private data + */ +static void ice_rep_indr_tc_block_unbind(void *cb_priv) +{ + struct ice_indr_block_priv *indr_priv = cb_priv; + + list_del(&indr_priv->list); + kfree(indr_priv); +} + +/** + * ice_tc_indir_block_unregister - Unregister TC indirect block notifications + * @vsi: VSI struct which has the netdev + */ +static void ice_tc_indir_block_unregister(struct ice_vsi *vsi) +{ + struct ice_netdev_priv *np = netdev_priv(vsi->netdev); + + flow_indr_dev_unregister(ice_indr_setup_tc_cb, np, + ice_rep_indr_tc_block_unbind); +} + +/** + * ice_tc_indir_block_remove - clean indirect TC block notifications + * @pf: PF structure + */ +static void ice_tc_indir_block_remove(struct ice_pf *pf) +{ + struct ice_vsi *pf_vsi = ice_get_main_vsi(pf); + + if (!pf_vsi) + return; + + ice_tc_indir_block_unregister(pf_vsi); +} + +/** + * ice_tc_indir_block_register - Register TC indirect block notifications + * @vsi: VSI struct which has the netdev + * + * Returns 0 on success, negative value on failure + */ +static int ice_tc_indir_block_register(struct ice_vsi *vsi) +{ + struct ice_netdev_priv *np; + + if (!vsi || !vsi->netdev) + return -EINVAL; + + np = netdev_priv(vsi->netdev); + + INIT_LIST_HEAD(&np->tc_indr_block_priv_list); + return flow_indr_dev_register(ice_indr_setup_tc_cb, np); +} + +/** * ice_setup_pf_sw - Setup the HW switch on startup or after reset * @pf: board private structure * @@ -3293,6 +3502,7 @@ ice_vlan_rx_kill_vid(struct net_device *netdev, __always_unused __be16 proto, */ static int ice_setup_pf_sw(struct ice_pf *pf) { + struct device *dev = ice_pf_to_dev(pf); struct ice_vsi *vsi; int status = 0; @@ -3303,6 +3513,9 @@ static int ice_setup_pf_sw(struct ice_pf *pf) if (!vsi) return -ENOMEM; + /* init channel list */ + INIT_LIST_HEAD(&vsi->ch_list); + status = ice_cfg_netdev(vsi); if (status) { status = -ENODEV; @@ -3311,6 +3524,13 @@ static int ice_setup_pf_sw(struct ice_pf *pf) /* netdev has to be configured before setting frame size */ ice_vsi_cfg_frame_size(vsi); + /* init indirect block notifications */ + status = ice_tc_indir_block_register(vsi); + if (status) { + dev_err(dev, "Failed to register netdev notifier\n"); + goto unroll_cfg_netdev; + } + /* Setup DCB netlink interface */ ice_dcbnl_setup(vsi); @@ -3322,7 +3542,7 @@ static int ice_setup_pf_sw(struct ice_pf *pf) status = ice_set_cpu_rx_rmap(vsi); if (status) { - dev_err(ice_pf_to_dev(pf), "Failed to set CPU Rx map VSI %d error %d\n", + dev_err(dev, "Failed to set CPU Rx map VSI %d error %d\n", vsi->vsi_num, status); status = -EINVAL; goto unroll_napi_add; @@ -3335,8 +3555,9 @@ static int ice_setup_pf_sw(struct ice_pf *pf) free_cpu_rx_map: ice_free_cpu_rx_rmap(vsi); - unroll_napi_add: + ice_tc_indir_block_unregister(vsi); +unroll_cfg_netdev: if (vsi) { ice_napi_del(vsi); if (vsi->netdev) { @@ -3538,6 +3759,13 @@ static int ice_ena_msix_range(struct ice_pf *pf) v_left -= needed; } + /* reserve for switchdev */ + needed = ICE_ESWITCH_MSIX; + if (v_left < needed) + goto no_hw_vecs_left_err; + v_budget += needed; + v_left -= needed; + /* total used for non-traffic vectors */ v_other = v_budget; @@ -4170,11 +4398,11 @@ static int ice_register_netdev(struct ice_pf *pf) set_bit(ICE_VSI_NETDEV_REGISTERED, vsi->state); netif_carrier_off(vsi->netdev); netif_tx_stop_all_queues(vsi->netdev); - err = ice_devlink_create_port(vsi); + err = ice_devlink_create_pf_port(pf); if (err) goto err_devlink_create; - devlink_port_type_eth_set(&vsi->devlink_port, vsi->netdev); + devlink_port_type_eth_set(&pf->devlink_port, vsi->netdev); return 0; err_devlink_create: @@ -4261,12 +4489,6 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) pf->msg_enable = netif_msg_init(debug, ICE_DFLT_NETIF_M); - err = ice_devlink_register(pf); - if (err) { - dev_err(dev, "ice_devlink_register failed: %d\n", err); - goto err_exit_unroll; - } - #ifndef CONFIG_DYNAMIC_DEBUG if (debug < -1) hw->debug_mask = debug; @@ -4279,6 +4501,8 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) goto err_exit_unroll; } + ice_init_feature_support(pf); + ice_request_fw(pf); /* if ice_request_fw fails, ICE_FLAG_ADV_FEATURES bit won't be @@ -4414,7 +4638,8 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) ice_init_link_dflt_override(pf->hw.port_info); - ice_check_module_power(pf, pf->hw.port_info->phy.link_info.link_cfg_err); + ice_check_link_cfg_err(pf, + pf->hw.port_info->phy.link_info.link_cfg_err); /* if media available, initialize PHY settings */ if (pf->hw.port_info->phy.link_info.link_info & @@ -4500,6 +4725,7 @@ probe_done: dev_warn(dev, "RDMA is not supported on this device\n"); } + ice_devlink_register(pf); return 0; err_init_aux_unroll: @@ -4523,7 +4749,6 @@ err_init_pf_unroll: ice_devlink_destroy_regions(pf); ice_deinit_hw(hw); err_exit_unroll: - ice_devlink_unregister(pf); pci_disable_pcie_error_reporting(pdev); pci_disable_device(pdev); return err; @@ -4600,15 +4825,15 @@ static void ice_remove(struct pci_dev *pdev) struct ice_pf *pf = pci_get_drvdata(pdev); int i; - if (!pf) - return; - + ice_devlink_unregister(pf); for (i = 0; i < ICE_MAX_RESET_WAIT; i++) { if (!ice_is_reset_in_progress(pf->state)) break; msleep(100); } + ice_tc_indir_block_remove(pf); + if (test_bit(ICE_FLAG_SRIOV_ENA, pf->flags)) { set_bit(ICE_VF_RESETS_DISABLED, pf->state); ice_free_vfs(pf); @@ -4640,7 +4865,6 @@ static void ice_remove(struct pci_dev *pdev) ice_deinit_pf(pf); ice_devlink_destroy_regions(pf); ice_deinit_hw(&pf->hw); - ice_devlink_unregister(pf); /* Issue a PFR as part of the prescribed driver unload flow. Do not * do it via ice_schedule_reset() since there is no need to rebuild @@ -4902,7 +5126,7 @@ ice_pci_err_detected(struct pci_dev *pdev, pci_channel_state_t err) if (!test_bit(ICE_PREPARED_FOR_RESET, pf->state)) { set_bit(ICE_PFR_REQ, pf->state); - ice_prepare_for_reset(pf); + ice_prepare_for_reset(pf, ICE_RESET_PFR); } } @@ -4994,7 +5218,7 @@ static void ice_pci_err_reset_prepare(struct pci_dev *pdev) if (!test_bit(ICE_PREPARED_FOR_RESET, pf->state)) { set_bit(ICE_PFR_REQ, pf->state); - ice_prepare_for_reset(pf); + ice_prepare_for_reset(pf, ICE_RESET_PFR); } } } @@ -5150,10 +5374,16 @@ static int ice_set_mac_address(struct net_device *netdev, void *pi) return -EBUSY; } + if (ice_chnl_dmac_fltr_cnt(pf)) { + netdev_err(netdev, "can't set mac %pM. Device has tc-flower filters, delete all of them and try again\n", + mac); + return -EAGAIN; + } + netif_addr_lock_bh(netdev); ether_addr_copy(old_mac, netdev->dev_addr); /* change the netdev's MAC address */ - memcpy(netdev->dev_addr, mac, netdev->addr_len); + eth_hw_addr_set(netdev, mac); netif_addr_unlock_bh(netdev); /* Clean up old MAC filter. Not an error if old filter doesn't exist */ @@ -5181,7 +5411,7 @@ err_update_filters: netdev_err(netdev, "can't set MAC %pM. filter update failed\n", mac); netif_addr_lock_bh(netdev); - ether_addr_copy(netdev->dev_addr, old_mac); + eth_hw_addr_set(netdev, old_mac); netif_addr_unlock_bh(netdev); return err; } @@ -5386,10 +5616,10 @@ ice_set_features(struct net_device *netdev, netdev_features_t features) if ((features & NETIF_F_HW_VLAN_CTAG_FILTER) && !(netdev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) - ret = ice_cfg_vlan_pruning(vsi, true, false); + ret = ice_cfg_vlan_pruning(vsi, true); else if (!(features & NETIF_F_HW_VLAN_CTAG_FILTER) && (netdev->features & NETIF_F_HW_VLAN_CTAG_FILTER)) - ret = ice_cfg_vlan_pruning(vsi, false, false); + ret = ice_cfg_vlan_pruning(vsi, false); if ((features & NETIF_F_NTUPLE) && !(netdev->features & NETIF_F_NTUPLE)) { @@ -5401,6 +5631,18 @@ ice_set_features(struct net_device *netdev, netdev_features_t features) ice_clear_arfs(vsi); } + /* don't turn off hw_tc_offload when ADQ is already enabled */ + if (!(features & NETIF_F_HW_TC) && ice_is_adq_active(pf)) { + dev_err(ice_pf_to_dev(pf), "ADQ is active, can't turn hw_tc_offload off\n"); + return -EACCES; + } + + if ((features & NETIF_F_HW_TC) && + !(netdev->features & NETIF_F_HW_TC)) + set_bit(ICE_FLAG_CLS_FLOWER, pf->flags); + else + clear_bit(ICE_FLAG_CLS_FLOWER, pf->flags); + return ret; } @@ -5450,77 +5692,59 @@ int ice_vsi_cfg(struct ice_vsi *vsi) } /* THEORY OF MODERATION: - * The below code creates custom DIM profiles for use by this driver, because - * the ice driver hardware works differently than the hardware that DIMLIB was + * The ice driver hardware works differently than the hardware that DIMLIB was * originally made for. ice hardware doesn't have packet count limits that * can trigger an interrupt, but it *does* have interrupt rate limit support, - * and this code adds that capability to be used by the driver when it's using - * DIMLIB. The DIMLIB code was always designed to be a suggestion to the driver - * for how to "respond" to traffic and interrupts, so this driver uses a - * slightly different set of moderation parameters to get best performance. + * which is hard-coded to a limit of 250,000 ints/second. + * If not using dynamic moderation, the INTRL value can be modified + * by ethtool rx-usecs-high. */ struct ice_dim { /* the throttle rate for interrupts, basically worst case delay before * an initial interrupt fires, value is stored in microseconds. */ u16 itr; - /* the rate limit for interrupts, which can cap a delay from a small - * ITR at a certain amount of interrupts per second. f.e. a 2us ITR - * could yield as much as 500,000 interrupts per second, but with a - * 10us rate limit, it limits to 100,000 interrupts per second. Value - * is stored in microseconds. - */ - u16 intrl; }; /* Make a different profile for Rx that doesn't allow quite so aggressive - * moderation at the high end (it maxes out at 128us or about 8k interrupts a - * second. The INTRL/rate parameters here are only useful to cap small ITR - * values, which is why for larger ITR's - like 128, which can only generate - * 8k interrupts per second, there is no point to rate limit and the values - * are set to zero. The rate limit values do affect latency, and so must - * be reasonably small so to not impact latency sensitive tests. + * moderation at the high end (it maxes out at 126us or about 8k interrupts a + * second. */ static const struct ice_dim rx_profile[] = { - {2, 10}, - {8, 16}, - {32, 0}, - {96, 0}, - {128, 0} + {2}, /* 500,000 ints/s, capped at 250K by INTRL */ + {8}, /* 125,000 ints/s */ + {16}, /* 62,500 ints/s */ + {62}, /* 16,129 ints/s */ + {126} /* 7,936 ints/s */ }; /* The transmit profile, which has the same sorts of values * as the previous struct */ static const struct ice_dim tx_profile[] = { - {2, 10}, - {8, 16}, - {64, 0}, - {128, 0}, - {256, 0} + {2}, /* 500,000 ints/s, capped at 250K by INTRL */ + {8}, /* 125,000 ints/s */ + {40}, /* 16,125 ints/s */ + {128}, /* 7,812 ints/s */ + {256} /* 3,906 ints/s */ }; static void ice_tx_dim_work(struct work_struct *work) { struct ice_ring_container *rc; - struct ice_q_vector *q_vector; struct dim *dim; - u16 itr, intrl; + u16 itr; dim = container_of(work, struct dim, work); - rc = container_of(dim, struct ice_ring_container, dim); - q_vector = container_of(rc, struct ice_q_vector, tx); + rc = (struct ice_ring_container *)dim->priv; - if (dim->profile_ix >= ARRAY_SIZE(tx_profile)) - dim->profile_ix = ARRAY_SIZE(tx_profile) - 1; + WARN_ON(dim->profile_ix >= ARRAY_SIZE(tx_profile)); /* look up the values in our local table */ itr = tx_profile[dim->profile_ix].itr; - intrl = tx_profile[dim->profile_ix].intrl; - ice_trace(tx_dim_work, q_vector, dim); + ice_trace(tx_dim_work, container_of(rc, struct ice_q_vector, tx), dim); ice_write_itr(rc, itr); - ice_write_intrl(q_vector, intrl); dim->state = DIM_START_MEASURE; } @@ -5528,28 +5752,65 @@ static void ice_tx_dim_work(struct work_struct *work) static void ice_rx_dim_work(struct work_struct *work) { struct ice_ring_container *rc; - struct ice_q_vector *q_vector; struct dim *dim; - u16 itr, intrl; + u16 itr; dim = container_of(work, struct dim, work); - rc = container_of(dim, struct ice_ring_container, dim); - q_vector = container_of(rc, struct ice_q_vector, rx); + rc = (struct ice_ring_container *)dim->priv; - if (dim->profile_ix >= ARRAY_SIZE(rx_profile)) - dim->profile_ix = ARRAY_SIZE(rx_profile) - 1; + WARN_ON(dim->profile_ix >= ARRAY_SIZE(rx_profile)); /* look up the values in our local table */ itr = rx_profile[dim->profile_ix].itr; - intrl = rx_profile[dim->profile_ix].intrl; - ice_trace(rx_dim_work, q_vector, dim); + ice_trace(rx_dim_work, container_of(rc, struct ice_q_vector, rx), dim); ice_write_itr(rc, itr); - ice_write_intrl(q_vector, intrl); dim->state = DIM_START_MEASURE; } +#define ICE_DIM_DEFAULT_PROFILE_IX 1 + +/** + * ice_init_moderation - set up interrupt moderation + * @q_vector: the vector containing rings to be configured + * + * Set up interrupt moderation registers, with the intent to do the right thing + * when called from reset or from probe, and whether or not dynamic moderation + * is enabled or not. Take special care to write all the registers in both + * dynamic moderation mode or not in order to make sure hardware is in a known + * state. + */ +static void ice_init_moderation(struct ice_q_vector *q_vector) +{ + struct ice_ring_container *rc; + bool tx_dynamic, rx_dynamic; + + rc = &q_vector->tx; + INIT_WORK(&rc->dim.work, ice_tx_dim_work); + rc->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; + rc->dim.profile_ix = ICE_DIM_DEFAULT_PROFILE_IX; + rc->dim.priv = rc; + tx_dynamic = ITR_IS_DYNAMIC(rc); + + /* set the initial TX ITR to match the above */ + ice_write_itr(rc, tx_dynamic ? + tx_profile[rc->dim.profile_ix].itr : rc->itr_setting); + + rc = &q_vector->rx; + INIT_WORK(&rc->dim.work, ice_rx_dim_work); + rc->dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; + rc->dim.profile_ix = ICE_DIM_DEFAULT_PROFILE_IX; + rc->dim.priv = rc; + rx_dynamic = ITR_IS_DYNAMIC(rc); + + /* set the initial RX ITR to match the above */ + ice_write_itr(rc, rx_dynamic ? rx_profile[rc->dim.profile_ix].itr : + rc->itr_setting); + + ice_set_q_vector_intrl(q_vector); +} + /** * ice_napi_enable_all - Enable NAPI for all q_vectors in the VSI * @vsi: the VSI being configured @@ -5564,13 +5825,9 @@ static void ice_napi_enable_all(struct ice_vsi *vsi) ice_for_each_q_vector(vsi, q_idx) { struct ice_q_vector *q_vector = vsi->q_vectors[q_idx]; - INIT_WORK(&q_vector->tx.dim.work, ice_tx_dim_work); - q_vector->tx.dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; + ice_init_moderation(q_vector); - INIT_WORK(&q_vector->rx.dim.work, ice_rx_dim_work); - q_vector->rx.dim.mode = DIM_CQ_PERIOD_MODE_START_FROM_EQE; - - if (q_vector->rx.ring || q_vector->tx.ring) + if (q_vector->rx.rx_ring || q_vector->tx.tx_ring) napi_enable(&q_vector->napi); } } @@ -5630,7 +5887,8 @@ int ice_up(struct ice_vsi *vsi) /** * ice_fetch_u64_stats_per_ring - get packets and bytes stats per ring - * @ring: Tx or Rx ring to read stats from + * @syncp: pointer to u64_stats_sync + * @stats: stats that pkts and bytes count will be taken from * @pkts: packets stats counter * @bytes: bytes stats counter * @@ -5638,19 +5896,16 @@ int ice_up(struct ice_vsi *vsi) * that needs to be performed to read u64 values in 32 bit machine. */ static void -ice_fetch_u64_stats_per_ring(struct ice_ring *ring, u64 *pkts, u64 *bytes) +ice_fetch_u64_stats_per_ring(struct u64_stats_sync *syncp, struct ice_q_stats stats, + u64 *pkts, u64 *bytes) { unsigned int start; - *pkts = 0; - *bytes = 0; - if (!ring) - return; do { - start = u64_stats_fetch_begin_irq(&ring->syncp); - *pkts = ring->stats.pkts; - *bytes = ring->stats.bytes; - } while (u64_stats_fetch_retry_irq(&ring->syncp, start)); + start = u64_stats_fetch_begin_irq(syncp); + *pkts = stats.pkts; + *bytes = stats.bytes; + } while (u64_stats_fetch_retry_irq(syncp, start)); } /** @@ -5660,18 +5915,19 @@ ice_fetch_u64_stats_per_ring(struct ice_ring *ring, u64 *pkts, u64 *bytes) * @count: number of rings */ static void -ice_update_vsi_tx_ring_stats(struct ice_vsi *vsi, struct ice_ring **rings, +ice_update_vsi_tx_ring_stats(struct ice_vsi *vsi, struct ice_tx_ring **rings, u16 count) { struct rtnl_link_stats64 *vsi_stats = &vsi->net_stats; u16 i; for (i = 0; i < count; i++) { - struct ice_ring *ring; - u64 pkts, bytes; + struct ice_tx_ring *ring; + u64 pkts = 0, bytes = 0; ring = READ_ONCE(rings[i]); - ice_fetch_u64_stats_per_ring(ring, &pkts, &bytes); + if (ring) + ice_fetch_u64_stats_per_ring(&ring->syncp, ring->stats, &pkts, &bytes); vsi_stats->tx_packets += pkts; vsi_stats->tx_bytes += bytes; vsi->tx_restart += ring->tx_stats.restart_q; @@ -5710,9 +5966,9 @@ static void ice_update_vsi_ring_stats(struct ice_vsi *vsi) /* update Rx rings counters */ ice_for_each_rxq(vsi, i) { - struct ice_ring *ring = READ_ONCE(vsi->rx_rings[i]); + struct ice_rx_ring *ring = READ_ONCE(vsi->rx_rings[i]); - ice_fetch_u64_stats_per_ring(ring, &pkts, &bytes); + ice_fetch_u64_stats_per_ring(&ring->syncp, ring->stats, &pkts, &bytes); vsi_stats->rx_packets += pkts; vsi_stats->rx_bytes += bytes; vsi->rx_buf_failed += ring->rx_stats.alloc_buf_failed; @@ -5976,7 +6232,7 @@ static void ice_napi_disable_all(struct ice_vsi *vsi) ice_for_each_q_vector(vsi, q_idx) { struct ice_q_vector *q_vector = vsi->q_vectors[q_idx]; - if (q_vector->rx.ring || q_vector->tx.ring) + if (q_vector->rx.rx_ring || q_vector->tx.tx_ring) napi_disable(&q_vector->napi); cancel_work_sync(&q_vector->tx.dim.work); @@ -5995,9 +6251,11 @@ int ice_down(struct ice_vsi *vsi) /* Caller of this function is expected to set the * vsi->state ICE_DOWN bit */ - if (vsi->netdev) { + if (vsi->netdev && vsi->type == ICE_VSI_PF) { netif_carrier_off(vsi->netdev); netif_tx_disable(vsi->netdev); + } else if (vsi->type == ICE_VSI_SWITCHDEV_CTRL) { + ice_eswitch_stop_all_tx_queues(vsi->back); } ice_vsi_dis_irq(vsi); @@ -6059,12 +6317,13 @@ int ice_vsi_setup_tx_rings(struct ice_vsi *vsi) } ice_for_each_txq(vsi, i) { - struct ice_ring *ring = vsi->tx_rings[i]; + struct ice_tx_ring *ring = vsi->tx_rings[i]; if (!ring) return -EINVAL; - ring->netdev = vsi->netdev; + if (vsi->netdev) + ring->netdev = vsi->netdev; err = ice_setup_tx_ring(ring); if (err) break; @@ -6090,12 +6349,13 @@ int ice_vsi_setup_rx_rings(struct ice_vsi *vsi) } ice_for_each_rxq(vsi, i) { - struct ice_ring *ring = vsi->rx_rings[i]; + struct ice_rx_ring *ring = vsi->rx_rings[i]; if (!ring) return -EINVAL; - ring->netdev = vsi->netdev; + if (vsi->netdev) + ring->netdev = vsi->netdev; err = ice_setup_rx_ring(ring); if (err) break; @@ -6168,7 +6428,7 @@ err_setup_tx: * * Returns 0 on success, negative value on error */ -static int ice_vsi_open(struct ice_vsi *vsi) +int ice_vsi_open(struct ice_vsi *vsi) { char int_name[ICE_INT_NAME_STR_LEN]; struct ice_pf *pf = vsi->back; @@ -6193,14 +6453,16 @@ static int ice_vsi_open(struct ice_vsi *vsi) if (err) goto err_setup_rx; - /* Notify the stack of the actual queue counts. */ - err = netif_set_real_num_tx_queues(vsi->netdev, vsi->num_txq); - if (err) - goto err_set_qs; + if (vsi->type == ICE_VSI_PF) { + /* Notify the stack of the actual queue counts. */ + err = netif_set_real_num_tx_queues(vsi->netdev, vsi->num_txq); + if (err) + goto err_set_qs; - err = netif_set_real_num_rx_queues(vsi->netdev, vsi->num_rxq); - if (err) - goto err_set_qs; + err = netif_set_real_num_rx_queues(vsi->netdev, vsi->num_rxq); + if (err) + goto err_set_qs; + } err = ice_up_complete(vsi); if (err) @@ -6235,6 +6497,9 @@ static void ice_vsi_release_all(struct ice_pf *pf) if (!pf->vsi[i]) continue; + if (pf->vsi[i]->type == ICE_VSI_CHNL) + continue; + err = ice_vsi_release(pf->vsi[i]); if (err) dev_dbg(ice_pf_to_dev(pf), "Failed to release pf->vsi[%d], err %d, vsi_num = %d\n", @@ -6439,6 +6704,21 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type) goto err_vsi_rebuild; } + err = ice_vsi_rebuild_by_type(pf, ICE_VSI_SWITCHDEV_CTRL); + if (err) { + dev_err(dev, "Switchdev CTRL VSI rebuild failed: %d\n", err); + goto err_vsi_rebuild; + } + + if (reset_type == ICE_RESET_PFR) { + err = ice_rebuild_channels(pf); + if (err) { + dev_err(dev, "failed to rebuild and replay ADQ VSIs, err %d\n", + err); + goto err_vsi_rebuild; + } + } + /* If Flow Director is active */ if (test_bit(ICE_FLAG_FD_ENA, pf->flags)) { err = ice_vsi_rebuild_by_type(pf, ICE_VSI_CTRL); @@ -6985,7 +7265,7 @@ ice_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh, static void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue) { struct ice_netdev_priv *np = netdev_priv(netdev); - struct ice_ring *tx_ring = NULL; + struct ice_tx_ring *tx_ring = NULL; struct ice_vsi *vsi = np->vsi; struct ice_pf *pf = vsi->back; u32 i; @@ -7003,7 +7283,7 @@ static void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue) } /* now that we have an index, find the tx_ring struct */ - for (i = 0; i < vsi->num_txq; i++) + ice_for_each_txq(vsi, i) if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc) if (txqueue == vsi->tx_rings[i]->q_index) { tx_ring = vsi->tx_rings[i]; @@ -7060,6 +7340,1050 @@ static void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue) } /** + * ice_setup_tc_cls_flower - flower classifier offloads + * @np: net device to configure + * @filter_dev: device on which filter is added + * @cls_flower: offload data + */ +static int +ice_setup_tc_cls_flower(struct ice_netdev_priv *np, + struct net_device *filter_dev, + struct flow_cls_offload *cls_flower) +{ + struct ice_vsi *vsi = np->vsi; + + if (cls_flower->common.chain_index) + return -EOPNOTSUPP; + + switch (cls_flower->command) { + case FLOW_CLS_REPLACE: + return ice_add_cls_flower(filter_dev, vsi, cls_flower); + case FLOW_CLS_DESTROY: + return ice_del_cls_flower(vsi, cls_flower); + default: + return -EINVAL; + } +} + +/** + * ice_setup_tc_block_cb - callback handler registered for TC block + * @type: TC SETUP type + * @type_data: TC flower offload data that contains user input + * @cb_priv: netdev private data + */ +static int +ice_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv) +{ + struct ice_netdev_priv *np = cb_priv; + + switch (type) { + case TC_SETUP_CLSFLOWER: + return ice_setup_tc_cls_flower(np, np->vsi->netdev, + type_data); + default: + return -EOPNOTSUPP; + } +} + +/** + * ice_validate_mqprio_qopt - Validate TCF input parameters + * @vsi: Pointer to VSI + * @mqprio_qopt: input parameters for mqprio queue configuration + * + * This function validates MQPRIO params, such as qcount (power of 2 wherever + * needed), and make sure user doesn't specify qcount and BW rate limit + * for TCs, which are more than "num_tc" + */ +static int +ice_validate_mqprio_qopt(struct ice_vsi *vsi, + struct tc_mqprio_qopt_offload *mqprio_qopt) +{ + u64 sum_max_rate = 0, sum_min_rate = 0; + int non_power_of_2_qcount = 0; + struct ice_pf *pf = vsi->back; + int max_rss_q_cnt = 0; + struct device *dev; + int i, speed; + u8 num_tc; + + if (vsi->type != ICE_VSI_PF) + return -EINVAL; + + if (mqprio_qopt->qopt.offset[0] != 0 || + mqprio_qopt->qopt.num_tc < 1 || + mqprio_qopt->qopt.num_tc > ICE_CHNL_MAX_TC) + return -EINVAL; + + dev = ice_pf_to_dev(pf); + vsi->ch_rss_size = 0; + num_tc = mqprio_qopt->qopt.num_tc; + + for (i = 0; num_tc; i++) { + int qcount = mqprio_qopt->qopt.count[i]; + u64 max_rate, min_rate, rem; + + if (!qcount) + return -EINVAL; + + if (is_power_of_2(qcount)) { + if (non_power_of_2_qcount && + qcount > non_power_of_2_qcount) { + dev_err(dev, "qcount[%d] cannot be greater than non power of 2 qcount[%d]\n", + qcount, non_power_of_2_qcount); + return -EINVAL; + } + if (qcount > max_rss_q_cnt) + max_rss_q_cnt = qcount; + } else { + if (non_power_of_2_qcount && + qcount != non_power_of_2_qcount) { + dev_err(dev, "Only one non power of 2 qcount allowed[%d,%d]\n", + qcount, non_power_of_2_qcount); + return -EINVAL; + } + if (qcount < max_rss_q_cnt) { + dev_err(dev, "non power of 2 qcount[%d] cannot be less than other qcount[%d]\n", + qcount, max_rss_q_cnt); + return -EINVAL; + } + max_rss_q_cnt = qcount; + non_power_of_2_qcount = qcount; + } + + /* TC command takes input in K/N/Gbps or K/M/Gbit etc but + * converts the bandwidth rate limit into Bytes/s when + * passing it down to the driver. So convert input bandwidth + * from Bytes/s to Kbps + */ + max_rate = mqprio_qopt->max_rate[i]; + max_rate = div_u64(max_rate, ICE_BW_KBPS_DIVISOR); + sum_max_rate += max_rate; + + /* min_rate is minimum guaranteed rate and it can't be zero */ + min_rate = mqprio_qopt->min_rate[i]; + min_rate = div_u64(min_rate, ICE_BW_KBPS_DIVISOR); + sum_min_rate += min_rate; + + if (min_rate && min_rate < ICE_MIN_BW_LIMIT) { + dev_err(dev, "TC%d: min_rate(%llu Kbps) < %u Kbps\n", i, + min_rate, ICE_MIN_BW_LIMIT); + return -EINVAL; + } + + iter_div_u64_rem(min_rate, ICE_MIN_BW_LIMIT, &rem); + if (rem) { + dev_err(dev, "TC%d: Min Rate not multiple of %u Kbps", + i, ICE_MIN_BW_LIMIT); + return -EINVAL; + } + + iter_div_u64_rem(max_rate, ICE_MIN_BW_LIMIT, &rem); + if (rem) { + dev_err(dev, "TC%d: Max Rate not multiple of %u Kbps", + i, ICE_MIN_BW_LIMIT); + return -EINVAL; + } + + /* min_rate can't be more than max_rate, except when max_rate + * is zero (implies max_rate sought is max line rate). In such + * a case min_rate can be more than max. + */ + if (max_rate && min_rate > max_rate) { + dev_err(dev, "min_rate %llu Kbps can't be more than max_rate %llu Kbps\n", + min_rate, max_rate); + return -EINVAL; + } + + if (i >= mqprio_qopt->qopt.num_tc - 1) + break; + if (mqprio_qopt->qopt.offset[i + 1] != + (mqprio_qopt->qopt.offset[i] + qcount)) + return -EINVAL; + } + if (vsi->num_rxq < + (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i])) + return -EINVAL; + if (vsi->num_txq < + (mqprio_qopt->qopt.offset[i] + mqprio_qopt->qopt.count[i])) + return -EINVAL; + + speed = ice_get_link_speed_kbps(vsi); + if (sum_max_rate && sum_max_rate > (u64)speed) { + dev_err(dev, "Invalid max Tx rate(%llu) Kbps > speed(%u) Kbps specified\n", + sum_max_rate, speed); + return -EINVAL; + } + if (sum_min_rate && sum_min_rate > (u64)speed) { + dev_err(dev, "Invalid min Tx rate(%llu) Kbps > speed (%u) Kbps specified\n", + sum_min_rate, speed); + return -EINVAL; + } + + /* make sure vsi->ch_rss_size is set correctly based on TC's qcount */ + vsi->ch_rss_size = max_rss_q_cnt; + + return 0; +} + +/** + * ice_add_channel - add a channel by adding VSI + * @pf: ptr to PF device + * @sw_id: underlying HW switching element ID + * @ch: ptr to channel structure + * + * Add a channel (VSI) using add_vsi and queue_map + */ +static int ice_add_channel(struct ice_pf *pf, u16 sw_id, struct ice_channel *ch) +{ + struct device *dev = ice_pf_to_dev(pf); + struct ice_vsi *vsi; + + if (ch->type != ICE_VSI_CHNL) { + dev_err(dev, "add new VSI failed, ch->type %d\n", ch->type); + return -EINVAL; + } + + vsi = ice_chnl_vsi_setup(pf, pf->hw.port_info, ch); + if (!vsi || vsi->type != ICE_VSI_CHNL) { + dev_err(dev, "create chnl VSI failure\n"); + return -EINVAL; + } + + ch->sw_id = sw_id; + ch->vsi_num = vsi->vsi_num; + ch->info.mapping_flags = vsi->info.mapping_flags; + ch->ch_vsi = vsi; + /* set the back pointer of channel for newly created VSI */ + vsi->ch = ch; + + memcpy(&ch->info.q_mapping, &vsi->info.q_mapping, + sizeof(vsi->info.q_mapping)); + memcpy(&ch->info.tc_mapping, vsi->info.tc_mapping, + sizeof(vsi->info.tc_mapping)); + + return 0; +} + +/** + * ice_chnl_cfg_res + * @vsi: the VSI being setup + * @ch: ptr to channel structure + * + * Configure channel specific resources such as rings, vector. + */ +static void ice_chnl_cfg_res(struct ice_vsi *vsi, struct ice_channel *ch) +{ + int i; + + for (i = 0; i < ch->num_txq; i++) { + struct ice_q_vector *tx_q_vector, *rx_q_vector; + struct ice_ring_container *rc; + struct ice_tx_ring *tx_ring; + struct ice_rx_ring *rx_ring; + + tx_ring = vsi->tx_rings[ch->base_q + i]; + rx_ring = vsi->rx_rings[ch->base_q + i]; + if (!tx_ring || !rx_ring) + continue; + + /* setup ring being channel enabled */ + tx_ring->ch = ch; + rx_ring->ch = ch; + + /* following code block sets up vector specific attributes */ + tx_q_vector = tx_ring->q_vector; + rx_q_vector = rx_ring->q_vector; + if (!tx_q_vector && !rx_q_vector) + continue; + + if (tx_q_vector) { + tx_q_vector->ch = ch; + /* setup Tx and Rx ITR setting if DIM is off */ + rc = &tx_q_vector->tx; + if (!ITR_IS_DYNAMIC(rc)) + ice_write_itr(rc, rc->itr_setting); + } + if (rx_q_vector) { + rx_q_vector->ch = ch; + /* setup Tx and Rx ITR setting if DIM is off */ + rc = &rx_q_vector->rx; + if (!ITR_IS_DYNAMIC(rc)) + ice_write_itr(rc, rc->itr_setting); + } + } + + /* it is safe to assume that, if channel has non-zero num_t[r]xq, then + * GLINT_ITR register would have written to perform in-context + * update, hence perform flush + */ + if (ch->num_txq || ch->num_rxq) + ice_flush(&vsi->back->hw); +} + +/** + * ice_cfg_chnl_all_res - configure channel resources + * @vsi: pte to main_vsi + * @ch: ptr to channel structure + * + * This function configures channel specific resources such as flow-director + * counter index, and other resources such as queues, vectors, ITR settings + */ +static void +ice_cfg_chnl_all_res(struct ice_vsi *vsi, struct ice_channel *ch) +{ + /* configure channel (aka ADQ) resources such as queues, vectors, + * ITR settings for channel specific vectors and anything else + */ + ice_chnl_cfg_res(vsi, ch); +} + +/** + * ice_setup_hw_channel - setup new channel + * @pf: ptr to PF device + * @vsi: the VSI being setup + * @ch: ptr to channel structure + * @sw_id: underlying HW switching element ID + * @type: type of channel to be created (VMDq2/VF) + * + * Setup new channel (VSI) based on specified type (VMDq2/VF) + * and configures Tx rings accordingly + */ +static int +ice_setup_hw_channel(struct ice_pf *pf, struct ice_vsi *vsi, + struct ice_channel *ch, u16 sw_id, u8 type) +{ + struct device *dev = ice_pf_to_dev(pf); + int ret; + + ch->base_q = vsi->next_base_q; + ch->type = type; + + ret = ice_add_channel(pf, sw_id, ch); + if (ret) { + dev_err(dev, "failed to add_channel using sw_id %u\n", sw_id); + return ret; + } + + /* configure/setup ADQ specific resources */ + ice_cfg_chnl_all_res(vsi, ch); + + /* make sure to update the next_base_q so that subsequent channel's + * (aka ADQ) VSI queue map is correct + */ + vsi->next_base_q = vsi->next_base_q + ch->num_rxq; + dev_dbg(dev, "added channel: vsi_num %u, num_rxq %u\n", ch->vsi_num, + ch->num_rxq); + + return 0; +} + +/** + * ice_setup_channel - setup new channel using uplink element + * @pf: ptr to PF device + * @vsi: the VSI being setup + * @ch: ptr to channel structure + * + * Setup new channel (VSI) based on specified type (VMDq2/VF) + * and uplink switching element + */ +static bool +ice_setup_channel(struct ice_pf *pf, struct ice_vsi *vsi, + struct ice_channel *ch) +{ + struct device *dev = ice_pf_to_dev(pf); + u16 sw_id; + int ret; + + if (vsi->type != ICE_VSI_PF) { + dev_err(dev, "unsupported parent VSI type(%d)\n", vsi->type); + return false; + } + + sw_id = pf->first_sw->sw_id; + + /* create channel (VSI) */ + ret = ice_setup_hw_channel(pf, vsi, ch, sw_id, ICE_VSI_CHNL); + if (ret) { + dev_err(dev, "failed to setup hw_channel\n"); + return false; + } + dev_dbg(dev, "successfully created channel()\n"); + + return ch->ch_vsi ? true : false; +} + +/** + * ice_set_bw_limit - setup BW limit for Tx traffic based on max_tx_rate + * @vsi: VSI to be configured + * @max_tx_rate: max Tx rate in Kbps to be configured as maximum BW limit + * @min_tx_rate: min Tx rate in Kbps to be configured as minimum BW limit + */ +static int +ice_set_bw_limit(struct ice_vsi *vsi, u64 max_tx_rate, u64 min_tx_rate) +{ + int err; + + err = ice_set_min_bw_limit(vsi, min_tx_rate); + if (err) + return err; + + return ice_set_max_bw_limit(vsi, max_tx_rate); +} + +/** + * ice_create_q_channel - function to create channel + * @vsi: VSI to be configured + * @ch: ptr to channel (it contains channel specific params) + * + * This function creates channel (VSI) using num_queues specified by user, + * reconfigs RSS if needed. + */ +static int ice_create_q_channel(struct ice_vsi *vsi, struct ice_channel *ch) +{ + struct ice_pf *pf = vsi->back; + struct device *dev; + + if (!ch) + return -EINVAL; + + dev = ice_pf_to_dev(pf); + if (!ch->num_txq || !ch->num_rxq) { + dev_err(dev, "Invalid num_queues requested: %d\n", ch->num_rxq); + return -EINVAL; + } + + if (!vsi->cnt_q_avail || vsi->cnt_q_avail < ch->num_txq) { + dev_err(dev, "cnt_q_avail (%u) less than num_queues %d\n", + vsi->cnt_q_avail, ch->num_txq); + return -EINVAL; + } + + if (!ice_setup_channel(pf, vsi, ch)) { + dev_info(dev, "Failed to setup channel\n"); + return -EINVAL; + } + /* configure BW rate limit */ + if (ch->ch_vsi && (ch->max_tx_rate || ch->min_tx_rate)) { + int ret; + + ret = ice_set_bw_limit(ch->ch_vsi, ch->max_tx_rate, + ch->min_tx_rate); + if (ret) + dev_err(dev, "failed to set Tx rate of %llu Kbps for VSI(%u)\n", + ch->max_tx_rate, ch->ch_vsi->vsi_num); + else + dev_dbg(dev, "set Tx rate of %llu Kbps for VSI(%u)\n", + ch->max_tx_rate, ch->ch_vsi->vsi_num); + } + + vsi->cnt_q_avail -= ch->num_txq; + + return 0; +} + +/** + * ice_rem_all_chnl_fltrs - removes all channel filters + * @pf: ptr to PF, TC-flower based filter are tracked at PF level + * + * Remove all advanced switch filters only if they are channel specific + * tc-flower based filter + */ +static void ice_rem_all_chnl_fltrs(struct ice_pf *pf) +{ + struct ice_tc_flower_fltr *fltr; + struct hlist_node *node; + + /* to remove all channel filters, iterate an ordered list of filters */ + hlist_for_each_entry_safe(fltr, node, + &pf->tc_flower_fltr_list, + tc_flower_node) { + struct ice_rule_query_data rule; + int status; + + /* for now process only channel specific filters */ + if (!ice_is_chnl_fltr(fltr)) + continue; + + rule.rid = fltr->rid; + rule.rule_id = fltr->rule_id; + rule.vsi_handle = fltr->dest_id; + status = ice_rem_adv_rule_by_id(&pf->hw, &rule); + if (status) { + if (status == -ENOENT) + dev_dbg(ice_pf_to_dev(pf), "TC flower filter (rule_id %u) does not exist\n", + rule.rule_id); + else + dev_err(ice_pf_to_dev(pf), "failed to delete TC flower filter, status %d\n", + status); + } else if (fltr->dest_vsi) { + /* update advanced switch filter count */ + if (fltr->dest_vsi->type == ICE_VSI_CHNL) { + u32 flags = fltr->flags; + + fltr->dest_vsi->num_chnl_fltr--; + if (flags & (ICE_TC_FLWR_FIELD_DST_MAC | + ICE_TC_FLWR_FIELD_ENC_DST_MAC)) + pf->num_dmac_chnl_fltrs--; + } + } + + hlist_del(&fltr->tc_flower_node); + kfree(fltr); + } +} + +/** + * ice_remove_q_channels - Remove queue channels for the TCs + * @vsi: VSI to be configured + * @rem_fltr: delete advanced switch filter or not + * + * Remove queue channels for the TCs + */ +static void ice_remove_q_channels(struct ice_vsi *vsi, bool rem_fltr) +{ + struct ice_channel *ch, *ch_tmp; + struct ice_pf *pf = vsi->back; + int i; + + /* remove all tc-flower based filter if they are channel filters only */ + if (rem_fltr) + ice_rem_all_chnl_fltrs(pf); + + /* perform cleanup for channels if they exist */ + list_for_each_entry_safe(ch, ch_tmp, &vsi->ch_list, list) { + struct ice_vsi *ch_vsi; + + list_del(&ch->list); + ch_vsi = ch->ch_vsi; + if (!ch_vsi) { + kfree(ch); + continue; + } + + /* Reset queue contexts */ + for (i = 0; i < ch->num_rxq; i++) { + struct ice_tx_ring *tx_ring; + struct ice_rx_ring *rx_ring; + + tx_ring = vsi->tx_rings[ch->base_q + i]; + rx_ring = vsi->rx_rings[ch->base_q + i]; + if (tx_ring) { + tx_ring->ch = NULL; + if (tx_ring->q_vector) + tx_ring->q_vector->ch = NULL; + } + if (rx_ring) { + rx_ring->ch = NULL; + if (rx_ring->q_vector) + rx_ring->q_vector->ch = NULL; + } + } + + /* clear the VSI from scheduler tree */ + ice_rm_vsi_lan_cfg(ch->ch_vsi->port_info, ch->ch_vsi->idx); + + /* Delete VSI from FW */ + ice_vsi_delete(ch->ch_vsi); + + /* Delete VSI from PF and HW VSI arrays */ + ice_vsi_clear(ch->ch_vsi); + + /* free the channel */ + kfree(ch); + } + + /* clear the channel VSI map which is stored in main VSI */ + ice_for_each_chnl_tc(i) + vsi->tc_map_vsi[i] = NULL; + + /* reset main VSI's all TC information */ + vsi->all_enatc = 0; + vsi->all_numtc = 0; +} + +/** + * ice_rebuild_channels - rebuild channel + * @pf: ptr to PF + * + * Recreate channel VSIs and replay filters + */ +static int ice_rebuild_channels(struct ice_pf *pf) +{ + struct device *dev = ice_pf_to_dev(pf); + struct ice_vsi *main_vsi; + bool rem_adv_fltr = true; + struct ice_channel *ch; + struct ice_vsi *vsi; + int tc_idx = 1; + int i, err; + + main_vsi = ice_get_main_vsi(pf); + if (!main_vsi) + return 0; + + if (!test_bit(ICE_FLAG_TC_MQPRIO, pf->flags) || + main_vsi->old_numtc == 1) + return 0; /* nothing to be done */ + + /* reconfigure main VSI based on old value of TC and cached values + * for MQPRIO opts + */ + err = ice_vsi_cfg_tc(main_vsi, main_vsi->old_ena_tc); + if (err) { + dev_err(dev, "failed configuring TC(ena_tc:0x%02x) for HW VSI=%u\n", + main_vsi->old_ena_tc, main_vsi->vsi_num); + return err; + } + + /* rebuild ADQ VSIs */ + ice_for_each_vsi(pf, i) { + enum ice_vsi_type type; + + vsi = pf->vsi[i]; + if (!vsi || vsi->type != ICE_VSI_CHNL) + continue; + + type = vsi->type; + + /* rebuild ADQ VSI */ + err = ice_vsi_rebuild(vsi, true); + if (err) { + dev_err(dev, "VSI (type:%s) at index %d rebuild failed, err %d\n", + ice_vsi_type_str(type), vsi->idx, err); + goto cleanup; + } + + /* Re-map HW VSI number, using VSI handle that has been + * previously validated in ice_replay_vsi() call above + */ + vsi->vsi_num = ice_get_hw_vsi_num(&pf->hw, vsi->idx); + + /* replay filters for the VSI */ + err = ice_replay_vsi(&pf->hw, vsi->idx); + if (err) { + dev_err(dev, "VSI (type:%s) replay failed, err %d, VSI index %d\n", + ice_vsi_type_str(type), err, vsi->idx); + rem_adv_fltr = false; + goto cleanup; + } + dev_info(dev, "VSI (type:%s) at index %d rebuilt successfully\n", + ice_vsi_type_str(type), vsi->idx); + + /* store ADQ VSI at correct TC index in main VSI's + * map of TC to VSI + */ + main_vsi->tc_map_vsi[tc_idx++] = vsi; + } + + /* ADQ VSI(s) has been rebuilt successfully, so setup + * channel for main VSI's Tx and Rx rings + */ + list_for_each_entry(ch, &main_vsi->ch_list, list) { + struct ice_vsi *ch_vsi; + + ch_vsi = ch->ch_vsi; + if (!ch_vsi) + continue; + + /* reconfig channel resources */ + ice_cfg_chnl_all_res(main_vsi, ch); + + /* replay BW rate limit if it is non-zero */ + if (!ch->max_tx_rate && !ch->min_tx_rate) + continue; + + err = ice_set_bw_limit(ch_vsi, ch->max_tx_rate, + ch->min_tx_rate); + if (err) + dev_err(dev, "failed (err:%d) to rebuild BW rate limit, max_tx_rate: %llu Kbps, min_tx_rate: %llu Kbps for VSI(%u)\n", + err, ch->max_tx_rate, ch->min_tx_rate, + ch_vsi->vsi_num); + else + dev_dbg(dev, "successfully rebuild BW rate limit, max_tx_rate: %llu Kbps, min_tx_rate: %llu Kbps for VSI(%u)\n", + ch->max_tx_rate, ch->min_tx_rate, + ch_vsi->vsi_num); + } + + /* reconfig RSS for main VSI */ + if (main_vsi->ch_rss_size) + ice_vsi_cfg_rss_lut_key(main_vsi); + + return 0; + +cleanup: + ice_remove_q_channels(main_vsi, rem_adv_fltr); + return err; +} + +/** + * ice_create_q_channels - Add queue channel for the given TCs + * @vsi: VSI to be configured + * + * Configures queue channel mapping to the given TCs + */ +static int ice_create_q_channels(struct ice_vsi *vsi) +{ + struct ice_pf *pf = vsi->back; + struct ice_channel *ch; + int ret = 0, i; + + ice_for_each_chnl_tc(i) { + if (!(vsi->all_enatc & BIT(i))) + continue; + + ch = kzalloc(sizeof(*ch), GFP_KERNEL); + if (!ch) { + ret = -ENOMEM; + goto err_free; + } + INIT_LIST_HEAD(&ch->list); + ch->num_rxq = vsi->mqprio_qopt.qopt.count[i]; + ch->num_txq = vsi->mqprio_qopt.qopt.count[i]; + ch->base_q = vsi->mqprio_qopt.qopt.offset[i]; + ch->max_tx_rate = vsi->mqprio_qopt.max_rate[i]; + ch->min_tx_rate = vsi->mqprio_qopt.min_rate[i]; + + /* convert to Kbits/s */ + if (ch->max_tx_rate) + ch->max_tx_rate = div_u64(ch->max_tx_rate, + ICE_BW_KBPS_DIVISOR); + if (ch->min_tx_rate) + ch->min_tx_rate = div_u64(ch->min_tx_rate, + ICE_BW_KBPS_DIVISOR); + + ret = ice_create_q_channel(vsi, ch); + if (ret) { + dev_err(ice_pf_to_dev(pf), + "failed creating channel TC:%d\n", i); + kfree(ch); + goto err_free; + } + list_add_tail(&ch->list, &vsi->ch_list); + vsi->tc_map_vsi[i] = ch->ch_vsi; + dev_dbg(ice_pf_to_dev(pf), + "successfully created channel: VSI %pK\n", ch->ch_vsi); + } + return 0; + +err_free: + ice_remove_q_channels(vsi, false); + + return ret; +} + +/** + * ice_setup_tc_mqprio_qdisc - configure multiple traffic classes + * @netdev: net device to configure + * @type_data: TC offload data + */ +static int ice_setup_tc_mqprio_qdisc(struct net_device *netdev, void *type_data) +{ + struct tc_mqprio_qopt_offload *mqprio_qopt = type_data; + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_vsi *vsi = np->vsi; + struct ice_pf *pf = vsi->back; + u16 mode, ena_tc_qdisc = 0; + int cur_txq, cur_rxq; + u8 hw = 0, num_tcf; + struct device *dev; + int ret, i; + + dev = ice_pf_to_dev(pf); + num_tcf = mqprio_qopt->qopt.num_tc; + hw = mqprio_qopt->qopt.hw; + mode = mqprio_qopt->mode; + if (!hw) { + clear_bit(ICE_FLAG_TC_MQPRIO, pf->flags); + vsi->ch_rss_size = 0; + memcpy(&vsi->mqprio_qopt, mqprio_qopt, sizeof(*mqprio_qopt)); + goto config_tcf; + } + + /* Generate queue region map for number of TCF requested */ + for (i = 0; i < num_tcf; i++) + ena_tc_qdisc |= BIT(i); + + switch (mode) { + case TC_MQPRIO_MODE_CHANNEL: + + ret = ice_validate_mqprio_qopt(vsi, mqprio_qopt); + if (ret) { + netdev_err(netdev, "failed to validate_mqprio_qopt(), ret %d\n", + ret); + return ret; + } + memcpy(&vsi->mqprio_qopt, mqprio_qopt, sizeof(*mqprio_qopt)); + set_bit(ICE_FLAG_TC_MQPRIO, pf->flags); + /* don't assume state of hw_tc_offload during driver load + * and set the flag for TC flower filter if hw_tc_offload + * already ON + */ + if (vsi->netdev->features & NETIF_F_HW_TC) + set_bit(ICE_FLAG_CLS_FLOWER, pf->flags); + break; + default: + return -EINVAL; + } + +config_tcf: + + /* Requesting same TCF configuration as already enabled */ + if (ena_tc_qdisc == vsi->tc_cfg.ena_tc && + mode != TC_MQPRIO_MODE_CHANNEL) + return 0; + + /* Pause VSI queues */ + ice_dis_vsi(vsi, true); + + if (!hw && !test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) + ice_remove_q_channels(vsi, true); + + if (!hw && !test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) { + vsi->req_txq = min_t(int, ice_get_avail_txq_count(pf), + num_online_cpus()); + vsi->req_rxq = min_t(int, ice_get_avail_rxq_count(pf), + num_online_cpus()); + } else { + /* logic to rebuild VSI, same like ethtool -L */ + u16 offset = 0, qcount_tx = 0, qcount_rx = 0; + + for (i = 0; i < num_tcf; i++) { + if (!(ena_tc_qdisc & BIT(i))) + continue; + + offset = vsi->mqprio_qopt.qopt.offset[i]; + qcount_rx = vsi->mqprio_qopt.qopt.count[i]; + qcount_tx = vsi->mqprio_qopt.qopt.count[i]; + } + vsi->req_txq = offset + qcount_tx; + vsi->req_rxq = offset + qcount_rx; + + /* store away original rss_size info, so that it gets reused + * form ice_vsi_rebuild during tc-qdisc delete stage - to + * determine, what should be the rss_sizefor main VSI + */ + vsi->orig_rss_size = vsi->rss_size; + } + + /* save current values of Tx and Rx queues before calling VSI rebuild + * for fallback option + */ + cur_txq = vsi->num_txq; + cur_rxq = vsi->num_rxq; + + /* proceed with rebuild main VSI using correct number of queues */ + ret = ice_vsi_rebuild(vsi, false); + if (ret) { + /* fallback to current number of queues */ + dev_info(dev, "Rebuild failed with new queues, try with current number of queues\n"); + vsi->req_txq = cur_txq; + vsi->req_rxq = cur_rxq; + clear_bit(ICE_RESET_FAILED, pf->state); + if (ice_vsi_rebuild(vsi, false)) { + dev_err(dev, "Rebuild of main VSI failed again\n"); + return ret; + } + } + + vsi->all_numtc = num_tcf; + vsi->all_enatc = ena_tc_qdisc; + ret = ice_vsi_cfg_tc(vsi, ena_tc_qdisc); + if (ret) { + netdev_err(netdev, "failed configuring TC for VSI id=%d\n", + vsi->vsi_num); + goto exit; + } + + if (test_bit(ICE_FLAG_TC_MQPRIO, pf->flags)) { + u64 max_tx_rate = vsi->mqprio_qopt.max_rate[0]; + u64 min_tx_rate = vsi->mqprio_qopt.min_rate[0]; + + /* set TC0 rate limit if specified */ + if (max_tx_rate || min_tx_rate) { + /* convert to Kbits/s */ + if (max_tx_rate) + max_tx_rate = div_u64(max_tx_rate, ICE_BW_KBPS_DIVISOR); + if (min_tx_rate) + min_tx_rate = div_u64(min_tx_rate, ICE_BW_KBPS_DIVISOR); + + ret = ice_set_bw_limit(vsi, max_tx_rate, min_tx_rate); + if (!ret) { + dev_dbg(dev, "set Tx rate max %llu min %llu for VSI(%u)\n", + max_tx_rate, min_tx_rate, vsi->vsi_num); + } else { + dev_err(dev, "failed to set Tx rate max %llu min %llu for VSI(%u)\n", + max_tx_rate, min_tx_rate, vsi->vsi_num); + goto exit; + } + } + ret = ice_create_q_channels(vsi); + if (ret) { + netdev_err(netdev, "failed configuring queue channels\n"); + goto exit; + } else { + netdev_dbg(netdev, "successfully configured channels\n"); + } + } + + if (vsi->ch_rss_size) + ice_vsi_cfg_rss_lut_key(vsi); + +exit: + /* if error, reset the all_numtc and all_enatc */ + if (ret) { + vsi->all_numtc = 0; + vsi->all_enatc = 0; + } + /* resume VSI */ + ice_ena_vsi(vsi, true); + + return ret; +} + +static LIST_HEAD(ice_block_cb_list); + +static int +ice_setup_tc(struct net_device *netdev, enum tc_setup_type type, + void *type_data) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_pf *pf = np->vsi->back; + int err; + + switch (type) { + case TC_SETUP_BLOCK: + return flow_block_cb_setup_simple(type_data, + &ice_block_cb_list, + ice_setup_tc_block_cb, + np, np, true); + case TC_SETUP_QDISC_MQPRIO: + /* setup traffic classifier for receive side */ + mutex_lock(&pf->tc_mutex); + err = ice_setup_tc_mqprio_qdisc(netdev, type_data); + mutex_unlock(&pf->tc_mutex); + return err; + default: + return -EOPNOTSUPP; + } + return -EOPNOTSUPP; +} + +static struct ice_indr_block_priv * +ice_indr_block_priv_lookup(struct ice_netdev_priv *np, + struct net_device *netdev) +{ + struct ice_indr_block_priv *cb_priv; + + list_for_each_entry(cb_priv, &np->tc_indr_block_priv_list, list) { + if (!cb_priv->netdev) + return NULL; + if (cb_priv->netdev == netdev) + return cb_priv; + } + return NULL; +} + +static int +ice_indr_setup_block_cb(enum tc_setup_type type, void *type_data, + void *indr_priv) +{ + struct ice_indr_block_priv *priv = indr_priv; + struct ice_netdev_priv *np = priv->np; + + switch (type) { + case TC_SETUP_CLSFLOWER: + return ice_setup_tc_cls_flower(np, priv->netdev, + (struct flow_cls_offload *) + type_data); + default: + return -EOPNOTSUPP; + } +} + +static int +ice_indr_setup_tc_block(struct net_device *netdev, struct Qdisc *sch, + struct ice_netdev_priv *np, + struct flow_block_offload *f, void *data, + void (*cleanup)(struct flow_block_cb *block_cb)) +{ + struct ice_indr_block_priv *indr_priv; + struct flow_block_cb *block_cb; + + if (!ice_is_tunnel_supported(netdev) && + !(is_vlan_dev(netdev) && + vlan_dev_real_dev(netdev) == np->vsi->netdev)) + return -EOPNOTSUPP; + + if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + return -EOPNOTSUPP; + + switch (f->command) { + case FLOW_BLOCK_BIND: + indr_priv = ice_indr_block_priv_lookup(np, netdev); + if (indr_priv) + return -EEXIST; + + indr_priv = kzalloc(sizeof(*indr_priv), GFP_KERNEL); + if (!indr_priv) + return -ENOMEM; + + indr_priv->netdev = netdev; + indr_priv->np = np; + list_add(&indr_priv->list, &np->tc_indr_block_priv_list); + + block_cb = + flow_indr_block_cb_alloc(ice_indr_setup_block_cb, + indr_priv, indr_priv, + ice_rep_indr_tc_block_unbind, + f, netdev, sch, data, np, + cleanup); + + if (IS_ERR(block_cb)) { + list_del(&indr_priv->list); + kfree(indr_priv); + return PTR_ERR(block_cb); + } + flow_block_cb_add(block_cb, f); + list_add_tail(&block_cb->driver_list, &ice_block_cb_list); + break; + case FLOW_BLOCK_UNBIND: + indr_priv = ice_indr_block_priv_lookup(np, netdev); + if (!indr_priv) + return -ENOENT; + + block_cb = flow_block_cb_lookup(f->block, + ice_indr_setup_block_cb, + indr_priv); + if (!block_cb) + return -ENOENT; + + flow_indr_block_cb_remove(block_cb, f); + + list_del(&block_cb->driver_list); + break; + default: + return -EOPNOTSUPP; + } + return 0; +} + +static int +ice_indr_setup_tc_cb(struct net_device *netdev, struct Qdisc *sch, + void *cb_priv, enum tc_setup_type type, void *type_data, + void *data, + void (*cleanup)(struct flow_block_cb *block_cb)) +{ + switch (type) { + case TC_SETUP_BLOCK: + return ice_indr_setup_tc_block(netdev, sch, cb_priv, type_data, + data, cleanup); + + default: + return -EOPNOTSUPP; + } +} + +/** * ice_open - Called when a network interface becomes active * @netdev: network interface device structure * @@ -7117,7 +8441,7 @@ int ice_open_internal(struct net_device *netdev) return -EIO; } - ice_check_module_power(pf, pi->phy.link_info.link_cfg_err); + ice_check_link_cfg_err(pf, pi->phy.link_info.link_cfg_err); /* Set PHY if there is media, otherwise, turn off PHY */ if (pi->phy.link_info.link_info & ICE_AQ_MEDIA_AVAILABLE) { @@ -7245,6 +8569,7 @@ static const struct net_device_ops ice_netdev_ops = { .ndo_open = ice_open, .ndo_stop = ice_stop, .ndo_start_xmit = ice_start_xmit, + .ndo_select_queue = ice_select_queue, .ndo_features_check = ice_features_check, .ndo_set_rx_mode = ice_set_rx_mode, .ndo_set_mac_address = ice_set_mac_address, @@ -7260,8 +8585,10 @@ static const struct net_device_ops ice_netdev_ops = { .ndo_set_vf_vlan = ice_set_vf_port_vlan, .ndo_set_vf_link_state = ice_set_vf_link_state, .ndo_get_vf_stats = ice_get_vf_stats, + .ndo_set_vf_rate = ice_set_vf_bw, .ndo_vlan_rx_add_vid = ice_vlan_rx_add_vid, .ndo_vlan_rx_kill_vid = ice_vlan_rx_kill_vid, + .ndo_setup_tc = ice_setup_tc, .ndo_set_features = ice_set_features, .ndo_bridge_getlink = ice_bridge_getlink, .ndo_bridge_setlink = ice_bridge_setlink, diff --git a/drivers/net/ethernet/intel/ice/ice_protocol_type.h b/drivers/net/ethernet/intel/ice/ice_protocol_type.h index 199aa5b71540..dc1b0e9e6df5 100644 --- a/drivers/net/ethernet/intel/ice/ice_protocol_type.h +++ b/drivers/net/ethernet/intel/ice/ice_protocol_type.h @@ -3,6 +3,56 @@ #ifndef _ICE_PROTOCOL_TYPE_H_ #define _ICE_PROTOCOL_TYPE_H_ +#define ICE_IPV6_ADDR_LENGTH 16 + +/* Each recipe can match up to 5 different fields. Fields to match can be meta- + * data, values extracted from packet headers, or results from other recipes. + * One of the 5 fields is reserved for matching the switch ID. So, up to 4 + * recipes can provide intermediate results to another one through chaining, + * e.g. recipes 0, 1, 2, and 3 can provide intermediate results to recipe 4. + */ +#define ICE_NUM_WORDS_RECIPE 4 + +/* Max recipes that can be chained */ +#define ICE_MAX_CHAIN_RECIPE 5 + +/* 1 word reserved for switch ID from allowed 5 words. + * So a recipe can have max 4 words. And you can chain 5 such recipes + * together. So maximum words that can be programmed for look up is 5 * 4. + */ +#define ICE_MAX_CHAIN_WORDS (ICE_NUM_WORDS_RECIPE * ICE_MAX_CHAIN_RECIPE) + +/* Field vector index corresponding to chaining */ +#define ICE_CHAIN_FV_INDEX_START 47 + +enum ice_protocol_type { + ICE_MAC_OFOS = 0, + ICE_MAC_IL, + ICE_ETYPE_OL, + ICE_VLAN_OFOS, + ICE_IPV4_OFOS, + ICE_IPV4_IL, + ICE_IPV6_OFOS, + ICE_IPV6_IL, + ICE_TCP_IL, + ICE_UDP_OF, + ICE_UDP_ILOS, + ICE_VXLAN, + ICE_GENEVE, + ICE_NVGRE, + ICE_VXLAN_GPE, + ICE_SCTP_IL, + ICE_PROTOCOL_LAST +}; + +enum ice_sw_tunnel_type { + ICE_NON_TUN = 0, + ICE_SW_TUN_VXLAN, + ICE_SW_TUN_GENEVE, + ICE_SW_TUN_NVGRE, + ICE_ALL_TUNNELS /* All tunnel types including NVGRE */ +}; + /* Decoders for ice_prot_id: * - F: First * - I: Inner @@ -35,4 +85,158 @@ enum ice_prot_id { ICE_PROT_META_ID = 255, /* when offset == metadata */ ICE_PROT_INVALID = 255 /* when offset == ICE_FV_OFFSET_INVAL */ }; + +#define ICE_VNI_OFFSET 12 /* offset of VNI from ICE_PROT_UDP_OF */ + +#define ICE_MAC_OFOS_HW 1 +#define ICE_MAC_IL_HW 4 +#define ICE_ETYPE_OL_HW 9 +#define ICE_VLAN_OF_HW 16 +#define ICE_VLAN_OL_HW 17 +#define ICE_IPV4_OFOS_HW 32 +#define ICE_IPV4_IL_HW 33 +#define ICE_IPV6_OFOS_HW 40 +#define ICE_IPV6_IL_HW 41 +#define ICE_TCP_IL_HW 49 +#define ICE_UDP_ILOS_HW 53 +#define ICE_GRE_OF_HW 64 + +#define ICE_UDP_OF_HW 52 /* UDP Tunnels */ +#define ICE_META_DATA_ID_HW 255 /* this is used for tunnel type */ + +#define ICE_MDID_SIZE 2 +#define ICE_TUN_FLAG_MDID 21 +#define ICE_TUN_FLAG_MDID_OFF (ICE_MDID_SIZE * ICE_TUN_FLAG_MDID) +#define ICE_TUN_FLAG_MASK 0xFF + +#define ICE_TUN_FLAG_FV_IND 2 + +/* Mapping of software defined protocol ID to hardware defined protocol ID */ +struct ice_protocol_entry { + enum ice_protocol_type type; + u8 protocol_id; +}; + +struct ice_ether_hdr { + u8 dst_addr[ETH_ALEN]; + u8 src_addr[ETH_ALEN]; +}; + +struct ice_ethtype_hdr { + __be16 ethtype_id; +}; + +struct ice_ether_vlan_hdr { + u8 dst_addr[ETH_ALEN]; + u8 src_addr[ETH_ALEN]; + __be32 vlan_id; +}; + +struct ice_vlan_hdr { + __be16 type; + __be16 vlan; +}; + +struct ice_ipv4_hdr { + u8 version; + u8 tos; + __be16 total_length; + __be16 id; + __be16 frag_off; + u8 time_to_live; + u8 protocol; + __be16 check; + __be32 src_addr; + __be32 dst_addr; +}; + +struct ice_ipv6_hdr { + __be32 be_ver_tc_flow; + __be16 payload_len; + u8 next_hdr; + u8 hop_limit; + u8 src_addr[ICE_IPV6_ADDR_LENGTH]; + u8 dst_addr[ICE_IPV6_ADDR_LENGTH]; +}; + +struct ice_sctp_hdr { + __be16 src_port; + __be16 dst_port; + __be32 verification_tag; + __be32 check; +}; + +struct ice_l4_hdr { + __be16 src_port; + __be16 dst_port; + __be16 len; + __be16 check; +}; + +struct ice_udp_tnl_hdr { + __be16 field; + __be16 proto_type; + __be32 vni; /* only use lower 24-bits */ +}; + +struct ice_nvgre_hdr { + __be16 flags; + __be16 protocol; + __be32 tni_flow; +}; + +union ice_prot_hdr { + struct ice_ether_hdr eth_hdr; + struct ice_ethtype_hdr ethertype; + struct ice_vlan_hdr vlan_hdr; + struct ice_ipv4_hdr ipv4_hdr; + struct ice_ipv6_hdr ipv6_hdr; + struct ice_l4_hdr l4_hdr; + struct ice_sctp_hdr sctp_hdr; + struct ice_udp_tnl_hdr tnl_hdr; + struct ice_nvgre_hdr nvgre_hdr; +}; + +/* This is mapping table entry that maps every word within a given protocol + * structure to the real byte offset as per the specification of that + * protocol header. + * for e.g. dst address is 3 words in ethertype header and corresponding bytes + * are 0, 2, 3 in the actual packet header and src address is at 4, 6, 8 + */ +struct ice_prot_ext_tbl_entry { + enum ice_protocol_type prot_type; + /* Byte offset into header of given protocol type */ + u8 offs[sizeof(union ice_prot_hdr)]; +}; + +/* Extractions to be looked up for a given recipe */ +struct ice_prot_lkup_ext { + u16 prot_type; + u8 n_val_words; + /* create a buffer to hold max words per recipe */ + u16 field_off[ICE_MAX_CHAIN_WORDS]; + u16 field_mask[ICE_MAX_CHAIN_WORDS]; + + struct ice_fv_word fv_words[ICE_MAX_CHAIN_WORDS]; + + /* Indicate field offsets that have field vector indices assigned */ + DECLARE_BITMAP(done, ICE_MAX_CHAIN_WORDS); +}; + +struct ice_pref_recipe_group { + u8 n_val_pairs; /* Number of valid pairs */ + struct ice_fv_word pairs[ICE_NUM_WORDS_RECIPE]; + u16 mask[ICE_NUM_WORDS_RECIPE]; +}; + +struct ice_recp_grp_entry { + struct list_head l_entry; + +#define ICE_INVAL_CHAIN_IND 0xFF + u16 rid; + u8 chain_idx; + u16 fv_idx[ICE_NUM_WORDS_RECIPE]; + u16 fv_mask[ICE_NUM_WORDS_RECIPE]; + struct ice_pref_recipe_group r_group; +}; #endif /* _ICE_PROTOCOL_TYPE_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.c b/drivers/net/ethernet/intel/ice/ice_ptp.c index d1ef3d48a4b0..bf7247c6f58e 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp.c @@ -6,6 +6,252 @@ #define E810_OUT_PROP_DELAY_NS 1 +static const struct ptp_pin_desc ice_pin_desc_e810t[] = { + /* name idx func chan */ + { "GNSS", GNSS, PTP_PF_EXTTS, 0, { 0, } }, + { "SMA1", SMA1, PTP_PF_NONE, 1, { 0, } }, + { "U.FL1", UFL1, PTP_PF_NONE, 1, { 0, } }, + { "SMA2", SMA2, PTP_PF_NONE, 2, { 0, } }, + { "U.FL2", UFL2, PTP_PF_NONE, 2, { 0, } }, +}; + +/** + * ice_get_sma_config_e810t + * @hw: pointer to the hw struct + * @ptp_pins: pointer to the ptp_pin_desc struture + * + * Read the configuration of the SMA control logic and put it into the + * ptp_pin_desc structure + */ +static int +ice_get_sma_config_e810t(struct ice_hw *hw, struct ptp_pin_desc *ptp_pins) +{ + u8 data, i; + int status; + + /* Read initial pin state */ + status = ice_read_sma_ctrl_e810t(hw, &data); + if (status) + return status; + + /* initialize with defaults */ + for (i = 0; i < NUM_PTP_PINS_E810T; i++) { + snprintf(ptp_pins[i].name, sizeof(ptp_pins[i].name), + "%s", ice_pin_desc_e810t[i].name); + ptp_pins[i].index = ice_pin_desc_e810t[i].index; + ptp_pins[i].func = ice_pin_desc_e810t[i].func; + ptp_pins[i].chan = ice_pin_desc_e810t[i].chan; + } + + /* Parse SMA1/UFL1 */ + switch (data & ICE_SMA1_MASK_E810T) { + case ICE_SMA1_MASK_E810T: + default: + ptp_pins[SMA1].func = PTP_PF_NONE; + ptp_pins[UFL1].func = PTP_PF_NONE; + break; + case ICE_SMA1_DIR_EN_E810T: + ptp_pins[SMA1].func = PTP_PF_PEROUT; + ptp_pins[UFL1].func = PTP_PF_NONE; + break; + case ICE_SMA1_TX_EN_E810T: + ptp_pins[SMA1].func = PTP_PF_EXTTS; + ptp_pins[UFL1].func = PTP_PF_NONE; + break; + case 0: + ptp_pins[SMA1].func = PTP_PF_EXTTS; + ptp_pins[UFL1].func = PTP_PF_PEROUT; + break; + } + + /* Parse SMA2/UFL2 */ + switch (data & ICE_SMA2_MASK_E810T) { + case ICE_SMA2_MASK_E810T: + default: + ptp_pins[SMA2].func = PTP_PF_NONE; + ptp_pins[UFL2].func = PTP_PF_NONE; + break; + case (ICE_SMA2_TX_EN_E810T | ICE_SMA2_UFL2_RX_DIS_E810T): + ptp_pins[SMA2].func = PTP_PF_EXTTS; + ptp_pins[UFL2].func = PTP_PF_NONE; + break; + case (ICE_SMA2_DIR_EN_E810T | ICE_SMA2_UFL2_RX_DIS_E810T): + ptp_pins[SMA2].func = PTP_PF_PEROUT; + ptp_pins[UFL2].func = PTP_PF_NONE; + break; + case (ICE_SMA2_DIR_EN_E810T | ICE_SMA2_TX_EN_E810T): + ptp_pins[SMA2].func = PTP_PF_NONE; + ptp_pins[UFL2].func = PTP_PF_EXTTS; + break; + case ICE_SMA2_DIR_EN_E810T: + ptp_pins[SMA2].func = PTP_PF_PEROUT; + ptp_pins[UFL2].func = PTP_PF_EXTTS; + break; + } + + return 0; +} + +/** + * ice_ptp_set_sma_config_e810t + * @hw: pointer to the hw struct + * @ptp_pins: pointer to the ptp_pin_desc struture + * + * Set the configuration of the SMA control logic based on the configuration in + * num_pins parameter + */ +static int +ice_ptp_set_sma_config_e810t(struct ice_hw *hw, + const struct ptp_pin_desc *ptp_pins) +{ + int status; + u8 data; + + /* SMA1 and UFL1 cannot be set to TX at the same time */ + if (ptp_pins[SMA1].func == PTP_PF_PEROUT && + ptp_pins[UFL1].func == PTP_PF_PEROUT) + return -EINVAL; + + /* SMA2 and UFL2 cannot be set to RX at the same time */ + if (ptp_pins[SMA2].func == PTP_PF_EXTTS && + ptp_pins[UFL2].func == PTP_PF_EXTTS) + return -EINVAL; + + /* Read initial pin state value */ + status = ice_read_sma_ctrl_e810t(hw, &data); + if (status) + return status; + + /* Set the right sate based on the desired configuration */ + data &= ~ICE_SMA1_MASK_E810T; + if (ptp_pins[SMA1].func == PTP_PF_NONE && + ptp_pins[UFL1].func == PTP_PF_NONE) { + dev_info(ice_hw_to_dev(hw), "SMA1 + U.FL1 disabled"); + data |= ICE_SMA1_MASK_E810T; + } else if (ptp_pins[SMA1].func == PTP_PF_EXTTS && + ptp_pins[UFL1].func == PTP_PF_NONE) { + dev_info(ice_hw_to_dev(hw), "SMA1 RX"); + data |= ICE_SMA1_TX_EN_E810T; + } else if (ptp_pins[SMA1].func == PTP_PF_NONE && + ptp_pins[UFL1].func == PTP_PF_PEROUT) { + /* U.FL 1 TX will always enable SMA 1 RX */ + dev_info(ice_hw_to_dev(hw), "SMA1 RX + U.FL1 TX"); + } else if (ptp_pins[SMA1].func == PTP_PF_EXTTS && + ptp_pins[UFL1].func == PTP_PF_PEROUT) { + dev_info(ice_hw_to_dev(hw), "SMA1 RX + U.FL1 TX"); + } else if (ptp_pins[SMA1].func == PTP_PF_PEROUT && + ptp_pins[UFL1].func == PTP_PF_NONE) { + dev_info(ice_hw_to_dev(hw), "SMA1 TX"); + data |= ICE_SMA1_DIR_EN_E810T; + } + + data &= ~ICE_SMA2_MASK_E810T; + if (ptp_pins[SMA2].func == PTP_PF_NONE && + ptp_pins[UFL2].func == PTP_PF_NONE) { + dev_info(ice_hw_to_dev(hw), "SMA2 + U.FL2 disabled"); + data |= ICE_SMA2_MASK_E810T; + } else if (ptp_pins[SMA2].func == PTP_PF_EXTTS && + ptp_pins[UFL2].func == PTP_PF_NONE) { + dev_info(ice_hw_to_dev(hw), "SMA2 RX"); + data |= (ICE_SMA2_TX_EN_E810T | + ICE_SMA2_UFL2_RX_DIS_E810T); + } else if (ptp_pins[SMA2].func == PTP_PF_NONE && + ptp_pins[UFL2].func == PTP_PF_EXTTS) { + dev_info(ice_hw_to_dev(hw), "UFL2 RX"); + data |= (ICE_SMA2_DIR_EN_E810T | ICE_SMA2_TX_EN_E810T); + } else if (ptp_pins[SMA2].func == PTP_PF_PEROUT && + ptp_pins[UFL2].func == PTP_PF_NONE) { + dev_info(ice_hw_to_dev(hw), "SMA2 TX"); + data |= (ICE_SMA2_DIR_EN_E810T | + ICE_SMA2_UFL2_RX_DIS_E810T); + } else if (ptp_pins[SMA2].func == PTP_PF_PEROUT && + ptp_pins[UFL2].func == PTP_PF_EXTTS) { + dev_info(ice_hw_to_dev(hw), "SMA2 TX + U.FL2 RX"); + data |= ICE_SMA2_DIR_EN_E810T; + } + + return ice_write_sma_ctrl_e810t(hw, data); +} + +/** + * ice_ptp_set_sma_e810t + * @info: the driver's PTP info structure + * @pin: pin index in kernel structure + * @func: Pin function to be set (PTP_PF_NONE, PTP_PF_EXTTS or PTP_PF_PEROUT) + * + * Set the configuration of a single SMA pin + */ +static int +ice_ptp_set_sma_e810t(struct ptp_clock_info *info, unsigned int pin, + enum ptp_pin_function func) +{ + struct ptp_pin_desc ptp_pins[NUM_PTP_PINS_E810T]; + struct ice_pf *pf = ptp_info_to_pf(info); + struct ice_hw *hw = &pf->hw; + int err; + + if (pin < SMA1 || func > PTP_PF_PEROUT) + return -EOPNOTSUPP; + + err = ice_get_sma_config_e810t(hw, ptp_pins); + if (err) + return err; + + /* Disable the same function on the other pin sharing the channel */ + if (pin == SMA1 && ptp_pins[UFL1].func == func) + ptp_pins[UFL1].func = PTP_PF_NONE; + if (pin == UFL1 && ptp_pins[SMA1].func == func) + ptp_pins[SMA1].func = PTP_PF_NONE; + + if (pin == SMA2 && ptp_pins[UFL2].func == func) + ptp_pins[UFL2].func = PTP_PF_NONE; + if (pin == UFL2 && ptp_pins[SMA2].func == func) + ptp_pins[SMA2].func = PTP_PF_NONE; + + /* Set up new pin function in the temp table */ + ptp_pins[pin].func = func; + + return ice_ptp_set_sma_config_e810t(hw, ptp_pins); +} + +/** + * ice_verify_pin_e810t + * @info: the driver's PTP info structure + * @pin: Pin index + * @func: Assigned function + * @chan: Assigned channel + * + * Verify if pin supports requested pin function. If the Check pins consistency. + * Reconfigure the SMA logic attached to the given pin to enable its + * desired functionality + */ +static int +ice_verify_pin_e810t(struct ptp_clock_info *info, unsigned int pin, + enum ptp_pin_function func, unsigned int chan) +{ + /* Don't allow channel reassignment */ + if (chan != ice_pin_desc_e810t[pin].chan) + return -EOPNOTSUPP; + + /* Check if functions are properly assigned */ + switch (func) { + case PTP_PF_NONE: + break; + case PTP_PF_EXTTS: + if (pin == UFL1) + return -EOPNOTSUPP; + break; + case PTP_PF_PEROUT: + if (pin == UFL2 || pin == GNSS) + return -EOPNOTSUPP; + break; + case PTP_PF_PHYSYNC: + return -EOPNOTSUPP; + } + + return ice_ptp_set_sma_e810t(info, pin, func); +} + /** * ice_set_tx_tstamp - Enable or disable Tx timestamping * @pf: The PF pointer to search in @@ -735,17 +981,34 @@ ice_ptp_gpio_enable_e810(struct ptp_clock_info *info, { struct ice_pf *pf = ptp_info_to_pf(info); struct ice_perout_channel clk_cfg = {0}; + bool sma_pres = false; unsigned int chan; u32 gpio_pin; int err; + if (ice_is_feature_supported(pf, ICE_F_SMA_CTRL)) + sma_pres = true; + switch (rq->type) { case PTP_CLK_REQ_PEROUT: chan = rq->perout.index; - if (chan == PPS_CLK_GEN_CHAN) + if (sma_pres) { + if (chan == ice_pin_desc_e810t[SMA1].chan) + clk_cfg.gpio_pin = GPIO_20; + else if (chan == ice_pin_desc_e810t[SMA2].chan) + clk_cfg.gpio_pin = GPIO_22; + else + return -1; + } else if (ice_is_e810t(&pf->hw)) { + if (chan == 0) + clk_cfg.gpio_pin = GPIO_20; + else + clk_cfg.gpio_pin = GPIO_22; + } else if (chan == PPS_CLK_GEN_CHAN) { clk_cfg.gpio_pin = PPS_PIN_INDEX; - else + } else { clk_cfg.gpio_pin = chan; + } clk_cfg.period = ((rq->perout.period.sec * NSEC_PER_SEC) + rq->perout.period.nsec); @@ -757,7 +1020,19 @@ ice_ptp_gpio_enable_e810(struct ptp_clock_info *info, break; case PTP_CLK_REQ_EXTTS: chan = rq->extts.index; - gpio_pin = chan; + if (sma_pres) { + if (chan < ice_pin_desc_e810t[SMA2].chan) + gpio_pin = GPIO_21; + else + gpio_pin = GPIO_23; + } else if (ice_is_e810t(&pf->hw)) { + if (chan == 0) + gpio_pin = GPIO_21; + else + gpio_pin = GPIO_23; + } else { + gpio_pin = chan; + } err = ice_ptp_cfg_extts(pf, !!on, chan, gpio_pin, rq->extts.flags); @@ -1012,7 +1287,7 @@ int ice_ptp_set_ts_config(struct ice_pf *pf, struct ifreq *ifr) * The timestamp is in ns, so we must convert the result first. */ void -ice_ptp_rx_hwtstamp(struct ice_ring *rx_ring, +ice_ptp_rx_hwtstamp(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb) { u32 ts_high; @@ -1038,13 +1313,93 @@ ice_ptp_rx_hwtstamp(struct ice_ring *rx_ring, } /** + * ice_ptp_disable_sma_pins_e810t - Disable E810-T SMA pins + * @pf: pointer to the PF structure + * @info: PTP clock info structure + * + * Disable the OS access to the SMA pins. Called to clear out the OS + * indications of pin support when we fail to setup the E810-T SMA control + * register. + */ +static void +ice_ptp_disable_sma_pins_e810t(struct ice_pf *pf, struct ptp_clock_info *info) +{ + struct device *dev = ice_pf_to_dev(pf); + + dev_warn(dev, "Failed to configure E810-T SMA pin control\n"); + + info->enable = NULL; + info->verify = NULL; + info->n_pins = 0; + info->n_ext_ts = 0; + info->n_per_out = 0; +} + +/** + * ice_ptp_setup_sma_pins_e810t - Setup the SMA pins + * @pf: pointer to the PF structure + * @info: PTP clock info structure + * + * Finish setting up the SMA pins by allocating pin_config, and setting it up + * according to the current status of the SMA. On failure, disable all of the + * extended SMA pin support. + */ +static void +ice_ptp_setup_sma_pins_e810t(struct ice_pf *pf, struct ptp_clock_info *info) +{ + struct device *dev = ice_pf_to_dev(pf); + int err; + + /* Allocate memory for kernel pins interface */ + info->pin_config = devm_kcalloc(dev, info->n_pins, + sizeof(*info->pin_config), GFP_KERNEL); + if (!info->pin_config) { + ice_ptp_disable_sma_pins_e810t(pf, info); + return; + } + + /* Read current SMA status */ + err = ice_get_sma_config_e810t(&pf->hw, info->pin_config); + if (err) + ice_ptp_disable_sma_pins_e810t(pf, info); +} + +/** + * ice_ptp_setup_pins_e810t - Setup PTP pins in sysfs + * @pf: pointer to the PF instance + * @info: PTP clock capabilities + */ +static void +ice_ptp_setup_pins_e810t(struct ice_pf *pf, struct ptp_clock_info *info) +{ + /* Check if SMA controller is in the netlist */ + if (ice_is_feature_supported(pf, ICE_F_SMA_CTRL) && + !ice_is_pca9575_present(&pf->hw)) + ice_clear_feature_support(pf, ICE_F_SMA_CTRL); + + if (!ice_is_feature_supported(pf, ICE_F_SMA_CTRL)) { + info->n_ext_ts = N_EXT_TS_E810_NO_SMA; + info->n_per_out = N_PER_OUT_E810T_NO_SMA; + return; + } + + info->n_per_out = N_PER_OUT_E810T; + info->n_ext_ts = N_EXT_TS_E810; + info->n_pins = NUM_PTP_PINS_E810T; + info->verify = ice_verify_pin_e810t; + + /* Complete setup of the SMA pins */ + ice_ptp_setup_sma_pins_e810t(pf, info); +} + +/** * ice_ptp_setup_pins_e810 - Setup PTP pins in sysfs * @info: PTP clock capabilities */ static void ice_ptp_setup_pins_e810(struct ptp_clock_info *info) { - info->n_per_out = E810_N_PER_OUT; - info->n_ext_ts = E810_N_EXT_TS; + info->n_per_out = N_PER_OUT_E810; + info->n_ext_ts = N_EXT_TS_E810; } /** @@ -1062,7 +1417,10 @@ ice_ptp_set_funcs_e810(struct ice_pf *pf, struct ptp_clock_info *info) { info->enable = ice_ptp_gpio_enable_e810; - ice_ptp_setup_pins_e810(info); + if (ice_is_e810t(&pf->hw)) + ice_ptp_setup_pins_e810t(pf, info); + else + ice_ptp_setup_pins_e810(info); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_ptp.h b/drivers/net/ethernet/intel/ice/ice_ptp.h index e1c787bd5b96..f71ad317d6c8 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp.h @@ -9,12 +9,21 @@ #include "ice_ptp_hw.h" -enum ice_ptp_pin { +enum ice_ptp_pin_e810 { GPIO_20 = 0, GPIO_21, GPIO_22, GPIO_23, - NUM_ICE_PTP_PIN + NUM_PTP_PIN_E810 +}; + +enum ice_ptp_pin_e810t { + GNSS = 0, + SMA1, + UFL1, + SMA2, + UFL2, + NUM_PTP_PINS_E810T }; struct ice_perout_channel { @@ -155,8 +164,11 @@ struct ice_ptp { #define PPS_CLK_SRC_CHAN 2 #define PPS_PIN_INDEX 5 #define TIME_SYNC_PIN_INDEX 4 -#define E810_N_EXT_TS 3 -#define E810_N_PER_OUT 4 +#define N_EXT_TS_E810 3 +#define N_PER_OUT_E810 4 +#define N_PER_OUT_E810T 3 +#define N_PER_OUT_E810T_NO_SMA 2 +#define N_EXT_TS_E810_NO_SMA 2 #if IS_ENABLED(CONFIG_PTP_1588_CLOCK) struct ice_pf; @@ -168,7 +180,7 @@ s8 ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb); void ice_ptp_process_ts(struct ice_pf *pf); void -ice_ptp_rx_hwtstamp(struct ice_ring *rx_ring, +ice_ptp_rx_hwtstamp(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb); void ice_ptp_init(struct ice_pf *pf); void ice_ptp_release(struct ice_pf *pf); @@ -196,7 +208,7 @@ ice_ptp_request_ts(struct ice_ptp_tx *tx, struct sk_buff *skb) static inline void ice_ptp_process_ts(struct ice_pf *pf) { } static inline void -ice_ptp_rx_hwtstamp(struct ice_ring *rx_ring, +ice_ptp_rx_hwtstamp(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb) { } static inline void ice_ptp_init(struct ice_pf *pf) { } static inline void ice_ptp_release(struct ice_pf *pf) { } diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c index 3eca0e4eab0b..29f947c0cd2e 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.c +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.c @@ -649,3 +649,154 @@ int ice_clear_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx) { return ice_clear_phy_tstamp_e810(hw, block, idx); } + +/* E810T SMA functions + * + * The following functions operate specifically on E810T hardware and are used + * to access the extended GPIOs available. + */ + +/** + * ice_get_pca9575_handle + * @hw: pointer to the hw struct + * @pca9575_handle: GPIO controller's handle + * + * Find and return the GPIO controller's handle in the netlist. + * When found - the value will be cached in the hw structure and following calls + * will return cached value + */ +static int +ice_get_pca9575_handle(struct ice_hw *hw, u16 *pca9575_handle) +{ + struct ice_aqc_get_link_topo *cmd; + struct ice_aq_desc desc; + int status; + u8 idx; + + /* If handle was read previously return cached value */ + if (hw->io_expander_handle) { + *pca9575_handle = hw->io_expander_handle; + return 0; + } + + /* If handle was not detected read it from the netlist */ + cmd = &desc.params.get_link_topo; + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_link_topo); + + /* Set node type to GPIO controller */ + cmd->addr.topo_params.node_type_ctx = + (ICE_AQC_LINK_TOPO_NODE_TYPE_M & + ICE_AQC_LINK_TOPO_NODE_TYPE_GPIO_CTRL); + +#define SW_PCA9575_SFP_TOPO_IDX 2 +#define SW_PCA9575_QSFP_TOPO_IDX 1 + + /* Check if the SW IO expander controlling SMA exists in the netlist. */ + if (hw->device_id == ICE_DEV_ID_E810C_SFP) + idx = SW_PCA9575_SFP_TOPO_IDX; + else if (hw->device_id == ICE_DEV_ID_E810C_QSFP) + idx = SW_PCA9575_QSFP_TOPO_IDX; + else + return -EOPNOTSUPP; + + cmd->addr.topo_params.index = idx; + + status = ice_aq_send_cmd(hw, &desc, NULL, 0, NULL); + if (status) + return -EOPNOTSUPP; + + /* Verify if we found the right IO expander type */ + if (desc.params.get_link_topo.node_part_num != + ICE_AQC_GET_LINK_TOPO_NODE_NR_PCA9575) + return -EOPNOTSUPP; + + /* If present save the handle and return it */ + hw->io_expander_handle = + le16_to_cpu(desc.params.get_link_topo.addr.handle); + *pca9575_handle = hw->io_expander_handle; + + return 0; +} + +/** + * ice_read_sma_ctrl_e810t + * @hw: pointer to the hw struct + * @data: pointer to data to be read from the GPIO controller + * + * Read the SMA controller state. It is connected to pins 3-7 of Port 1 of the + * PCA9575 expander, so only bits 3-7 in data are valid. + */ +int ice_read_sma_ctrl_e810t(struct ice_hw *hw, u8 *data) +{ + int status; + u16 handle; + u8 i; + + status = ice_get_pca9575_handle(hw, &handle); + if (status) + return status; + + *data = 0; + + for (i = ICE_SMA_MIN_BIT_E810T; i <= ICE_SMA_MAX_BIT_E810T; i++) { + bool pin; + + status = ice_aq_get_gpio(hw, handle, i + ICE_PCA9575_P1_OFFSET, + &pin, NULL); + if (status) + break; + *data |= (u8)(!pin) << i; + } + + return status; +} + +/** + * ice_write_sma_ctrl_e810t + * @hw: pointer to the hw struct + * @data: data to be written to the GPIO controller + * + * Write the data to the SMA controller. It is connected to pins 3-7 of Port 1 + * of the PCA9575 expander, so only bits 3-7 in data are valid. + */ +int ice_write_sma_ctrl_e810t(struct ice_hw *hw, u8 data) +{ + int status; + u16 handle; + u8 i; + + status = ice_get_pca9575_handle(hw, &handle); + if (status) + return status; + + for (i = ICE_SMA_MIN_BIT_E810T; i <= ICE_SMA_MAX_BIT_E810T; i++) { + bool pin; + + pin = !(data & (1 << i)); + status = ice_aq_set_gpio(hw, handle, i + ICE_PCA9575_P1_OFFSET, + pin, NULL); + if (status) + break; + } + + return status; +} + +/** + * ice_is_pca9575_present + * @hw: pointer to the hw struct + * + * Check if the SW IO expander is present in the netlist + */ +bool ice_is_pca9575_present(struct ice_hw *hw) +{ + u16 handle = 0; + int status; + + if (!ice_is_e810t(hw)) + return false; + + status = ice_get_pca9575_handle(hw, &handle); + + return !status && handle; +} diff --git a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h index 55a414e87018..b2984b5c22c1 100644 --- a/drivers/net/ethernet/intel/ice/ice_ptp_hw.h +++ b/drivers/net/ethernet/intel/ice/ice_ptp_hw.h @@ -30,6 +30,9 @@ int ice_clear_phy_tstamp(struct ice_hw *hw, u8 block, u8 idx); /* E810 family functions */ int ice_ptp_init_phy_e810(struct ice_hw *hw); +int ice_read_sma_ctrl_e810t(struct ice_hw *hw, u8 *data); +int ice_write_sma_ctrl_e810t(struct ice_hw *hw, u8 data); +bool ice_is_pca9575_present(struct ice_hw *hw); #define PFTSYN_SEM_BYTES 4 @@ -76,4 +79,23 @@ int ice_ptp_init_phy_e810(struct ice_hw *hw); #define LOW_TX_MEMORY_BANK_START 0x03090000 #define HIGH_TX_MEMORY_BANK_START 0x03090004 +/* E810T SMA controller pin control */ +#define ICE_SMA1_DIR_EN_E810T BIT(4) +#define ICE_SMA1_TX_EN_E810T BIT(5) +#define ICE_SMA2_UFL2_RX_DIS_E810T BIT(3) +#define ICE_SMA2_DIR_EN_E810T BIT(6) +#define ICE_SMA2_TX_EN_E810T BIT(7) + +#define ICE_SMA1_MASK_E810T (ICE_SMA1_DIR_EN_E810T | \ + ICE_SMA1_TX_EN_E810T) +#define ICE_SMA2_MASK_E810T (ICE_SMA2_UFL2_RX_DIS_E810T | \ + ICE_SMA2_DIR_EN_E810T | \ + ICE_SMA2_TX_EN_E810T) +#define ICE_ALL_SMA_MASK_E810T (ICE_SMA1_MASK_E810T | \ + ICE_SMA2_MASK_E810T) + +#define ICE_SMA_MIN_BIT_E810T 3 +#define ICE_SMA_MAX_BIT_E810T 7 +#define ICE_PCA9575_P1_OFFSET 8 + #endif /* _ICE_PTP_HW_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_repr.c b/drivers/net/ethernet/intel/ice/ice_repr.c new file mode 100644 index 000000000000..af8e6ef5f571 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_repr.c @@ -0,0 +1,389 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2019-2021, Intel Corporation. */ + +#include "ice.h" +#include "ice_eswitch.h" +#include "ice_devlink.h" +#include "ice_virtchnl_pf.h" +#include "ice_tc_lib.h" + +/** + * ice_repr_get_sw_port_id - get port ID associated with representor + * @repr: pointer to port representor + */ +static int ice_repr_get_sw_port_id(struct ice_repr *repr) +{ + return repr->vf->pf->hw.port_info->lport; +} + +/** + * ice_repr_get_phys_port_name - get phys port name + * @netdev: pointer to port representor netdev + * @buf: write here port name + * @len: max length of buf + */ +static int +ice_repr_get_phys_port_name(struct net_device *netdev, char *buf, size_t len) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_repr *repr = np->repr; + int res; + + /* Devlink port is registered and devlink core is taking care of name formatting. */ + if (repr->vf->devlink_port.devlink) + return -EOPNOTSUPP; + + res = snprintf(buf, len, "pf%dvfr%d", ice_repr_get_sw_port_id(repr), + repr->vf->vf_id); + if (res <= 0) + return -EOPNOTSUPP; + return 0; +} + +/** + * ice_repr_get_stats64 - get VF stats for VFPR use + * @netdev: pointer to port representor netdev + * @stats: pointer to struct where stats can be stored + */ +static void +ice_repr_get_stats64(struct net_device *netdev, struct rtnl_link_stats64 *stats) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + struct ice_eth_stats *eth_stats; + struct ice_vsi *vsi; + + if (ice_is_vf_disabled(np->repr->vf)) + return; + vsi = np->repr->src_vsi; + + ice_update_vsi_stats(vsi); + eth_stats = &vsi->eth_stats; + + stats->tx_packets = eth_stats->tx_unicast + eth_stats->tx_broadcast + + eth_stats->tx_multicast; + stats->rx_packets = eth_stats->rx_unicast + eth_stats->rx_broadcast + + eth_stats->rx_multicast; + stats->tx_bytes = eth_stats->tx_bytes; + stats->rx_bytes = eth_stats->rx_bytes; + stats->multicast = eth_stats->rx_multicast; + stats->tx_errors = eth_stats->tx_errors; + stats->tx_dropped = eth_stats->tx_discards; + stats->rx_dropped = eth_stats->rx_discards; +} + +/** + * ice_netdev_to_repr - Get port representor for given netdevice + * @netdev: pointer to port representor netdev + */ +struct ice_repr *ice_netdev_to_repr(struct net_device *netdev) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + + return np->repr; +} + +/** + * ice_repr_open - Enable port representor's network interface + * @netdev: network interface device structure + * + * The open entry point is called when a port representor's network + * interface is made active by the system (IFF_UP). Corresponding + * VF is notified about link status change. + * + * Returns 0 on success + */ +static int ice_repr_open(struct net_device *netdev) +{ + struct ice_repr *repr = ice_netdev_to_repr(netdev); + struct ice_vf *vf; + + vf = repr->vf; + vf->link_forced = true; + vf->link_up = true; + ice_vc_notify_vf_link_state(vf); + + netif_carrier_on(netdev); + netif_tx_start_all_queues(netdev); + + return 0; +} + +/** + * ice_repr_stop - Disable port representor's network interface + * @netdev: network interface device structure + * + * The stop entry point is called when a port representor's network + * interface is de-activated by the system. Corresponding + * VF is notified about link status change. + * + * Returns 0 on success + */ +static int ice_repr_stop(struct net_device *netdev) +{ + struct ice_repr *repr = ice_netdev_to_repr(netdev); + struct ice_vf *vf; + + vf = repr->vf; + vf->link_forced = true; + vf->link_up = false; + ice_vc_notify_vf_link_state(vf); + + netif_carrier_off(netdev); + netif_tx_stop_all_queues(netdev); + + return 0; +} + +static struct devlink_port * +ice_repr_get_devlink_port(struct net_device *netdev) +{ + struct ice_repr *repr = ice_netdev_to_repr(netdev); + + return &repr->vf->devlink_port; +} + +static int +ice_repr_setup_tc_cls_flower(struct ice_repr *repr, + struct flow_cls_offload *flower) +{ + switch (flower->command) { + case FLOW_CLS_REPLACE: + return ice_add_cls_flower(repr->netdev, repr->src_vsi, flower); + case FLOW_CLS_DESTROY: + return ice_del_cls_flower(repr->src_vsi, flower); + default: + return -EINVAL; + } +} + +static int +ice_repr_setup_tc_block_cb(enum tc_setup_type type, void *type_data, + void *cb_priv) +{ + struct flow_cls_offload *flower = (struct flow_cls_offload *)type_data; + struct ice_netdev_priv *np = (struct ice_netdev_priv *)cb_priv; + + switch (type) { + case TC_SETUP_CLSFLOWER: + return ice_repr_setup_tc_cls_flower(np->repr, flower); + default: + return -EOPNOTSUPP; + } +} + +static LIST_HEAD(ice_repr_block_cb_list); + +static int +ice_repr_setup_tc(struct net_device *netdev, enum tc_setup_type type, + void *type_data) +{ + struct ice_netdev_priv *np = netdev_priv(netdev); + + switch (type) { + case TC_SETUP_BLOCK: + return flow_block_cb_setup_simple((struct flow_block_offload *) + type_data, + &ice_repr_block_cb_list, + ice_repr_setup_tc_block_cb, + np, np, true); + default: + return -EOPNOTSUPP; + } +} + +static const struct net_device_ops ice_repr_netdev_ops = { + .ndo_get_phys_port_name = ice_repr_get_phys_port_name, + .ndo_get_stats64 = ice_repr_get_stats64, + .ndo_open = ice_repr_open, + .ndo_stop = ice_repr_stop, + .ndo_start_xmit = ice_eswitch_port_start_xmit, + .ndo_get_devlink_port = ice_repr_get_devlink_port, + .ndo_setup_tc = ice_repr_setup_tc, +}; + +/** + * ice_is_port_repr_netdev - Check if a given netdevice is a port representor netdev + * @netdev: pointer to netdev + */ +bool ice_is_port_repr_netdev(struct net_device *netdev) +{ + return netdev && (netdev->netdev_ops == &ice_repr_netdev_ops); +} + +/** + * ice_repr_reg_netdev - register port representor netdev + * @netdev: pointer to port representor netdev + */ +static int +ice_repr_reg_netdev(struct net_device *netdev) +{ + eth_hw_addr_random(netdev); + netdev->netdev_ops = &ice_repr_netdev_ops; + ice_set_ethtool_repr_ops(netdev); + + netdev->hw_features |= NETIF_F_HW_TC; + + netif_carrier_off(netdev); + netif_tx_stop_all_queues(netdev); + + return register_netdev(netdev); +} + +/** + * ice_repr_add - add representor for VF + * @vf: pointer to VF structure + */ +static int ice_repr_add(struct ice_vf *vf) +{ + struct ice_q_vector *q_vector; + struct ice_netdev_priv *np; + struct ice_repr *repr; + int err; + + repr = kzalloc(sizeof(*repr), GFP_KERNEL); + if (!repr) + return -ENOMEM; + + repr->netdev = alloc_etherdev(sizeof(struct ice_netdev_priv)); + if (!repr->netdev) { + err = -ENOMEM; + goto err_alloc; + } + + repr->src_vsi = ice_get_vf_vsi(vf); + repr->vf = vf; + vf->repr = repr; + np = netdev_priv(repr->netdev); + np->repr = repr; + + q_vector = kzalloc(sizeof(*q_vector), GFP_KERNEL); + if (!q_vector) { + err = -ENOMEM; + goto err_alloc_q_vector; + } + repr->q_vector = q_vector; + + err = ice_devlink_create_vf_port(vf); + if (err) + goto err_devlink; + + repr->netdev->min_mtu = ETH_MIN_MTU; + repr->netdev->max_mtu = ICE_MAX_MTU; + + err = ice_repr_reg_netdev(repr->netdev); + if (err) + goto err_netdev; + + devlink_port_type_eth_set(&vf->devlink_port, repr->netdev); + + return 0; + +err_netdev: + ice_devlink_destroy_vf_port(vf); +err_devlink: + kfree(repr->q_vector); + vf->repr->q_vector = NULL; +err_alloc_q_vector: + free_netdev(repr->netdev); + repr->netdev = NULL; +err_alloc: + kfree(repr); + vf->repr = NULL; + return err; +} + +/** + * ice_repr_rem - remove representor from VF + * @vf: pointer to VF structure + */ +static void ice_repr_rem(struct ice_vf *vf) +{ + ice_devlink_destroy_vf_port(vf); + kfree(vf->repr->q_vector); + vf->repr->q_vector = NULL; + unregister_netdev(vf->repr->netdev); + free_netdev(vf->repr->netdev); + vf->repr->netdev = NULL; + kfree(vf->repr); + vf->repr = NULL; +} + +/** + * ice_repr_add_for_all_vfs - add port representor for all VFs + * @pf: pointer to PF structure + */ +int ice_repr_add_for_all_vfs(struct ice_pf *pf) +{ + int err; + int i; + + ice_for_each_vf(pf, i) { + struct ice_vf *vf = &pf->vf[i]; + + err = ice_repr_add(vf); + if (err) + goto err; + + ice_vc_change_ops_to_repr(&vf->vc_ops); + } + + return 0; + +err: + for (i = i - 1; i >= 0; i--) { + struct ice_vf *vf = &pf->vf[i]; + + ice_repr_rem(vf); + ice_vc_set_dflt_vf_ops(&vf->vc_ops); + } + + return err; +} + +/** + * ice_repr_rem_from_all_vfs - remove port representor for all VFs + * @pf: pointer to PF structure + */ +void ice_repr_rem_from_all_vfs(struct ice_pf *pf) +{ + int i; + + ice_for_each_vf(pf, i) { + struct ice_vf *vf = &pf->vf[i]; + + ice_repr_rem(vf); + ice_vc_set_dflt_vf_ops(&vf->vc_ops); + } +} + +/** + * ice_repr_start_tx_queues - start Tx queues of port representor + * @repr: pointer to repr structure + */ +void ice_repr_start_tx_queues(struct ice_repr *repr) +{ + netif_carrier_on(repr->netdev); + netif_tx_start_all_queues(repr->netdev); +} + +/** + * ice_repr_stop_tx_queues - stop Tx queues of port representor + * @repr: pointer to repr structure + */ +void ice_repr_stop_tx_queues(struct ice_repr *repr) +{ + netif_carrier_off(repr->netdev); + netif_tx_stop_all_queues(repr->netdev); +} + +/** + * ice_repr_set_traffic_vsi - set traffic VSI for port representor + * @repr: repr on with VSI will be set + * @vsi: pointer to VSI that will be used by port representor to pass traffic + */ +void ice_repr_set_traffic_vsi(struct ice_repr *repr, struct ice_vsi *vsi) +{ + struct ice_netdev_priv *np = netdev_priv(repr->netdev); + + np->vsi = vsi; +} diff --git a/drivers/net/ethernet/intel/ice/ice_repr.h b/drivers/net/ethernet/intel/ice/ice_repr.h new file mode 100644 index 000000000000..806de22933c6 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_repr.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2019-2021, Intel Corporation. */ + +#ifndef _ICE_REPR_H_ +#define _ICE_REPR_H_ + +#include <net/dst_metadata.h> +#include "ice.h" + +struct ice_repr { + struct ice_vsi *src_vsi; + struct ice_vf *vf; + struct ice_q_vector *q_vector; + struct net_device *netdev; + struct metadata_dst *dst; +}; + +int ice_repr_add_for_all_vfs(struct ice_pf *pf); +void ice_repr_rem_from_all_vfs(struct ice_pf *pf); + +void ice_repr_start_tx_queues(struct ice_repr *repr); +void ice_repr_stop_tx_queues(struct ice_repr *repr); + +void ice_repr_set_traffic_vsi(struct ice_repr *repr, struct ice_vsi *vsi); + +struct ice_repr *ice_netdev_to_repr(struct net_device *netdev); +bool ice_is_port_repr_netdev(struct net_device *netdev); +#endif diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c index 2d9b10277186..ce3c7bded4cb 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.c +++ b/drivers/net/ethernet/intel/ice/ice_sched.c @@ -3012,6 +3012,43 @@ static void ice_set_clear_shared_bw(struct ice_bw_type_info *bw_t_info, u32 bw) } /** + * ice_sched_save_vsi_bw - save VSI node's BW information + * @pi: port information structure + * @vsi_handle: sw VSI handle + * @tc: traffic class + * @rl_type: rate limit type min, max, or shared + * @bw: bandwidth in Kbps - Kilo bits per sec + * + * Save BW information of VSI type node for post replay use. + */ +static int +ice_sched_save_vsi_bw(struct ice_port_info *pi, u16 vsi_handle, u8 tc, + enum ice_rl_type rl_type, u32 bw) +{ + struct ice_vsi_ctx *vsi_ctx; + + if (!ice_is_vsi_valid(pi->hw, vsi_handle)) + return -EINVAL; + vsi_ctx = ice_get_vsi_ctx(pi->hw, vsi_handle); + if (!vsi_ctx) + return -EINVAL; + switch (rl_type) { + case ICE_MIN_BW: + ice_set_clear_cir_bw(&vsi_ctx->sched.bw_t_info[tc], bw); + break; + case ICE_MAX_BW: + ice_set_clear_eir_bw(&vsi_ctx->sched.bw_t_info[tc], bw); + break; + case ICE_SHARED_BW: + ice_set_clear_shared_bw(&vsi_ctx->sched.bw_t_info[tc], bw); + break; + default: + return -EINVAL; + } + return 0; +} + +/** * ice_sched_calc_wakeup - calculate RL profile wakeup parameter * @hw: pointer to the HW struct * @bw: bandwidth in Kbps @@ -3784,6 +3821,153 @@ ice_cfg_q_bw_dflt_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc, } /** + * ice_sched_get_node_by_id_type - get node from ID type + * @pi: port information structure + * @id: identifier + * @agg_type: type of aggregator + * @tc: traffic class + * + * This function returns node identified by ID of type aggregator, and + * based on traffic class (TC). This function needs to be called with + * the scheduler lock held. + */ +static struct ice_sched_node * +ice_sched_get_node_by_id_type(struct ice_port_info *pi, u32 id, + enum ice_agg_type agg_type, u8 tc) +{ + struct ice_sched_node *node = NULL; + + switch (agg_type) { + case ICE_AGG_TYPE_VSI: { + struct ice_vsi_ctx *vsi_ctx; + u16 vsi_handle = (u16)id; + + if (!ice_is_vsi_valid(pi->hw, vsi_handle)) + break; + /* Get sched_vsi_info */ + vsi_ctx = ice_get_vsi_ctx(pi->hw, vsi_handle); + if (!vsi_ctx) + break; + node = vsi_ctx->sched.vsi_node[tc]; + break; + } + + case ICE_AGG_TYPE_AGG: { + struct ice_sched_node *tc_node; + + tc_node = ice_sched_get_tc_node(pi, tc); + if (tc_node) + node = ice_sched_get_agg_node(pi, tc_node, id); + break; + } + + default: + break; + } + + return node; +} + +/** + * ice_sched_set_node_bw_lmt_per_tc - set node BW limit per TC + * @pi: port information structure + * @id: ID (software VSI handle or AGG ID) + * @agg_type: aggregator type (VSI or AGG type node) + * @tc: traffic class + * @rl_type: min or max + * @bw: bandwidth in Kbps + * + * This function sets BW limit of VSI or Aggregator scheduling node + * based on TC information from passed in argument BW. + */ +static enum ice_status +ice_sched_set_node_bw_lmt_per_tc(struct ice_port_info *pi, u32 id, + enum ice_agg_type agg_type, u8 tc, + enum ice_rl_type rl_type, u32 bw) +{ + enum ice_status status = ICE_ERR_PARAM; + struct ice_sched_node *node; + + if (!pi) + return status; + + if (rl_type == ICE_UNKNOWN_BW) + return status; + + mutex_lock(&pi->sched_lock); + node = ice_sched_get_node_by_id_type(pi, id, agg_type, tc); + if (!node) { + ice_debug(pi->hw, ICE_DBG_SCHED, "Wrong id, agg type, or tc\n"); + goto exit_set_node_bw_lmt_per_tc; + } + if (bw == ICE_SCHED_DFLT_BW) + status = ice_sched_set_node_bw_dflt_lmt(pi, node, rl_type); + else + status = ice_sched_set_node_bw_lmt(pi, node, rl_type, bw); + +exit_set_node_bw_lmt_per_tc: + mutex_unlock(&pi->sched_lock); + return status; +} + +/** + * ice_cfg_vsi_bw_lmt_per_tc - configure VSI BW limit per TC + * @pi: port information structure + * @vsi_handle: software VSI handle + * @tc: traffic class + * @rl_type: min or max + * @bw: bandwidth in Kbps + * + * This function configures BW limit of VSI scheduling node based on TC + * information. + */ +enum ice_status +ice_cfg_vsi_bw_lmt_per_tc(struct ice_port_info *pi, u16 vsi_handle, u8 tc, + enum ice_rl_type rl_type, u32 bw) +{ + int status; + + status = ice_sched_set_node_bw_lmt_per_tc(pi, vsi_handle, + ICE_AGG_TYPE_VSI, + tc, rl_type, bw); + if (!status) { + mutex_lock(&pi->sched_lock); + status = ice_sched_save_vsi_bw(pi, vsi_handle, tc, rl_type, bw); + mutex_unlock(&pi->sched_lock); + } + return status; +} + +/** + * ice_cfg_vsi_bw_dflt_lmt_per_tc - configure default VSI BW limit per TC + * @pi: port information structure + * @vsi_handle: software VSI handle + * @tc: traffic class + * @rl_type: min or max + * + * This function configures default BW limit of VSI scheduling node based on TC + * information. + */ +enum ice_status +ice_cfg_vsi_bw_dflt_lmt_per_tc(struct ice_port_info *pi, u16 vsi_handle, u8 tc, + enum ice_rl_type rl_type) +{ + int status; + + status = ice_sched_set_node_bw_lmt_per_tc(pi, vsi_handle, + ICE_AGG_TYPE_VSI, + tc, rl_type, + ICE_SCHED_DFLT_BW); + if (!status) { + mutex_lock(&pi->sched_lock); + status = ice_sched_save_vsi_bw(pi, vsi_handle, tc, rl_type, + ICE_SCHED_DFLT_BW); + mutex_unlock(&pi->sched_lock); + } + return status; +} + +/** * ice_cfg_rl_burst_size - Set burst size value * @hw: pointer to the HW struct * @bytes: burst size in bytes diff --git a/drivers/net/ethernet/intel/ice/ice_sched.h b/drivers/net/ethernet/intel/ice/ice_sched.h index fdf7a5882f07..6bddcbecaf5e 100644 --- a/drivers/net/ethernet/intel/ice/ice_sched.h +++ b/drivers/net/ethernet/intel/ice/ice_sched.h @@ -58,6 +58,8 @@ struct ice_sched_agg_info { DECLARE_BITMAP(tc_bitmap, ICE_MAX_TRAFFIC_CLASS); u32 agg_id; enum ice_agg_type agg_type; + /* bw_t_info saves aggregator BW information */ + struct ice_bw_type_info bw_t_info[ICE_MAX_TRAFFIC_CLASS]; /* save aggregator TC bitmap */ DECLARE_BITMAP(replay_tc_bitmap, ICE_MAX_TRAFFIC_CLASS); }; @@ -104,6 +106,12 @@ ice_cfg_q_bw_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc, enum ice_status ice_cfg_q_bw_dflt_lmt(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 q_handle, enum ice_rl_type rl_type); +enum ice_status +ice_cfg_vsi_bw_lmt_per_tc(struct ice_port_info *pi, u16 vsi_handle, u8 tc, + enum ice_rl_type rl_type, u32 bw); +enum ice_status +ice_cfg_vsi_bw_dflt_lmt_per_tc(struct ice_port_info *pi, u16 vsi_handle, u8 tc, + enum ice_rl_type rl_type); enum ice_status ice_cfg_rl_burst_size(struct ice_hw *hw, u32 bytes); void ice_sched_replay_agg_vsi_preinit(struct ice_hw *hw); void ice_sched_replay_agg(struct ice_hw *hw); diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index 3b6c1420aa7b..793f4a9fc2cd 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -8,6 +8,7 @@ #define ICE_ETH_ETHTYPE_OFFSET 12 #define ICE_ETH_VLAN_TCI_OFFSET 14 #define ICE_MAX_VLAN_ID 0xFFF +#define ICE_IPV6_ETHER_ID 0x86DD /* Dummy ethernet header needed in the ice_aqc_sw_rules_elem * struct to configure any switch filter rules. @@ -29,6 +30,476 @@ static const u8 dummy_eth_header[DUMMY_ETH_HDR_LEN] = { 0x2, 0, 0, 0, 0, 0, 0x2, 0, 0, 0, 0, 0, 0x81, 0, 0, 0}; +struct ice_dummy_pkt_offsets { + enum ice_protocol_type type; + u16 offset; /* ICE_PROTOCOL_LAST indicates end of list */ +}; + +static const struct ice_dummy_pkt_offsets dummy_gre_tcp_packet_offsets[] = { + { ICE_MAC_OFOS, 0 }, + { ICE_ETYPE_OL, 12 }, + { ICE_IPV4_OFOS, 14 }, + { ICE_NVGRE, 34 }, + { ICE_MAC_IL, 42 }, + { ICE_IPV4_IL, 56 }, + { ICE_TCP_IL, 76 }, + { ICE_PROTOCOL_LAST, 0 }, +}; + +static const u8 dummy_gre_tcp_packet[] = { + 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x08, 0x00, /* ICE_ETYPE_OL 12 */ + + 0x45, 0x00, 0x00, 0x3E, /* ICE_IPV4_OFOS 14 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x2F, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x80, 0x00, 0x65, 0x58, /* ICE_NVGRE 34 */ + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_IL 42 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x08, 0x00, + + 0x45, 0x00, 0x00, 0x14, /* ICE_IPV4_IL 56 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x06, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x00, 0x00, /* ICE_TCP_IL 76 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x50, 0x02, 0x20, 0x00, + 0x00, 0x00, 0x00, 0x00 +}; + +static const struct ice_dummy_pkt_offsets dummy_gre_udp_packet_offsets[] = { + { ICE_MAC_OFOS, 0 }, + { ICE_ETYPE_OL, 12 }, + { ICE_IPV4_OFOS, 14 }, + { ICE_NVGRE, 34 }, + { ICE_MAC_IL, 42 }, + { ICE_IPV4_IL, 56 }, + { ICE_UDP_ILOS, 76 }, + { ICE_PROTOCOL_LAST, 0 }, +}; + +static const u8 dummy_gre_udp_packet[] = { + 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x08, 0x00, /* ICE_ETYPE_OL 12 */ + + 0x45, 0x00, 0x00, 0x3E, /* ICE_IPV4_OFOS 14 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x2F, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x80, 0x00, 0x65, 0x58, /* ICE_NVGRE 34 */ + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_IL 42 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x08, 0x00, + + 0x45, 0x00, 0x00, 0x14, /* ICE_IPV4_IL 56 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x11, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x00, 0x00, /* ICE_UDP_ILOS 76 */ + 0x00, 0x08, 0x00, 0x00, +}; + +static const struct ice_dummy_pkt_offsets dummy_udp_tun_tcp_packet_offsets[] = { + { ICE_MAC_OFOS, 0 }, + { ICE_ETYPE_OL, 12 }, + { ICE_IPV4_OFOS, 14 }, + { ICE_UDP_OF, 34 }, + { ICE_VXLAN, 42 }, + { ICE_GENEVE, 42 }, + { ICE_VXLAN_GPE, 42 }, + { ICE_MAC_IL, 50 }, + { ICE_IPV4_IL, 64 }, + { ICE_TCP_IL, 84 }, + { ICE_PROTOCOL_LAST, 0 }, +}; + +static const u8 dummy_udp_tun_tcp_packet[] = { + 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x08, 0x00, /* ICE_ETYPE_OL 12 */ + + 0x45, 0x00, 0x00, 0x5a, /* ICE_IPV4_OFOS 14 */ + 0x00, 0x01, 0x00, 0x00, + 0x40, 0x11, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x12, 0xb5, /* ICE_UDP_OF 34 */ + 0x00, 0x46, 0x00, 0x00, + + 0x00, 0x00, 0x65, 0x58, /* ICE_VXLAN 42 */ + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_IL 50 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x08, 0x00, + + 0x45, 0x00, 0x00, 0x28, /* ICE_IPV4_IL 64 */ + 0x00, 0x01, 0x00, 0x00, + 0x40, 0x06, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x00, 0x00, /* ICE_TCP_IL 84 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x50, 0x02, 0x20, 0x00, + 0x00, 0x00, 0x00, 0x00 +}; + +static const struct ice_dummy_pkt_offsets dummy_udp_tun_udp_packet_offsets[] = { + { ICE_MAC_OFOS, 0 }, + { ICE_ETYPE_OL, 12 }, + { ICE_IPV4_OFOS, 14 }, + { ICE_UDP_OF, 34 }, + { ICE_VXLAN, 42 }, + { ICE_GENEVE, 42 }, + { ICE_VXLAN_GPE, 42 }, + { ICE_MAC_IL, 50 }, + { ICE_IPV4_IL, 64 }, + { ICE_UDP_ILOS, 84 }, + { ICE_PROTOCOL_LAST, 0 }, +}; + +static const u8 dummy_udp_tun_udp_packet[] = { + 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x08, 0x00, /* ICE_ETYPE_OL 12 */ + + 0x45, 0x00, 0x00, 0x4e, /* ICE_IPV4_OFOS 14 */ + 0x00, 0x01, 0x00, 0x00, + 0x00, 0x11, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x12, 0xb5, /* ICE_UDP_OF 34 */ + 0x00, 0x3a, 0x00, 0x00, + + 0x00, 0x00, 0x65, 0x58, /* ICE_VXLAN 42 */ + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_IL 50 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x08, 0x00, + + 0x45, 0x00, 0x00, 0x1c, /* ICE_IPV4_IL 64 */ + 0x00, 0x01, 0x00, 0x00, + 0x00, 0x11, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x00, 0x00, /* ICE_UDP_ILOS 84 */ + 0x00, 0x08, 0x00, 0x00, +}; + +/* offset info for MAC + IPv4 + UDP dummy packet */ +static const struct ice_dummy_pkt_offsets dummy_udp_packet_offsets[] = { + { ICE_MAC_OFOS, 0 }, + { ICE_ETYPE_OL, 12 }, + { ICE_IPV4_OFOS, 14 }, + { ICE_UDP_ILOS, 34 }, + { ICE_PROTOCOL_LAST, 0 }, +}; + +/* Dummy packet for MAC + IPv4 + UDP */ +static const u8 dummy_udp_packet[] = { + 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x08, 0x00, /* ICE_ETYPE_OL 12 */ + + 0x45, 0x00, 0x00, 0x1c, /* ICE_IPV4_OFOS 14 */ + 0x00, 0x01, 0x00, 0x00, + 0x00, 0x11, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x00, 0x00, /* ICE_UDP_ILOS 34 */ + 0x00, 0x08, 0x00, 0x00, + + 0x00, 0x00, /* 2 bytes for 4 byte alignment */ +}; + +/* offset info for MAC + VLAN + IPv4 + UDP dummy packet */ +static const struct ice_dummy_pkt_offsets dummy_vlan_udp_packet_offsets[] = { + { ICE_MAC_OFOS, 0 }, + { ICE_VLAN_OFOS, 12 }, + { ICE_ETYPE_OL, 16 }, + { ICE_IPV4_OFOS, 18 }, + { ICE_UDP_ILOS, 38 }, + { ICE_PROTOCOL_LAST, 0 }, +}; + +/* C-tag (801.1Q), IPv4:UDP dummy packet */ +static const u8 dummy_vlan_udp_packet[] = { + 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x81, 0x00, 0x00, 0x00, /* ICE_VLAN_OFOS 12 */ + + 0x08, 0x00, /* ICE_ETYPE_OL 16 */ + + 0x45, 0x00, 0x00, 0x1c, /* ICE_IPV4_OFOS 18 */ + 0x00, 0x01, 0x00, 0x00, + 0x00, 0x11, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x00, 0x00, /* ICE_UDP_ILOS 38 */ + 0x00, 0x08, 0x00, 0x00, + + 0x00, 0x00, /* 2 bytes for 4 byte alignment */ +}; + +/* offset info for MAC + IPv4 + TCP dummy packet */ +static const struct ice_dummy_pkt_offsets dummy_tcp_packet_offsets[] = { + { ICE_MAC_OFOS, 0 }, + { ICE_ETYPE_OL, 12 }, + { ICE_IPV4_OFOS, 14 }, + { ICE_TCP_IL, 34 }, + { ICE_PROTOCOL_LAST, 0 }, +}; + +/* Dummy packet for MAC + IPv4 + TCP */ +static const u8 dummy_tcp_packet[] = { + 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x08, 0x00, /* ICE_ETYPE_OL 12 */ + + 0x45, 0x00, 0x00, 0x28, /* ICE_IPV4_OFOS 14 */ + 0x00, 0x01, 0x00, 0x00, + 0x00, 0x06, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x00, 0x00, /* ICE_TCP_IL 34 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x50, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, /* 2 bytes for 4 byte alignment */ +}; + +/* offset info for MAC + VLAN (C-tag, 802.1Q) + IPv4 + TCP dummy packet */ +static const struct ice_dummy_pkt_offsets dummy_vlan_tcp_packet_offsets[] = { + { ICE_MAC_OFOS, 0 }, + { ICE_VLAN_OFOS, 12 }, + { ICE_ETYPE_OL, 16 }, + { ICE_IPV4_OFOS, 18 }, + { ICE_TCP_IL, 38 }, + { ICE_PROTOCOL_LAST, 0 }, +}; + +/* C-tag (801.1Q), IPv4:TCP dummy packet */ +static const u8 dummy_vlan_tcp_packet[] = { + 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x81, 0x00, 0x00, 0x00, /* ICE_VLAN_OFOS 12 */ + + 0x08, 0x00, /* ICE_ETYPE_OL 16 */ + + 0x45, 0x00, 0x00, 0x28, /* ICE_IPV4_OFOS 18 */ + 0x00, 0x01, 0x00, 0x00, + 0x00, 0x06, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x00, 0x00, /* ICE_TCP_IL 38 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x50, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, /* 2 bytes for 4 byte alignment */ +}; + +static const struct ice_dummy_pkt_offsets dummy_tcp_ipv6_packet_offsets[] = { + { ICE_MAC_OFOS, 0 }, + { ICE_ETYPE_OL, 12 }, + { ICE_IPV6_OFOS, 14 }, + { ICE_TCP_IL, 54 }, + { ICE_PROTOCOL_LAST, 0 }, +}; + +static const u8 dummy_tcp_ipv6_packet[] = { + 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x86, 0xDD, /* ICE_ETYPE_OL 12 */ + + 0x60, 0x00, 0x00, 0x00, /* ICE_IPV6_OFOS 40 */ + 0x00, 0x14, 0x06, 0x00, /* Next header is TCP */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x00, 0x00, /* ICE_TCP_IL 54 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x50, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, /* 2 bytes for 4 byte alignment */ +}; + +/* C-tag (802.1Q): IPv6 + TCP */ +static const struct ice_dummy_pkt_offsets +dummy_vlan_tcp_ipv6_packet_offsets[] = { + { ICE_MAC_OFOS, 0 }, + { ICE_VLAN_OFOS, 12 }, + { ICE_ETYPE_OL, 16 }, + { ICE_IPV6_OFOS, 18 }, + { ICE_TCP_IL, 58 }, + { ICE_PROTOCOL_LAST, 0 }, +}; + +/* C-tag (802.1Q), IPv6 + TCP dummy packet */ +static const u8 dummy_vlan_tcp_ipv6_packet[] = { + 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x81, 0x00, 0x00, 0x00, /* ICE_VLAN_OFOS 12 */ + + 0x86, 0xDD, /* ICE_ETYPE_OL 16 */ + + 0x60, 0x00, 0x00, 0x00, /* ICE_IPV6_OFOS 18 */ + 0x00, 0x14, 0x06, 0x00, /* Next header is TCP */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x00, 0x00, /* ICE_TCP_IL 58 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x50, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, /* 2 bytes for 4 byte alignment */ +}; + +/* IPv6 + UDP */ +static const struct ice_dummy_pkt_offsets dummy_udp_ipv6_packet_offsets[] = { + { ICE_MAC_OFOS, 0 }, + { ICE_ETYPE_OL, 12 }, + { ICE_IPV6_OFOS, 14 }, + { ICE_UDP_ILOS, 54 }, + { ICE_PROTOCOL_LAST, 0 }, +}; + +/* IPv6 + UDP dummy packet */ +static const u8 dummy_udp_ipv6_packet[] = { + 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x86, 0xDD, /* ICE_ETYPE_OL 12 */ + + 0x60, 0x00, 0x00, 0x00, /* ICE_IPV6_OFOS 40 */ + 0x00, 0x10, 0x11, 0x00, /* Next header UDP */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x00, 0x00, /* ICE_UDP_ILOS 54 */ + 0x00, 0x10, 0x00, 0x00, + + 0x00, 0x00, 0x00, 0x00, /* needed for ESP packets */ + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, /* 2 bytes for 4 byte alignment */ +}; + +/* C-tag (802.1Q): IPv6 + UDP */ +static const struct ice_dummy_pkt_offsets +dummy_vlan_udp_ipv6_packet_offsets[] = { + { ICE_MAC_OFOS, 0 }, + { ICE_VLAN_OFOS, 12 }, + { ICE_ETYPE_OL, 16 }, + { ICE_IPV6_OFOS, 18 }, + { ICE_UDP_ILOS, 58 }, + { ICE_PROTOCOL_LAST, 0 }, +}; + +/* C-tag (802.1Q), IPv6 + UDP dummy packet */ +static const u8 dummy_vlan_udp_ipv6_packet[] = { + 0x00, 0x00, 0x00, 0x00, /* ICE_MAC_OFOS 0 */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x81, 0x00, 0x00, 0x00,/* ICE_VLAN_OFOS 12 */ + + 0x86, 0xDD, /* ICE_ETYPE_OL 16 */ + + 0x60, 0x00, 0x00, 0x00, /* ICE_IPV6_OFOS 18 */ + 0x00, 0x08, 0x11, 0x00, /* Next header UDP */ + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + + 0x00, 0x00, 0x00, 0x00, /* ICE_UDP_ILOS 58 */ + 0x00, 0x08, 0x00, 0x00, + + 0x00, 0x00, /* 2 bytes for 4 byte alignment */ +}; + #define ICE_SW_RULE_RX_TX_ETH_HDR_SIZE \ (offsetof(struct ice_aqc_sw_rules_elem, pdata.lkup_tx_rx.hdr) + \ (DUMMY_ETH_HDR_LEN * \ @@ -42,6 +513,14 @@ static const u8 dummy_eth_header[DUMMY_ETH_HDR_LEN] = { 0x2, 0, 0, 0, 0, 0, (offsetof(struct ice_aqc_sw_rules_elem, pdata.vsi_list.vsi) + \ ((n) * sizeof(((struct ice_sw_rule_vsi_list *)0)->vsi[0]))) +/* this is a recipe to profile association bitmap */ +static DECLARE_BITMAP(recipe_to_profile[ICE_MAX_NUM_RECIPES], + ICE_MAX_NUM_PROFILES); + +/* this is a profile to recipe association bitmap */ +static DECLARE_BITMAP(profile_to_recipe[ICE_MAX_NUM_PROFILES], + ICE_MAX_NUM_RECIPES); + /** * ice_init_def_sw_recp - initialize the recipe book keeping tables * @hw: pointer to the HW struct @@ -59,10 +538,11 @@ enum ice_status ice_init_def_sw_recp(struct ice_hw *hw) if (!recps) return ICE_ERR_NO_MEMORY; - for (i = 0; i < ICE_SW_LKUP_LAST; i++) { + for (i = 0; i < ICE_MAX_NUM_RECIPES; i++) { recps[i].root_rid = i; INIT_LIST_HEAD(&recps[i].filt_rules); INIT_LIST_HEAD(&recps[i].filt_replay_rules); + INIT_LIST_HEAD(&recps[i].rg_list); mutex_init(&recps[i].filt_rule_lock); } @@ -518,7 +998,7 @@ ice_aq_alloc_free_vsi_list_exit: * * Add(0x02a0)/Update(0x02a1)/Remove(0x02a2) switch rules commands to firmware */ -static enum ice_status +enum ice_status ice_aq_sw_rules(struct ice_hw *hw, void *rule_list, u16 rule_list_sz, u8 num_rules, enum ice_adminq_opc opc, struct ice_sq_cd *cd) { @@ -543,6 +1023,360 @@ ice_aq_sw_rules(struct ice_hw *hw, void *rule_list, u16 rule_list_sz, return status; } +/** + * ice_aq_add_recipe - add switch recipe + * @hw: pointer to the HW struct + * @s_recipe_list: pointer to switch rule population list + * @num_recipes: number of switch recipes in the list + * @cd: pointer to command details structure or NULL + * + * Add(0x0290) + */ +static enum ice_status +ice_aq_add_recipe(struct ice_hw *hw, + struct ice_aqc_recipe_data_elem *s_recipe_list, + u16 num_recipes, struct ice_sq_cd *cd) +{ + struct ice_aqc_add_get_recipe *cmd; + struct ice_aq_desc desc; + u16 buf_size; + + cmd = &desc.params.add_get_recipe; + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_add_recipe); + + cmd->num_sub_recipes = cpu_to_le16(num_recipes); + desc.flags |= cpu_to_le16(ICE_AQ_FLAG_RD); + + buf_size = num_recipes * sizeof(*s_recipe_list); + + return ice_aq_send_cmd(hw, &desc, s_recipe_list, buf_size, cd); +} + +/** + * ice_aq_get_recipe - get switch recipe + * @hw: pointer to the HW struct + * @s_recipe_list: pointer to switch rule population list + * @num_recipes: pointer to the number of recipes (input and output) + * @recipe_root: root recipe number of recipe(s) to retrieve + * @cd: pointer to command details structure or NULL + * + * Get(0x0292) + * + * On input, *num_recipes should equal the number of entries in s_recipe_list. + * On output, *num_recipes will equal the number of entries returned in + * s_recipe_list. + * + * The caller must supply enough space in s_recipe_list to hold all possible + * recipes and *num_recipes must equal ICE_MAX_NUM_RECIPES. + */ +static enum ice_status +ice_aq_get_recipe(struct ice_hw *hw, + struct ice_aqc_recipe_data_elem *s_recipe_list, + u16 *num_recipes, u16 recipe_root, struct ice_sq_cd *cd) +{ + struct ice_aqc_add_get_recipe *cmd; + struct ice_aq_desc desc; + enum ice_status status; + u16 buf_size; + + if (*num_recipes != ICE_MAX_NUM_RECIPES) + return ICE_ERR_PARAM; + + cmd = &desc.params.add_get_recipe; + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_recipe); + + cmd->return_index = cpu_to_le16(recipe_root); + cmd->num_sub_recipes = 0; + + buf_size = *num_recipes * sizeof(*s_recipe_list); + + status = ice_aq_send_cmd(hw, &desc, s_recipe_list, buf_size, cd); + *num_recipes = le16_to_cpu(cmd->num_sub_recipes); + + return status; +} + +/** + * ice_aq_map_recipe_to_profile - Map recipe to packet profile + * @hw: pointer to the HW struct + * @profile_id: package profile ID to associate the recipe with + * @r_bitmap: Recipe bitmap filled in and need to be returned as response + * @cd: pointer to command details structure or NULL + * Recipe to profile association (0x0291) + */ +static enum ice_status +ice_aq_map_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap, + struct ice_sq_cd *cd) +{ + struct ice_aqc_recipe_to_profile *cmd; + struct ice_aq_desc desc; + + cmd = &desc.params.recipe_to_profile; + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_recipe_to_profile); + cmd->profile_id = cpu_to_le16(profile_id); + /* Set the recipe ID bit in the bitmask to let the device know which + * profile we are associating the recipe to + */ + memcpy(cmd->recipe_assoc, r_bitmap, sizeof(cmd->recipe_assoc)); + + return ice_aq_send_cmd(hw, &desc, NULL, 0, cd); +} + +/** + * ice_aq_get_recipe_to_profile - Map recipe to packet profile + * @hw: pointer to the HW struct + * @profile_id: package profile ID to associate the recipe with + * @r_bitmap: Recipe bitmap filled in and need to be returned as response + * @cd: pointer to command details structure or NULL + * Associate profile ID with given recipe (0x0293) + */ +static enum ice_status +ice_aq_get_recipe_to_profile(struct ice_hw *hw, u32 profile_id, u8 *r_bitmap, + struct ice_sq_cd *cd) +{ + struct ice_aqc_recipe_to_profile *cmd; + struct ice_aq_desc desc; + enum ice_status status; + + cmd = &desc.params.recipe_to_profile; + ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_get_recipe_to_profile); + cmd->profile_id = cpu_to_le16(profile_id); + + status = ice_aq_send_cmd(hw, &desc, NULL, 0, cd); + if (!status) + memcpy(r_bitmap, cmd->recipe_assoc, sizeof(cmd->recipe_assoc)); + + return status; +} + +/** + * ice_alloc_recipe - add recipe resource + * @hw: pointer to the hardware structure + * @rid: recipe ID returned as response to AQ call + */ +static enum ice_status ice_alloc_recipe(struct ice_hw *hw, u16 *rid) +{ + struct ice_aqc_alloc_free_res_elem *sw_buf; + enum ice_status status; + u16 buf_len; + + buf_len = struct_size(sw_buf, elem, 1); + sw_buf = kzalloc(buf_len, GFP_KERNEL); + if (!sw_buf) + return ICE_ERR_NO_MEMORY; + + sw_buf->num_elems = cpu_to_le16(1); + sw_buf->res_type = cpu_to_le16((ICE_AQC_RES_TYPE_RECIPE << + ICE_AQC_RES_TYPE_S) | + ICE_AQC_RES_TYPE_FLAG_SHARED); + status = ice_aq_alloc_free_res(hw, 1, sw_buf, buf_len, + ice_aqc_opc_alloc_res, NULL); + if (!status) + *rid = le16_to_cpu(sw_buf->elem[0].e.sw_resp); + kfree(sw_buf); + + return status; +} + +/** + * ice_get_recp_to_prof_map - updates recipe to profile mapping + * @hw: pointer to hardware structure + * + * This function is used to populate recipe_to_profile matrix where index to + * this array is the recipe ID and the element is the mapping of which profiles + * is this recipe mapped to. + */ +static void ice_get_recp_to_prof_map(struct ice_hw *hw) +{ + DECLARE_BITMAP(r_bitmap, ICE_MAX_NUM_RECIPES); + u16 i; + + for (i = 0; i < hw->switch_info->max_used_prof_index + 1; i++) { + u16 j; + + bitmap_zero(profile_to_recipe[i], ICE_MAX_NUM_RECIPES); + bitmap_zero(r_bitmap, ICE_MAX_NUM_RECIPES); + if (ice_aq_get_recipe_to_profile(hw, i, (u8 *)r_bitmap, NULL)) + continue; + bitmap_copy(profile_to_recipe[i], r_bitmap, + ICE_MAX_NUM_RECIPES); + for_each_set_bit(j, r_bitmap, ICE_MAX_NUM_RECIPES) + set_bit(i, recipe_to_profile[j]); + } +} + +/** + * ice_collect_result_idx - copy result index values + * @buf: buffer that contains the result index + * @recp: the recipe struct to copy data into + */ +static void +ice_collect_result_idx(struct ice_aqc_recipe_data_elem *buf, + struct ice_sw_recipe *recp) +{ + if (buf->content.result_indx & ICE_AQ_RECIPE_RESULT_EN) + set_bit(buf->content.result_indx & ~ICE_AQ_RECIPE_RESULT_EN, + recp->res_idxs); +} + +/** + * ice_get_recp_frm_fw - update SW bookkeeping from FW recipe entries + * @hw: pointer to hardware structure + * @recps: struct that we need to populate + * @rid: recipe ID that we are populating + * @refresh_required: true if we should get recipe to profile mapping from FW + * + * This function is used to populate all the necessary entries into our + * bookkeeping so that we have a current list of all the recipes that are + * programmed in the firmware. + */ +static enum ice_status +ice_get_recp_frm_fw(struct ice_hw *hw, struct ice_sw_recipe *recps, u8 rid, + bool *refresh_required) +{ + DECLARE_BITMAP(result_bm, ICE_MAX_FV_WORDS); + struct ice_aqc_recipe_data_elem *tmp; + u16 num_recps = ICE_MAX_NUM_RECIPES; + struct ice_prot_lkup_ext *lkup_exts; + enum ice_status status; + u8 fv_word_idx = 0; + u16 sub_recps; + + bitmap_zero(result_bm, ICE_MAX_FV_WORDS); + + /* we need a buffer big enough to accommodate all the recipes */ + tmp = kcalloc(ICE_MAX_NUM_RECIPES, sizeof(*tmp), GFP_KERNEL); + if (!tmp) + return ICE_ERR_NO_MEMORY; + + tmp[0].recipe_indx = rid; + status = ice_aq_get_recipe(hw, tmp, &num_recps, rid, NULL); + /* non-zero status meaning recipe doesn't exist */ + if (status) + goto err_unroll; + + /* Get recipe to profile map so that we can get the fv from lkups that + * we read for a recipe from FW. Since we want to minimize the number of + * times we make this FW call, just make one call and cache the copy + * until a new recipe is added. This operation is only required the + * first time to get the changes from FW. Then to search existing + * entries we don't need to update the cache again until another recipe + * gets added. + */ + if (*refresh_required) { + ice_get_recp_to_prof_map(hw); + *refresh_required = false; + } + + /* Start populating all the entries for recps[rid] based on lkups from + * firmware. Note that we are only creating the root recipe in our + * database. + */ + lkup_exts = &recps[rid].lkup_exts; + + for (sub_recps = 0; sub_recps < num_recps; sub_recps++) { + struct ice_aqc_recipe_data_elem root_bufs = tmp[sub_recps]; + struct ice_recp_grp_entry *rg_entry; + u8 i, prof, idx, prot = 0; + bool is_root; + u16 off = 0; + + rg_entry = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*rg_entry), + GFP_KERNEL); + if (!rg_entry) { + status = ICE_ERR_NO_MEMORY; + goto err_unroll; + } + + idx = root_bufs.recipe_indx; + is_root = root_bufs.content.rid & ICE_AQ_RECIPE_ID_IS_ROOT; + + /* Mark all result indices in this chain */ + if (root_bufs.content.result_indx & ICE_AQ_RECIPE_RESULT_EN) + set_bit(root_bufs.content.result_indx & ~ICE_AQ_RECIPE_RESULT_EN, + result_bm); + + /* get the first profile that is associated with rid */ + prof = find_first_bit(recipe_to_profile[idx], + ICE_MAX_NUM_PROFILES); + for (i = 0; i < ICE_NUM_WORDS_RECIPE; i++) { + u8 lkup_indx = root_bufs.content.lkup_indx[i + 1]; + + rg_entry->fv_idx[i] = lkup_indx; + rg_entry->fv_mask[i] = + le16_to_cpu(root_bufs.content.mask[i + 1]); + + /* If the recipe is a chained recipe then all its + * child recipe's result will have a result index. + * To fill fv_words we should not use those result + * index, we only need the protocol ids and offsets. + * We will skip all the fv_idx which stores result + * index in them. We also need to skip any fv_idx which + * has ICE_AQ_RECIPE_LKUP_IGNORE or 0 since it isn't a + * valid offset value. + */ + if (test_bit(rg_entry->fv_idx[i], hw->switch_info->prof_res_bm[prof]) || + rg_entry->fv_idx[i] & ICE_AQ_RECIPE_LKUP_IGNORE || + rg_entry->fv_idx[i] == 0) + continue; + + ice_find_prot_off(hw, ICE_BLK_SW, prof, + rg_entry->fv_idx[i], &prot, &off); + lkup_exts->fv_words[fv_word_idx].prot_id = prot; + lkup_exts->fv_words[fv_word_idx].off = off; + lkup_exts->field_mask[fv_word_idx] = + rg_entry->fv_mask[i]; + fv_word_idx++; + } + /* populate rg_list with the data from the child entry of this + * recipe + */ + list_add(&rg_entry->l_entry, &recps[rid].rg_list); + + /* Propagate some data to the recipe database */ + recps[idx].is_root = !!is_root; + recps[idx].priority = root_bufs.content.act_ctrl_fwd_priority; + bitmap_zero(recps[idx].res_idxs, ICE_MAX_FV_WORDS); + if (root_bufs.content.result_indx & ICE_AQ_RECIPE_RESULT_EN) { + recps[idx].chain_idx = root_bufs.content.result_indx & + ~ICE_AQ_RECIPE_RESULT_EN; + set_bit(recps[idx].chain_idx, recps[idx].res_idxs); + } else { + recps[idx].chain_idx = ICE_INVAL_CHAIN_IND; + } + + if (!is_root) + continue; + + /* Only do the following for root recipes entries */ + memcpy(recps[idx].r_bitmap, root_bufs.recipe_bitmap, + sizeof(recps[idx].r_bitmap)); + recps[idx].root_rid = root_bufs.content.rid & + ~ICE_AQ_RECIPE_ID_IS_ROOT; + recps[idx].priority = root_bufs.content.act_ctrl_fwd_priority; + } + + /* Complete initialization of the root recipe entry */ + lkup_exts->n_val_words = fv_word_idx; + recps[rid].big_recp = (num_recps > 1); + recps[rid].n_grp_count = (u8)num_recps; + recps[rid].root_buf = devm_kmemdup(ice_hw_to_dev(hw), tmp, + recps[rid].n_grp_count * sizeof(*recps[rid].root_buf), + GFP_KERNEL); + if (!recps[rid].root_buf) { + status = ICE_ERR_NO_MEMORY; + goto err_unroll; + } + + /* Copy result indexes */ + bitmap_copy(recps[rid].res_idxs, result_bm, ICE_MAX_FV_WORDS); + recps[rid].recp_created = true; + +err_unroll: + kfree(tmp); + return status; +} + /* ice_init_port_info - Initialize port_info with switch configuration data * @pi: pointer to port_info * @vsi_port_num: VSI number or port number @@ -1627,6 +2461,125 @@ exit: } /** + * ice_mac_fltr_exist - does this MAC filter exist for given VSI + * @hw: pointer to the hardware structure + * @mac: MAC address to be checked (for MAC filter) + * @vsi_handle: check MAC filter for this VSI + */ +bool ice_mac_fltr_exist(struct ice_hw *hw, u8 *mac, u16 vsi_handle) +{ + struct ice_fltr_mgmt_list_entry *entry; + struct list_head *rule_head; + struct ice_switch_info *sw; + struct mutex *rule_lock; /* Lock to protect filter rule list */ + u16 hw_vsi_id; + + if (!ice_is_vsi_valid(hw, vsi_handle)) + return false; + + hw_vsi_id = ice_get_hw_vsi_num(hw, vsi_handle); + sw = hw->switch_info; + rule_head = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rules; + if (!rule_head) + return false; + + rule_lock = &sw->recp_list[ICE_SW_LKUP_MAC].filt_rule_lock; + mutex_lock(rule_lock); + list_for_each_entry(entry, rule_head, list_entry) { + struct ice_fltr_info *f_info = &entry->fltr_info; + u8 *mac_addr = &f_info->l_data.mac.mac_addr[0]; + + if (is_zero_ether_addr(mac_addr)) + continue; + + if (f_info->flag != ICE_FLTR_TX || + f_info->src_id != ICE_SRC_ID_VSI || + f_info->lkup_type != ICE_SW_LKUP_MAC || + f_info->fltr_act != ICE_FWD_TO_VSI || + hw_vsi_id != f_info->fwd_id.hw_vsi_id) + continue; + + if (ether_addr_equal(mac, mac_addr)) { + mutex_unlock(rule_lock); + return true; + } + } + mutex_unlock(rule_lock); + return false; +} + +/** + * ice_vlan_fltr_exist - does this VLAN filter exist for given VSI + * @hw: pointer to the hardware structure + * @vlan_id: VLAN ID + * @vsi_handle: check MAC filter for this VSI + */ +bool ice_vlan_fltr_exist(struct ice_hw *hw, u16 vlan_id, u16 vsi_handle) +{ + struct ice_fltr_mgmt_list_entry *entry; + struct list_head *rule_head; + struct ice_switch_info *sw; + struct mutex *rule_lock; /* Lock to protect filter rule list */ + u16 hw_vsi_id; + + if (vlan_id > ICE_MAX_VLAN_ID) + return false; + + if (!ice_is_vsi_valid(hw, vsi_handle)) + return false; + + hw_vsi_id = ice_get_hw_vsi_num(hw, vsi_handle); + sw = hw->switch_info; + rule_head = &sw->recp_list[ICE_SW_LKUP_VLAN].filt_rules; + if (!rule_head) + return false; + + rule_lock = &sw->recp_list[ICE_SW_LKUP_VLAN].filt_rule_lock; + mutex_lock(rule_lock); + list_for_each_entry(entry, rule_head, list_entry) { + struct ice_fltr_info *f_info = &entry->fltr_info; + u16 entry_vlan_id = f_info->l_data.vlan.vlan_id; + struct ice_vsi_list_map_info *map_info; + + if (entry_vlan_id > ICE_MAX_VLAN_ID) + continue; + + if (f_info->flag != ICE_FLTR_TX || + f_info->src_id != ICE_SRC_ID_VSI || + f_info->lkup_type != ICE_SW_LKUP_VLAN) + continue; + + /* Only allowed filter action are FWD_TO_VSI/_VSI_LIST */ + if (f_info->fltr_act != ICE_FWD_TO_VSI && + f_info->fltr_act != ICE_FWD_TO_VSI_LIST) + continue; + + if (f_info->fltr_act == ICE_FWD_TO_VSI) { + if (hw_vsi_id != f_info->fwd_id.hw_vsi_id) + continue; + } else if (f_info->fltr_act == ICE_FWD_TO_VSI_LIST) { + /* If filter_action is FWD_TO_VSI_LIST, make sure + * that VSI being checked is part of VSI list + */ + if (entry->vsi_count == 1 && + entry->vsi_list_info) { + map_info = entry->vsi_list_info; + if (!test_bit(vsi_handle, map_info->vsi_map)) + continue; + } + } + + if (vlan_id == entry_vlan_id) { + mutex_unlock(rule_lock); + return true; + } + } + mutex_unlock(rule_lock); + + return false; +} + +/** * ice_add_mac - Add a MAC address based filter rule * @hw: pointer to the hardware structure * @m_list: list of MAC addresses and forwarding information @@ -2037,6 +2990,27 @@ ice_rem_sw_rule_info(struct ice_hw *hw, struct list_head *rule_head) } /** + * ice_rem_adv_rule_info + * @hw: pointer to the hardware structure + * @rule_head: pointer to the switch list structure that we want to delete + */ +static void +ice_rem_adv_rule_info(struct ice_hw *hw, struct list_head *rule_head) +{ + struct ice_adv_fltr_mgmt_list_entry *tmp_entry; + struct ice_adv_fltr_mgmt_list_entry *lst_itr; + + if (list_empty(rule_head)) + return; + + list_for_each_entry_safe(lst_itr, tmp_entry, rule_head, list_entry) { + list_del(&lst_itr->list_entry); + devm_kfree(ice_hw_to_dev(hw), lst_itr->lkups); + devm_kfree(ice_hw_to_dev(hw), lst_itr); + } +} + +/** * ice_cfg_dflt_vsi - change state of VSI to set/clear default * @hw: pointer to the hardware structure * @vsi_handle: VSI handle to set as default @@ -2773,6 +3747,1621 @@ ice_free_res_cntr(struct ice_hw *hw, u8 type, u8 alloc_shared, u16 num_items, return status; } +/* This is mapping table entry that maps every word within a given protocol + * structure to the real byte offset as per the specification of that + * protocol header. + * for example dst address is 3 words in ethertype header and corresponding + * bytes are 0, 2, 3 in the actual packet header and src address is at 4, 6, 8 + * IMPORTANT: Every structure part of "ice_prot_hdr" union should have a + * matching entry describing its field. This needs to be updated if new + * structure is added to that union. + */ +static const struct ice_prot_ext_tbl_entry ice_prot_ext[ICE_PROTOCOL_LAST] = { + { ICE_MAC_OFOS, { 0, 2, 4, 6, 8, 10, 12 } }, + { ICE_MAC_IL, { 0, 2, 4, 6, 8, 10, 12 } }, + { ICE_ETYPE_OL, { 0 } }, + { ICE_VLAN_OFOS, { 2, 0 } }, + { ICE_IPV4_OFOS, { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18 } }, + { ICE_IPV4_IL, { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18 } }, + { ICE_IPV6_OFOS, { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, + 26, 28, 30, 32, 34, 36, 38 } }, + { ICE_IPV6_IL, { 0, 2, 4, 6, 8, 10, 12, 14, 16, 18, 20, 22, 24, + 26, 28, 30, 32, 34, 36, 38 } }, + { ICE_TCP_IL, { 0, 2 } }, + { ICE_UDP_OF, { 0, 2 } }, + { ICE_UDP_ILOS, { 0, 2 } }, + { ICE_VXLAN, { 8, 10, 12, 14 } }, + { ICE_GENEVE, { 8, 10, 12, 14 } }, + { ICE_NVGRE, { 0, 2, 4, 6 } }, +}; + +static struct ice_protocol_entry ice_prot_id_tbl[ICE_PROTOCOL_LAST] = { + { ICE_MAC_OFOS, ICE_MAC_OFOS_HW }, + { ICE_MAC_IL, ICE_MAC_IL_HW }, + { ICE_ETYPE_OL, ICE_ETYPE_OL_HW }, + { ICE_VLAN_OFOS, ICE_VLAN_OL_HW }, + { ICE_IPV4_OFOS, ICE_IPV4_OFOS_HW }, + { ICE_IPV4_IL, ICE_IPV4_IL_HW }, + { ICE_IPV6_OFOS, ICE_IPV6_OFOS_HW }, + { ICE_IPV6_IL, ICE_IPV6_IL_HW }, + { ICE_TCP_IL, ICE_TCP_IL_HW }, + { ICE_UDP_OF, ICE_UDP_OF_HW }, + { ICE_UDP_ILOS, ICE_UDP_ILOS_HW }, + { ICE_VXLAN, ICE_UDP_OF_HW }, + { ICE_GENEVE, ICE_UDP_OF_HW }, + { ICE_NVGRE, ICE_GRE_OF_HW }, +}; + +/** + * ice_find_recp - find a recipe + * @hw: pointer to the hardware structure + * @lkup_exts: extension sequence to match + * + * Returns index of matching recipe, or ICE_MAX_NUM_RECIPES if not found. + */ +static u16 ice_find_recp(struct ice_hw *hw, struct ice_prot_lkup_ext *lkup_exts) +{ + bool refresh_required = true; + struct ice_sw_recipe *recp; + u8 i; + + /* Walk through existing recipes to find a match */ + recp = hw->switch_info->recp_list; + for (i = 0; i < ICE_MAX_NUM_RECIPES; i++) { + /* If recipe was not created for this ID, in SW bookkeeping, + * check if FW has an entry for this recipe. If the FW has an + * entry update it in our SW bookkeeping and continue with the + * matching. + */ + if (!recp[i].recp_created) + if (ice_get_recp_frm_fw(hw, + hw->switch_info->recp_list, i, + &refresh_required)) + continue; + + /* Skip inverse action recipes */ + if (recp[i].root_buf && recp[i].root_buf->content.act_ctrl & + ICE_AQ_RECIPE_ACT_INV_ACT) + continue; + + /* if number of words we are looking for match */ + if (lkup_exts->n_val_words == recp[i].lkup_exts.n_val_words) { + struct ice_fv_word *ar = recp[i].lkup_exts.fv_words; + struct ice_fv_word *be = lkup_exts->fv_words; + u16 *cr = recp[i].lkup_exts.field_mask; + u16 *de = lkup_exts->field_mask; + bool found = true; + u8 pe, qr; + + /* ar, cr, and qr are related to the recipe words, while + * be, de, and pe are related to the lookup words + */ + for (pe = 0; pe < lkup_exts->n_val_words; pe++) { + for (qr = 0; qr < recp[i].lkup_exts.n_val_words; + qr++) { + if (ar[qr].off == be[pe].off && + ar[qr].prot_id == be[pe].prot_id && + cr[qr] == de[pe]) + /* Found the "pe"th word in the + * given recipe + */ + break; + } + /* After walking through all the words in the + * "i"th recipe if "p"th word was not found then + * this recipe is not what we are looking for. + * So break out from this loop and try the next + * recipe + */ + if (qr >= recp[i].lkup_exts.n_val_words) { + found = false; + break; + } + } + /* If for "i"th recipe the found was never set to false + * then it means we found our match + */ + if (found) + return i; /* Return the recipe ID */ + } + } + return ICE_MAX_NUM_RECIPES; +} + +/** + * ice_prot_type_to_id - get protocol ID from protocol type + * @type: protocol type + * @id: pointer to variable that will receive the ID + * + * Returns true if found, false otherwise + */ +static bool ice_prot_type_to_id(enum ice_protocol_type type, u8 *id) +{ + u8 i; + + for (i = 0; i < ARRAY_SIZE(ice_prot_id_tbl); i++) + if (ice_prot_id_tbl[i].type == type) { + *id = ice_prot_id_tbl[i].protocol_id; + return true; + } + return false; +} + +/** + * ice_fill_valid_words - count valid words + * @rule: advanced rule with lookup information + * @lkup_exts: byte offset extractions of the words that are valid + * + * calculate valid words in a lookup rule using mask value + */ +static u8 +ice_fill_valid_words(struct ice_adv_lkup_elem *rule, + struct ice_prot_lkup_ext *lkup_exts) +{ + u8 j, word, prot_id, ret_val; + + if (!ice_prot_type_to_id(rule->type, &prot_id)) + return 0; + + word = lkup_exts->n_val_words; + + for (j = 0; j < sizeof(rule->m_u) / sizeof(u16); j++) + if (((u16 *)&rule->m_u)[j] && + rule->type < ARRAY_SIZE(ice_prot_ext)) { + /* No more space to accommodate */ + if (word >= ICE_MAX_CHAIN_WORDS) + return 0; + lkup_exts->fv_words[word].off = + ice_prot_ext[rule->type].offs[j]; + lkup_exts->fv_words[word].prot_id = + ice_prot_id_tbl[rule->type].protocol_id; + lkup_exts->field_mask[word] = + be16_to_cpu(((__force __be16 *)&rule->m_u)[j]); + word++; + } + + ret_val = word - lkup_exts->n_val_words; + lkup_exts->n_val_words = word; + + return ret_val; +} + +/** + * ice_create_first_fit_recp_def - Create a recipe grouping + * @hw: pointer to the hardware structure + * @lkup_exts: an array of protocol header extractions + * @rg_list: pointer to a list that stores new recipe groups + * @recp_cnt: pointer to a variable that stores returned number of recipe groups + * + * Using first fit algorithm, take all the words that are still not done + * and start grouping them in 4-word groups. Each group makes up one + * recipe. + */ +static enum ice_status +ice_create_first_fit_recp_def(struct ice_hw *hw, + struct ice_prot_lkup_ext *lkup_exts, + struct list_head *rg_list, + u8 *recp_cnt) +{ + struct ice_pref_recipe_group *grp = NULL; + u8 j; + + *recp_cnt = 0; + + /* Walk through every word in the rule to check if it is not done. If so + * then this word needs to be part of a new recipe. + */ + for (j = 0; j < lkup_exts->n_val_words; j++) + if (!test_bit(j, lkup_exts->done)) { + if (!grp || + grp->n_val_pairs == ICE_NUM_WORDS_RECIPE) { + struct ice_recp_grp_entry *entry; + + entry = devm_kzalloc(ice_hw_to_dev(hw), + sizeof(*entry), + GFP_KERNEL); + if (!entry) + return ICE_ERR_NO_MEMORY; + list_add(&entry->l_entry, rg_list); + grp = &entry->r_group; + (*recp_cnt)++; + } + + grp->pairs[grp->n_val_pairs].prot_id = + lkup_exts->fv_words[j].prot_id; + grp->pairs[grp->n_val_pairs].off = + lkup_exts->fv_words[j].off; + grp->mask[grp->n_val_pairs] = lkup_exts->field_mask[j]; + grp->n_val_pairs++; + } + + return 0; +} + +/** + * ice_fill_fv_word_index - fill in the field vector indices for a recipe group + * @hw: pointer to the hardware structure + * @fv_list: field vector with the extraction sequence information + * @rg_list: recipe groupings with protocol-offset pairs + * + * Helper function to fill in the field vector indices for protocol-offset + * pairs. These indexes are then ultimately programmed into a recipe. + */ +static enum ice_status +ice_fill_fv_word_index(struct ice_hw *hw, struct list_head *fv_list, + struct list_head *rg_list) +{ + struct ice_sw_fv_list_entry *fv; + struct ice_recp_grp_entry *rg; + struct ice_fv_word *fv_ext; + + if (list_empty(fv_list)) + return 0; + + fv = list_first_entry(fv_list, struct ice_sw_fv_list_entry, + list_entry); + fv_ext = fv->fv_ptr->ew; + + list_for_each_entry(rg, rg_list, l_entry) { + u8 i; + + for (i = 0; i < rg->r_group.n_val_pairs; i++) { + struct ice_fv_word *pr; + bool found = false; + u16 mask; + u8 j; + + pr = &rg->r_group.pairs[i]; + mask = rg->r_group.mask[i]; + + for (j = 0; j < hw->blk[ICE_BLK_SW].es.fvw; j++) + if (fv_ext[j].prot_id == pr->prot_id && + fv_ext[j].off == pr->off) { + found = true; + + /* Store index of field vector */ + rg->fv_idx[i] = j; + rg->fv_mask[i] = mask; + break; + } + + /* Protocol/offset could not be found, caller gave an + * invalid pair + */ + if (!found) + return ICE_ERR_PARAM; + } + } + + return 0; +} + +/** + * ice_find_free_recp_res_idx - find free result indexes for recipe + * @hw: pointer to hardware structure + * @profiles: bitmap of profiles that will be associated with the new recipe + * @free_idx: pointer to variable to receive the free index bitmap + * + * The algorithm used here is: + * 1. When creating a new recipe, create a set P which contains all + * Profiles that will be associated with our new recipe + * + * 2. For each Profile p in set P: + * a. Add all recipes associated with Profile p into set R + * b. Optional : PossibleIndexes &= profile[p].possibleIndexes + * [initially PossibleIndexes should be 0xFFFFFFFFFFFFFFFF] + * i. Or just assume they all have the same possible indexes: + * 44, 45, 46, 47 + * i.e., PossibleIndexes = 0x0000F00000000000 + * + * 3. For each Recipe r in set R: + * a. UsedIndexes |= (bitwise or ) recipe[r].res_indexes + * b. FreeIndexes = UsedIndexes ^ PossibleIndexes + * + * FreeIndexes will contain the bits indicating the indexes free for use, + * then the code needs to update the recipe[r].used_result_idx_bits to + * indicate which indexes were selected for use by this recipe. + */ +static u16 +ice_find_free_recp_res_idx(struct ice_hw *hw, const unsigned long *profiles, + unsigned long *free_idx) +{ + DECLARE_BITMAP(possible_idx, ICE_MAX_FV_WORDS); + DECLARE_BITMAP(recipes, ICE_MAX_NUM_RECIPES); + DECLARE_BITMAP(used_idx, ICE_MAX_FV_WORDS); + u16 bit; + + bitmap_zero(possible_idx, ICE_MAX_FV_WORDS); + bitmap_zero(recipes, ICE_MAX_NUM_RECIPES); + bitmap_zero(used_idx, ICE_MAX_FV_WORDS); + bitmap_zero(free_idx, ICE_MAX_FV_WORDS); + + bitmap_set(possible_idx, 0, ICE_MAX_FV_WORDS); + + /* For each profile we are going to associate the recipe with, add the + * recipes that are associated with that profile. This will give us + * the set of recipes that our recipe may collide with. Also, determine + * what possible result indexes are usable given this set of profiles. + */ + for_each_set_bit(bit, profiles, ICE_MAX_NUM_PROFILES) { + bitmap_or(recipes, recipes, profile_to_recipe[bit], + ICE_MAX_NUM_RECIPES); + bitmap_and(possible_idx, possible_idx, + hw->switch_info->prof_res_bm[bit], + ICE_MAX_FV_WORDS); + } + + /* For each recipe that our new recipe may collide with, determine + * which indexes have been used. + */ + for_each_set_bit(bit, recipes, ICE_MAX_NUM_RECIPES) + bitmap_or(used_idx, used_idx, + hw->switch_info->recp_list[bit].res_idxs, + ICE_MAX_FV_WORDS); + + bitmap_xor(free_idx, used_idx, possible_idx, ICE_MAX_FV_WORDS); + + /* return number of free indexes */ + return (u16)bitmap_weight(free_idx, ICE_MAX_FV_WORDS); +} + +/** + * ice_add_sw_recipe - function to call AQ calls to create switch recipe + * @hw: pointer to hardware structure + * @rm: recipe management list entry + * @profiles: bitmap of profiles that will be associated. + */ +static enum ice_status +ice_add_sw_recipe(struct ice_hw *hw, struct ice_sw_recipe *rm, + unsigned long *profiles) +{ + DECLARE_BITMAP(result_idx_bm, ICE_MAX_FV_WORDS); + struct ice_aqc_recipe_data_elem *tmp; + struct ice_aqc_recipe_data_elem *buf; + struct ice_recp_grp_entry *entry; + enum ice_status status; + u16 free_res_idx; + u16 recipe_count; + u8 chain_idx; + u8 recps = 0; + + /* When more than one recipe are required, another recipe is needed to + * chain them together. Matching a tunnel metadata ID takes up one of + * the match fields in the chaining recipe reducing the number of + * chained recipes by one. + */ + /* check number of free result indices */ + bitmap_zero(result_idx_bm, ICE_MAX_FV_WORDS); + free_res_idx = ice_find_free_recp_res_idx(hw, profiles, result_idx_bm); + + ice_debug(hw, ICE_DBG_SW, "Result idx slots: %d, need %d\n", + free_res_idx, rm->n_grp_count); + + if (rm->n_grp_count > 1) { + if (rm->n_grp_count > free_res_idx) + return ICE_ERR_MAX_LIMIT; + + rm->n_grp_count++; + } + + if (rm->n_grp_count > ICE_MAX_CHAIN_RECIPE) + return ICE_ERR_MAX_LIMIT; + + tmp = kcalloc(ICE_MAX_NUM_RECIPES, sizeof(*tmp), GFP_KERNEL); + if (!tmp) + return ICE_ERR_NO_MEMORY; + + buf = devm_kcalloc(ice_hw_to_dev(hw), rm->n_grp_count, sizeof(*buf), + GFP_KERNEL); + if (!buf) { + status = ICE_ERR_NO_MEMORY; + goto err_mem; + } + + bitmap_zero(rm->r_bitmap, ICE_MAX_NUM_RECIPES); + recipe_count = ICE_MAX_NUM_RECIPES; + status = ice_aq_get_recipe(hw, tmp, &recipe_count, ICE_SW_LKUP_MAC, + NULL); + if (status || recipe_count == 0) + goto err_unroll; + + /* Allocate the recipe resources, and configure them according to the + * match fields from protocol headers and extracted field vectors. + */ + chain_idx = find_first_bit(result_idx_bm, ICE_MAX_FV_WORDS); + list_for_each_entry(entry, &rm->rg_list, l_entry) { + u8 i; + + status = ice_alloc_recipe(hw, &entry->rid); + if (status) + goto err_unroll; + + /* Clear the result index of the located recipe, as this will be + * updated, if needed, later in the recipe creation process. + */ + tmp[0].content.result_indx = 0; + + buf[recps] = tmp[0]; + buf[recps].recipe_indx = (u8)entry->rid; + /* if the recipe is a non-root recipe RID should be programmed + * as 0 for the rules to be applied correctly. + */ + buf[recps].content.rid = 0; + memset(&buf[recps].content.lkup_indx, 0, + sizeof(buf[recps].content.lkup_indx)); + + /* All recipes use look-up index 0 to match switch ID. */ + buf[recps].content.lkup_indx[0] = ICE_AQ_SW_ID_LKUP_IDX; + buf[recps].content.mask[0] = + cpu_to_le16(ICE_AQ_SW_ID_LKUP_MASK); + /* Setup lkup_indx 1..4 to INVALID/ignore and set the mask + * to be 0 + */ + for (i = 1; i <= ICE_NUM_WORDS_RECIPE; i++) { + buf[recps].content.lkup_indx[i] = 0x80; + buf[recps].content.mask[i] = 0; + } + + for (i = 0; i < entry->r_group.n_val_pairs; i++) { + buf[recps].content.lkup_indx[i + 1] = entry->fv_idx[i]; + buf[recps].content.mask[i + 1] = + cpu_to_le16(entry->fv_mask[i]); + } + + if (rm->n_grp_count > 1) { + /* Checks to see if there really is a valid result index + * that can be used. + */ + if (chain_idx >= ICE_MAX_FV_WORDS) { + ice_debug(hw, ICE_DBG_SW, "No chain index available\n"); + status = ICE_ERR_MAX_LIMIT; + goto err_unroll; + } + + entry->chain_idx = chain_idx; + buf[recps].content.result_indx = + ICE_AQ_RECIPE_RESULT_EN | + ((chain_idx << ICE_AQ_RECIPE_RESULT_DATA_S) & + ICE_AQ_RECIPE_RESULT_DATA_M); + clear_bit(chain_idx, result_idx_bm); + chain_idx = find_first_bit(result_idx_bm, + ICE_MAX_FV_WORDS); + } + + /* fill recipe dependencies */ + bitmap_zero((unsigned long *)buf[recps].recipe_bitmap, + ICE_MAX_NUM_RECIPES); + set_bit(buf[recps].recipe_indx, + (unsigned long *)buf[recps].recipe_bitmap); + buf[recps].content.act_ctrl_fwd_priority = rm->priority; + recps++; + } + + if (rm->n_grp_count == 1) { + rm->root_rid = buf[0].recipe_indx; + set_bit(buf[0].recipe_indx, rm->r_bitmap); + buf[0].content.rid = rm->root_rid | ICE_AQ_RECIPE_ID_IS_ROOT; + if (sizeof(buf[0].recipe_bitmap) >= sizeof(rm->r_bitmap)) { + memcpy(buf[0].recipe_bitmap, rm->r_bitmap, + sizeof(buf[0].recipe_bitmap)); + } else { + status = ICE_ERR_BAD_PTR; + goto err_unroll; + } + /* Applicable only for ROOT_RECIPE, set the fwd_priority for + * the recipe which is getting created if specified + * by user. Usually any advanced switch filter, which results + * into new extraction sequence, ended up creating a new recipe + * of type ROOT and usually recipes are associated with profiles + * Switch rule referreing newly created recipe, needs to have + * either/or 'fwd' or 'join' priority, otherwise switch rule + * evaluation will not happen correctly. In other words, if + * switch rule to be evaluated on priority basis, then recipe + * needs to have priority, otherwise it will be evaluated last. + */ + buf[0].content.act_ctrl_fwd_priority = rm->priority; + } else { + struct ice_recp_grp_entry *last_chain_entry; + u16 rid, i; + + /* Allocate the last recipe that will chain the outcomes of the + * other recipes together + */ + status = ice_alloc_recipe(hw, &rid); + if (status) + goto err_unroll; + + buf[recps].recipe_indx = (u8)rid; + buf[recps].content.rid = (u8)rid; + buf[recps].content.rid |= ICE_AQ_RECIPE_ID_IS_ROOT; + /* the new entry created should also be part of rg_list to + * make sure we have complete recipe + */ + last_chain_entry = devm_kzalloc(ice_hw_to_dev(hw), + sizeof(*last_chain_entry), + GFP_KERNEL); + if (!last_chain_entry) { + status = ICE_ERR_NO_MEMORY; + goto err_unroll; + } + last_chain_entry->rid = rid; + memset(&buf[recps].content.lkup_indx, 0, + sizeof(buf[recps].content.lkup_indx)); + /* All recipes use look-up index 0 to match switch ID. */ + buf[recps].content.lkup_indx[0] = ICE_AQ_SW_ID_LKUP_IDX; + buf[recps].content.mask[0] = + cpu_to_le16(ICE_AQ_SW_ID_LKUP_MASK); + for (i = 1; i <= ICE_NUM_WORDS_RECIPE; i++) { + buf[recps].content.lkup_indx[i] = + ICE_AQ_RECIPE_LKUP_IGNORE; + buf[recps].content.mask[i] = 0; + } + + i = 1; + /* update r_bitmap with the recp that is used for chaining */ + set_bit(rid, rm->r_bitmap); + /* this is the recipe that chains all the other recipes so it + * should not have a chaining ID to indicate the same + */ + last_chain_entry->chain_idx = ICE_INVAL_CHAIN_IND; + list_for_each_entry(entry, &rm->rg_list, l_entry) { + last_chain_entry->fv_idx[i] = entry->chain_idx; + buf[recps].content.lkup_indx[i] = entry->chain_idx; + buf[recps].content.mask[i++] = cpu_to_le16(0xFFFF); + set_bit(entry->rid, rm->r_bitmap); + } + list_add(&last_chain_entry->l_entry, &rm->rg_list); + if (sizeof(buf[recps].recipe_bitmap) >= + sizeof(rm->r_bitmap)) { + memcpy(buf[recps].recipe_bitmap, rm->r_bitmap, + sizeof(buf[recps].recipe_bitmap)); + } else { + status = ICE_ERR_BAD_PTR; + goto err_unroll; + } + buf[recps].content.act_ctrl_fwd_priority = rm->priority; + + recps++; + rm->root_rid = (u8)rid; + } + status = ice_acquire_change_lock(hw, ICE_RES_WRITE); + if (status) + goto err_unroll; + + status = ice_aq_add_recipe(hw, buf, rm->n_grp_count, NULL); + ice_release_change_lock(hw); + if (status) + goto err_unroll; + + /* Every recipe that just got created add it to the recipe + * book keeping list + */ + list_for_each_entry(entry, &rm->rg_list, l_entry) { + struct ice_switch_info *sw = hw->switch_info; + bool is_root, idx_found = false; + struct ice_sw_recipe *recp; + u16 idx, buf_idx = 0; + + /* find buffer index for copying some data */ + for (idx = 0; idx < rm->n_grp_count; idx++) + if (buf[idx].recipe_indx == entry->rid) { + buf_idx = idx; + idx_found = true; + } + + if (!idx_found) { + status = ICE_ERR_OUT_OF_RANGE; + goto err_unroll; + } + + recp = &sw->recp_list[entry->rid]; + is_root = (rm->root_rid == entry->rid); + recp->is_root = is_root; + + recp->root_rid = entry->rid; + recp->big_recp = (is_root && rm->n_grp_count > 1); + + memcpy(&recp->ext_words, entry->r_group.pairs, + entry->r_group.n_val_pairs * sizeof(struct ice_fv_word)); + + memcpy(recp->r_bitmap, buf[buf_idx].recipe_bitmap, + sizeof(recp->r_bitmap)); + + /* Copy non-result fv index values and masks to recipe. This + * call will also update the result recipe bitmask. + */ + ice_collect_result_idx(&buf[buf_idx], recp); + + /* for non-root recipes, also copy to the root, this allows + * easier matching of a complete chained recipe + */ + if (!is_root) + ice_collect_result_idx(&buf[buf_idx], + &sw->recp_list[rm->root_rid]); + + recp->n_ext_words = entry->r_group.n_val_pairs; + recp->chain_idx = entry->chain_idx; + recp->priority = buf[buf_idx].content.act_ctrl_fwd_priority; + recp->n_grp_count = rm->n_grp_count; + recp->tun_type = rm->tun_type; + recp->recp_created = true; + } + rm->root_buf = buf; + kfree(tmp); + return status; + +err_unroll: +err_mem: + kfree(tmp); + devm_kfree(ice_hw_to_dev(hw), buf); + return status; +} + +/** + * ice_create_recipe_group - creates recipe group + * @hw: pointer to hardware structure + * @rm: recipe management list entry + * @lkup_exts: lookup elements + */ +static enum ice_status +ice_create_recipe_group(struct ice_hw *hw, struct ice_sw_recipe *rm, + struct ice_prot_lkup_ext *lkup_exts) +{ + enum ice_status status; + u8 recp_count = 0; + + rm->n_grp_count = 0; + + /* Create recipes for words that are marked not done by packing them + * as best fit. + */ + status = ice_create_first_fit_recp_def(hw, lkup_exts, + &rm->rg_list, &recp_count); + if (!status) { + rm->n_grp_count += recp_count; + rm->n_ext_words = lkup_exts->n_val_words; + memcpy(&rm->ext_words, lkup_exts->fv_words, + sizeof(rm->ext_words)); + memcpy(rm->word_masks, lkup_exts->field_mask, + sizeof(rm->word_masks)); + } + + return status; +} + +/** + * ice_get_fv - get field vectors/extraction sequences for spec. lookup types + * @hw: pointer to hardware structure + * @lkups: lookup elements or match criteria for the advanced recipe, one + * structure per protocol header + * @lkups_cnt: number of protocols + * @bm: bitmap of field vectors to consider + * @fv_list: pointer to a list that holds the returned field vectors + */ +static enum ice_status +ice_get_fv(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, u16 lkups_cnt, + unsigned long *bm, struct list_head *fv_list) +{ + enum ice_status status; + u8 *prot_ids; + u16 i; + + prot_ids = kcalloc(lkups_cnt, sizeof(*prot_ids), GFP_KERNEL); + if (!prot_ids) + return ICE_ERR_NO_MEMORY; + + for (i = 0; i < lkups_cnt; i++) + if (!ice_prot_type_to_id(lkups[i].type, &prot_ids[i])) { + status = ICE_ERR_CFG; + goto free_mem; + } + + /* Find field vectors that include all specified protocol types */ + status = ice_get_sw_fv_list(hw, prot_ids, lkups_cnt, bm, fv_list); + +free_mem: + kfree(prot_ids); + return status; +} + +/** + * ice_tun_type_match_word - determine if tun type needs a match mask + * @tun_type: tunnel type + * @mask: mask to be used for the tunnel + */ +static bool ice_tun_type_match_word(enum ice_sw_tunnel_type tun_type, u16 *mask) +{ + switch (tun_type) { + case ICE_SW_TUN_GENEVE: + case ICE_SW_TUN_VXLAN: + case ICE_SW_TUN_NVGRE: + *mask = ICE_TUN_FLAG_MASK; + return true; + + default: + *mask = 0; + return false; + } +} + +/** + * ice_add_special_words - Add words that are not protocols, such as metadata + * @rinfo: other information regarding the rule e.g. priority and action info + * @lkup_exts: lookup word structure + */ +static enum ice_status +ice_add_special_words(struct ice_adv_rule_info *rinfo, + struct ice_prot_lkup_ext *lkup_exts) +{ + u16 mask; + + /* If this is a tunneled packet, then add recipe index to match the + * tunnel bit in the packet metadata flags. + */ + if (ice_tun_type_match_word(rinfo->tun_type, &mask)) { + if (lkup_exts->n_val_words < ICE_MAX_CHAIN_WORDS) { + u8 word = lkup_exts->n_val_words++; + + lkup_exts->fv_words[word].prot_id = ICE_META_DATA_ID_HW; + lkup_exts->fv_words[word].off = ICE_TUN_FLAG_MDID_OFF; + lkup_exts->field_mask[word] = mask; + } else { + return ICE_ERR_MAX_LIMIT; + } + } + + return 0; +} + +/* ice_get_compat_fv_bitmap - Get compatible field vector bitmap for rule + * @hw: pointer to hardware structure + * @rinfo: other information regarding the rule e.g. priority and action info + * @bm: pointer to memory for returning the bitmap of field vectors + */ +static void +ice_get_compat_fv_bitmap(struct ice_hw *hw, struct ice_adv_rule_info *rinfo, + unsigned long *bm) +{ + enum ice_prof_type prof_type; + + bitmap_zero(bm, ICE_MAX_NUM_PROFILES); + + switch (rinfo->tun_type) { + case ICE_NON_TUN: + prof_type = ICE_PROF_NON_TUN; + break; + case ICE_ALL_TUNNELS: + prof_type = ICE_PROF_TUN_ALL; + break; + case ICE_SW_TUN_GENEVE: + case ICE_SW_TUN_VXLAN: + prof_type = ICE_PROF_TUN_UDP; + break; + case ICE_SW_TUN_NVGRE: + prof_type = ICE_PROF_TUN_GRE; + break; + default: + prof_type = ICE_PROF_ALL; + break; + } + + ice_get_sw_fv_bitmap(hw, prof_type, bm); +} + +/** + * ice_add_adv_recipe - Add an advanced recipe that is not part of the default + * @hw: pointer to hardware structure + * @lkups: lookup elements or match criteria for the advanced recipe, one + * structure per protocol header + * @lkups_cnt: number of protocols + * @rinfo: other information regarding the rule e.g. priority and action info + * @rid: return the recipe ID of the recipe created + */ +static enum ice_status +ice_add_adv_recipe(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, + u16 lkups_cnt, struct ice_adv_rule_info *rinfo, u16 *rid) +{ + DECLARE_BITMAP(fv_bitmap, ICE_MAX_NUM_PROFILES); + DECLARE_BITMAP(profiles, ICE_MAX_NUM_PROFILES); + struct ice_prot_lkup_ext *lkup_exts; + struct ice_recp_grp_entry *r_entry; + struct ice_sw_fv_list_entry *fvit; + struct ice_recp_grp_entry *r_tmp; + struct ice_sw_fv_list_entry *tmp; + enum ice_status status = 0; + struct ice_sw_recipe *rm; + u8 i; + + if (!lkups_cnt) + return ICE_ERR_PARAM; + + lkup_exts = kzalloc(sizeof(*lkup_exts), GFP_KERNEL); + if (!lkup_exts) + return ICE_ERR_NO_MEMORY; + + /* Determine the number of words to be matched and if it exceeds a + * recipe's restrictions + */ + for (i = 0; i < lkups_cnt; i++) { + u16 count; + + if (lkups[i].type >= ICE_PROTOCOL_LAST) { + status = ICE_ERR_CFG; + goto err_free_lkup_exts; + } + + count = ice_fill_valid_words(&lkups[i], lkup_exts); + if (!count) { + status = ICE_ERR_CFG; + goto err_free_lkup_exts; + } + } + + rm = kzalloc(sizeof(*rm), GFP_KERNEL); + if (!rm) { + status = ICE_ERR_NO_MEMORY; + goto err_free_lkup_exts; + } + + /* Get field vectors that contain fields extracted from all the protocol + * headers being programmed. + */ + INIT_LIST_HEAD(&rm->fv_list); + INIT_LIST_HEAD(&rm->rg_list); + + /* Get bitmap of field vectors (profiles) that are compatible with the + * rule request; only these will be searched in the subsequent call to + * ice_get_fv. + */ + ice_get_compat_fv_bitmap(hw, rinfo, fv_bitmap); + + status = ice_get_fv(hw, lkups, lkups_cnt, fv_bitmap, &rm->fv_list); + if (status) + goto err_unroll; + + /* Create any special protocol/offset pairs, such as looking at tunnel + * bits by extracting metadata + */ + status = ice_add_special_words(rinfo, lkup_exts); + if (status) + goto err_free_lkup_exts; + + /* Group match words into recipes using preferred recipe grouping + * criteria. + */ + status = ice_create_recipe_group(hw, rm, lkup_exts); + if (status) + goto err_unroll; + + /* set the recipe priority if specified */ + rm->priority = (u8)rinfo->priority; + + /* Find offsets from the field vector. Pick the first one for all the + * recipes. + */ + status = ice_fill_fv_word_index(hw, &rm->fv_list, &rm->rg_list); + if (status) + goto err_unroll; + + /* get bitmap of all profiles the recipe will be associated with */ + bitmap_zero(profiles, ICE_MAX_NUM_PROFILES); + list_for_each_entry(fvit, &rm->fv_list, list_entry) { + ice_debug(hw, ICE_DBG_SW, "profile: %d\n", fvit->profile_id); + set_bit((u16)fvit->profile_id, profiles); + } + + /* Look for a recipe which matches our requested fv / mask list */ + *rid = ice_find_recp(hw, lkup_exts); + if (*rid < ICE_MAX_NUM_RECIPES) + /* Success if found a recipe that match the existing criteria */ + goto err_unroll; + + /* Recipe we need does not exist, add a recipe */ + status = ice_add_sw_recipe(hw, rm, profiles); + if (status) + goto err_unroll; + + /* Associate all the recipes created with all the profiles in the + * common field vector. + */ + list_for_each_entry(fvit, &rm->fv_list, list_entry) { + DECLARE_BITMAP(r_bitmap, ICE_MAX_NUM_RECIPES); + u16 j; + + status = ice_aq_get_recipe_to_profile(hw, fvit->profile_id, + (u8 *)r_bitmap, NULL); + if (status) + goto err_unroll; + + bitmap_or(r_bitmap, r_bitmap, rm->r_bitmap, + ICE_MAX_NUM_RECIPES); + status = ice_acquire_change_lock(hw, ICE_RES_WRITE); + if (status) + goto err_unroll; + + status = ice_aq_map_recipe_to_profile(hw, fvit->profile_id, + (u8 *)r_bitmap, + NULL); + ice_release_change_lock(hw); + + if (status) + goto err_unroll; + + /* Update profile to recipe bitmap array */ + bitmap_copy(profile_to_recipe[fvit->profile_id], r_bitmap, + ICE_MAX_NUM_RECIPES); + + /* Update recipe to profile bitmap array */ + for_each_set_bit(j, rm->r_bitmap, ICE_MAX_NUM_RECIPES) + set_bit((u16)fvit->profile_id, recipe_to_profile[j]); + } + + *rid = rm->root_rid; + memcpy(&hw->switch_info->recp_list[*rid].lkup_exts, lkup_exts, + sizeof(*lkup_exts)); +err_unroll: + list_for_each_entry_safe(r_entry, r_tmp, &rm->rg_list, l_entry) { + list_del(&r_entry->l_entry); + devm_kfree(ice_hw_to_dev(hw), r_entry); + } + + list_for_each_entry_safe(fvit, tmp, &rm->fv_list, list_entry) { + list_del(&fvit->list_entry); + devm_kfree(ice_hw_to_dev(hw), fvit); + } + + if (rm->root_buf) + devm_kfree(ice_hw_to_dev(hw), rm->root_buf); + + kfree(rm); + +err_free_lkup_exts: + kfree(lkup_exts); + + return status; +} + +/** + * ice_find_dummy_packet - find dummy packet + * + * @lkups: lookup elements or match criteria for the advanced recipe, one + * structure per protocol header + * @lkups_cnt: number of protocols + * @tun_type: tunnel type + * @pkt: dummy packet to fill according to filter match criteria + * @pkt_len: packet length of dummy packet + * @offsets: pointer to receive the pointer to the offsets for the packet + */ +static void +ice_find_dummy_packet(struct ice_adv_lkup_elem *lkups, u16 lkups_cnt, + enum ice_sw_tunnel_type tun_type, + const u8 **pkt, u16 *pkt_len, + const struct ice_dummy_pkt_offsets **offsets) +{ + bool tcp = false, udp = false, ipv6 = false, vlan = false; + u16 i; + + for (i = 0; i < lkups_cnt; i++) { + if (lkups[i].type == ICE_UDP_ILOS) + udp = true; + else if (lkups[i].type == ICE_TCP_IL) + tcp = true; + else if (lkups[i].type == ICE_IPV6_OFOS) + ipv6 = true; + else if (lkups[i].type == ICE_VLAN_OFOS) + vlan = true; + else if (lkups[i].type == ICE_ETYPE_OL && + lkups[i].h_u.ethertype.ethtype_id == + cpu_to_be16(ICE_IPV6_ETHER_ID) && + lkups[i].m_u.ethertype.ethtype_id == + cpu_to_be16(0xFFFF)) + ipv6 = true; + } + + if (tun_type == ICE_SW_TUN_NVGRE) { + if (tcp) { + *pkt = dummy_gre_tcp_packet; + *pkt_len = sizeof(dummy_gre_tcp_packet); + *offsets = dummy_gre_tcp_packet_offsets; + return; + } + + *pkt = dummy_gre_udp_packet; + *pkt_len = sizeof(dummy_gre_udp_packet); + *offsets = dummy_gre_udp_packet_offsets; + return; + } + + if (tun_type == ICE_SW_TUN_VXLAN || + tun_type == ICE_SW_TUN_GENEVE) { + if (tcp) { + *pkt = dummy_udp_tun_tcp_packet; + *pkt_len = sizeof(dummy_udp_tun_tcp_packet); + *offsets = dummy_udp_tun_tcp_packet_offsets; + return; + } + + *pkt = dummy_udp_tun_udp_packet; + *pkt_len = sizeof(dummy_udp_tun_udp_packet); + *offsets = dummy_udp_tun_udp_packet_offsets; + return; + } + + if (udp && !ipv6) { + if (vlan) { + *pkt = dummy_vlan_udp_packet; + *pkt_len = sizeof(dummy_vlan_udp_packet); + *offsets = dummy_vlan_udp_packet_offsets; + return; + } + *pkt = dummy_udp_packet; + *pkt_len = sizeof(dummy_udp_packet); + *offsets = dummy_udp_packet_offsets; + return; + } else if (udp && ipv6) { + if (vlan) { + *pkt = dummy_vlan_udp_ipv6_packet; + *pkt_len = sizeof(dummy_vlan_udp_ipv6_packet); + *offsets = dummy_vlan_udp_ipv6_packet_offsets; + return; + } + *pkt = dummy_udp_ipv6_packet; + *pkt_len = sizeof(dummy_udp_ipv6_packet); + *offsets = dummy_udp_ipv6_packet_offsets; + return; + } else if ((tcp && ipv6) || ipv6) { + if (vlan) { + *pkt = dummy_vlan_tcp_ipv6_packet; + *pkt_len = sizeof(dummy_vlan_tcp_ipv6_packet); + *offsets = dummy_vlan_tcp_ipv6_packet_offsets; + return; + } + *pkt = dummy_tcp_ipv6_packet; + *pkt_len = sizeof(dummy_tcp_ipv6_packet); + *offsets = dummy_tcp_ipv6_packet_offsets; + return; + } + + if (vlan) { + *pkt = dummy_vlan_tcp_packet; + *pkt_len = sizeof(dummy_vlan_tcp_packet); + *offsets = dummy_vlan_tcp_packet_offsets; + } else { + *pkt = dummy_tcp_packet; + *pkt_len = sizeof(dummy_tcp_packet); + *offsets = dummy_tcp_packet_offsets; + } +} + +/** + * ice_fill_adv_dummy_packet - fill a dummy packet with given match criteria + * + * @lkups: lookup elements or match criteria for the advanced recipe, one + * structure per protocol header + * @lkups_cnt: number of protocols + * @s_rule: stores rule information from the match criteria + * @dummy_pkt: dummy packet to fill according to filter match criteria + * @pkt_len: packet length of dummy packet + * @offsets: offset info for the dummy packet + */ +static enum ice_status +ice_fill_adv_dummy_packet(struct ice_adv_lkup_elem *lkups, u16 lkups_cnt, + struct ice_aqc_sw_rules_elem *s_rule, + const u8 *dummy_pkt, u16 pkt_len, + const struct ice_dummy_pkt_offsets *offsets) +{ + u8 *pkt; + u16 i; + + /* Start with a packet with a pre-defined/dummy content. Then, fill + * in the header values to be looked up or matched. + */ + pkt = s_rule->pdata.lkup_tx_rx.hdr; + + memcpy(pkt, dummy_pkt, pkt_len); + + for (i = 0; i < lkups_cnt; i++) { + enum ice_protocol_type type; + u16 offset = 0, len = 0, j; + bool found = false; + + /* find the start of this layer; it should be found since this + * was already checked when search for the dummy packet + */ + type = lkups[i].type; + for (j = 0; offsets[j].type != ICE_PROTOCOL_LAST; j++) { + if (type == offsets[j].type) { + offset = offsets[j].offset; + found = true; + break; + } + } + /* this should never happen in a correct calling sequence */ + if (!found) + return ICE_ERR_PARAM; + + switch (lkups[i].type) { + case ICE_MAC_OFOS: + case ICE_MAC_IL: + len = sizeof(struct ice_ether_hdr); + break; + case ICE_ETYPE_OL: + len = sizeof(struct ice_ethtype_hdr); + break; + case ICE_VLAN_OFOS: + len = sizeof(struct ice_vlan_hdr); + break; + case ICE_IPV4_OFOS: + case ICE_IPV4_IL: + len = sizeof(struct ice_ipv4_hdr); + break; + case ICE_IPV6_OFOS: + case ICE_IPV6_IL: + len = sizeof(struct ice_ipv6_hdr); + break; + case ICE_TCP_IL: + case ICE_UDP_OF: + case ICE_UDP_ILOS: + len = sizeof(struct ice_l4_hdr); + break; + case ICE_SCTP_IL: + len = sizeof(struct ice_sctp_hdr); + break; + case ICE_NVGRE: + len = sizeof(struct ice_nvgre_hdr); + break; + case ICE_VXLAN: + case ICE_GENEVE: + len = sizeof(struct ice_udp_tnl_hdr); + break; + default: + return ICE_ERR_PARAM; + } + + /* the length should be a word multiple */ + if (len % ICE_BYTES_PER_WORD) + return ICE_ERR_CFG; + + /* We have the offset to the header start, the length, the + * caller's header values and mask. Use this information to + * copy the data into the dummy packet appropriately based on + * the mask. Note that we need to only write the bits as + * indicated by the mask to make sure we don't improperly write + * over any significant packet data. + */ + for (j = 0; j < len / sizeof(u16); j++) + if (((u16 *)&lkups[i].m_u)[j]) + ((u16 *)(pkt + offset))[j] = + (((u16 *)(pkt + offset))[j] & + ~((u16 *)&lkups[i].m_u)[j]) | + (((u16 *)&lkups[i].h_u)[j] & + ((u16 *)&lkups[i].m_u)[j]); + } + + s_rule->pdata.lkup_tx_rx.hdr_len = cpu_to_le16(pkt_len); + + return 0; +} + +/** + * ice_fill_adv_packet_tun - fill dummy packet with udp tunnel port + * @hw: pointer to the hardware structure + * @tun_type: tunnel type + * @pkt: dummy packet to fill in + * @offsets: offset info for the dummy packet + */ +static enum ice_status +ice_fill_adv_packet_tun(struct ice_hw *hw, enum ice_sw_tunnel_type tun_type, + u8 *pkt, const struct ice_dummy_pkt_offsets *offsets) +{ + u16 open_port, i; + + switch (tun_type) { + case ICE_SW_TUN_VXLAN: + case ICE_SW_TUN_GENEVE: + if (!ice_get_open_tunnel_port(hw, &open_port)) + return ICE_ERR_CFG; + break; + + default: + /* Nothing needs to be done for this tunnel type */ + return 0; + } + + /* Find the outer UDP protocol header and insert the port number */ + for (i = 0; offsets[i].type != ICE_PROTOCOL_LAST; i++) { + if (offsets[i].type == ICE_UDP_OF) { + struct ice_l4_hdr *hdr; + u16 offset; + + offset = offsets[i].offset; + hdr = (struct ice_l4_hdr *)&pkt[offset]; + hdr->dst_port = cpu_to_be16(open_port); + + return 0; + } + } + + return ICE_ERR_CFG; +} + +/** + * ice_find_adv_rule_entry - Search a rule entry + * @hw: pointer to the hardware structure + * @lkups: lookup elements or match criteria for the advanced recipe, one + * structure per protocol header + * @lkups_cnt: number of protocols + * @recp_id: recipe ID for which we are finding the rule + * @rinfo: other information regarding the rule e.g. priority and action info + * + * Helper function to search for a given advance rule entry + * Returns pointer to entry storing the rule if found + */ +static struct ice_adv_fltr_mgmt_list_entry * +ice_find_adv_rule_entry(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, + u16 lkups_cnt, u16 recp_id, + struct ice_adv_rule_info *rinfo) +{ + struct ice_adv_fltr_mgmt_list_entry *list_itr; + struct ice_switch_info *sw = hw->switch_info; + int i; + + list_for_each_entry(list_itr, &sw->recp_list[recp_id].filt_rules, + list_entry) { + bool lkups_matched = true; + + if (lkups_cnt != list_itr->lkups_cnt) + continue; + for (i = 0; i < list_itr->lkups_cnt; i++) + if (memcmp(&list_itr->lkups[i], &lkups[i], + sizeof(*lkups))) { + lkups_matched = false; + break; + } + if (rinfo->sw_act.flag == list_itr->rule_info.sw_act.flag && + rinfo->tun_type == list_itr->rule_info.tun_type && + lkups_matched) + return list_itr; + } + return NULL; +} + +/** + * ice_adv_add_update_vsi_list + * @hw: pointer to the hardware structure + * @m_entry: pointer to current adv filter management list entry + * @cur_fltr: filter information from the book keeping entry + * @new_fltr: filter information with the new VSI to be added + * + * Call AQ command to add or update previously created VSI list with new VSI. + * + * Helper function to do book keeping associated with adding filter information + * The algorithm to do the booking keeping is described below : + * When a VSI needs to subscribe to a given advanced filter + * if only one VSI has been added till now + * Allocate a new VSI list and add two VSIs + * to this list using switch rule command + * Update the previously created switch rule with the + * newly created VSI list ID + * if a VSI list was previously created + * Add the new VSI to the previously created VSI list set + * using the update switch rule command + */ +static enum ice_status +ice_adv_add_update_vsi_list(struct ice_hw *hw, + struct ice_adv_fltr_mgmt_list_entry *m_entry, + struct ice_adv_rule_info *cur_fltr, + struct ice_adv_rule_info *new_fltr) +{ + enum ice_status status; + u16 vsi_list_id = 0; + + if (cur_fltr->sw_act.fltr_act == ICE_FWD_TO_Q || + cur_fltr->sw_act.fltr_act == ICE_FWD_TO_QGRP || + cur_fltr->sw_act.fltr_act == ICE_DROP_PACKET) + return ICE_ERR_NOT_IMPL; + + if ((new_fltr->sw_act.fltr_act == ICE_FWD_TO_Q || + new_fltr->sw_act.fltr_act == ICE_FWD_TO_QGRP) && + (cur_fltr->sw_act.fltr_act == ICE_FWD_TO_VSI || + cur_fltr->sw_act.fltr_act == ICE_FWD_TO_VSI_LIST)) + return ICE_ERR_NOT_IMPL; + + if (m_entry->vsi_count < 2 && !m_entry->vsi_list_info) { + /* Only one entry existed in the mapping and it was not already + * a part of a VSI list. So, create a VSI list with the old and + * new VSIs. + */ + struct ice_fltr_info tmp_fltr; + u16 vsi_handle_arr[2]; + + /* A rule already exists with the new VSI being added */ + if (cur_fltr->sw_act.fwd_id.hw_vsi_id == + new_fltr->sw_act.fwd_id.hw_vsi_id) + return ICE_ERR_ALREADY_EXISTS; + + vsi_handle_arr[0] = cur_fltr->sw_act.vsi_handle; + vsi_handle_arr[1] = new_fltr->sw_act.vsi_handle; + status = ice_create_vsi_list_rule(hw, &vsi_handle_arr[0], 2, + &vsi_list_id, + ICE_SW_LKUP_LAST); + if (status) + return status; + + memset(&tmp_fltr, 0, sizeof(tmp_fltr)); + tmp_fltr.flag = m_entry->rule_info.sw_act.flag; + tmp_fltr.fltr_rule_id = cur_fltr->fltr_rule_id; + tmp_fltr.fltr_act = ICE_FWD_TO_VSI_LIST; + tmp_fltr.fwd_id.vsi_list_id = vsi_list_id; + tmp_fltr.lkup_type = ICE_SW_LKUP_LAST; + + /* Update the previous switch rule of "forward to VSI" to + * "fwd to VSI list" + */ + status = ice_update_pkt_fwd_rule(hw, &tmp_fltr); + if (status) + return status; + + cur_fltr->sw_act.fwd_id.vsi_list_id = vsi_list_id; + cur_fltr->sw_act.fltr_act = ICE_FWD_TO_VSI_LIST; + m_entry->vsi_list_info = + ice_create_vsi_list_map(hw, &vsi_handle_arr[0], 2, + vsi_list_id); + } else { + u16 vsi_handle = new_fltr->sw_act.vsi_handle; + + if (!m_entry->vsi_list_info) + return ICE_ERR_CFG; + + /* A rule already exists with the new VSI being added */ + if (test_bit(vsi_handle, m_entry->vsi_list_info->vsi_map)) + return 0; + + /* Update the previously created VSI list set with + * the new VSI ID passed in + */ + vsi_list_id = cur_fltr->sw_act.fwd_id.vsi_list_id; + + status = ice_update_vsi_list_rule(hw, &vsi_handle, 1, + vsi_list_id, false, + ice_aqc_opc_update_sw_rules, + ICE_SW_LKUP_LAST); + /* update VSI list mapping info with new VSI ID */ + if (!status) + set_bit(vsi_handle, m_entry->vsi_list_info->vsi_map); + } + if (!status) + m_entry->vsi_count++; + return status; +} + +/** + * ice_add_adv_rule - helper function to create an advanced switch rule + * @hw: pointer to the hardware structure + * @lkups: information on the words that needs to be looked up. All words + * together makes one recipe + * @lkups_cnt: num of entries in the lkups array + * @rinfo: other information related to the rule that needs to be programmed + * @added_entry: this will return recipe_id, rule_id and vsi_handle. should be + * ignored is case of error. + * + * This function can program only 1 rule at a time. The lkups is used to + * describe the all the words that forms the "lookup" portion of the recipe. + * These words can span multiple protocols. Callers to this function need to + * pass in a list of protocol headers with lookup information along and mask + * that determines which words are valid from the given protocol header. + * rinfo describes other information related to this rule such as forwarding + * IDs, priority of this rule, etc. + */ +enum ice_status +ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, + u16 lkups_cnt, struct ice_adv_rule_info *rinfo, + struct ice_rule_query_data *added_entry) +{ + struct ice_adv_fltr_mgmt_list_entry *m_entry, *adv_fltr = NULL; + u16 rid = 0, i, pkt_len, rule_buf_sz, vsi_handle; + const struct ice_dummy_pkt_offsets *pkt_offsets; + struct ice_aqc_sw_rules_elem *s_rule = NULL; + struct list_head *rule_head; + struct ice_switch_info *sw; + enum ice_status status; + const u8 *pkt = NULL; + u16 word_cnt; + u32 act = 0; + u8 q_rgn; + + /* Initialize profile to result index bitmap */ + if (!hw->switch_info->prof_res_bm_init) { + hw->switch_info->prof_res_bm_init = 1; + ice_init_prof_result_bm(hw); + } + + if (!lkups_cnt) + return ICE_ERR_PARAM; + + /* get # of words we need to match */ + word_cnt = 0; + for (i = 0; i < lkups_cnt; i++) { + u16 j, *ptr; + + ptr = (u16 *)&lkups[i].m_u; + for (j = 0; j < sizeof(lkups->m_u) / sizeof(u16); j++) + if (ptr[j] != 0) + word_cnt++; + } + + if (!word_cnt || word_cnt > ICE_MAX_CHAIN_WORDS) + return ICE_ERR_PARAM; + + /* make sure that we can locate a dummy packet */ + ice_find_dummy_packet(lkups, lkups_cnt, rinfo->tun_type, &pkt, &pkt_len, + &pkt_offsets); + if (!pkt) { + status = ICE_ERR_PARAM; + goto err_ice_add_adv_rule; + } + + if (!(rinfo->sw_act.fltr_act == ICE_FWD_TO_VSI || + rinfo->sw_act.fltr_act == ICE_FWD_TO_Q || + rinfo->sw_act.fltr_act == ICE_FWD_TO_QGRP || + rinfo->sw_act.fltr_act == ICE_DROP_PACKET)) + return ICE_ERR_CFG; + + vsi_handle = rinfo->sw_act.vsi_handle; + if (!ice_is_vsi_valid(hw, vsi_handle)) + return ICE_ERR_PARAM; + + if (rinfo->sw_act.fltr_act == ICE_FWD_TO_VSI) + rinfo->sw_act.fwd_id.hw_vsi_id = + ice_get_hw_vsi_num(hw, vsi_handle); + if (rinfo->sw_act.flag & ICE_FLTR_TX) + rinfo->sw_act.src = ice_get_hw_vsi_num(hw, vsi_handle); + + status = ice_add_adv_recipe(hw, lkups, lkups_cnt, rinfo, &rid); + if (status) + return status; + m_entry = ice_find_adv_rule_entry(hw, lkups, lkups_cnt, rid, rinfo); + if (m_entry) { + /* we have to add VSI to VSI_LIST and increment vsi_count. + * Also Update VSI list so that we can change forwarding rule + * if the rule already exists, we will check if it exists with + * same vsi_id, if not then add it to the VSI list if it already + * exists if not then create a VSI list and add the existing VSI + * ID and the new VSI ID to the list + * We will add that VSI to the list + */ + status = ice_adv_add_update_vsi_list(hw, m_entry, + &m_entry->rule_info, + rinfo); + if (added_entry) { + added_entry->rid = rid; + added_entry->rule_id = m_entry->rule_info.fltr_rule_id; + added_entry->vsi_handle = rinfo->sw_act.vsi_handle; + } + return status; + } + rule_buf_sz = ICE_SW_RULE_RX_TX_NO_HDR_SIZE + pkt_len; + s_rule = kzalloc(rule_buf_sz, GFP_KERNEL); + if (!s_rule) + return ICE_ERR_NO_MEMORY; + if (!rinfo->flags_info.act_valid) { + act |= ICE_SINGLE_ACT_LAN_ENABLE; + act |= ICE_SINGLE_ACT_LB_ENABLE; + } else { + act |= rinfo->flags_info.act & (ICE_SINGLE_ACT_LAN_ENABLE | + ICE_SINGLE_ACT_LB_ENABLE); + } + + switch (rinfo->sw_act.fltr_act) { + case ICE_FWD_TO_VSI: + act |= (rinfo->sw_act.fwd_id.hw_vsi_id << + ICE_SINGLE_ACT_VSI_ID_S) & ICE_SINGLE_ACT_VSI_ID_M; + act |= ICE_SINGLE_ACT_VSI_FORWARDING | ICE_SINGLE_ACT_VALID_BIT; + break; + case ICE_FWD_TO_Q: + act |= ICE_SINGLE_ACT_TO_Q; + act |= (rinfo->sw_act.fwd_id.q_id << ICE_SINGLE_ACT_Q_INDEX_S) & + ICE_SINGLE_ACT_Q_INDEX_M; + break; + case ICE_FWD_TO_QGRP: + q_rgn = rinfo->sw_act.qgrp_size > 0 ? + (u8)ilog2(rinfo->sw_act.qgrp_size) : 0; + act |= ICE_SINGLE_ACT_TO_Q; + act |= (rinfo->sw_act.fwd_id.q_id << ICE_SINGLE_ACT_Q_INDEX_S) & + ICE_SINGLE_ACT_Q_INDEX_M; + act |= (q_rgn << ICE_SINGLE_ACT_Q_REGION_S) & + ICE_SINGLE_ACT_Q_REGION_M; + break; + case ICE_DROP_PACKET: + act |= ICE_SINGLE_ACT_VSI_FORWARDING | ICE_SINGLE_ACT_DROP | + ICE_SINGLE_ACT_VALID_BIT; + break; + default: + status = ICE_ERR_CFG; + goto err_ice_add_adv_rule; + } + + /* set the rule LOOKUP type based on caller specified 'Rx' + * instead of hardcoding it to be either LOOKUP_TX/RX + * + * for 'Rx' set the source to be the port number + * for 'Tx' set the source to be the source HW VSI number (determined + * by caller) + */ + if (rinfo->rx) { + s_rule->type = cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_RX); + s_rule->pdata.lkup_tx_rx.src = + cpu_to_le16(hw->port_info->lport); + } else { + s_rule->type = cpu_to_le16(ICE_AQC_SW_RULES_T_LKUP_TX); + s_rule->pdata.lkup_tx_rx.src = cpu_to_le16(rinfo->sw_act.src); + } + + s_rule->pdata.lkup_tx_rx.recipe_id = cpu_to_le16(rid); + s_rule->pdata.lkup_tx_rx.act = cpu_to_le32(act); + + status = ice_fill_adv_dummy_packet(lkups, lkups_cnt, s_rule, pkt, + pkt_len, pkt_offsets); + if (status) + goto err_ice_add_adv_rule; + + if (rinfo->tun_type != ICE_NON_TUN) { + status = ice_fill_adv_packet_tun(hw, rinfo->tun_type, + s_rule->pdata.lkup_tx_rx.hdr, + pkt_offsets); + if (status) + goto err_ice_add_adv_rule; + } + + status = ice_aq_sw_rules(hw, (struct ice_aqc_sw_rules *)s_rule, + rule_buf_sz, 1, ice_aqc_opc_add_sw_rules, + NULL); + if (status) + goto err_ice_add_adv_rule; + adv_fltr = devm_kzalloc(ice_hw_to_dev(hw), + sizeof(struct ice_adv_fltr_mgmt_list_entry), + GFP_KERNEL); + if (!adv_fltr) { + status = ICE_ERR_NO_MEMORY; + goto err_ice_add_adv_rule; + } + + adv_fltr->lkups = devm_kmemdup(ice_hw_to_dev(hw), lkups, + lkups_cnt * sizeof(*lkups), GFP_KERNEL); + if (!adv_fltr->lkups) { + status = ICE_ERR_NO_MEMORY; + goto err_ice_add_adv_rule; + } + + adv_fltr->lkups_cnt = lkups_cnt; + adv_fltr->rule_info = *rinfo; + adv_fltr->rule_info.fltr_rule_id = + le16_to_cpu(s_rule->pdata.lkup_tx_rx.index); + sw = hw->switch_info; + sw->recp_list[rid].adv_rule = true; + rule_head = &sw->recp_list[rid].filt_rules; + + if (rinfo->sw_act.fltr_act == ICE_FWD_TO_VSI) + adv_fltr->vsi_count = 1; + + /* Add rule entry to book keeping list */ + list_add(&adv_fltr->list_entry, rule_head); + if (added_entry) { + added_entry->rid = rid; + added_entry->rule_id = adv_fltr->rule_info.fltr_rule_id; + added_entry->vsi_handle = rinfo->sw_act.vsi_handle; + } +err_ice_add_adv_rule: + if (status && adv_fltr) { + devm_kfree(ice_hw_to_dev(hw), adv_fltr->lkups); + devm_kfree(ice_hw_to_dev(hw), adv_fltr); + } + + kfree(s_rule); + + return status; +} + /** * ice_replay_vsi_fltr - Replay filters for requested VSI * @hw: pointer to the hardware structure @@ -2831,6 +5420,236 @@ end: } /** + * ice_adv_rem_update_vsi_list + * @hw: pointer to the hardware structure + * @vsi_handle: VSI handle of the VSI to remove + * @fm_list: filter management entry for which the VSI list management needs to + * be done + */ +static enum ice_status +ice_adv_rem_update_vsi_list(struct ice_hw *hw, u16 vsi_handle, + struct ice_adv_fltr_mgmt_list_entry *fm_list) +{ + struct ice_vsi_list_map_info *vsi_list_info; + enum ice_sw_lkup_type lkup_type; + enum ice_status status; + u16 vsi_list_id; + + if (fm_list->rule_info.sw_act.fltr_act != ICE_FWD_TO_VSI_LIST || + fm_list->vsi_count == 0) + return ICE_ERR_PARAM; + + /* A rule with the VSI being removed does not exist */ + if (!test_bit(vsi_handle, fm_list->vsi_list_info->vsi_map)) + return ICE_ERR_DOES_NOT_EXIST; + + lkup_type = ICE_SW_LKUP_LAST; + vsi_list_id = fm_list->rule_info.sw_act.fwd_id.vsi_list_id; + status = ice_update_vsi_list_rule(hw, &vsi_handle, 1, vsi_list_id, true, + ice_aqc_opc_update_sw_rules, + lkup_type); + if (status) + return status; + + fm_list->vsi_count--; + clear_bit(vsi_handle, fm_list->vsi_list_info->vsi_map); + vsi_list_info = fm_list->vsi_list_info; + if (fm_list->vsi_count == 1) { + struct ice_fltr_info tmp_fltr; + u16 rem_vsi_handle; + + rem_vsi_handle = find_first_bit(vsi_list_info->vsi_map, + ICE_MAX_VSI); + if (!ice_is_vsi_valid(hw, rem_vsi_handle)) + return ICE_ERR_OUT_OF_RANGE; + + /* Make sure VSI list is empty before removing it below */ + status = ice_update_vsi_list_rule(hw, &rem_vsi_handle, 1, + vsi_list_id, true, + ice_aqc_opc_update_sw_rules, + lkup_type); + if (status) + return status; + + memset(&tmp_fltr, 0, sizeof(tmp_fltr)); + tmp_fltr.flag = fm_list->rule_info.sw_act.flag; + tmp_fltr.fltr_rule_id = fm_list->rule_info.fltr_rule_id; + fm_list->rule_info.sw_act.fltr_act = ICE_FWD_TO_VSI; + tmp_fltr.fltr_act = ICE_FWD_TO_VSI; + tmp_fltr.fwd_id.hw_vsi_id = + ice_get_hw_vsi_num(hw, rem_vsi_handle); + fm_list->rule_info.sw_act.fwd_id.hw_vsi_id = + ice_get_hw_vsi_num(hw, rem_vsi_handle); + fm_list->rule_info.sw_act.vsi_handle = rem_vsi_handle; + + /* Update the previous switch rule of "MAC forward to VSI" to + * "MAC fwd to VSI list" + */ + status = ice_update_pkt_fwd_rule(hw, &tmp_fltr); + if (status) { + ice_debug(hw, ICE_DBG_SW, "Failed to update pkt fwd rule to FWD_TO_VSI on HW VSI %d, error %d\n", + tmp_fltr.fwd_id.hw_vsi_id, status); + return status; + } + fm_list->vsi_list_info->ref_cnt--; + + /* Remove the VSI list since it is no longer used */ + status = ice_remove_vsi_list_rule(hw, vsi_list_id, lkup_type); + if (status) { + ice_debug(hw, ICE_DBG_SW, "Failed to remove VSI list %d, error %d\n", + vsi_list_id, status); + return status; + } + + list_del(&vsi_list_info->list_entry); + devm_kfree(ice_hw_to_dev(hw), vsi_list_info); + fm_list->vsi_list_info = NULL; + } + + return status; +} + +/** + * ice_rem_adv_rule - removes existing advanced switch rule + * @hw: pointer to the hardware structure + * @lkups: information on the words that needs to be looked up. All words + * together makes one recipe + * @lkups_cnt: num of entries in the lkups array + * @rinfo: Its the pointer to the rule information for the rule + * + * This function can be used to remove 1 rule at a time. The lkups is + * used to describe all the words that forms the "lookup" portion of the + * rule. These words can span multiple protocols. Callers to this function + * need to pass in a list of protocol headers with lookup information along + * and mask that determines which words are valid from the given protocol + * header. rinfo describes other information related to this rule such as + * forwarding IDs, priority of this rule, etc. + */ +static enum ice_status +ice_rem_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, + u16 lkups_cnt, struct ice_adv_rule_info *rinfo) +{ + struct ice_adv_fltr_mgmt_list_entry *list_elem; + struct ice_prot_lkup_ext lkup_exts; + enum ice_status status = 0; + bool remove_rule = false; + struct mutex *rule_lock; /* Lock to protect filter rule list */ + u16 i, rid, vsi_handle; + + memset(&lkup_exts, 0, sizeof(lkup_exts)); + for (i = 0; i < lkups_cnt; i++) { + u16 count; + + if (lkups[i].type >= ICE_PROTOCOL_LAST) + return ICE_ERR_CFG; + + count = ice_fill_valid_words(&lkups[i], &lkup_exts); + if (!count) + return ICE_ERR_CFG; + } + + /* Create any special protocol/offset pairs, such as looking at tunnel + * bits by extracting metadata + */ + status = ice_add_special_words(rinfo, &lkup_exts); + if (status) + return status; + + rid = ice_find_recp(hw, &lkup_exts); + /* If did not find a recipe that match the existing criteria */ + if (rid == ICE_MAX_NUM_RECIPES) + return ICE_ERR_PARAM; + + rule_lock = &hw->switch_info->recp_list[rid].filt_rule_lock; + list_elem = ice_find_adv_rule_entry(hw, lkups, lkups_cnt, rid, rinfo); + /* the rule is already removed */ + if (!list_elem) + return 0; + mutex_lock(rule_lock); + if (list_elem->rule_info.sw_act.fltr_act != ICE_FWD_TO_VSI_LIST) { + remove_rule = true; + } else if (list_elem->vsi_count > 1) { + remove_rule = false; + vsi_handle = rinfo->sw_act.vsi_handle; + status = ice_adv_rem_update_vsi_list(hw, vsi_handle, list_elem); + } else { + vsi_handle = rinfo->sw_act.vsi_handle; + status = ice_adv_rem_update_vsi_list(hw, vsi_handle, list_elem); + if (status) { + mutex_unlock(rule_lock); + return status; + } + if (list_elem->vsi_count == 0) + remove_rule = true; + } + mutex_unlock(rule_lock); + if (remove_rule) { + struct ice_aqc_sw_rules_elem *s_rule; + u16 rule_buf_sz; + + rule_buf_sz = ICE_SW_RULE_RX_TX_NO_HDR_SIZE; + s_rule = kzalloc(rule_buf_sz, GFP_KERNEL); + if (!s_rule) + return ICE_ERR_NO_MEMORY; + s_rule->pdata.lkup_tx_rx.act = 0; + s_rule->pdata.lkup_tx_rx.index = + cpu_to_le16(list_elem->rule_info.fltr_rule_id); + s_rule->pdata.lkup_tx_rx.hdr_len = 0; + status = ice_aq_sw_rules(hw, (struct ice_aqc_sw_rules *)s_rule, + rule_buf_sz, 1, + ice_aqc_opc_remove_sw_rules, NULL); + if (!status || status == ICE_ERR_DOES_NOT_EXIST) { + struct ice_switch_info *sw = hw->switch_info; + + mutex_lock(rule_lock); + list_del(&list_elem->list_entry); + devm_kfree(ice_hw_to_dev(hw), list_elem->lkups); + devm_kfree(ice_hw_to_dev(hw), list_elem); + mutex_unlock(rule_lock); + if (list_empty(&sw->recp_list[rid].filt_rules)) + sw->recp_list[rid].adv_rule = false; + } + kfree(s_rule); + } + return status; +} + +/** + * ice_rem_adv_rule_by_id - removes existing advanced switch rule by ID + * @hw: pointer to the hardware structure + * @remove_entry: data struct which holds rule_id, VSI handle and recipe ID + * + * This function is used to remove 1 rule at a time. The removal is based on + * the remove_entry parameter. This function will remove rule for a given + * vsi_handle with a given rule_id which is passed as parameter in remove_entry + */ +enum ice_status +ice_rem_adv_rule_by_id(struct ice_hw *hw, + struct ice_rule_query_data *remove_entry) +{ + struct ice_adv_fltr_mgmt_list_entry *list_itr; + struct list_head *list_head; + struct ice_adv_rule_info rinfo; + struct ice_switch_info *sw; + + sw = hw->switch_info; + if (!sw->recp_list[remove_entry->rid].recp_created) + return ICE_ERR_PARAM; + list_head = &sw->recp_list[remove_entry->rid].filt_rules; + list_for_each_entry(list_itr, list_head, list_entry) { + if (list_itr->rule_info.fltr_rule_id == + remove_entry->rule_id) { + rinfo = list_itr->rule_info; + rinfo.sw_act.vsi_handle = remove_entry->vsi_handle; + return ice_rem_adv_rule(hw, list_itr->lkups, + list_itr->lkups_cnt, &rinfo); + } + } + /* either list is empty or unable to find rule */ + return ICE_ERR_DOES_NOT_EXIST; +} + +/** * ice_replay_vsi_all_fltr - replay all filters stored in bookkeeping lists * @hw: pointer to the hardware structure * @vsi_handle: driver VSI handle @@ -2868,12 +5687,15 @@ void ice_rm_all_sw_replay_rule_info(struct ice_hw *hw) if (!sw) return; - for (i = 0; i < ICE_SW_LKUP_LAST; i++) { + for (i = 0; i < ICE_MAX_NUM_RECIPES; i++) { if (!list_empty(&sw->recp_list[i].filt_replay_rules)) { struct list_head *l_head; l_head = &sw->recp_list[i].filt_replay_rules; - ice_rem_sw_rule_info(hw, l_head); + if (!sw->recp_list[i].adv_rule) + ice_rem_sw_rule_info(hw, l_head); + else + ice_rem_adv_rule_info(hw, l_head); } } } diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h index c5db8d56133f..d8a38906f16f 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.h +++ b/drivers/net/ethernet/intel/ice/ice_switch.h @@ -14,6 +14,9 @@ #define ICE_VSI_INVAL_ID 0xffff #define ICE_INVAL_Q_HANDLE 0xFFFF +#define ICE_SW_RULE_RX_TX_NO_HDR_SIZE \ + (offsetof(struct ice_aqc_sw_rules_elem, pdata.lkup_tx_rx.hdr)) + /* VSI context structure for add/get/update/free operations */ struct ice_vsi_ctx { u16 vsi_num; @@ -122,30 +125,124 @@ struct ice_fltr_info { u8 lan_en; /* Indicate if packet can be forwarded to the uplink */ }; +struct ice_adv_lkup_elem { + enum ice_protocol_type type; + union ice_prot_hdr h_u; /* Header values */ + union ice_prot_hdr m_u; /* Mask of header values to match */ +}; + +struct ice_sw_act_ctrl { + /* Source VSI for LOOKUP_TX or source port for LOOKUP_RX */ + u16 src; + u16 flag; + enum ice_sw_fwd_act_type fltr_act; + /* Depending on filter action */ + union { + /* This is a queue ID in case of ICE_FWD_TO_Q and starting + * queue ID in case of ICE_FWD_TO_QGRP. + */ + u16 q_id:11; + u16 vsi_id:10; + u16 hw_vsi_id:10; + u16 vsi_list_id:10; + } fwd_id; + /* software VSI handle */ + u16 vsi_handle; + u8 qgrp_size; +}; + +struct ice_rule_query_data { + /* Recipe ID for which the requested rule was added */ + u16 rid; + /* Rule ID that was added or is supposed to be removed */ + u16 rule_id; + /* vsi_handle for which Rule was added or is supposed to be removed */ + u16 vsi_handle; +}; + +/* This structure allows to pass info about lb_en and lan_en + * flags to ice_add_adv_rule. Values in act would be used + * only if act_valid was set to true, otherwise default + * values would be used. + */ +struct ice_adv_rule_flags_info { + u32 act; + u8 act_valid; /* indicate if flags in act are valid */ +}; + +struct ice_adv_rule_info { + enum ice_sw_tunnel_type tun_type; + struct ice_sw_act_ctrl sw_act; + u32 priority; + u8 rx; /* true means LOOKUP_RX otherwise LOOKUP_TX */ + u16 fltr_rule_id; + struct ice_adv_rule_flags_info flags_info; +}; + +/* A collection of one or more four word recipe */ struct ice_sw_recipe { - struct list_head l_entry; + /* For a chained recipe the root recipe is what should be used for + * programming rules + */ + u8 is_root; + u8 root_rid; + u8 recp_created; - /* To protect modification of filt_rule list - * defined below + /* Number of extraction words */ + u8 n_ext_words; + /* Protocol ID and Offset pair (extraction word) to describe the + * recipe */ - struct mutex filt_rule_lock; + struct ice_fv_word ext_words[ICE_MAX_CHAIN_WORDS]; + u16 word_masks[ICE_MAX_CHAIN_WORDS]; - /* List of type ice_fltr_mgmt_list_entry */ + /* if this recipe is a collection of other recipe */ + u8 big_recp; + + /* if this recipe is part of another bigger recipe then chain index + * corresponding to this recipe + */ + u8 chain_idx; + + /* if this recipe is a collection of other recipe then count of other + * recipes and recipe IDs of those recipes + */ + u8 n_grp_count; + + /* Bit map specifying the IDs associated with this group of recipe */ + DECLARE_BITMAP(r_bitmap, ICE_MAX_NUM_RECIPES); + + enum ice_sw_tunnel_type tun_type; + + /* List of type ice_fltr_mgmt_list_entry or adv_rule */ + u8 adv_rule; struct list_head filt_rules; struct list_head filt_replay_rules; - /* linked list of type recipe_list_entry */ - struct list_head rg_list; - /* linked list of type ice_sw_fv_list_entry*/ + struct mutex filt_rule_lock; /* protect filter rule structure */ + + /* Profiles this recipe should be associated with */ struct list_head fv_list; - struct ice_aqc_recipe_data_elem *r_buf; - u8 recp_count; - u8 root_rid; - u8 num_profs; - u8 *prof_ids; - /* recipe bitmap: what all recipes makes this recipe */ - DECLARE_BITMAP(r_bitmap, ICE_MAX_NUM_RECIPES); + /* Profiles this recipe is associated with */ + u8 num_profs, *prof_ids; + + /* Bit map for possible result indexes */ + DECLARE_BITMAP(res_idxs, ICE_MAX_FV_WORDS); + + /* This allows user to specify the recipe priority. + * For now, this becomes 'fwd_priority' when recipe + * is created, usually recipes can have 'fwd' and 'join' + * priority. + */ + u8 priority; + + struct list_head rg_list; + + /* AQ buffer associated with this recipe */ + struct ice_aqc_recipe_data_elem *root_buf; + /* This struct saves the fv_words for a given lookup */ + struct ice_prot_lkup_ext lkup_exts; }; /* Bookkeeping structure to hold bitmap of VSIs corresponding to VSI list ID */ @@ -183,6 +280,16 @@ struct ice_fltr_mgmt_list_entry { u8 counter_index; }; +struct ice_adv_fltr_mgmt_list_entry { + struct list_head list_entry; + + struct ice_adv_lkup_elem *lkups; + struct ice_adv_rule_info rule_info; + u16 lkups_cnt; + struct ice_vsi_list_map_info *vsi_list_info; + u16 vsi_count; +}; + enum ice_promisc_flags { ICE_PROMISC_UCAST_RX = 0x1, ICE_PROMISC_UCAST_TX = 0x2, @@ -218,6 +325,10 @@ ice_free_res_cntr(struct ice_hw *hw, u8 type, u8 alloc_shared, u16 num_items, u16 counter_id); /* Switch/bridge related commands */ +enum ice_status +ice_add_adv_rule(struct ice_hw *hw, struct ice_adv_lkup_elem *lkups, + u16 lkups_cnt, struct ice_adv_rule_info *rinfo, + struct ice_rule_query_data *added_entry); enum ice_status ice_update_sw_rule_bridge_mode(struct ice_hw *hw); enum ice_status ice_add_mac(struct ice_hw *hw, struct list_head *m_lst); enum ice_status ice_remove_mac(struct ice_hw *hw, struct list_head *m_lst); @@ -227,6 +338,8 @@ enum ice_status ice_remove_eth_mac(struct ice_hw *hw, struct list_head *em_list); int ice_cfg_rdma_fltr(struct ice_hw *hw, u16 vsi_handle, bool enable); +bool ice_mac_fltr_exist(struct ice_hw *hw, u8 *mac, u16 vsi_handle); +bool ice_vlan_fltr_exist(struct ice_hw *hw, u16 vlan_id, u16 vsi_handle); void ice_remove_vsi_fltr(struct ice_hw *hw, u16 vsi_handle); enum ice_status ice_add_vlan(struct ice_hw *hw, struct list_head *m_list); @@ -245,10 +358,19 @@ enum ice_status ice_set_vlan_vsi_promisc(struct ice_hw *hw, u16 vsi_handle, u8 promisc_mask, bool rm_vlan_promisc); +enum ice_status +ice_rem_adv_rule_for_vsi(struct ice_hw *hw, u16 vsi_handle); +enum ice_status +ice_rem_adv_rule_by_id(struct ice_hw *hw, + struct ice_rule_query_data *remove_entry); + enum ice_status ice_init_def_sw_recp(struct ice_hw *hw); u16 ice_get_hw_vsi_num(struct ice_hw *hw, u16 vsi_handle); enum ice_status ice_replay_vsi_all_fltr(struct ice_hw *hw, u16 vsi_handle); void ice_rm_all_sw_replay_rule_info(struct ice_hw *hw); +enum ice_status +ice_aq_sw_rules(struct ice_hw *hw, void *rule_list, u16 rule_list_sz, + u8 num_rules, enum ice_adminq_opc opc, struct ice_sq_cd *cd); #endif /* _ICE_SWITCH_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.c b/drivers/net/ethernet/intel/ice/ice_tc_lib.c new file mode 100644 index 000000000000..e5d23feb6701 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.c @@ -0,0 +1,1369 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2019-2021, Intel Corporation. */ + +#include "ice.h" +#include "ice_tc_lib.h" +#include "ice_fltr.h" +#include "ice_lib.h" +#include "ice_protocol_type.h" + +/** + * ice_tc_count_lkups - determine lookup count for switch filter + * @flags: TC-flower flags + * @headers: Pointer to TC flower filter header structure + * @fltr: Pointer to outer TC filter structure + * + * Determine lookup count based on TC flower input for switch filter. + */ +static int +ice_tc_count_lkups(u32 flags, struct ice_tc_flower_lyr_2_4_hdrs *headers, + struct ice_tc_flower_fltr *fltr) +{ + int lkups_cnt = 0; + + if (flags & ICE_TC_FLWR_FIELD_TENANT_ID) + lkups_cnt++; + + if (flags & (ICE_TC_FLWR_FIELD_ENC_SRC_IPV4 | + ICE_TC_FLWR_FIELD_ENC_DEST_IPV4 | + ICE_TC_FLWR_FIELD_ENC_SRC_IPV6 | + ICE_TC_FLWR_FIELD_ENC_DEST_IPV6)) + lkups_cnt++; + + if (flags & ICE_TC_FLWR_FIELD_ENC_DEST_L4_PORT) + lkups_cnt++; + + /* currently inner etype filter isn't supported */ + if ((flags & ICE_TC_FLWR_FIELD_ETH_TYPE_ID) && + fltr->tunnel_type == TNL_LAST) + lkups_cnt++; + + /* are MAC fields specified? */ + if (flags & (ICE_TC_FLWR_FIELD_DST_MAC | ICE_TC_FLWR_FIELD_SRC_MAC)) + lkups_cnt++; + + /* is VLAN specified? */ + if (flags & ICE_TC_FLWR_FIELD_VLAN) + lkups_cnt++; + + /* are IPv[4|6] fields specified? */ + if (flags & (ICE_TC_FLWR_FIELD_DEST_IPV4 | ICE_TC_FLWR_FIELD_SRC_IPV4 | + ICE_TC_FLWR_FIELD_DEST_IPV6 | ICE_TC_FLWR_FIELD_SRC_IPV6)) + lkups_cnt++; + + /* is L4 (TCP/UDP/any other L4 protocol fields) specified? */ + if (flags & (ICE_TC_FLWR_FIELD_DEST_L4_PORT | + ICE_TC_FLWR_FIELD_SRC_L4_PORT)) + lkups_cnt++; + + return lkups_cnt; +} + +static enum ice_protocol_type ice_proto_type_from_mac(bool inner) +{ + return inner ? ICE_MAC_IL : ICE_MAC_OFOS; +} + +static enum ice_protocol_type ice_proto_type_from_ipv4(bool inner) +{ + return inner ? ICE_IPV4_IL : ICE_IPV4_OFOS; +} + +static enum ice_protocol_type ice_proto_type_from_ipv6(bool inner) +{ + return inner ? ICE_IPV6_IL : ICE_IPV6_OFOS; +} + +static enum ice_protocol_type +ice_proto_type_from_l4_port(bool inner, u16 ip_proto) +{ + if (inner) { + switch (ip_proto) { + case IPPROTO_UDP: + return ICE_UDP_ILOS; + } + } else { + switch (ip_proto) { + case IPPROTO_TCP: + return ICE_TCP_IL; + case IPPROTO_UDP: + return ICE_UDP_OF; + } + } + + return 0; +} + +static enum ice_protocol_type +ice_proto_type_from_tunnel(enum ice_tunnel_type type) +{ + switch (type) { + case TNL_VXLAN: + return ICE_VXLAN; + case TNL_GENEVE: + return ICE_GENEVE; + case TNL_GRETAP: + return ICE_NVGRE; + default: + return 0; + } +} + +static enum ice_sw_tunnel_type +ice_sw_type_from_tunnel(enum ice_tunnel_type type) +{ + switch (type) { + case TNL_VXLAN: + return ICE_SW_TUN_VXLAN; + case TNL_GENEVE: + return ICE_SW_TUN_GENEVE; + case TNL_GRETAP: + return ICE_SW_TUN_NVGRE; + default: + return ICE_NON_TUN; + } +} + +static int +ice_tc_fill_tunnel_outer(u32 flags, struct ice_tc_flower_fltr *fltr, + struct ice_adv_lkup_elem *list) +{ + struct ice_tc_flower_lyr_2_4_hdrs *hdr = &fltr->outer_headers; + int i = 0; + + if (flags & ICE_TC_FLWR_FIELD_TENANT_ID) { + u32 tenant_id; + + list[i].type = ice_proto_type_from_tunnel(fltr->tunnel_type); + switch (fltr->tunnel_type) { + case TNL_VXLAN: + case TNL_GENEVE: + tenant_id = be32_to_cpu(fltr->tenant_id) << 8; + list[i].h_u.tnl_hdr.vni = cpu_to_be32(tenant_id); + memcpy(&list[i].m_u.tnl_hdr.vni, "\xff\xff\xff\x00", 4); + i++; + break; + case TNL_GRETAP: + list[i].h_u.nvgre_hdr.tni_flow = fltr->tenant_id; + memcpy(&list[i].m_u.nvgre_hdr.tni_flow, "\xff\xff\xff\xff", 4); + i++; + break; + default: + break; + } + } + + if (flags & (ICE_TC_FLWR_FIELD_ENC_SRC_IPV4 | + ICE_TC_FLWR_FIELD_ENC_DEST_IPV4)) { + list[i].type = ice_proto_type_from_ipv4(false); + + if (flags & ICE_TC_FLWR_FIELD_ENC_SRC_IPV4) { + list[i].h_u.ipv4_hdr.src_addr = hdr->l3_key.src_ipv4; + list[i].m_u.ipv4_hdr.src_addr = hdr->l3_mask.src_ipv4; + } + if (flags & ICE_TC_FLWR_FIELD_ENC_DEST_IPV4) { + list[i].h_u.ipv4_hdr.dst_addr = hdr->l3_key.dst_ipv4; + list[i].m_u.ipv4_hdr.dst_addr = hdr->l3_mask.dst_ipv4; + } + i++; + } + + if (flags & (ICE_TC_FLWR_FIELD_ENC_SRC_IPV6 | + ICE_TC_FLWR_FIELD_ENC_DEST_IPV6)) { + list[i].type = ice_proto_type_from_ipv6(false); + + if (flags & ICE_TC_FLWR_FIELD_ENC_SRC_IPV6) { + memcpy(&list[i].h_u.ipv6_hdr.src_addr, + &hdr->l3_key.src_ipv6_addr, + sizeof(hdr->l3_key.src_ipv6_addr)); + memcpy(&list[i].m_u.ipv6_hdr.src_addr, + &hdr->l3_mask.src_ipv6_addr, + sizeof(hdr->l3_mask.src_ipv6_addr)); + } + if (flags & ICE_TC_FLWR_FIELD_ENC_DEST_IPV6) { + memcpy(&list[i].h_u.ipv6_hdr.dst_addr, + &hdr->l3_key.dst_ipv6_addr, + sizeof(hdr->l3_key.dst_ipv6_addr)); + memcpy(&list[i].m_u.ipv6_hdr.dst_addr, + &hdr->l3_mask.dst_ipv6_addr, + sizeof(hdr->l3_mask.dst_ipv6_addr)); + } + i++; + } + + if (flags & ICE_TC_FLWR_FIELD_ENC_DEST_L4_PORT) { + list[i].type = ice_proto_type_from_l4_port(false, hdr->l3_key.ip_proto); + list[i].h_u.l4_hdr.dst_port = hdr->l4_key.dst_port; + list[i].m_u.l4_hdr.dst_port = hdr->l4_mask.dst_port; + i++; + } + + return i; +} + +/** + * ice_tc_fill_rules - fill filter rules based on TC fltr + * @hw: pointer to HW structure + * @flags: tc flower field flags + * @tc_fltr: pointer to TC flower filter + * @list: list of advance rule elements + * @rule_info: pointer to information about rule + * @l4_proto: pointer to information such as L4 proto type + * + * Fill ice_adv_lkup_elem list based on TC flower flags and + * TC flower headers. This list should be used to add + * advance filter in hardware. + */ +static int +ice_tc_fill_rules(struct ice_hw *hw, u32 flags, + struct ice_tc_flower_fltr *tc_fltr, + struct ice_adv_lkup_elem *list, + struct ice_adv_rule_info *rule_info, + u16 *l4_proto) +{ + struct ice_tc_flower_lyr_2_4_hdrs *headers = &tc_fltr->outer_headers; + bool inner = false; + int i = 0; + + rule_info->tun_type = ice_sw_type_from_tunnel(tc_fltr->tunnel_type); + if (tc_fltr->tunnel_type != TNL_LAST) { + i = ice_tc_fill_tunnel_outer(flags, tc_fltr, list); + + headers = &tc_fltr->inner_headers; + inner = true; + } else if (flags & ICE_TC_FLWR_FIELD_ETH_TYPE_ID) { + list[i].type = ICE_ETYPE_OL; + list[i].h_u.ethertype.ethtype_id = headers->l2_key.n_proto; + list[i].m_u.ethertype.ethtype_id = headers->l2_mask.n_proto; + i++; + } + + if (flags & (ICE_TC_FLWR_FIELD_DST_MAC | + ICE_TC_FLWR_FIELD_SRC_MAC)) { + struct ice_tc_l2_hdr *l2_key, *l2_mask; + + l2_key = &headers->l2_key; + l2_mask = &headers->l2_mask; + + list[i].type = ice_proto_type_from_mac(inner); + if (flags & ICE_TC_FLWR_FIELD_DST_MAC) { + ether_addr_copy(list[i].h_u.eth_hdr.dst_addr, + l2_key->dst_mac); + ether_addr_copy(list[i].m_u.eth_hdr.dst_addr, + l2_mask->dst_mac); + } + if (flags & ICE_TC_FLWR_FIELD_SRC_MAC) { + ether_addr_copy(list[i].h_u.eth_hdr.src_addr, + l2_key->src_mac); + ether_addr_copy(list[i].m_u.eth_hdr.src_addr, + l2_mask->src_mac); + } + i++; + } + + /* copy VLAN info */ + if (flags & ICE_TC_FLWR_FIELD_VLAN) { + list[i].type = ICE_VLAN_OFOS; + list[i].h_u.vlan_hdr.vlan = headers->vlan_hdr.vlan_id; + list[i].m_u.vlan_hdr.vlan = cpu_to_be16(0xFFFF); + i++; + } + + /* copy L3 (IPv[4|6]: src, dest) address */ + if (flags & (ICE_TC_FLWR_FIELD_DEST_IPV4 | + ICE_TC_FLWR_FIELD_SRC_IPV4)) { + struct ice_tc_l3_hdr *l3_key, *l3_mask; + + list[i].type = ice_proto_type_from_ipv4(inner); + l3_key = &headers->l3_key; + l3_mask = &headers->l3_mask; + if (flags & ICE_TC_FLWR_FIELD_DEST_IPV4) { + list[i].h_u.ipv4_hdr.dst_addr = l3_key->dst_ipv4; + list[i].m_u.ipv4_hdr.dst_addr = l3_mask->dst_ipv4; + } + if (flags & ICE_TC_FLWR_FIELD_SRC_IPV4) { + list[i].h_u.ipv4_hdr.src_addr = l3_key->src_ipv4; + list[i].m_u.ipv4_hdr.src_addr = l3_mask->src_ipv4; + } + i++; + } else if (flags & (ICE_TC_FLWR_FIELD_DEST_IPV6 | + ICE_TC_FLWR_FIELD_SRC_IPV6)) { + struct ice_ipv6_hdr *ipv6_hdr, *ipv6_mask; + struct ice_tc_l3_hdr *l3_key, *l3_mask; + + list[i].type = ice_proto_type_from_ipv6(inner); + ipv6_hdr = &list[i].h_u.ipv6_hdr; + ipv6_mask = &list[i].m_u.ipv6_hdr; + l3_key = &headers->l3_key; + l3_mask = &headers->l3_mask; + + if (flags & ICE_TC_FLWR_FIELD_DEST_IPV6) { + memcpy(&ipv6_hdr->dst_addr, &l3_key->dst_ipv6_addr, + sizeof(l3_key->dst_ipv6_addr)); + memcpy(&ipv6_mask->dst_addr, &l3_mask->dst_ipv6_addr, + sizeof(l3_mask->dst_ipv6_addr)); + } + if (flags & ICE_TC_FLWR_FIELD_SRC_IPV6) { + memcpy(&ipv6_hdr->src_addr, &l3_key->src_ipv6_addr, + sizeof(l3_key->src_ipv6_addr)); + memcpy(&ipv6_mask->src_addr, &l3_mask->src_ipv6_addr, + sizeof(l3_mask->src_ipv6_addr)); + } + i++; + } + + /* copy L4 (src, dest) port */ + if (flags & (ICE_TC_FLWR_FIELD_DEST_L4_PORT | + ICE_TC_FLWR_FIELD_SRC_L4_PORT)) { + struct ice_tc_l4_hdr *l4_key, *l4_mask; + + list[i].type = ice_proto_type_from_l4_port(inner, headers->l3_key.ip_proto); + l4_key = &headers->l4_key; + l4_mask = &headers->l4_mask; + + if (flags & ICE_TC_FLWR_FIELD_DEST_L4_PORT) { + list[i].h_u.l4_hdr.dst_port = l4_key->dst_port; + list[i].m_u.l4_hdr.dst_port = l4_mask->dst_port; + } + if (flags & ICE_TC_FLWR_FIELD_SRC_L4_PORT) { + list[i].h_u.l4_hdr.src_port = l4_key->src_port; + list[i].m_u.l4_hdr.src_port = l4_mask->src_port; + } + i++; + } + + return i; +} + +/** + * ice_tc_tun_get_type - get the tunnel type + * @tunnel_dev: ptr to tunnel device + * + * This function detects appropriate tunnel_type if specified device is + * tunnel device such as VXLAN/Geneve + */ +static int ice_tc_tun_get_type(struct net_device *tunnel_dev) +{ + if (netif_is_vxlan(tunnel_dev)) + return TNL_VXLAN; + if (netif_is_geneve(tunnel_dev)) + return TNL_GENEVE; + if (netif_is_gretap(tunnel_dev) || + netif_is_ip6gretap(tunnel_dev)) + return TNL_GRETAP; + return TNL_LAST; +} + +bool ice_is_tunnel_supported(struct net_device *dev) +{ + return ice_tc_tun_get_type(dev) != TNL_LAST; +} + +static int +ice_eswitch_tc_parse_action(struct ice_tc_flower_fltr *fltr, + struct flow_action_entry *act) +{ + struct ice_repr *repr; + + switch (act->id) { + case FLOW_ACTION_DROP: + fltr->action.fltr_act = ICE_DROP_PACKET; + break; + + case FLOW_ACTION_REDIRECT: + fltr->action.fltr_act = ICE_FWD_TO_VSI; + + if (ice_is_port_repr_netdev(act->dev)) { + repr = ice_netdev_to_repr(act->dev); + + fltr->dest_vsi = repr->src_vsi; + fltr->direction = ICE_ESWITCH_FLTR_INGRESS; + } else if (netif_is_ice(act->dev) || + ice_is_tunnel_supported(act->dev)) { + fltr->direction = ICE_ESWITCH_FLTR_EGRESS; + } else { + NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported netdevice in switchdev mode"); + return -EINVAL; + } + + break; + + default: + NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported action in switchdev mode"); + return -EINVAL; + } + + return 0; +} + +static int +ice_eswitch_add_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) +{ + struct ice_tc_flower_lyr_2_4_hdrs *headers = &fltr->outer_headers; + struct ice_adv_rule_info rule_info = { 0 }; + struct ice_rule_query_data rule_added; + struct ice_hw *hw = &vsi->back->hw; + struct ice_adv_lkup_elem *list; + u32 flags = fltr->flags; + enum ice_status status; + int lkups_cnt; + int ret = 0; + int i; + + if (!flags || (flags & ICE_TC_FLWR_FIELD_ENC_SRC_L4_PORT)) { + NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported encap field(s)"); + return -EOPNOTSUPP; + } + + lkups_cnt = ice_tc_count_lkups(flags, headers, fltr); + list = kcalloc(lkups_cnt, sizeof(*list), GFP_ATOMIC); + if (!list) + return -ENOMEM; + + i = ice_tc_fill_rules(hw, flags, fltr, list, &rule_info, NULL); + if (i != lkups_cnt) { + ret = -EINVAL; + goto exit; + } + + /* egress traffic is always redirect to uplink */ + if (fltr->direction == ICE_ESWITCH_FLTR_EGRESS) + fltr->dest_vsi = vsi->back->switchdev.uplink_vsi; + + rule_info.sw_act.fltr_act = fltr->action.fltr_act; + if (fltr->action.fltr_act != ICE_DROP_PACKET) + rule_info.sw_act.vsi_handle = fltr->dest_vsi->idx; + /* For now, making priority to be highest, and it also becomes + * the priority for recipe which will get created as a result of + * new extraction sequence based on input set. + * Priority '7' is max val for switch recipe, higher the number + * results into order of switch rule evaluation. + */ + rule_info.priority = 7; + + if (fltr->direction == ICE_ESWITCH_FLTR_INGRESS) { + rule_info.sw_act.flag |= ICE_FLTR_RX; + rule_info.sw_act.src = hw->pf_id; + rule_info.rx = true; + } else { + rule_info.sw_act.flag |= ICE_FLTR_TX; + rule_info.sw_act.src = vsi->idx; + rule_info.rx = false; + rule_info.flags_info.act = ICE_SINGLE_ACT_LAN_ENABLE; + rule_info.flags_info.act_valid = true; + } + + /* specify the cookie as filter_rule_id */ + rule_info.fltr_rule_id = fltr->cookie; + + status = ice_add_adv_rule(hw, list, lkups_cnt, &rule_info, &rule_added); + if (status == ICE_ERR_ALREADY_EXISTS) { + NL_SET_ERR_MSG_MOD(fltr->extack, "Unable to add filter because it already exist"); + ret = -EINVAL; + goto exit; + } else if (status) { + NL_SET_ERR_MSG_MOD(fltr->extack, "Unable to add filter due to error"); + ret = -EIO; + goto exit; + } + + /* store the output params, which are needed later for removing + * advanced switch filter + */ + fltr->rid = rule_added.rid; + fltr->rule_id = rule_added.rule_id; + +exit: + kfree(list); + return ret; +} + +/** + * ice_add_tc_flower_adv_fltr - add appropriate filter rules + * @vsi: Pointer to VSI + * @tc_fltr: Pointer to TC flower filter structure + * + * based on filter parameters using Advance recipes supported + * by OS package. + */ +static int +ice_add_tc_flower_adv_fltr(struct ice_vsi *vsi, + struct ice_tc_flower_fltr *tc_fltr) +{ + struct ice_tc_flower_lyr_2_4_hdrs *headers = &tc_fltr->outer_headers; + struct ice_adv_rule_info rule_info = {0}; + struct ice_rule_query_data rule_added; + struct ice_adv_lkup_elem *list; + struct ice_pf *pf = vsi->back; + struct ice_hw *hw = &pf->hw; + u32 flags = tc_fltr->flags; + struct ice_vsi *ch_vsi; + struct device *dev; + u16 lkups_cnt = 0; + u16 l4_proto = 0; + int ret = 0; + u16 i = 0; + + dev = ice_pf_to_dev(pf); + if (ice_is_safe_mode(pf)) { + NL_SET_ERR_MSG_MOD(tc_fltr->extack, "Unable to add filter because driver is in safe mode"); + return -EOPNOTSUPP; + } + + if (!flags || (flags & (ICE_TC_FLWR_FIELD_ENC_DEST_IPV4 | + ICE_TC_FLWR_FIELD_ENC_SRC_IPV4 | + ICE_TC_FLWR_FIELD_ENC_DEST_IPV6 | + ICE_TC_FLWR_FIELD_ENC_SRC_IPV6 | + ICE_TC_FLWR_FIELD_ENC_SRC_L4_PORT))) { + NL_SET_ERR_MSG_MOD(tc_fltr->extack, "Unsupported encap field(s)"); + return -EOPNOTSUPP; + } + + /* get the channel (aka ADQ VSI) */ + if (tc_fltr->dest_vsi) + ch_vsi = tc_fltr->dest_vsi; + else + ch_vsi = vsi->tc_map_vsi[tc_fltr->action.tc_class]; + + lkups_cnt = ice_tc_count_lkups(flags, headers, tc_fltr); + list = kcalloc(lkups_cnt, sizeof(*list), GFP_ATOMIC); + if (!list) + return -ENOMEM; + + i = ice_tc_fill_rules(hw, flags, tc_fltr, list, &rule_info, &l4_proto); + if (i != lkups_cnt) { + ret = -EINVAL; + goto exit; + } + + rule_info.sw_act.fltr_act = tc_fltr->action.fltr_act; + if (tc_fltr->action.tc_class >= ICE_CHNL_START_TC) { + if (!ch_vsi) { + NL_SET_ERR_MSG_MOD(tc_fltr->extack, "Unable to add filter because specified destination doesn't exist"); + ret = -EINVAL; + goto exit; + } + + rule_info.sw_act.fltr_act = ICE_FWD_TO_VSI; + rule_info.sw_act.vsi_handle = ch_vsi->idx; + rule_info.priority = 7; + rule_info.sw_act.src = hw->pf_id; + rule_info.rx = true; + dev_dbg(dev, "add switch rule for TC:%u vsi_idx:%u, lkups_cnt:%u\n", + tc_fltr->action.tc_class, + rule_info.sw_act.vsi_handle, lkups_cnt); + } else { + rule_info.sw_act.flag |= ICE_FLTR_TX; + rule_info.sw_act.src = vsi->idx; + rule_info.rx = false; + } + + /* specify the cookie as filter_rule_id */ + rule_info.fltr_rule_id = tc_fltr->cookie; + + ret = ice_add_adv_rule(hw, list, lkups_cnt, &rule_info, &rule_added); + if (ret == -EEXIST) { + NL_SET_ERR_MSG_MOD(tc_fltr->extack, + "Unable to add filter because it already exist"); + ret = -EINVAL; + goto exit; + } else if (ret) { + NL_SET_ERR_MSG_MOD(tc_fltr->extack, + "Unable to add filter due to error"); + ret = -EIO; + goto exit; + } + + /* store the output params, which are needed later for removing + * advanced switch filter + */ + tc_fltr->rid = rule_added.rid; + tc_fltr->rule_id = rule_added.rule_id; + if (tc_fltr->action.tc_class > 0 && ch_vsi) { + /* For PF ADQ, VSI type is set as ICE_VSI_CHNL, and + * for PF ADQ filter, it is not yet set in tc_fltr, + * hence store the dest_vsi ptr in tc_fltr + */ + if (ch_vsi->type == ICE_VSI_CHNL) + tc_fltr->dest_vsi = ch_vsi; + /* keep track of advanced switch filter for + * destination VSI (channel VSI) + */ + ch_vsi->num_chnl_fltr++; + /* in this case, dest_id is VSI handle (sw handle) */ + tc_fltr->dest_id = rule_added.vsi_handle; + + /* keeps track of channel filters for PF VSI */ + if (vsi->type == ICE_VSI_PF && + (flags & (ICE_TC_FLWR_FIELD_DST_MAC | + ICE_TC_FLWR_FIELD_ENC_DST_MAC))) + pf->num_dmac_chnl_fltrs++; + } + dev_dbg(dev, "added switch rule (lkups_cnt %u, flags 0x%x) for TC %u, rid %u, rule_id %u, vsi_idx %u\n", + lkups_cnt, flags, + tc_fltr->action.tc_class, rule_added.rid, + rule_added.rule_id, rule_added.vsi_handle); +exit: + kfree(list); + return ret; +} + +/** + * ice_tc_set_ipv4 - Parse IPv4 addresses from TC flower filter + * @match: Pointer to flow match structure + * @fltr: Pointer to filter structure + * @headers: inner or outer header fields + * @is_encap: set true for tunnel IPv4 address + */ +static int +ice_tc_set_ipv4(struct flow_match_ipv4_addrs *match, + struct ice_tc_flower_fltr *fltr, + struct ice_tc_flower_lyr_2_4_hdrs *headers, bool is_encap) +{ + if (match->key->dst) { + if (is_encap) + fltr->flags |= ICE_TC_FLWR_FIELD_ENC_DEST_IPV4; + else + fltr->flags |= ICE_TC_FLWR_FIELD_DEST_IPV4; + headers->l3_key.dst_ipv4 = match->key->dst; + headers->l3_mask.dst_ipv4 = match->mask->dst; + } + if (match->key->src) { + if (is_encap) + fltr->flags |= ICE_TC_FLWR_FIELD_ENC_SRC_IPV4; + else + fltr->flags |= ICE_TC_FLWR_FIELD_SRC_IPV4; + headers->l3_key.src_ipv4 = match->key->src; + headers->l3_mask.src_ipv4 = match->mask->src; + } + return 0; +} + +/** + * ice_tc_set_ipv6 - Parse IPv6 addresses from TC flower filter + * @match: Pointer to flow match structure + * @fltr: Pointer to filter structure + * @headers: inner or outer header fields + * @is_encap: set true for tunnel IPv6 address + */ +static int +ice_tc_set_ipv6(struct flow_match_ipv6_addrs *match, + struct ice_tc_flower_fltr *fltr, + struct ice_tc_flower_lyr_2_4_hdrs *headers, bool is_encap) +{ + struct ice_tc_l3_hdr *l3_key, *l3_mask; + + /* src and dest IPV6 address should not be LOOPBACK + * (0:0:0:0:0:0:0:1), which can be represented as ::1 + */ + if (ipv6_addr_loopback(&match->key->dst) || + ipv6_addr_loopback(&match->key->src)) { + NL_SET_ERR_MSG_MOD(fltr->extack, "Bad IPv6, addr is LOOPBACK"); + return -EINVAL; + } + /* if src/dest IPv6 address is *,* error */ + if (ipv6_addr_any(&match->mask->dst) && + ipv6_addr_any(&match->mask->src)) { + NL_SET_ERR_MSG_MOD(fltr->extack, "Bad src/dest IPv6, addr is any"); + return -EINVAL; + } + if (!ipv6_addr_any(&match->mask->dst)) { + if (is_encap) + fltr->flags |= ICE_TC_FLWR_FIELD_ENC_DEST_IPV6; + else + fltr->flags |= ICE_TC_FLWR_FIELD_DEST_IPV6; + } + if (!ipv6_addr_any(&match->mask->src)) { + if (is_encap) + fltr->flags |= ICE_TC_FLWR_FIELD_ENC_SRC_IPV6; + else + fltr->flags |= ICE_TC_FLWR_FIELD_SRC_IPV6; + } + + l3_key = &headers->l3_key; + l3_mask = &headers->l3_mask; + + if (fltr->flags & (ICE_TC_FLWR_FIELD_ENC_SRC_IPV6 | + ICE_TC_FLWR_FIELD_SRC_IPV6)) { + memcpy(&l3_key->src_ipv6_addr, &match->key->src.s6_addr, + sizeof(match->key->src.s6_addr)); + memcpy(&l3_mask->src_ipv6_addr, &match->mask->src.s6_addr, + sizeof(match->mask->src.s6_addr)); + } + if (fltr->flags & (ICE_TC_FLWR_FIELD_ENC_DEST_IPV6 | + ICE_TC_FLWR_FIELD_DEST_IPV6)) { + memcpy(&l3_key->dst_ipv6_addr, &match->key->dst.s6_addr, + sizeof(match->key->dst.s6_addr)); + memcpy(&l3_mask->dst_ipv6_addr, &match->mask->dst.s6_addr, + sizeof(match->mask->dst.s6_addr)); + } + + return 0; +} + +/** + * ice_tc_set_port - Parse ports from TC flower filter + * @match: Flow match structure + * @fltr: Pointer to filter structure + * @headers: inner or outer header fields + * @is_encap: set true for tunnel port + */ +static int +ice_tc_set_port(struct flow_match_ports match, + struct ice_tc_flower_fltr *fltr, + struct ice_tc_flower_lyr_2_4_hdrs *headers, bool is_encap) +{ + if (match.key->dst) { + if (is_encap) + fltr->flags |= ICE_TC_FLWR_FIELD_ENC_DEST_L4_PORT; + else + fltr->flags |= ICE_TC_FLWR_FIELD_DEST_L4_PORT; + fltr->flags |= ICE_TC_FLWR_FIELD_DEST_L4_PORT; + headers->l4_key.dst_port = match.key->dst; + headers->l4_mask.dst_port = match.mask->dst; + } + if (match.key->src) { + if (is_encap) + fltr->flags |= ICE_TC_FLWR_FIELD_ENC_SRC_L4_PORT; + else + fltr->flags |= ICE_TC_FLWR_FIELD_SRC_L4_PORT; + fltr->flags |= ICE_TC_FLWR_FIELD_SRC_L4_PORT; + headers->l4_key.src_port = match.key->src; + headers->l4_mask.src_port = match.mask->src; + } + return 0; +} + +static struct net_device * +ice_get_tunnel_device(struct net_device *dev, struct flow_rule *rule) +{ + struct flow_action_entry *act; + int i; + + if (ice_is_tunnel_supported(dev)) + return dev; + + flow_action_for_each(i, act, &rule->action) { + if (act->id == FLOW_ACTION_REDIRECT && + ice_is_tunnel_supported(act->dev)) + return act->dev; + } + + return NULL; +} + +static int +ice_parse_tunnel_attr(struct net_device *dev, struct flow_rule *rule, + struct ice_tc_flower_fltr *fltr) +{ + struct ice_tc_flower_lyr_2_4_hdrs *headers = &fltr->outer_headers; + struct flow_match_control enc_control; + + fltr->tunnel_type = ice_tc_tun_get_type(dev); + headers->l3_key.ip_proto = IPPROTO_UDP; + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID)) { + struct flow_match_enc_keyid enc_keyid; + + flow_rule_match_enc_keyid(rule, &enc_keyid); + + if (!enc_keyid.mask->keyid || + enc_keyid.mask->keyid != cpu_to_be32(ICE_TC_FLOWER_MASK_32)) + return -EINVAL; + + fltr->flags |= ICE_TC_FLWR_FIELD_TENANT_ID; + fltr->tenant_id = enc_keyid.key->keyid; + } + + flow_rule_match_enc_control(rule, &enc_control); + + if (enc_control.key->addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + struct flow_match_ipv4_addrs match; + + flow_rule_match_enc_ipv4_addrs(rule, &match); + if (ice_tc_set_ipv4(&match, fltr, headers, true)) + return -EINVAL; + } else if (enc_control.key->addr_type == + FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct flow_match_ipv6_addrs match; + + flow_rule_match_enc_ipv6_addrs(rule, &match); + if (ice_tc_set_ipv6(&match, fltr, headers, true)) + return -EINVAL; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_IP)) { + struct flow_match_ip match; + + flow_rule_match_enc_ip(rule, &match); + headers->l3_key.tos = match.key->tos; + headers->l3_key.ttl = match.key->ttl; + headers->l3_mask.tos = match.mask->tos; + headers->l3_mask.ttl = match.mask->ttl; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_PORTS)) { + struct flow_match_ports match; + + flow_rule_match_enc_ports(rule, &match); + if (ice_tc_set_port(match, fltr, headers, true)) + return -EINVAL; + } + + return 0; +} + +/** + * ice_parse_cls_flower - Parse TC flower filters provided by kernel + * @vsi: Pointer to the VSI + * @filter_dev: Pointer to device on which filter is being added + * @f: Pointer to struct flow_cls_offload + * @fltr: Pointer to filter structure + */ +static int +ice_parse_cls_flower(struct net_device *filter_dev, struct ice_vsi *vsi, + struct flow_cls_offload *f, + struct ice_tc_flower_fltr *fltr) +{ + struct ice_tc_flower_lyr_2_4_hdrs *headers = &fltr->outer_headers; + struct flow_rule *rule = flow_cls_offload_flow_rule(f); + u16 n_proto_mask = 0, n_proto_key = 0, addr_type = 0; + struct flow_dissector *dissector; + struct net_device *tunnel_dev; + + dissector = rule->match.dissector; + + if (dissector->used_keys & + ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) | + BIT(FLOW_DISSECTOR_KEY_BASIC) | + BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_VLAN) | + BIT(FLOW_DISSECTOR_KEY_IPV4_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_IPV6_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_ENC_CONTROL) | + BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | + BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_ENC_PORTS) | + BIT(FLOW_DISSECTOR_KEY_ENC_IP) | + BIT(FLOW_DISSECTOR_KEY_PORTS))) { + NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported key used"); + return -EOPNOTSUPP; + } + + tunnel_dev = ice_get_tunnel_device(filter_dev, rule); + if (tunnel_dev) { + int err; + + filter_dev = tunnel_dev; + + err = ice_parse_tunnel_attr(filter_dev, rule, fltr); + if (err) { + NL_SET_ERR_MSG_MOD(fltr->extack, "Failed to parse TC flower tunnel attributes"); + return err; + } + + /* header pointers should point to the inner headers, outer + * header were already set by ice_parse_tunnel_attr + */ + headers = &fltr->inner_headers; + } else if (dissector->used_keys & + (BIT(FLOW_DISSECTOR_KEY_ENC_IPV4_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_ENC_IPV6_ADDRS) | + BIT(FLOW_DISSECTOR_KEY_ENC_KEYID) | + BIT(FLOW_DISSECTOR_KEY_ENC_PORTS))) { + NL_SET_ERR_MSG_MOD(fltr->extack, "Tunnel key used, but device isn't a tunnel"); + return -EOPNOTSUPP; + } else { + fltr->tunnel_type = TNL_LAST; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) { + struct flow_match_basic match; + + flow_rule_match_basic(rule, &match); + + n_proto_key = ntohs(match.key->n_proto); + n_proto_mask = ntohs(match.mask->n_proto); + + if (n_proto_key == ETH_P_ALL || n_proto_key == 0) { + n_proto_key = 0; + n_proto_mask = 0; + } else { + fltr->flags |= ICE_TC_FLWR_FIELD_ETH_TYPE_ID; + } + + headers->l2_key.n_proto = cpu_to_be16(n_proto_key); + headers->l2_mask.n_proto = cpu_to_be16(n_proto_mask); + headers->l3_key.ip_proto = match.key->ip_proto; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) { + struct flow_match_eth_addrs match; + + flow_rule_match_eth_addrs(rule, &match); + + if (!is_zero_ether_addr(match.key->dst)) { + ether_addr_copy(headers->l2_key.dst_mac, + match.key->dst); + ether_addr_copy(headers->l2_mask.dst_mac, + match.mask->dst); + fltr->flags |= ICE_TC_FLWR_FIELD_DST_MAC; + } + + if (!is_zero_ether_addr(match.key->src)) { + ether_addr_copy(headers->l2_key.src_mac, + match.key->src); + ether_addr_copy(headers->l2_mask.src_mac, + match.mask->src); + fltr->flags |= ICE_TC_FLWR_FIELD_SRC_MAC; + } + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN) || + is_vlan_dev(filter_dev)) { + struct flow_dissector_key_vlan mask; + struct flow_dissector_key_vlan key; + struct flow_match_vlan match; + + if (is_vlan_dev(filter_dev)) { + match.key = &key; + match.key->vlan_id = vlan_dev_vlan_id(filter_dev); + match.key->vlan_priority = 0; + match.mask = &mask; + memset(match.mask, 0xff, sizeof(*match.mask)); + match.mask->vlan_priority = 0; + } else { + flow_rule_match_vlan(rule, &match); + } + + if (match.mask->vlan_id) { + if (match.mask->vlan_id == VLAN_VID_MASK) { + fltr->flags |= ICE_TC_FLWR_FIELD_VLAN; + } else { + NL_SET_ERR_MSG_MOD(fltr->extack, "Bad VLAN mask"); + return -EINVAL; + } + } + + headers->vlan_hdr.vlan_id = + cpu_to_be16(match.key->vlan_id & VLAN_VID_MASK); + if (match.mask->vlan_priority) + headers->vlan_hdr.vlan_prio = match.key->vlan_priority; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_CONTROL)) { + struct flow_match_control match; + + flow_rule_match_control(rule, &match); + + addr_type = match.key->addr_type; + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV4_ADDRS) { + struct flow_match_ipv4_addrs match; + + flow_rule_match_ipv4_addrs(rule, &match); + if (ice_tc_set_ipv4(&match, fltr, headers, false)) + return -EINVAL; + } + + if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) { + struct flow_match_ipv6_addrs match; + + flow_rule_match_ipv6_addrs(rule, &match); + if (ice_tc_set_ipv6(&match, fltr, headers, false)) + return -EINVAL; + } + + if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_PORTS)) { + struct flow_match_ports match; + + flow_rule_match_ports(rule, &match); + if (ice_tc_set_port(match, fltr, headers, false)) + return -EINVAL; + switch (headers->l3_key.ip_proto) { + case IPPROTO_TCP: + case IPPROTO_UDP: + break; + default: + NL_SET_ERR_MSG_MOD(fltr->extack, "Only UDP and TCP transport are supported"); + return -EINVAL; + } + } + return 0; +} + +/** + * ice_add_switch_fltr - Add TC flower filters + * @vsi: Pointer to VSI + * @fltr: Pointer to struct ice_tc_flower_fltr + * + * Add filter in HW switch block + */ +static int +ice_add_switch_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) +{ + if (fltr->action.fltr_act == ICE_FWD_TO_QGRP) + return -EOPNOTSUPP; + + if (ice_is_eswitch_mode_switchdev(vsi->back)) + return ice_eswitch_add_tc_fltr(vsi, fltr); + + return ice_add_tc_flower_adv_fltr(vsi, fltr); +} + +/** + * ice_handle_tclass_action - Support directing to a traffic class + * @vsi: Pointer to VSI + * @cls_flower: Pointer to TC flower offload structure + * @fltr: Pointer to TC flower filter structure + * + * Support directing traffic to a traffic class + */ +static int +ice_handle_tclass_action(struct ice_vsi *vsi, + struct flow_cls_offload *cls_flower, + struct ice_tc_flower_fltr *fltr) +{ + int tc = tc_classid_to_hwtc(vsi->netdev, cls_flower->classid); + struct ice_vsi *main_vsi; + + if (tc < 0) { + NL_SET_ERR_MSG_MOD(fltr->extack, "Unable to add filter because specified destination is invalid"); + return -EINVAL; + } + if (!tc) { + NL_SET_ERR_MSG_MOD(fltr->extack, "Unable to add filter because of invalid destination"); + return -EINVAL; + } + + if (!(vsi->all_enatc & BIT(tc))) { + NL_SET_ERR_MSG_MOD(fltr->extack, "Unable to add filter because of non-existence destination"); + return -EINVAL; + } + + /* Redirect to a TC class or Queue Group */ + main_vsi = ice_get_main_vsi(vsi->back); + if (!main_vsi || !main_vsi->netdev) { + NL_SET_ERR_MSG_MOD(fltr->extack, + "Unable to add filter because of invalid netdevice"); + return -EINVAL; + } + + if ((fltr->flags & ICE_TC_FLWR_FIELD_TENANT_ID) && + (fltr->flags & (ICE_TC_FLWR_FIELD_DST_MAC | + ICE_TC_FLWR_FIELD_SRC_MAC))) { + NL_SET_ERR_MSG_MOD(fltr->extack, + "Unable to add filter because filter using tunnel key and inner MAC is unsupported combination"); + return -EOPNOTSUPP; + } + + /* For ADQ, filter must include dest MAC address, otherwise unwanted + * packets with unrelated MAC address get delivered to ADQ VSIs as long + * as remaining filter criteria is satisfied such as dest IP address + * and dest/src L4 port. Following code is trying to handle: + * 1. For non-tunnel, if user specify MAC addresses, use them (means + * this code won't do anything + * 2. For non-tunnel, if user didn't specify MAC address, add implicit + * dest MAC to be lower netdev's active unicast MAC address + */ + if (!(fltr->flags & ICE_TC_FLWR_FIELD_DST_MAC)) { + ether_addr_copy(fltr->outer_headers.l2_key.dst_mac, + main_vsi->netdev->dev_addr); + eth_broadcast_addr(fltr->outer_headers.l2_mask.dst_mac); + fltr->flags |= ICE_TC_FLWR_FIELD_DST_MAC; + } + + /* validate specified dest MAC address, make sure either it belongs to + * lower netdev or any of MACVLAN. MACVLANs MAC address are added as + * unicast MAC filter destined to main VSI. + */ + if (!ice_mac_fltr_exist(&main_vsi->back->hw, + fltr->outer_headers.l2_key.dst_mac, + main_vsi->idx)) { + NL_SET_ERR_MSG_MOD(fltr->extack, + "Unable to add filter because legacy MAC filter for specified destination doesn't exist"); + return -EINVAL; + } + + /* Make sure VLAN is already added to main VSI, before allowing ADQ to + * add a VLAN based filter such as MAC + VLAN + L4 port. + */ + if (fltr->flags & ICE_TC_FLWR_FIELD_VLAN) { + u16 vlan_id = be16_to_cpu(fltr->outer_headers.vlan_hdr.vlan_id); + + if (!ice_vlan_fltr_exist(&main_vsi->back->hw, vlan_id, + main_vsi->idx)) { + NL_SET_ERR_MSG_MOD(fltr->extack, + "Unable to add filter because legacy VLAN filter for specified destination doesn't exist"); + return -EINVAL; + } + } + fltr->action.fltr_act = ICE_FWD_TO_VSI; + fltr->action.tc_class = tc; + + return 0; +} + +/** + * ice_parse_tc_flower_actions - Parse the actions for a TC filter + * @vsi: Pointer to VSI + * @cls_flower: Pointer to TC flower offload structure + * @fltr: Pointer to TC flower filter structure + * + * Parse the actions for a TC filter + */ +static int +ice_parse_tc_flower_actions(struct ice_vsi *vsi, + struct flow_cls_offload *cls_flower, + struct ice_tc_flower_fltr *fltr) +{ + struct flow_rule *rule = flow_cls_offload_flow_rule(cls_flower); + struct flow_action *flow_action = &rule->action; + struct flow_action_entry *act; + int i; + + if (cls_flower->classid) + return ice_handle_tclass_action(vsi, cls_flower, fltr); + + if (!flow_action_has_entries(flow_action)) + return -EINVAL; + + flow_action_for_each(i, act, flow_action) { + if (ice_is_eswitch_mode_switchdev(vsi->back)) { + int err = ice_eswitch_tc_parse_action(fltr, act); + + if (err) + return err; + continue; + } + /* Allow only one rule per filter */ + + /* Drop action */ + if (act->id == FLOW_ACTION_DROP) { + NL_SET_ERR_MSG_MOD(fltr->extack, "Unsupported action DROP"); + return -EINVAL; + } + fltr->action.fltr_act = ICE_FWD_TO_VSI; + } + return 0; +} + +/** + * ice_del_tc_fltr - deletes a filter from HW table + * @vsi: Pointer to VSI + * @fltr: Pointer to struct ice_tc_flower_fltr + * + * This function deletes a filter from HW table and manages book-keeping + */ +static int ice_del_tc_fltr(struct ice_vsi *vsi, struct ice_tc_flower_fltr *fltr) +{ + struct ice_rule_query_data rule_rem; + struct ice_pf *pf = vsi->back; + int err; + + rule_rem.rid = fltr->rid; + rule_rem.rule_id = fltr->rule_id; + rule_rem.vsi_handle = fltr->dest_id; + err = ice_rem_adv_rule_by_id(&pf->hw, &rule_rem); + if (err) { + if (err == ICE_ERR_DOES_NOT_EXIST) { + NL_SET_ERR_MSG_MOD(fltr->extack, "Filter does not exist"); + return -ENOENT; + } + NL_SET_ERR_MSG_MOD(fltr->extack, "Failed to delete TC flower filter"); + return -EIO; + } + + /* update advanced switch filter count for destination + * VSI if filter destination was VSI + */ + if (fltr->dest_vsi) { + if (fltr->dest_vsi->type == ICE_VSI_CHNL) { + fltr->dest_vsi->num_chnl_fltr--; + + /* keeps track of channel filters for PF VSI */ + if (vsi->type == ICE_VSI_PF && + (fltr->flags & (ICE_TC_FLWR_FIELD_DST_MAC | + ICE_TC_FLWR_FIELD_ENC_DST_MAC))) + pf->num_dmac_chnl_fltrs--; + } + } + return 0; +} + +/** + * ice_add_tc_fltr - adds a TC flower filter + * @netdev: Pointer to netdev + * @vsi: Pointer to VSI + * @f: Pointer to flower offload structure + * @__fltr: Pointer to struct ice_tc_flower_fltr + * + * This function parses TC-flower input fields, parses action, + * and adds a filter. + */ +static int +ice_add_tc_fltr(struct net_device *netdev, struct ice_vsi *vsi, + struct flow_cls_offload *f, + struct ice_tc_flower_fltr **__fltr) +{ + struct ice_tc_flower_fltr *fltr; + int err; + + /* by default, set output to be INVALID */ + *__fltr = NULL; + + fltr = kzalloc(sizeof(*fltr), GFP_KERNEL); + if (!fltr) + return -ENOMEM; + + fltr->cookie = f->cookie; + fltr->extack = f->common.extack; + fltr->src_vsi = vsi; + INIT_HLIST_NODE(&fltr->tc_flower_node); + + err = ice_parse_cls_flower(netdev, vsi, f, fltr); + if (err < 0) + goto err; + + err = ice_parse_tc_flower_actions(vsi, f, fltr); + if (err < 0) + goto err; + + err = ice_add_switch_fltr(vsi, fltr); + if (err < 0) + goto err; + + /* return the newly created filter */ + *__fltr = fltr; + + return 0; +err: + kfree(fltr); + return err; +} + +/** + * ice_find_tc_flower_fltr - Find the TC flower filter in the list + * @pf: Pointer to PF + * @cookie: filter specific cookie + */ +static struct ice_tc_flower_fltr * +ice_find_tc_flower_fltr(struct ice_pf *pf, unsigned long cookie) +{ + struct ice_tc_flower_fltr *fltr; + + hlist_for_each_entry(fltr, &pf->tc_flower_fltr_list, tc_flower_node) + if (cookie == fltr->cookie) + return fltr; + + return NULL; +} + +/** + * ice_add_cls_flower - add TC flower filters + * @netdev: Pointer to filter device + * @vsi: Pointer to VSI + * @cls_flower: Pointer to flower offload structure + */ +int +ice_add_cls_flower(struct net_device *netdev, struct ice_vsi *vsi, + struct flow_cls_offload *cls_flower) +{ + struct netlink_ext_ack *extack = cls_flower->common.extack; + struct net_device *vsi_netdev = vsi->netdev; + struct ice_tc_flower_fltr *fltr; + struct ice_pf *pf = vsi->back; + int err; + + if (ice_is_reset_in_progress(pf->state)) + return -EBUSY; + if (test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags)) + return -EINVAL; + + if (ice_is_port_repr_netdev(netdev)) + vsi_netdev = netdev; + + if (!(vsi_netdev->features & NETIF_F_HW_TC) && + !test_bit(ICE_FLAG_CLS_FLOWER, pf->flags)) { + /* Based on TC indirect notifications from kernel, all ice + * devices get an instance of rule from higher level device. + * Avoid triggering explicit error in this case. + */ + if (netdev == vsi_netdev) + NL_SET_ERR_MSG_MOD(extack, "can't apply TC flower filters, turn ON hw-tc-offload and try again"); + return -EINVAL; + } + + /* avoid duplicate entries, if exists - return error */ + fltr = ice_find_tc_flower_fltr(pf, cls_flower->cookie); + if (fltr) { + NL_SET_ERR_MSG_MOD(extack, "filter cookie already exists, ignoring"); + return -EEXIST; + } + + /* prep and add TC-flower filter in HW */ + err = ice_add_tc_fltr(netdev, vsi, cls_flower, &fltr); + if (err) + return err; + + /* add filter into an ordered list */ + hlist_add_head(&fltr->tc_flower_node, &pf->tc_flower_fltr_list); + return 0; +} + +/** + * ice_del_cls_flower - delete TC flower filters + * @vsi: Pointer to VSI + * @cls_flower: Pointer to struct flow_cls_offload + */ +int +ice_del_cls_flower(struct ice_vsi *vsi, struct flow_cls_offload *cls_flower) +{ + struct ice_tc_flower_fltr *fltr; + struct ice_pf *pf = vsi->back; + int err; + + /* find filter */ + fltr = ice_find_tc_flower_fltr(pf, cls_flower->cookie); + if (!fltr) { + if (!test_bit(ICE_FLAG_TC_MQPRIO, pf->flags) && + hlist_empty(&pf->tc_flower_fltr_list)) + return 0; + + NL_SET_ERR_MSG_MOD(cls_flower->common.extack, "failed to delete TC flower filter because unable to find it"); + return -EINVAL; + } + + fltr->extack = cls_flower->common.extack; + /* delete filter from HW */ + err = ice_del_tc_fltr(vsi, fltr); + if (err) + return err; + + /* delete filter from an ordered list */ + hlist_del(&fltr->tc_flower_node); + + /* free the filter node */ + kfree(fltr); + + return 0; +} + +/** + * ice_replay_tc_fltrs - replay TC filters + * @pf: pointer to PF struct + */ +void ice_replay_tc_fltrs(struct ice_pf *pf) +{ + struct ice_tc_flower_fltr *fltr; + struct hlist_node *node; + + hlist_for_each_entry_safe(fltr, node, + &pf->tc_flower_fltr_list, + tc_flower_node) { + fltr->extack = NULL; + ice_add_switch_fltr(fltr->src_vsi, fltr); + } +} diff --git a/drivers/net/ethernet/intel/ice/ice_tc_lib.h b/drivers/net/ethernet/intel/ice/ice_tc_lib.h new file mode 100644 index 000000000000..319049477959 --- /dev/null +++ b/drivers/net/ethernet/intel/ice/ice_tc_lib.h @@ -0,0 +1,162 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* Copyright (C) 2019-2021, Intel Corporation. */ + +#ifndef _ICE_TC_LIB_H_ +#define _ICE_TC_LIB_H_ + +#define ICE_TC_FLWR_FIELD_DST_MAC BIT(0) +#define ICE_TC_FLWR_FIELD_SRC_MAC BIT(1) +#define ICE_TC_FLWR_FIELD_VLAN BIT(2) +#define ICE_TC_FLWR_FIELD_DEST_IPV4 BIT(3) +#define ICE_TC_FLWR_FIELD_SRC_IPV4 BIT(4) +#define ICE_TC_FLWR_FIELD_DEST_IPV6 BIT(5) +#define ICE_TC_FLWR_FIELD_SRC_IPV6 BIT(6) +#define ICE_TC_FLWR_FIELD_DEST_L4_PORT BIT(7) +#define ICE_TC_FLWR_FIELD_SRC_L4_PORT BIT(8) +#define ICE_TC_FLWR_FIELD_TENANT_ID BIT(9) +#define ICE_TC_FLWR_FIELD_ENC_DEST_IPV4 BIT(10) +#define ICE_TC_FLWR_FIELD_ENC_SRC_IPV4 BIT(11) +#define ICE_TC_FLWR_FIELD_ENC_DEST_IPV6 BIT(12) +#define ICE_TC_FLWR_FIELD_ENC_SRC_IPV6 BIT(13) +#define ICE_TC_FLWR_FIELD_ENC_DEST_L4_PORT BIT(14) +#define ICE_TC_FLWR_FIELD_ENC_SRC_L4_PORT BIT(15) +#define ICE_TC_FLWR_FIELD_ENC_DST_MAC BIT(16) +#define ICE_TC_FLWR_FIELD_ETH_TYPE_ID BIT(17) + +#define ICE_TC_FLOWER_MASK_32 0xFFFFFFFF + +struct ice_indr_block_priv { + struct net_device *netdev; + struct ice_netdev_priv *np; + struct list_head list; +}; + +struct ice_tc_flower_action { + u32 tc_class; + enum ice_sw_fwd_act_type fltr_act; +}; + +struct ice_tc_vlan_hdr { + __be16 vlan_id; /* Only last 12 bits valid */ + u16 vlan_prio; /* Only last 3 bits valid (valid values: 0..7) */ +}; + +struct ice_tc_l2_hdr { + u8 dst_mac[ETH_ALEN]; + u8 src_mac[ETH_ALEN]; + __be16 n_proto; /* Ethernet Protocol */ +}; + +struct ice_tc_l3_hdr { + u8 ip_proto; /* IPPROTO value */ + union { + struct { + struct in_addr dst_ip; + struct in_addr src_ip; + } v4; + struct { + struct in6_addr dst_ip6; + struct in6_addr src_ip6; + } v6; + } ip; +#define dst_ipv6 ip.v6.dst_ip6.s6_addr32 +#define dst_ipv6_addr ip.v6.dst_ip6.s6_addr +#define src_ipv6 ip.v6.src_ip6.s6_addr32 +#define src_ipv6_addr ip.v6.src_ip6.s6_addr +#define dst_ipv4 ip.v4.dst_ip.s_addr +#define src_ipv4 ip.v4.src_ip.s_addr + + u8 tos; + u8 ttl; +}; + +struct ice_tc_l4_hdr { + __be16 dst_port; + __be16 src_port; +}; + +struct ice_tc_flower_lyr_2_4_hdrs { + /* L2 layer fields with their mask */ + struct ice_tc_l2_hdr l2_key; + struct ice_tc_l2_hdr l2_mask; + struct ice_tc_vlan_hdr vlan_hdr; + /* L3 (IPv4[6]) layer fields with their mask */ + struct ice_tc_l3_hdr l3_key; + struct ice_tc_l3_hdr l3_mask; + + /* L4 layer fields with their mask */ + struct ice_tc_l4_hdr l4_key; + struct ice_tc_l4_hdr l4_mask; +}; + +enum ice_eswitch_fltr_direction { + ICE_ESWITCH_FLTR_INGRESS, + ICE_ESWITCH_FLTR_EGRESS, +}; + +struct ice_tc_flower_fltr { + struct hlist_node tc_flower_node; + + /* cookie becomes filter_rule_id if rule is added successfully */ + unsigned long cookie; + + /* add_adv_rule returns information like recipe ID, rule_id. Store + * those values since they are needed to remove advanced rule + */ + u16 rid; + u16 rule_id; + /* this could be queue/vsi_idx (sw handle)/queue_group, depending upon + * destination type + */ + u16 dest_id; + /* if dest_id is vsi_idx, then need to store destination VSI ptr */ + struct ice_vsi *dest_vsi; + /* direction of fltr for eswitch use case */ + enum ice_eswitch_fltr_direction direction; + + /* Parsed TC flower configuration params */ + struct ice_tc_flower_lyr_2_4_hdrs outer_headers; + struct ice_tc_flower_lyr_2_4_hdrs inner_headers; + struct ice_vsi *src_vsi; + __be32 tenant_id; + u32 flags; + u8 tunnel_type; + struct ice_tc_flower_action action; + + /* cache ptr which is used wherever needed to communicate netlink + * messages + */ + struct netlink_ext_ack *extack; +}; + +/** + * ice_is_chnl_fltr - is this a valid channel filter + * @f: Pointer to tc-flower filter + * + * Criteria to determine of given filter is valid channel filter + * or not is based on its "destination". If destination is hw_tc (aka tc_class) + * and it is non-zero, then it is valid channel (aka ADQ) filter + */ +static inline bool ice_is_chnl_fltr(struct ice_tc_flower_fltr *f) +{ + return !!f->action.tc_class; +} + +/** + * ice_chnl_dmac_fltr_cnt - DMAC based CHNL filter count + * @pf: Pointer to PF + */ +static inline int ice_chnl_dmac_fltr_cnt(struct ice_pf *pf) +{ + return pf->num_dmac_chnl_fltrs; +} + +int +ice_add_cls_flower(struct net_device *netdev, struct ice_vsi *vsi, + struct flow_cls_offload *cls_flower); +int +ice_del_cls_flower(struct ice_vsi *vsi, struct flow_cls_offload *cls_flower); +void ice_replay_tc_fltrs(struct ice_pf *pf); +bool ice_is_tunnel_supported(struct net_device *dev); + +#endif /* _ICE_TC_LIB_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_trace.h b/drivers/net/ethernet/intel/ice/ice_trace.h index 9bc0b8fdfc77..cf685247c07a 100644 --- a/drivers/net/ethernet/intel/ice/ice_trace.h +++ b/drivers/net/ethernet/intel/ice/ice_trace.h @@ -64,15 +64,15 @@ DECLARE_EVENT_CLASS(ice_rx_dim_template, TP_ARGS(q_vector, dim), TP_STRUCT__entry(__field(struct ice_q_vector *, q_vector) __field(struct dim *, dim) - __string(devname, q_vector->rx.ring->netdev->name)), + __string(devname, q_vector->rx.rx_ring->netdev->name)), TP_fast_assign(__entry->q_vector = q_vector; __entry->dim = dim; - __assign_str(devname, q_vector->rx.ring->netdev->name);), + __assign_str(devname, q_vector->rx.rx_ring->netdev->name);), TP_printk("netdev: %s Rx-Q: %d dim-state: %d dim-profile: %d dim-tune: %d dim-st-right: %d dim-st-left: %d dim-tired: %d", __get_str(devname), - __entry->q_vector->rx.ring->q_index, + __entry->q_vector->rx.rx_ring->q_index, __entry->dim->state, __entry->dim->profile_ix, __entry->dim->tune_state, @@ -91,15 +91,15 @@ DECLARE_EVENT_CLASS(ice_tx_dim_template, TP_ARGS(q_vector, dim), TP_STRUCT__entry(__field(struct ice_q_vector *, q_vector) __field(struct dim *, dim) - __string(devname, q_vector->tx.ring->netdev->name)), + __string(devname, q_vector->tx.tx_ring->netdev->name)), TP_fast_assign(__entry->q_vector = q_vector; __entry->dim = dim; - __assign_str(devname, q_vector->tx.ring->netdev->name);), + __assign_str(devname, q_vector->tx.tx_ring->netdev->name);), TP_printk("netdev: %s Tx-Q: %d dim-state: %d dim-profile: %d dim-tune: %d dim-st-right: %d dim-st-left: %d dim-tired: %d", __get_str(devname), - __entry->q_vector->tx.ring->q_index, + __entry->q_vector->tx.tx_ring->q_index, __entry->dim->state, __entry->dim->profile_ix, __entry->dim->tune_state, @@ -115,7 +115,7 @@ DEFINE_EVENT(ice_tx_dim_template, ice_tx_dim_work, /* Events related to a vsi & ring */ DECLARE_EVENT_CLASS(ice_tx_template, - TP_PROTO(struct ice_ring *ring, struct ice_tx_desc *desc, + TP_PROTO(struct ice_tx_ring *ring, struct ice_tx_desc *desc, struct ice_tx_buf *buf), TP_ARGS(ring, desc, buf), @@ -135,7 +135,7 @@ DECLARE_EVENT_CLASS(ice_tx_template, #define DEFINE_TX_TEMPLATE_OP_EVENT(name) \ DEFINE_EVENT(ice_tx_template, name, \ - TP_PROTO(struct ice_ring *ring, \ + TP_PROTO(struct ice_tx_ring *ring, \ struct ice_tx_desc *desc, \ struct ice_tx_buf *buf), \ TP_ARGS(ring, desc, buf)) @@ -145,7 +145,7 @@ DEFINE_TX_TEMPLATE_OP_EVENT(ice_clean_tx_irq_unmap); DEFINE_TX_TEMPLATE_OP_EVENT(ice_clean_tx_irq_unmap_eop); DECLARE_EVENT_CLASS(ice_rx_template, - TP_PROTO(struct ice_ring *ring, union ice_32b_rx_flex_desc *desc), + TP_PROTO(struct ice_rx_ring *ring, union ice_32b_rx_flex_desc *desc), TP_ARGS(ring, desc), @@ -161,12 +161,12 @@ DECLARE_EVENT_CLASS(ice_rx_template, __entry->ring, __entry->desc) ); DEFINE_EVENT(ice_rx_template, ice_clean_rx_irq, - TP_PROTO(struct ice_ring *ring, union ice_32b_rx_flex_desc *desc), + TP_PROTO(struct ice_rx_ring *ring, union ice_32b_rx_flex_desc *desc), TP_ARGS(ring, desc) ); DECLARE_EVENT_CLASS(ice_rx_indicate_template, - TP_PROTO(struct ice_ring *ring, union ice_32b_rx_flex_desc *desc, + TP_PROTO(struct ice_rx_ring *ring, union ice_32b_rx_flex_desc *desc, struct sk_buff *skb), TP_ARGS(ring, desc, skb), @@ -186,13 +186,13 @@ DECLARE_EVENT_CLASS(ice_rx_indicate_template, ); DEFINE_EVENT(ice_rx_indicate_template, ice_clean_rx_irq_indicate, - TP_PROTO(struct ice_ring *ring, union ice_32b_rx_flex_desc *desc, + TP_PROTO(struct ice_rx_ring *ring, union ice_32b_rx_flex_desc *desc, struct sk_buff *skb), TP_ARGS(ring, desc, skb) ); DECLARE_EVENT_CLASS(ice_xmit_template, - TP_PROTO(struct ice_ring *ring, struct sk_buff *skb), + TP_PROTO(struct ice_tx_ring *ring, struct sk_buff *skb), TP_ARGS(ring, skb), @@ -210,7 +210,7 @@ DECLARE_EVENT_CLASS(ice_xmit_template, #define DEFINE_XMIT_TEMPLATE_OP_EVENT(name) \ DEFINE_EVENT(ice_xmit_template, name, \ - TP_PROTO(struct ice_ring *ring, struct sk_buff *skb), \ + TP_PROTO(struct ice_tx_ring *ring, struct sk_buff *skb), \ TP_ARGS(ring, skb)) DEFINE_XMIT_TEMPLATE_OP_EVENT(ice_xmit_frame_ring); diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index 6ee8e0032d52..bc3ba19dc88f 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -6,6 +6,7 @@ #include <linux/prefetch.h> #include <linux/mm.h> #include <linux/bpf_trace.h> +#include <net/dsfield.h> #include <net/xdp.h> #include "ice_txrx_lib.h" #include "ice_lib.h" @@ -13,6 +14,7 @@ #include "ice_trace.h" #include "ice_dcb_lib.h" #include "ice_xsk.h" +#include "ice_eswitch.h" #define ICE_RX_HDR_SIZE 256 @@ -32,7 +34,7 @@ ice_prgm_fdir_fltr(struct ice_vsi *vsi, struct ice_fltr_desc *fdir_desc, struct ice_tx_buf *tx_buf, *first; struct ice_fltr_desc *f_desc; struct ice_tx_desc *tx_desc; - struct ice_ring *tx_ring; + struct ice_tx_ring *tx_ring; struct device *dev; dma_addr_t dma; u32 td_cmd; @@ -106,7 +108,7 @@ ice_prgm_fdir_fltr(struct ice_vsi *vsi, struct ice_fltr_desc *fdir_desc, * @tx_buf: the buffer to free */ static void -ice_unmap_and_free_tx_buf(struct ice_ring *ring, struct ice_tx_buf *tx_buf) +ice_unmap_and_free_tx_buf(struct ice_tx_ring *ring, struct ice_tx_buf *tx_buf) { if (tx_buf->skb) { if (tx_buf->tx_flags & ICE_TX_FLAGS_DUMMY_PKT) @@ -133,7 +135,7 @@ ice_unmap_and_free_tx_buf(struct ice_ring *ring, struct ice_tx_buf *tx_buf) /* tx_buf must be completely set up in the transmit path */ } -static struct netdev_queue *txring_txq(const struct ice_ring *ring) +static struct netdev_queue *txring_txq(const struct ice_tx_ring *ring) { return netdev_get_tx_queue(ring->netdev, ring->q_index); } @@ -142,8 +144,9 @@ static struct netdev_queue *txring_txq(const struct ice_ring *ring) * ice_clean_tx_ring - Free any empty Tx buffers * @tx_ring: ring to be cleaned */ -void ice_clean_tx_ring(struct ice_ring *tx_ring) +void ice_clean_tx_ring(struct ice_tx_ring *tx_ring) { + u32 size; u16 i; if (ice_ring_is_xdp(tx_ring) && tx_ring->xsk_pool) { @@ -162,8 +165,10 @@ void ice_clean_tx_ring(struct ice_ring *tx_ring) tx_skip_free: memset(tx_ring->tx_buf, 0, sizeof(*tx_ring->tx_buf) * tx_ring->count); + size = ALIGN(tx_ring->count * sizeof(struct ice_tx_desc), + PAGE_SIZE); /* Zero out the descriptor ring */ - memset(tx_ring->desc, 0, tx_ring->size); + memset(tx_ring->desc, 0, size); tx_ring->next_to_use = 0; tx_ring->next_to_clean = 0; @@ -181,14 +186,18 @@ tx_skip_free: * * Free all transmit software resources */ -void ice_free_tx_ring(struct ice_ring *tx_ring) +void ice_free_tx_ring(struct ice_tx_ring *tx_ring) { + u32 size; + ice_clean_tx_ring(tx_ring); devm_kfree(tx_ring->dev, tx_ring->tx_buf); tx_ring->tx_buf = NULL; if (tx_ring->desc) { - dmam_free_coherent(tx_ring->dev, tx_ring->size, + size = ALIGN(tx_ring->count * sizeof(struct ice_tx_desc), + PAGE_SIZE); + dmam_free_coherent(tx_ring->dev, size, tx_ring->desc, tx_ring->dma); tx_ring->desc = NULL; } @@ -201,7 +210,7 @@ void ice_free_tx_ring(struct ice_ring *tx_ring) * * Returns true if there's any budget left (e.g. the clean is finished) */ -static bool ice_clean_tx_irq(struct ice_ring *tx_ring, int napi_budget) +static bool ice_clean_tx_irq(struct ice_tx_ring *tx_ring, int napi_budget) { unsigned int total_bytes = 0, total_pkts = 0; unsigned int budget = ICE_DFLT_IRQ_WORK; @@ -238,11 +247,8 @@ static bool ice_clean_tx_irq(struct ice_ring *tx_ring, int napi_budget) total_bytes += tx_buf->bytecount; total_pkts += tx_buf->gso_segs; - if (ice_ring_is_xdp(tx_ring)) - page_frag_free(tx_buf->raw_buf); - else - /* free the skb */ - napi_consume_skb(tx_buf->skb, napi_budget); + /* free the skb */ + napi_consume_skb(tx_buf->skb, napi_budget); /* unmap skb header data */ dma_unmap_single(tx_ring->dev, @@ -298,9 +304,6 @@ static bool ice_clean_tx_irq(struct ice_ring *tx_ring, int napi_budget) ice_update_tx_ring_stats(tx_ring, total_pkts, total_bytes); - if (ice_ring_is_xdp(tx_ring)) - return !!budget; - netdev_tx_completed_queue(txring_txq(tx_ring), total_pkts, total_bytes); @@ -329,9 +332,10 @@ static bool ice_clean_tx_irq(struct ice_ring *tx_ring, int napi_budget) * * Return 0 on success, negative on error */ -int ice_setup_tx_ring(struct ice_ring *tx_ring) +int ice_setup_tx_ring(struct ice_tx_ring *tx_ring) { struct device *dev = tx_ring->dev; + u32 size; if (!dev) return -ENOMEM; @@ -339,19 +343,19 @@ int ice_setup_tx_ring(struct ice_ring *tx_ring) /* warn if we are about to overwrite the pointer */ WARN_ON(tx_ring->tx_buf); tx_ring->tx_buf = - devm_kzalloc(dev, sizeof(*tx_ring->tx_buf) * tx_ring->count, + devm_kcalloc(dev, sizeof(*tx_ring->tx_buf), tx_ring->count, GFP_KERNEL); if (!tx_ring->tx_buf) return -ENOMEM; /* round up to nearest page */ - tx_ring->size = ALIGN(tx_ring->count * sizeof(struct ice_tx_desc), - PAGE_SIZE); - tx_ring->desc = dmam_alloc_coherent(dev, tx_ring->size, &tx_ring->dma, + size = ALIGN(tx_ring->count * sizeof(struct ice_tx_desc), + PAGE_SIZE); + tx_ring->desc = dmam_alloc_coherent(dev, size, &tx_ring->dma, GFP_KERNEL); if (!tx_ring->desc) { dev_err(dev, "Unable to allocate memory for the Tx descriptor ring, size=%d\n", - tx_ring->size); + size); goto err; } @@ -370,9 +374,10 @@ err: * ice_clean_rx_ring - Free Rx buffers * @rx_ring: ring to be cleaned */ -void ice_clean_rx_ring(struct ice_ring *rx_ring) +void ice_clean_rx_ring(struct ice_rx_ring *rx_ring) { struct device *dev = rx_ring->dev; + u32 size; u16 i; /* ring already cleared, nothing to do */ @@ -417,7 +422,9 @@ rx_skip_free: memset(rx_ring->rx_buf, 0, sizeof(*rx_ring->rx_buf) * rx_ring->count); /* Zero out the descriptor ring */ - memset(rx_ring->desc, 0, rx_ring->size); + size = ALIGN(rx_ring->count * sizeof(union ice_32byte_rx_desc), + PAGE_SIZE); + memset(rx_ring->desc, 0, size); rx_ring->next_to_alloc = 0; rx_ring->next_to_clean = 0; @@ -430,8 +437,10 @@ rx_skip_free: * * Free all receive software resources */ -void ice_free_rx_ring(struct ice_ring *rx_ring) +void ice_free_rx_ring(struct ice_rx_ring *rx_ring) { + u32 size; + ice_clean_rx_ring(rx_ring); if (rx_ring->vsi->type == ICE_VSI_PF) if (xdp_rxq_info_is_reg(&rx_ring->xdp_rxq)) @@ -441,7 +450,9 @@ void ice_free_rx_ring(struct ice_ring *rx_ring) rx_ring->rx_buf = NULL; if (rx_ring->desc) { - dmam_free_coherent(rx_ring->dev, rx_ring->size, + size = ALIGN(rx_ring->count * sizeof(union ice_32byte_rx_desc), + PAGE_SIZE); + dmam_free_coherent(rx_ring->dev, size, rx_ring->desc, rx_ring->dma); rx_ring->desc = NULL; } @@ -453,9 +464,10 @@ void ice_free_rx_ring(struct ice_ring *rx_ring) * * Return 0 on success, negative on error */ -int ice_setup_rx_ring(struct ice_ring *rx_ring) +int ice_setup_rx_ring(struct ice_rx_ring *rx_ring) { struct device *dev = rx_ring->dev; + u32 size; if (!dev) return -ENOMEM; @@ -463,19 +475,19 @@ int ice_setup_rx_ring(struct ice_ring *rx_ring) /* warn if we are about to overwrite the pointer */ WARN_ON(rx_ring->rx_buf); rx_ring->rx_buf = - devm_kzalloc(dev, sizeof(*rx_ring->rx_buf) * rx_ring->count, + devm_kcalloc(dev, sizeof(*rx_ring->rx_buf), rx_ring->count, GFP_KERNEL); if (!rx_ring->rx_buf) return -ENOMEM; /* round up to nearest page */ - rx_ring->size = ALIGN(rx_ring->count * sizeof(union ice_32byte_rx_desc), - PAGE_SIZE); - rx_ring->desc = dmam_alloc_coherent(dev, rx_ring->size, &rx_ring->dma, + size = ALIGN(rx_ring->count * sizeof(union ice_32byte_rx_desc), + PAGE_SIZE); + rx_ring->desc = dmam_alloc_coherent(dev, size, &rx_ring->dma, GFP_KERNEL); if (!rx_ring->desc) { dev_err(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n", - rx_ring->size); + size); goto err; } @@ -499,7 +511,7 @@ err: } static unsigned int -ice_rx_frame_truesize(struct ice_ring *rx_ring, unsigned int __maybe_unused size) +ice_rx_frame_truesize(struct ice_rx_ring *rx_ring, unsigned int __maybe_unused size) { unsigned int truesize; @@ -519,15 +531,15 @@ ice_rx_frame_truesize(struct ice_ring *rx_ring, unsigned int __maybe_unused size * @rx_ring: Rx ring * @xdp: xdp_buff used as input to the XDP program * @xdp_prog: XDP program to run + * @xdp_ring: ring to be used for XDP_TX action * * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR} */ static int -ice_run_xdp(struct ice_ring *rx_ring, struct xdp_buff *xdp, - struct bpf_prog *xdp_prog) +ice_run_xdp(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, + struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring) { - struct ice_ring *xdp_ring; - int err, result; + int err; u32 act; act = bpf_prog_run_xdp(xdp_prog, xdp); @@ -535,11 +547,14 @@ ice_run_xdp(struct ice_ring *rx_ring, struct xdp_buff *xdp, case XDP_PASS: return ICE_XDP_PASS; case XDP_TX: - xdp_ring = rx_ring->vsi->xdp_rings[smp_processor_id()]; - result = ice_xmit_xdp_buff(xdp, xdp_ring); - if (result == ICE_XDP_CONSUMED) + if (static_branch_unlikely(&ice_xdp_locking_key)) + spin_lock(&xdp_ring->tx_lock); + err = ice_xmit_xdp_ring(xdp->data, xdp->data_end - xdp->data, xdp_ring); + if (static_branch_unlikely(&ice_xdp_locking_key)) + spin_unlock(&xdp_ring->tx_lock); + if (err == ICE_XDP_CONSUMED) goto out_failure; - return result; + return err; case XDP_REDIRECT: err = xdp_do_redirect(rx_ring->netdev, xdp, xdp_prog); if (err) @@ -576,7 +591,7 @@ ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, struct ice_netdev_priv *np = netdev_priv(dev); unsigned int queue_index = smp_processor_id(); struct ice_vsi *vsi = np->vsi; - struct ice_ring *xdp_ring; + struct ice_tx_ring *xdp_ring; int nxmit = 0, i; if (test_bit(ICE_VSI_DOWN, vsi->state)) @@ -588,7 +603,14 @@ ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK)) return -EINVAL; - xdp_ring = vsi->xdp_rings[queue_index]; + if (static_branch_unlikely(&ice_xdp_locking_key)) { + queue_index %= vsi->num_xdp_txq; + xdp_ring = vsi->xdp_rings[queue_index]; + spin_lock(&xdp_ring->tx_lock); + } else { + xdp_ring = vsi->xdp_rings[queue_index]; + } + for (i = 0; i < n; i++) { struct xdp_frame *xdpf = frames[i]; int err; @@ -602,6 +624,9 @@ ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, if (unlikely(flags & XDP_XMIT_FLUSH)) ice_xdp_ring_update_tail(xdp_ring); + if (static_branch_unlikely(&ice_xdp_locking_key)) + spin_unlock(&xdp_ring->tx_lock); + return nxmit; } @@ -614,7 +639,7 @@ ice_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, * reused. */ static bool -ice_alloc_mapped_page(struct ice_ring *rx_ring, struct ice_rx_buf *bi) +ice_alloc_mapped_page(struct ice_rx_ring *rx_ring, struct ice_rx_buf *bi) { struct page *page = bi->page; dma_addr_t dma; @@ -665,7 +690,7 @@ ice_alloc_mapped_page(struct ice_ring *rx_ring, struct ice_rx_buf *bi) * buffers. Then bump tail at most one time. Grouping like this lets us avoid * multiple tail writes per call. */ -bool ice_alloc_rx_bufs(struct ice_ring *rx_ring, u16 cleaned_count) +bool ice_alloc_rx_bufs(struct ice_rx_ring *rx_ring, u16 cleaned_count) { union ice_32b_rx_flex_desc *rx_desc; u16 ntu = rx_ring->next_to_use; @@ -794,7 +819,7 @@ ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf, int rx_buf_pgcnt) * The function will then update the page offset. */ static void -ice_add_rx_frag(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, +ice_add_rx_frag(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf, struct sk_buff *skb, unsigned int size) { #if (PAGE_SIZE >= 8192) @@ -820,7 +845,7 @@ ice_add_rx_frag(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, * Synchronizes page for reuse by the adapter */ static void -ice_reuse_rx_page(struct ice_ring *rx_ring, struct ice_rx_buf *old_buf) +ice_reuse_rx_page(struct ice_rx_ring *rx_ring, struct ice_rx_buf *old_buf) { u16 nta = rx_ring->next_to_alloc; struct ice_rx_buf *new_buf; @@ -851,7 +876,7 @@ ice_reuse_rx_page(struct ice_ring *rx_ring, struct ice_rx_buf *old_buf) * for use by the CPU. */ static struct ice_rx_buf * -ice_get_rx_buf(struct ice_ring *rx_ring, const unsigned int size, +ice_get_rx_buf(struct ice_rx_ring *rx_ring, const unsigned int size, int *rx_buf_pgcnt) { struct ice_rx_buf *rx_buf; @@ -888,7 +913,7 @@ ice_get_rx_buf(struct ice_ring *rx_ring, const unsigned int size, * to set up the skb correctly and avoid any memcpy overhead. */ static struct sk_buff * -ice_build_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, +ice_build_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf, struct xdp_buff *xdp) { u8 metasize = xdp->data - xdp->data_meta; @@ -940,7 +965,7 @@ ice_build_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, * skb correctly. */ static struct sk_buff * -ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, +ice_construct_skb(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf, struct xdp_buff *xdp) { unsigned int size = xdp->data_end - xdp->data; @@ -1000,7 +1025,7 @@ ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, * the associated resources. */ static void -ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, +ice_put_rx_buf(struct ice_rx_ring *rx_ring, struct ice_rx_buf *rx_buf, int rx_buf_pgcnt) { u16 ntc = rx_ring->next_to_clean + 1; @@ -1036,7 +1061,7 @@ ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, * otherwise return true indicating that this is in fact a non-EOP buffer. */ static bool -ice_is_non_eop(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc) +ice_is_non_eop(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc) { /* if we are the last buffer then there is nothing else to do */ #define ICE_RXD_EOF BIT(ICE_RX_FLEX_DESC_STATUS0_EOF_S) @@ -1060,11 +1085,12 @@ ice_is_non_eop(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc) * * Returns amount of work completed */ -int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) +int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget) { unsigned int total_rx_bytes = 0, total_rx_pkts = 0, frame_sz = 0; u16 cleaned_count = ICE_DESC_UNUSED(rx_ring); unsigned int offset = rx_ring->rx_offset; + struct ice_tx_ring *xdp_ring = NULL; unsigned int xdp_res, xdp_xmit = 0; struct sk_buff *skb = rx_ring->skb; struct bpf_prog *xdp_prog = NULL; @@ -1077,6 +1103,10 @@ int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) #endif xdp_init_buff(&xdp, frame_sz, &rx_ring->xdp_rxq); + xdp_prog = READ_ONCE(rx_ring->xdp_prog); + if (xdp_prog) + xdp_ring = rx_ring->xdp_ring; + /* start the loop to process Rx packets bounded by 'budget' */ while (likely(total_rx_pkts < (unsigned int)budget)) { union ice_32b_rx_flex_desc *rx_desc; @@ -1140,11 +1170,10 @@ int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) xdp.frame_sz = ice_rx_frame_truesize(rx_ring, size); #endif - xdp_prog = READ_ONCE(rx_ring->xdp_prog); if (!xdp_prog) goto construct_skb; - xdp_res = ice_run_xdp(rx_ring, &xdp, xdp_prog); + xdp_res = ice_run_xdp(rx_ring, &xdp, xdp_prog, xdp_ring); if (!xdp_res) goto construct_skb; if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR)) { @@ -1221,7 +1250,7 @@ construct_skb: failure = ice_alloc_rx_bufs(rx_ring, cleaned_count); if (xdp_prog) - ice_finalize_xdp_rx(rx_ring, xdp_xmit); + ice_finalize_xdp_rx(xdp_ring, xdp_xmit); rx_ring->skb = skb; ice_update_rx_ring_stats(rx_ring, total_rx_pkts, total_rx_bytes); @@ -1230,6 +1259,41 @@ construct_skb: return failure ? budget : (int)total_rx_pkts; } +static void __ice_update_sample(struct ice_q_vector *q_vector, + struct ice_ring_container *rc, + struct dim_sample *sample, + bool is_tx) +{ + u64 packets = 0, bytes = 0; + + if (is_tx) { + struct ice_tx_ring *tx_ring; + + ice_for_each_tx_ring(tx_ring, *rc) { + packets += tx_ring->stats.pkts; + bytes += tx_ring->stats.bytes; + } + } else { + struct ice_rx_ring *rx_ring; + + ice_for_each_rx_ring(rx_ring, *rc) { + packets += rx_ring->stats.pkts; + bytes += rx_ring->stats.bytes; + } + } + + dim_update_sample(q_vector->total_events, packets, bytes, sample); + sample->comp_ctr = 0; + + /* if dim settings get stale, like when not updated for 1 + * second or longer, force it to start again. This addresses the + * frequent case of an idle queue being switched to by the + * scheduler. The 1,000 here means 1,000 milliseconds. + */ + if (ktime_ms_delta(sample->time, rc->dim.start_sample.time) >= 1000) + rc->dim.state = DIM_START_MEASURE; +} + /** * ice_net_dim - Update net DIM algorithm * @q_vector: the vector associated with the interrupt @@ -1245,34 +1309,16 @@ static void ice_net_dim(struct ice_q_vector *q_vector) struct ice_ring_container *rx = &q_vector->rx; if (ITR_IS_DYNAMIC(tx)) { - struct dim_sample dim_sample = {}; - u64 packets = 0, bytes = 0; - struct ice_ring *ring; - - ice_for_each_ring(ring, q_vector->tx) { - packets += ring->stats.pkts; - bytes += ring->stats.bytes; - } - - dim_update_sample(q_vector->total_events, packets, bytes, - &dim_sample); + struct dim_sample dim_sample; + __ice_update_sample(q_vector, tx, &dim_sample, true); net_dim(&tx->dim, dim_sample); } if (ITR_IS_DYNAMIC(rx)) { - struct dim_sample dim_sample = {}; - u64 packets = 0, bytes = 0; - struct ice_ring *ring; - - ice_for_each_ring(ring, q_vector->rx) { - packets += ring->stats.pkts; - bytes += ring->stats.bytes; - } - - dim_update_sample(q_vector->total_events, packets, bytes, - &dim_sample); + struct dim_sample dim_sample; + __ice_update_sample(q_vector, rx, &dim_sample, false); net_dim(&rx->dim, dim_sample); } } @@ -1299,15 +1345,14 @@ static u32 ice_buildreg_itr(u16 itr_idx, u16 itr) } /** - * ice_update_ena_itr - Update ITR moderation and re-enable MSI-X interrupt + * ice_enable_interrupt - re-enable MSI-X interrupt * @q_vector: the vector associated with the interrupt to enable * - * Update the net_dim() algorithm and re-enable the interrupt associated with - * this vector. - * - * If the VSI is down, the interrupt will not be re-enabled. + * If the VSI is down, the interrupt will not be re-enabled. Also, + * when enabling the interrupt always reset the wb_on_itr to false + * and trigger a software interrupt to clean out internal state. */ -static void ice_update_ena_itr(struct ice_q_vector *q_vector) +static void ice_enable_interrupt(struct ice_q_vector *q_vector) { struct ice_vsi *vsi = q_vector->vsi; bool wb_en = q_vector->wb_on_itr; @@ -1316,25 +1361,25 @@ static void ice_update_ena_itr(struct ice_q_vector *q_vector) if (test_bit(ICE_DOWN, vsi->state)) return; - /* When exiting WB_ON_ITR, let ITR resume its normal - * interrupts-enabled path. + /* trigger an ITR delayed software interrupt when exiting busy poll, to + * make sure to catch any pending cleanups that might have been missed + * due to interrupt state transition. If busy poll or poll isn't + * enabled, then don't update ITR, and just enable the interrupt. */ - if (wb_en) + if (!wb_en) { + itr_val = ice_buildreg_itr(ICE_ITR_NONE, 0); + } else { q_vector->wb_on_itr = false; - /* This will do nothing if dynamic updates are not enabled. */ - ice_net_dim(q_vector); - - /* net_dim() updates ITR out-of-band using a work item */ - itr_val = ice_buildreg_itr(ICE_ITR_NONE, 0); - /* trigger an immediate software interrupt when exiting - * busy poll, to make sure to catch any pending cleanups - * that might have been missed due to interrupt state - * transition. - */ - if (wb_en) { + /* do two things here with a single write. Set up the third ITR + * index to be used for software interrupt moderation, and then + * trigger a software interrupt with a rate limit of 20K on + * software interrupts, this will help avoid high interrupt + * loads due to frequently polling and exiting polling. + */ + itr_val = ice_buildreg_itr(ICE_IDX_ITR2, ICE_ITR_20K); itr_val |= GLINT_DYN_CTL_SWINT_TRIG_M | - GLINT_DYN_CTL_SW_ITR_INDX_M | + ICE_IDX_ITR2 << GLINT_DYN_CTL_SW_ITR_INDX_S | GLINT_DYN_CTL_SW_ITR_INDX_ENA_M; } wr32(&vsi->back->hw, GLINT_DYN_CTL(q_vector->reg_idx), itr_val); @@ -1387,18 +1432,24 @@ int ice_napi_poll(struct napi_struct *napi, int budget) { struct ice_q_vector *q_vector = container_of(napi, struct ice_q_vector, napi); + struct ice_tx_ring *tx_ring; + struct ice_rx_ring *rx_ring; bool clean_complete = true; - struct ice_ring *ring; int budget_per_ring; int work_done = 0; /* Since the actual Tx work is minimal, we can give the Tx a larger * budget and be more aggressive about cleaning up the Tx descriptors. */ - ice_for_each_ring(ring, q_vector->tx) { - bool wd = ring->xsk_pool ? - ice_clean_tx_irq_zc(ring, budget) : - ice_clean_tx_irq(ring, budget); + ice_for_each_tx_ring(tx_ring, q_vector->tx) { + bool wd; + + if (tx_ring->xsk_pool) + wd = ice_clean_tx_irq_zc(tx_ring, budget); + else if (ice_ring_is_xdp(tx_ring)) + wd = true; + else + wd = ice_clean_tx_irq(tx_ring, budget); if (!wd) clean_complete = false; @@ -1419,16 +1470,16 @@ int ice_napi_poll(struct napi_struct *napi, int budget) /* Max of 1 Rx ring in this q_vector so give it the budget */ budget_per_ring = budget; - ice_for_each_ring(ring, q_vector->rx) { + ice_for_each_rx_ring(rx_ring, q_vector->rx) { int cleaned; /* A dedicated path for zero-copy allows making a single * comparison in the irq context instead of many inside the * ice_clean_rx_irq function and makes the codebase cleaner. */ - cleaned = ring->xsk_pool ? - ice_clean_rx_irq_zc(ring, budget_per_ring) : - ice_clean_rx_irq(ring, budget_per_ring); + cleaned = rx_ring->xsk_pool ? + ice_clean_rx_irq_zc(rx_ring, budget_per_ring) : + ice_clean_rx_irq(rx_ring, budget_per_ring); work_done += cleaned; /* if we clean as many as budgeted, we must not be done */ if (cleaned >= budget_per_ring) @@ -1447,10 +1498,12 @@ int ice_napi_poll(struct napi_struct *napi, int budget) /* Exit the polling mode, but don't re-enable interrupts if stack might * poll us due to busy-polling */ - if (likely(napi_complete_done(napi, work_done))) - ice_update_ena_itr(q_vector); - else + if (likely(napi_complete_done(napi, work_done))) { + ice_net_dim(q_vector); + ice_enable_interrupt(q_vector); + } else { ice_set_wb_on_itr(q_vector); + } return min_t(int, work_done, budget - 1); } @@ -1462,7 +1515,7 @@ int ice_napi_poll(struct napi_struct *napi, int budget) * * Returns -EBUSY if a stop is needed, else 0 */ -static int __ice_maybe_stop_tx(struct ice_ring *tx_ring, unsigned int size) +static int __ice_maybe_stop_tx(struct ice_tx_ring *tx_ring, unsigned int size) { netif_stop_subqueue(tx_ring->netdev, tx_ring->q_index); /* Memory barrier before checking head and tail */ @@ -1485,7 +1538,7 @@ static int __ice_maybe_stop_tx(struct ice_ring *tx_ring, unsigned int size) * * Returns 0 if stop is not needed */ -static int ice_maybe_stop_tx(struct ice_ring *tx_ring, unsigned int size) +static int ice_maybe_stop_tx(struct ice_tx_ring *tx_ring, unsigned int size) { if (likely(ICE_DESC_UNUSED(tx_ring) >= size)) return 0; @@ -1504,7 +1557,7 @@ static int ice_maybe_stop_tx(struct ice_ring *tx_ring, unsigned int size) * it and the length into the transmit descriptor. */ static void -ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first, +ice_tx_map(struct ice_tx_ring *tx_ring, struct ice_tx_buf *first, struct ice_tx_offload_params *off) { u64 td_offset, td_tag, td_cmd; @@ -1840,7 +1893,7 @@ int ice_tx_csum(struct ice_tx_buf *first, struct ice_tx_offload_params *off) * related to VLAN tagging for the HW, such as VLAN, DCB, etc. */ static void -ice_tx_prepare_vlan_flags(struct ice_ring *tx_ring, struct ice_tx_buf *first) +ice_tx_prepare_vlan_flags(struct ice_tx_ring *tx_ring, struct ice_tx_buf *first) { struct sk_buff *skb = first->skb; @@ -2146,7 +2199,7 @@ static bool ice_chk_linearize(struct sk_buff *skb, unsigned int count) * @off: Tx offload parameters */ static void -ice_tstamp(struct ice_ring *tx_ring, struct sk_buff *skb, +ice_tstamp(struct ice_tx_ring *tx_ring, struct sk_buff *skb, struct ice_tx_buf *first, struct ice_tx_offload_params *off) { s8 idx; @@ -2181,7 +2234,7 @@ ice_tstamp(struct ice_ring *tx_ring, struct sk_buff *skb, * Returns NETDEV_TX_OK if sent, else an error code */ static netdev_tx_t -ice_xmit_frame_ring(struct sk_buff *skb, struct ice_ring *tx_ring) +ice_xmit_frame_ring(struct sk_buff *skb, struct ice_tx_ring *tx_ring) { struct ice_tx_offload_params offload = { 0 }; struct ice_vsi *vsi = tx_ring->vsi; @@ -2245,6 +2298,8 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_ring *tx_ring) ICE_TXD_CTX_QW1_CMD_S); ice_tstamp(tx_ring, skb, first, &offload); + if (ice_is_switchdev_running(vsi->back)) + ice_eswitch_set_target_vsi(skb, &offload); if (offload.cd_qw1 & ICE_TX_DESC_DTYPE_CTX) { struct ice_tx_ctx_desc *cdesc; @@ -2282,7 +2337,7 @@ netdev_tx_t ice_start_xmit(struct sk_buff *skb, struct net_device *netdev) { struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_vsi *vsi = np->vsi; - struct ice_ring *tx_ring; + struct ice_tx_ring *tx_ring; tx_ring = vsi->tx_rings[skb->queue_mapping]; @@ -2296,10 +2351,43 @@ netdev_tx_t ice_start_xmit(struct sk_buff *skb, struct net_device *netdev) } /** + * ice_get_dscp_up - return the UP/TC value for a SKB + * @dcbcfg: DCB config that contains DSCP to UP/TC mapping + * @skb: SKB to query for info to determine UP/TC + * + * This function is to only be called when the PF is in L3 DSCP PFC mode + */ +static u8 ice_get_dscp_up(struct ice_dcbx_cfg *dcbcfg, struct sk_buff *skb) +{ + u8 dscp = 0; + + if (skb->protocol == htons(ETH_P_IP)) + dscp = ipv4_get_dsfield(ip_hdr(skb)) >> 2; + else if (skb->protocol == htons(ETH_P_IPV6)) + dscp = ipv6_get_dsfield(ipv6_hdr(skb)) >> 2; + + return dcbcfg->dscp_map[dscp]; +} + +u16 +ice_select_queue(struct net_device *netdev, struct sk_buff *skb, + struct net_device *sb_dev) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_dcbx_cfg *dcbcfg; + + dcbcfg = &pf->hw.port_info->qos_cfg.local_dcbx_cfg; + if (dcbcfg->pfc_mode == ICE_QOS_MODE_DSCP) + skb->priority = ice_get_dscp_up(dcbcfg, skb); + + return netdev_pick_tx(netdev, skb, sb_dev); +} + +/** * ice_clean_ctrl_tx_irq - interrupt handler for flow director Tx queue * @tx_ring: tx_ring to clean */ -void ice_clean_ctrl_tx_irq(struct ice_ring *tx_ring) +void ice_clean_ctrl_tx_irq(struct ice_tx_ring *tx_ring) { struct ice_vsi *vsi = tx_ring->vsi; s16 i = tx_ring->next_to_clean; diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h index 1e46e80f3d6f..c56dd1749903 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx.h @@ -13,6 +13,7 @@ #define ICE_MAX_CHAINED_RX_BUFS 5 #define ICE_MAX_BUF_TXD 8 #define ICE_MIN_TX_LEN 17 +#define ICE_TX_THRESH 32 /* The size limit for a transmit buffer in a descriptor is (16K - 1). * In order to align with the read requests we will align the value to @@ -154,7 +155,7 @@ struct ice_tx_buf { struct ice_tx_offload_params { u64 cd_qw1; - struct ice_ring *tx_ring; + struct ice_tx_ring *tx_ring; u32 td_cmd; u32 td_offset; u32 td_l2tag1; @@ -164,17 +165,10 @@ struct ice_tx_offload_params { }; struct ice_rx_buf { - union { - struct { - dma_addr_t dma; - struct page *page; - unsigned int page_offset; - u16 pagecnt_bias; - }; - struct { - struct xdp_buff *xdp; - }; - }; + dma_addr_t dma; + struct page *page; + unsigned int page_offset; + u16 pagecnt_bias; }; struct ice_q_stats { @@ -258,9 +252,9 @@ enum ice_dynamic_itr { #define ICE_TX_LEGACY 1 /* descriptor ring, associated with a VSI */ -struct ice_ring { +struct ice_rx_ring { /* CL1 - 1st cacheline starts here */ - struct ice_ring *next; /* pointer to next ring in q_vector */ + struct ice_rx_ring *next; /* pointer to next ring in q_vector */ void *desc; /* Descriptor ring memory */ struct device *dev; /* Used for DMA mapping */ struct net_device *netdev; /* netdev ring maps to */ @@ -268,14 +262,13 @@ struct ice_ring { struct ice_q_vector *q_vector; /* Backreference to associated vector */ u8 __iomem *tail; union { - struct ice_tx_buf *tx_buf; struct ice_rx_buf *rx_buf; + struct xdp_buff **xdp_buf; }; /* CL2 - 2nd cacheline starts here */ + struct xdp_rxq_info xdp_rxq; + /* CL3 - 3rd cacheline starts here */ u16 q_index; /* Queue number of ring */ - u16 q_handle; /* Queue handle per TC */ - - u8 ring_active:1; /* is ring online or not */ u16 count; /* Number of descriptors */ u16 reg_idx; /* HW register index of the ring */ @@ -284,63 +277,104 @@ struct ice_ring { u16 next_to_use; u16 next_to_clean; u16 next_to_alloc; + u16 rx_offset; + u16 rx_buf_len; /* stats structs */ + struct ice_rxq_stats rx_stats; struct ice_q_stats stats; struct u64_stats_sync syncp; - union { - struct ice_txq_stats tx_stats; - struct ice_rxq_stats rx_stats; - }; struct rcu_head rcu; /* to avoid race on free */ - DECLARE_BITMAP(xps_state, ICE_TX_NBITS); /* XPS Config State */ + /* CL4 - 3rd cacheline starts here */ + struct ice_channel *ch; struct bpf_prog *xdp_prog; + struct ice_tx_ring *xdp_ring; struct xsk_buff_pool *xsk_pool; - u16 rx_offset; - /* CL3 - 3rd cacheline starts here */ - struct xdp_rxq_info xdp_rxq; struct sk_buff *skb; - /* CLX - the below items are only accessed infrequently and should be - * in their own cache line if possible - */ -#define ICE_TX_FLAGS_RING_XDP BIT(0) + dma_addr_t dma; /* physical address of ring */ #define ICE_RX_FLAGS_RING_BUILD_SKB BIT(1) + u64 cached_phctime; + u8 dcb_tc; /* Traffic class of ring */ + u8 ptp_rx; u8 flags; +} ____cacheline_internodealigned_in_smp; + +struct ice_tx_ring { + /* CL1 - 1st cacheline starts here */ + struct ice_tx_ring *next; /* pointer to next ring in q_vector */ + void *desc; /* Descriptor ring memory */ + struct device *dev; /* Used for DMA mapping */ + u8 __iomem *tail; + struct ice_tx_buf *tx_buf; + struct ice_q_vector *q_vector; /* Backreference to associated vector */ + struct net_device *netdev; /* netdev ring maps to */ + struct ice_vsi *vsi; /* Backreference to associated VSI */ + /* CL2 - 2nd cacheline starts here */ dma_addr_t dma; /* physical address of ring */ - unsigned int size; /* length of descriptor ring in bytes */ + struct xsk_buff_pool *xsk_pool; + u16 next_to_use; + u16 next_to_clean; + u16 next_rs; + u16 next_dd; + u16 q_handle; /* Queue handle per TC */ + u16 reg_idx; /* HW register index of the ring */ + u16 count; /* Number of descriptors */ + u16 q_index; /* Queue number of ring */ + /* stats structs */ + struct ice_q_stats stats; + struct u64_stats_sync syncp; + struct ice_txq_stats tx_stats; + + /* CL3 - 3rd cacheline starts here */ + struct rcu_head rcu; /* to avoid race on free */ + DECLARE_BITMAP(xps_state, ICE_TX_NBITS); /* XPS Config State */ + struct ice_channel *ch; + struct ice_ptp_tx *tx_tstamps; + spinlock_t tx_lock; u32 txq_teid; /* Added Tx queue TEID */ - u16 rx_buf_len; +#define ICE_TX_FLAGS_RING_XDP BIT(0) + u8 flags; u8 dcb_tc; /* Traffic class of ring */ - struct ice_ptp_tx *tx_tstamps; - u64 cached_phctime; - u8 ptp_rx:1; - u8 ptp_tx:1; + u8 ptp_tx; } ____cacheline_internodealigned_in_smp; -static inline bool ice_ring_uses_build_skb(struct ice_ring *ring) +static inline bool ice_ring_uses_build_skb(struct ice_rx_ring *ring) { return !!(ring->flags & ICE_RX_FLAGS_RING_BUILD_SKB); } -static inline void ice_set_ring_build_skb_ena(struct ice_ring *ring) +static inline void ice_set_ring_build_skb_ena(struct ice_rx_ring *ring) { ring->flags |= ICE_RX_FLAGS_RING_BUILD_SKB; } -static inline void ice_clear_ring_build_skb_ena(struct ice_ring *ring) +static inline void ice_clear_ring_build_skb_ena(struct ice_rx_ring *ring) { ring->flags &= ~ICE_RX_FLAGS_RING_BUILD_SKB; } -static inline bool ice_ring_is_xdp(struct ice_ring *ring) +static inline bool ice_ring_ch_enabled(struct ice_tx_ring *ring) +{ + return !!ring->ch; +} + +static inline bool ice_ring_is_xdp(struct ice_tx_ring *ring) { return !!(ring->flags & ICE_TX_FLAGS_RING_XDP); } +enum ice_container_type { + ICE_RX_CONTAINER, + ICE_TX_CONTAINER, +}; + struct ice_ring_container { /* head of linked-list of rings */ - struct ice_ring *ring; + union { + struct ice_rx_ring *rx_ring; + struct ice_tx_ring *tx_ring; + }; struct dim dim; /* data for net_dim algorithm */ u16 itr_idx; /* index in the interrupt vector */ /* this matches the maximum number of ITR bits, but in usec @@ -349,6 +383,7 @@ struct ice_ring_container { u16 itr_setting:13; u16 itr_reserved:2; u16 itr_mode:1; + enum ice_container_type type; }; struct ice_coalesce_stored { @@ -360,10 +395,13 @@ struct ice_coalesce_stored { }; /* iterator for handling rings in ring container */ -#define ice_for_each_ring(pos, head) \ - for (pos = (head).ring; pos; pos = pos->next) +#define ice_for_each_rx_ring(pos, head) \ + for (pos = (head).rx_ring; pos; pos = pos->next) + +#define ice_for_each_tx_ring(pos, head) \ + for (pos = (head).tx_ring; pos; pos = pos->next) -static inline unsigned int ice_rx_pg_order(struct ice_ring *ring) +static inline unsigned int ice_rx_pg_order(struct ice_rx_ring *ring) { #if (PAGE_SIZE < 8192) if (ring->rx_buf_len > (PAGE_SIZE / 2)) @@ -376,18 +414,21 @@ static inline unsigned int ice_rx_pg_order(struct ice_ring *ring) union ice_32b_rx_flex_desc; -bool ice_alloc_rx_bufs(struct ice_ring *rxr, u16 cleaned_count); +bool ice_alloc_rx_bufs(struct ice_rx_ring *rxr, u16 cleaned_count); netdev_tx_t ice_start_xmit(struct sk_buff *skb, struct net_device *netdev); -void ice_clean_tx_ring(struct ice_ring *tx_ring); -void ice_clean_rx_ring(struct ice_ring *rx_ring); -int ice_setup_tx_ring(struct ice_ring *tx_ring); -int ice_setup_rx_ring(struct ice_ring *rx_ring); -void ice_free_tx_ring(struct ice_ring *tx_ring); -void ice_free_rx_ring(struct ice_ring *rx_ring); +u16 +ice_select_queue(struct net_device *dev, struct sk_buff *skb, + struct net_device *sb_dev); +void ice_clean_tx_ring(struct ice_tx_ring *tx_ring); +void ice_clean_rx_ring(struct ice_rx_ring *rx_ring); +int ice_setup_tx_ring(struct ice_tx_ring *tx_ring); +int ice_setup_rx_ring(struct ice_rx_ring *rx_ring); +void ice_free_tx_ring(struct ice_tx_ring *tx_ring); +void ice_free_rx_ring(struct ice_rx_ring *rx_ring); int ice_napi_poll(struct napi_struct *napi, int budget); int ice_prgm_fdir_fltr(struct ice_vsi *vsi, struct ice_fltr_desc *fdir_desc, u8 *raw_packet); -int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget); -void ice_clean_ctrl_tx_irq(struct ice_ring *tx_ring); +int ice_clean_rx_irq(struct ice_rx_ring *rx_ring, int budget); +void ice_clean_ctrl_tx_irq(struct ice_tx_ring *tx_ring); #endif /* _ICE_TXRX_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c index 171397dcf00a..1dd7e84f41f8 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c @@ -2,13 +2,15 @@ /* Copyright (c) 2019, Intel Corporation. */ #include "ice_txrx_lib.h" +#include "ice_eswitch.h" +#include "ice_lib.h" /** * ice_release_rx_desc - Store the new tail and head values * @rx_ring: ring to bump * @val: new head index */ -void ice_release_rx_desc(struct ice_ring *rx_ring, u16 val) +void ice_release_rx_desc(struct ice_rx_ring *rx_ring, u16 val) { u16 prev_ntu = rx_ring->next_to_use & ~0x7; @@ -66,7 +68,7 @@ static enum pkt_hash_types ice_ptype_to_htype(u16 ptype) * @rx_ptype: the ptype value from the descriptor */ static void -ice_rx_hash(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc, +ice_rx_hash(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb, u16 rx_ptype) { struct ice_32b_rx_flex_desc_nic *nic_mdid; @@ -93,7 +95,7 @@ ice_rx_hash(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc, * skb->protocol must be set before this function is called */ static void -ice_rx_csum(struct ice_ring *ring, struct sk_buff *skb, +ice_rx_csum(struct ice_rx_ring *ring, struct sk_buff *skb, union ice_32b_rx_flex_desc *rx_desc, u16 ptype) { struct ice_rx_ptype_decoded decoded; @@ -178,14 +180,15 @@ checksum_fail: * other fields within the skb. */ void -ice_process_skb_fields(struct ice_ring *rx_ring, +ice_process_skb_fields(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb, u16 ptype) { ice_rx_hash(rx_ring, rx_desc, skb, ptype); /* modifies the skb - consumes the enet header */ - skb->protocol = eth_type_trans(skb, rx_ring->netdev); + skb->protocol = eth_type_trans(skb, ice_eswitch_get_target_netdev + (rx_ring, rx_desc)); ice_rx_csum(rx_ring, skb, rx_desc, ptype); @@ -203,7 +206,7 @@ ice_process_skb_fields(struct ice_ring *rx_ring, * gro receive functions (with/without VLAN tag) */ void -ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag) +ice_receive_skb(struct ice_rx_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag) { if ((rx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_RX) && (vlan_tag & VLAN_VID_MASK)) @@ -212,18 +215,67 @@ ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag) } /** + * ice_clean_xdp_irq - Reclaim resources after transmit completes on XDP ring + * @xdp_ring: XDP ring to clean + */ +static void ice_clean_xdp_irq(struct ice_tx_ring *xdp_ring) +{ + unsigned int total_bytes = 0, total_pkts = 0; + u16 ntc = xdp_ring->next_to_clean; + struct ice_tx_desc *next_dd_desc; + u16 next_dd = xdp_ring->next_dd; + struct ice_tx_buf *tx_buf; + int i; + + next_dd_desc = ICE_TX_DESC(xdp_ring, next_dd); + if (!(next_dd_desc->cmd_type_offset_bsz & + cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE))) + return; + + for (i = 0; i < ICE_TX_THRESH; i++) { + tx_buf = &xdp_ring->tx_buf[ntc]; + + total_bytes += tx_buf->bytecount; + /* normally tx_buf->gso_segs was taken but at this point + * it's always 1 for us + */ + total_pkts++; + + page_frag_free(tx_buf->raw_buf); + dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma), + dma_unmap_len(tx_buf, len), DMA_TO_DEVICE); + dma_unmap_len_set(tx_buf, len, 0); + tx_buf->raw_buf = NULL; + + ntc++; + if (ntc >= xdp_ring->count) + ntc = 0; + } + + next_dd_desc->cmd_type_offset_bsz = 0; + xdp_ring->next_dd = xdp_ring->next_dd + ICE_TX_THRESH; + if (xdp_ring->next_dd > xdp_ring->count) + xdp_ring->next_dd = ICE_TX_THRESH - 1; + xdp_ring->next_to_clean = ntc; + ice_update_tx_ring_stats(xdp_ring, total_pkts, total_bytes); +} + +/** * ice_xmit_xdp_ring - submit single packet to XDP ring for transmission * @data: packet data pointer * @size: packet data size * @xdp_ring: XDP ring for transmission */ -int ice_xmit_xdp_ring(void *data, u16 size, struct ice_ring *xdp_ring) +int ice_xmit_xdp_ring(void *data, u16 size, struct ice_tx_ring *xdp_ring) { u16 i = xdp_ring->next_to_use; struct ice_tx_desc *tx_desc; struct ice_tx_buf *tx_buf; dma_addr_t dma; + if (ICE_DESC_UNUSED(xdp_ring) < ICE_TX_THRESH) + ice_clean_xdp_irq(xdp_ring); + if (!unlikely(ICE_DESC_UNUSED(xdp_ring))) { xdp_ring->tx_stats.tx_busy++; return ICE_XDP_CONSUMED; @@ -244,21 +296,26 @@ int ice_xmit_xdp_ring(void *data, u16 size, struct ice_ring *xdp_ring) tx_desc = ICE_TX_DESC(xdp_ring, i); tx_desc->buf_addr = cpu_to_le64(dma); - tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TXD_LAST_DESC_CMD, 0, + tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TX_DESC_CMD_EOP, 0, size, 0); - /* Make certain all of the status bits have been updated - * before next_to_watch is written. - */ - smp_wmb(); - i++; - if (i == xdp_ring->count) + if (i == xdp_ring->count) { i = 0; - - tx_buf->next_to_watch = tx_desc; + tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs); + tx_desc->cmd_type_offset_bsz |= + cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S); + xdp_ring->next_rs = ICE_TX_THRESH - 1; + } xdp_ring->next_to_use = i; + if (i > xdp_ring->next_rs) { + tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_rs); + tx_desc->cmd_type_offset_bsz |= + cpu_to_le64(ICE_TX_DESC_CMD_RS << ICE_TXD_QW1_CMD_S); + xdp_ring->next_rs += ICE_TX_THRESH; + } + return ICE_XDP_TX; } @@ -269,7 +326,7 @@ int ice_xmit_xdp_ring(void *data, u16 size, struct ice_ring *xdp_ring) * * Returns negative on failure, 0 on success. */ -int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_ring *xdp_ring) +int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring) { struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp); @@ -281,22 +338,23 @@ int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_ring *xdp_ring) /** * ice_finalize_xdp_rx - Bump XDP Tx tail and/or flush redirect map - * @rx_ring: Rx ring + * @xdp_ring: XDP ring * @xdp_res: Result of the receive batch * * This function bumps XDP Tx tail and/or flush redirect map, and * should be called when a batch of packets has been processed in the * napi loop. */ -void ice_finalize_xdp_rx(struct ice_ring *rx_ring, unsigned int xdp_res) +void ice_finalize_xdp_rx(struct ice_tx_ring *xdp_ring, unsigned int xdp_res) { if (xdp_res & ICE_XDP_REDIR) xdp_do_flush_map(); if (xdp_res & ICE_XDP_TX) { - struct ice_ring *xdp_ring = - rx_ring->vsi->xdp_rings[rx_ring->q_index]; - + if (static_branch_unlikely(&ice_xdp_locking_key)) + spin_lock(&xdp_ring->tx_lock); ice_xdp_ring_update_tail(xdp_ring); + if (static_branch_unlikely(&ice_xdp_locking_key)) + spin_unlock(&xdp_ring->tx_lock); } } diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h index 05ac30752902..11b6c1601986 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h +++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h @@ -37,7 +37,7 @@ ice_build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag) * * This function updates the XDP Tx ring tail register. */ -static inline void ice_xdp_ring_update_tail(struct ice_ring *xdp_ring) +static inline void ice_xdp_ring_update_tail(struct ice_tx_ring *xdp_ring) { /* Force memory writes to complete before letting h/w * know there are new descriptors to fetch. @@ -46,14 +46,14 @@ static inline void ice_xdp_ring_update_tail(struct ice_ring *xdp_ring) writel_relaxed(xdp_ring->next_to_use, xdp_ring->tail); } -void ice_finalize_xdp_rx(struct ice_ring *rx_ring, unsigned int xdp_res); -int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_ring *xdp_ring); -int ice_xmit_xdp_ring(void *data, u16 size, struct ice_ring *xdp_ring); -void ice_release_rx_desc(struct ice_ring *rx_ring, u16 val); +void ice_finalize_xdp_rx(struct ice_tx_ring *xdp_ring, unsigned int xdp_res); +int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_tx_ring *xdp_ring); +int ice_xmit_xdp_ring(void *data, u16 size, struct ice_tx_ring *xdp_ring); +void ice_release_rx_desc(struct ice_rx_ring *rx_ring, u16 val); void -ice_process_skb_fields(struct ice_ring *rx_ring, +ice_process_skb_fields(struct ice_rx_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc, struct sk_buff *skb, u16 ptype); void -ice_receive_skb(struct ice_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag); +ice_receive_skb(struct ice_rx_ring *rx_ring, struct sk_buff *skb, u16 vlan_tag); #endif /* !_ICE_TXRX_LIB_H_ */ diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h index d33d1906103c..9e0c2923c62e 100644 --- a/drivers/net/ethernet/intel/ice/ice_type.h +++ b/drivers/net/ethernet/intel/ice/ice_type.h @@ -138,7 +138,9 @@ enum ice_vsi_type { ICE_VSI_PF = 0, ICE_VSI_VF = 1, ICE_VSI_CTRL = 3, /* equates to ICE_VSI_PF with 1 queue pair */ + ICE_VSI_CHNL = 4, ICE_VSI_LB = 6, + ICE_VSI_SWITCHDEV_CTRL = 7, }; struct ice_link_status { @@ -569,6 +571,8 @@ struct ice_sched_vsi_info { struct list_head list_entry; u16 max_lanq[ICE_MAX_TRAFFIC_CLASS]; u16 max_rdmaq[ICE_MAX_TRAFFIC_CLASS]; + /* bw_t_info saves VSI BW information */ + struct ice_bw_type_info bw_t_info[ICE_MAX_TRAFFIC_CLASS]; }; /* driver defines the policy */ @@ -604,7 +608,8 @@ struct ice_dcb_app_priority_table { }; #define ICE_MAX_USER_PRIORITY 8 -#define ICE_DCBX_MAX_APPS 32 +#define ICE_DCBX_MAX_APPS 64 +#define ICE_DSCP_NUM_VAL 64 #define ICE_LLDPDU_SIZE 1500 #define ICE_TLV_STATUS_OPER 0x1 #define ICE_TLV_STATUS_SYNC 0x2 @@ -622,7 +627,14 @@ struct ice_dcbx_cfg { struct ice_dcb_ets_cfg etscfg; struct ice_dcb_ets_cfg etsrec; struct ice_dcb_pfc_cfg pfc; +#define ICE_QOS_MODE_VLAN 0x0 +#define ICE_QOS_MODE_DSCP 0x1 + u8 pfc_mode; struct ice_dcb_app_priority_table app[ICE_DCBX_MAX_APPS]; + /* when DSCP mapping defined by user set its bit to 1 */ + DECLARE_BITMAP(dscp_mapped, ICE_DSCP_NUM_VAL); + /* array holding DSCP -> UP/TC values for DSCP L3 QoS mode */ + u8 dscp_map[ICE_DSCP_NUM_VAL]; u8 dcbx_mode; #define ICE_DCBX_MODE_CEE 0x1 #define ICE_DCBX_MODE_IEEE 0x2 @@ -668,6 +680,10 @@ struct ice_port_info { struct ice_switch_info { struct list_head vsi_list_map_head; struct ice_sw_recipe *recp_list; + u16 prof_res_bm_init; + u16 max_used_prof_index; + + DECLARE_BITMAP(prof_res_bm[ICE_MAX_NUM_PROFILES], ICE_MAX_FV_WORDS); }; /* FW logging configuration */ @@ -903,6 +919,7 @@ struct ice_hw { struct mutex rss_locks; /* protect RSS configuration */ struct list_head rss_list_head; struct ice_mbx_snapshot mbx_snapshot; + u16 io_expander_handle; }; /* Statistics collected by each port, VSI, VEB, and S-channel */ diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c index e93430ab37f1..2ac21484b876 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c @@ -5,7 +5,9 @@ #include "ice_base.h" #include "ice_lib.h" #include "ice_fltr.h" +#include "ice_dcb_lib.h" #include "ice_flow.h" +#include "ice_eswitch.h" #include "ice_virtchnl_allowlist.h" #define FIELD_SELECTOR(proto_hdr_field) \ @@ -251,7 +253,7 @@ ice_vc_hash_field_match_type ice_vc_hash_field_list_comms[] = { * ice_get_vf_vsi - get VF's VSI based on the stored index * @vf: VF used to get VSI */ -static struct ice_vsi *ice_get_vf_vsi(struct ice_vf *vf) +struct ice_vsi *ice_get_vf_vsi(struct ice_vf *vf) { return vf->pf->vsi[vf->lan_vsi_idx]; } @@ -412,7 +414,7 @@ static bool ice_is_vf_link_up(struct ice_vf *vf) * * send a link status message to a single VF */ -static void ice_vc_notify_vf_link_state(struct ice_vf *vf) +void ice_vc_notify_vf_link_state(struct ice_vf *vf) { struct virtchnl_pf_event pfe = { 0 }; struct ice_hw *hw = &vf->pf->hw; @@ -620,6 +622,8 @@ void ice_free_vfs(struct ice_pf *pf) if (!pf->vf) return; + ice_eswitch_release(pf); + while (test_and_set_bit(ICE_VF_DIS, pf->state)) usleep_range(1000, 2000); @@ -828,7 +832,7 @@ static struct ice_vsi *ice_vf_vsi_setup(struct ice_vf *vf) struct ice_pf *pf = vf->pf; struct ice_vsi *vsi; - vsi = ice_vsi_setup(pf, pi, ICE_VSI_VF, vf->vf_id); + vsi = ice_vsi_setup(pf, pi, ICE_VSI_VF, vf->vf_id, NULL); if (!vsi) { dev_err(ice_pf_to_dev(pf), "Failed to create VF VSI\n"); @@ -855,7 +859,7 @@ struct ice_vsi *ice_vf_ctrl_vsi_setup(struct ice_vf *vf) struct ice_pf *pf = vf->pf; struct ice_vsi *vsi; - vsi = ice_vsi_setup(pf, pi, ICE_VSI_CTRL, vf->vf_id); + vsi = ice_vsi_setup(pf, pi, ICE_VSI_CTRL, vf->vf_id, NULL); if (!vsi) { dev_err(ice_pf_to_dev(pf), "Failed to create VF control VSI\n"); ice_vf_ctrl_invalidate_vsi(vf); @@ -882,6 +886,40 @@ static int ice_calc_vf_first_vector_idx(struct ice_pf *pf, struct ice_vf *vf) } /** + * ice_vf_rebuild_host_tx_rate_cfg - re-apply the Tx rate limiting configuration + * @vf: VF to re-apply the configuration for + * + * Called after a VF VSI has been re-added/rebuild during reset. The PF driver + * needs to re-apply the host configured Tx rate limiting configuration. + */ +static int ice_vf_rebuild_host_tx_rate_cfg(struct ice_vf *vf) +{ + struct device *dev = ice_pf_to_dev(vf->pf); + struct ice_vsi *vsi = ice_get_vf_vsi(vf); + int err; + + if (vf->min_tx_rate) { + err = ice_set_min_bw_limit(vsi, (u64)vf->min_tx_rate * 1000); + if (err) { + dev_err(dev, "failed to set min Tx rate to %d Mbps for VF %u, error %d\n", + vf->min_tx_rate, vf->vf_id, err); + return err; + } + } + + if (vf->max_tx_rate) { + err = ice_set_max_bw_limit(vsi, (u64)vf->max_tx_rate * 1000); + if (err) { + dev_err(dev, "failed to set max Tx rate to %d Mbps for VF %u, error %d\n", + vf->max_tx_rate, vf->vf_id, err); + return err; + } + } + + return 0; +} + +/** * ice_vf_rebuild_host_vlan_cfg - add VLAN 0 filter or rebuild the Port VLAN * @vf: VF to add MAC filters for * @@ -932,6 +970,9 @@ static int ice_vf_rebuild_host_mac_cfg(struct ice_vf *vf) enum ice_status status; u8 broadcast[ETH_ALEN]; + if (ice_is_eswitch_mode_switchdev(vf->pf)) + return 0; + eth_broadcast_addr(broadcast); status = ice_fltr_add_mac(vsi, broadcast, ICE_FWD_TO_VSI); if (status) { @@ -1414,6 +1455,11 @@ static void ice_vf_rebuild_host_cfg(struct ice_vf *vf) if (ice_vf_rebuild_host_vlan_cfg(vf)) dev_err(dev, "failed to rebuild VLAN configuration for VF %u\n", vf->vf_id); + + if (ice_vf_rebuild_host_tx_rate_cfg(vf)) + dev_err(dev, "failed to rebuild Tx rate limiting configuration for VF %u\n", + vf->vf_id); + /* rebuild aggregator node config for main VF VSI */ ice_vf_rebuild_aggregator_node_cfg(vsi); } @@ -1581,6 +1627,10 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr) ice_vf_post_vsi_rebuild(vf); } + if (ice_is_eswitch_mode_switchdev(pf)) + if (ice_eswitch_rebuild(pf)) + dev_warn(dev, "eswitch rebuild failed\n"); + ice_flush(hw); clear_bit(ICE_VF_DIS, pf->state); @@ -1593,7 +1643,7 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr) * * Returns true if the PF or VF is disabled, false otherwise. */ -static bool ice_is_vf_disabled(struct ice_vf *vf) +bool ice_is_vf_disabled(struct ice_vf *vf) { struct ice_pf *pf = vf->pf; @@ -1711,6 +1761,8 @@ bool ice_reset_vf(struct ice_vf *vf, bool is_vflr) } ice_vf_post_vsi_rebuild(vf); + vsi = ice_get_vf_vsi(vf); + ice_eswitch_update_repr(vsi); /* if the VF has been reset allow it to come up again */ if (ice_mbx_clear_malvf(&hw->mbx_snapshot, pf->malvfs, ICE_MAX_VF_COUNT, vf->vf_id)) @@ -1894,6 +1946,8 @@ static void ice_set_dflt_settings_vfs(struct ice_pf *pf) */ ice_vf_ctrl_invalidate_vsi(vf); ice_vf_fdir_init(vf); + + ice_vc_set_dflt_vf_ops(&vf->vc_ops); } } @@ -1960,6 +2014,11 @@ static int ice_ena_vfs(struct ice_pf *pf, u16 num_vfs) } clear_bit(ICE_VF_DIS, pf->state); + + ret = ice_eswitch_configure(pf); + if (ret) + goto err_unroll_sriov; + return 0; err_unroll_sriov: @@ -2823,7 +2882,7 @@ static void ice_wait_on_vf_reset(struct ice_vf *vf) * disabled, and initialized so it can be configured and/or queried by a host * administrator. */ -static int ice_check_vf_ready_for_cfg(struct ice_vf *vf) +int ice_check_vf_ready_for_cfg(struct ice_vf *vf) { struct ice_pf *pf; @@ -3013,7 +3072,10 @@ static int ice_vc_cfg_promiscuous_mode_msg(struct ice_vf *vf, u8 *msg) v_ret = VIRTCHNL_STATUS_ERR_PARAM; } - ret = ice_cfg_vlan_pruning(vsi, true, !rm_promisc); + if (rm_promisc) + ret = ice_cfg_vlan_pruning(vsi, true); + else + ret = ice_cfg_vlan_pruning(vsi, false); if (ret) { dev_err(dev, "Failed to configure VLAN pruning in promiscuous mode\n"); v_ret = VIRTCHNL_STATUS_ERR_PARAM; @@ -3329,7 +3391,7 @@ static int ice_vc_dis_qs_msg(struct ice_vf *vf, u8 *msg) q_map = vqs->tx_queues; for_each_set_bit(vf_q_id, &q_map, ICE_MAX_RSS_QS_PER_VF) { - struct ice_ring *ring = vsi->tx_rings[vf_q_id]; + struct ice_tx_ring *ring = vsi->tx_rings[vf_q_id]; struct ice_txq_meta txq_meta = { 0 }; if (!ice_vc_isvalid_q_id(vf, vqs->vsi_id, vf_q_id)) { @@ -3802,6 +3864,26 @@ static bool ice_is_legacy_umac_expired(struct ice_time_mac *last_added_umac) } /** + * ice_update_legacy_cached_mac - update cached hardware MAC for legacy VF + * @vf: VF to update + * @vc_ether_addr: structure from VIRTCHNL with MAC to check + * + * only update cached hardware MAC for legacy VF drivers on delete + * because we cannot guarantee order/type of MAC from the VF driver + */ +static void +ice_update_legacy_cached_mac(struct ice_vf *vf, + struct virtchnl_ether_addr *vc_ether_addr) +{ + if (!ice_is_vc_addr_legacy(vc_ether_addr) || + ice_is_legacy_umac_expired(&vf->legacy_last_added_umac)) + return; + + ether_addr_copy(vf->dev_lan_addr.addr, vf->legacy_last_added_umac.addr); + ether_addr_copy(vf->hw_lan_addr.addr, vf->legacy_last_added_umac.addr); +} + +/** * ice_vfhw_mac_del - update the VF's cached hardware MAC if allowed * @vf: VF to update * @vc_ether_addr: structure from VIRTCHNL with MAC to delete @@ -3822,16 +3904,7 @@ ice_vfhw_mac_del(struct ice_vf *vf, struct virtchnl_ether_addr *vc_ether_addr) */ eth_zero_addr(vf->dev_lan_addr.addr); - /* only update cached hardware MAC for legacy VF drivers on delete - * because we cannot guarantee order/type of MAC from the VF driver - */ - if (ice_is_vc_addr_legacy(vc_ether_addr) && - !ice_is_legacy_umac_expired(&vf->legacy_last_added_umac)) { - ether_addr_copy(vf->dev_lan_addr.addr, - vf->legacy_last_added_umac.addr); - ether_addr_copy(vf->hw_lan_addr.addr, - vf->legacy_last_added_umac.addr); - } + ice_update_legacy_cached_mac(vf, vc_ether_addr); } /** @@ -4207,7 +4280,7 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v) /* Enable VLAN pruning when non-zero VLAN is added */ if (!vlan_promisc && vid && !ice_vsi_is_vlan_pruning_ena(vsi)) { - status = ice_cfg_vlan_pruning(vsi, true, false); + status = ice_cfg_vlan_pruning(vsi, true); if (status) { v_ret = VIRTCHNL_STATUS_ERR_PARAM; dev_err(dev, "Enable VLAN pruning on VLAN ID: %d failed error-%d\n", @@ -4261,7 +4334,7 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v) /* Disable VLAN pruning when only VLAN 0 is left */ if (vsi->num_vlan == 1 && ice_vsi_is_vlan_pruning_ena(vsi)) - ice_cfg_vlan_pruning(vsi, false, false); + ice_cfg_vlan_pruning(vsi, false); /* Disable Unicast/Multicast VLAN promiscuous mode */ if (vlan_promisc) { @@ -4400,6 +4473,168 @@ static int ice_vf_init_vlan_stripping(struct ice_vf *vf) return ice_vsi_manage_vlan_stripping(vsi, false); } +static struct ice_vc_vf_ops ice_vc_vf_dflt_ops = { + .get_ver_msg = ice_vc_get_ver_msg, + .get_vf_res_msg = ice_vc_get_vf_res_msg, + .reset_vf = ice_vc_reset_vf_msg, + .add_mac_addr_msg = ice_vc_add_mac_addr_msg, + .del_mac_addr_msg = ice_vc_del_mac_addr_msg, + .cfg_qs_msg = ice_vc_cfg_qs_msg, + .ena_qs_msg = ice_vc_ena_qs_msg, + .dis_qs_msg = ice_vc_dis_qs_msg, + .request_qs_msg = ice_vc_request_qs_msg, + .cfg_irq_map_msg = ice_vc_cfg_irq_map_msg, + .config_rss_key = ice_vc_config_rss_key, + .config_rss_lut = ice_vc_config_rss_lut, + .get_stats_msg = ice_vc_get_stats_msg, + .cfg_promiscuous_mode_msg = ice_vc_cfg_promiscuous_mode_msg, + .add_vlan_msg = ice_vc_add_vlan_msg, + .remove_vlan_msg = ice_vc_remove_vlan_msg, + .ena_vlan_stripping = ice_vc_ena_vlan_stripping, + .dis_vlan_stripping = ice_vc_dis_vlan_stripping, + .handle_rss_cfg_msg = ice_vc_handle_rss_cfg, + .add_fdir_fltr_msg = ice_vc_add_fdir_fltr, + .del_fdir_fltr_msg = ice_vc_del_fdir_fltr, +}; + +void ice_vc_set_dflt_vf_ops(struct ice_vc_vf_ops *ops) +{ + *ops = ice_vc_vf_dflt_ops; +} + +/** + * ice_vc_repr_add_mac + * @vf: pointer to VF + * @msg: virtchannel message + * + * When port representors are created, we do not add MAC rule + * to firmware, we store it so that PF could report same + * MAC as VF. + */ +static int ice_vc_repr_add_mac(struct ice_vf *vf, u8 *msg) +{ + enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS; + struct virtchnl_ether_addr_list *al = + (struct virtchnl_ether_addr_list *)msg; + struct ice_vsi *vsi; + struct ice_pf *pf; + int i; + + if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states) || + !ice_vc_isvalid_vsi_id(vf, al->vsi_id)) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto handle_mac_exit; + } + + pf = vf->pf; + + vsi = ice_get_vf_vsi(vf); + if (!vsi) { + v_ret = VIRTCHNL_STATUS_ERR_PARAM; + goto handle_mac_exit; + } + + for (i = 0; i < al->num_elements; i++) { + u8 *mac_addr = al->list[i].addr; + + if (!is_unicast_ether_addr(mac_addr) || + ether_addr_equal(mac_addr, vf->hw_lan_addr.addr)) + continue; + + if (vf->pf_set_mac) { + dev_err(ice_pf_to_dev(pf), "VF attempting to override administratively set MAC address\n"); + v_ret = VIRTCHNL_STATUS_ERR_NOT_SUPPORTED; + goto handle_mac_exit; + } + + ice_vfhw_mac_add(vf, &al->list[i]); + vf->num_mac++; + break; + } + +handle_mac_exit: + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ADD_ETH_ADDR, + v_ret, NULL, 0); +} + +/** + * ice_vc_repr_del_mac - response with success for deleting MAC + * @vf: pointer to VF + * @msg: virtchannel message + * + * Respond with success to not break normal VF flow. + * For legacy VF driver try to update cached MAC address. + */ +static int +ice_vc_repr_del_mac(struct ice_vf __always_unused *vf, u8 __always_unused *msg) +{ + struct virtchnl_ether_addr_list *al = + (struct virtchnl_ether_addr_list *)msg; + + ice_update_legacy_cached_mac(vf, &al->list[0]); + + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DEL_ETH_ADDR, + VIRTCHNL_STATUS_SUCCESS, NULL, 0); +} + +static int ice_vc_repr_add_vlan(struct ice_vf *vf, u8 __always_unused *msg) +{ + dev_dbg(ice_pf_to_dev(vf->pf), + "Can't add VLAN in switchdev mode for VF %d\n", vf->vf_id); + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ADD_VLAN, + VIRTCHNL_STATUS_SUCCESS, NULL, 0); +} + +static int ice_vc_repr_del_vlan(struct ice_vf *vf, u8 __always_unused *msg) +{ + dev_dbg(ice_pf_to_dev(vf->pf), + "Can't delete VLAN in switchdev mode for VF %d\n", vf->vf_id); + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DEL_VLAN, + VIRTCHNL_STATUS_SUCCESS, NULL, 0); +} + +static int ice_vc_repr_ena_vlan_stripping(struct ice_vf *vf) +{ + dev_dbg(ice_pf_to_dev(vf->pf), + "Can't enable VLAN stripping in switchdev mode for VF %d\n", + vf->vf_id); + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_ENABLE_VLAN_STRIPPING, + VIRTCHNL_STATUS_ERR_NOT_SUPPORTED, + NULL, 0); +} + +static int ice_vc_repr_dis_vlan_stripping(struct ice_vf *vf) +{ + dev_dbg(ice_pf_to_dev(vf->pf), + "Can't disable VLAN stripping in switchdev mode for VF %d\n", + vf->vf_id); + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_DISABLE_VLAN_STRIPPING, + VIRTCHNL_STATUS_ERR_NOT_SUPPORTED, + NULL, 0); +} + +static int +ice_vc_repr_cfg_promiscuous_mode(struct ice_vf *vf, u8 __always_unused *msg) +{ + dev_dbg(ice_pf_to_dev(vf->pf), + "Can't config promiscuous mode in switchdev mode for VF %d\n", + vf->vf_id); + return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE, + VIRTCHNL_STATUS_ERR_NOT_SUPPORTED, + NULL, 0); +} + +void ice_vc_change_ops_to_repr(struct ice_vc_vf_ops *ops) +{ + ops->add_mac_addr_msg = ice_vc_repr_add_mac; + ops->del_mac_addr_msg = ice_vc_repr_del_mac; + ops->add_vlan_msg = ice_vc_repr_add_vlan; + ops->remove_vlan_msg = ice_vc_repr_del_vlan; + ops->ena_vlan_stripping = ice_vc_repr_ena_vlan_stripping; + ops->dis_vlan_stripping = ice_vc_repr_dis_vlan_stripping; + ops->cfg_promiscuous_mode_msg = ice_vc_repr_cfg_promiscuous_mode; +} + /** * ice_vc_process_vf_msg - Process request from VF * @pf: pointer to the PF structure @@ -4413,6 +4648,7 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event) u32 v_opcode = le32_to_cpu(event->desc.cookie_high); s16 vf_id = le16_to_cpu(event->desc.retval); u16 msglen = event->msg_len; + struct ice_vc_vf_ops *ops; u8 *msg = event->msg_buf; struct ice_vf *vf = NULL; struct device *dev; @@ -4436,6 +4672,8 @@ void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event) goto error_handler; } + ops = &vf->vc_ops; + /* Perform basic checks on the msg */ err = virtchnl_vc_validate_vf_msg(&vf->vf_ver, v_opcode, msg, msglen); if (err) { @@ -4463,75 +4701,75 @@ error_handler: switch (v_opcode) { case VIRTCHNL_OP_VERSION: - err = ice_vc_get_ver_msg(vf, msg); + err = ops->get_ver_msg(vf, msg); break; case VIRTCHNL_OP_GET_VF_RESOURCES: - err = ice_vc_get_vf_res_msg(vf, msg); + err = ops->get_vf_res_msg(vf, msg); if (ice_vf_init_vlan_stripping(vf)) dev_err(dev, "Failed to initialize VLAN stripping for VF %d\n", vf->vf_id); ice_vc_notify_vf_link_state(vf); break; case VIRTCHNL_OP_RESET_VF: - ice_vc_reset_vf_msg(vf); + ops->reset_vf(vf); break; case VIRTCHNL_OP_ADD_ETH_ADDR: - err = ice_vc_add_mac_addr_msg(vf, msg); + err = ops->add_mac_addr_msg(vf, msg); break; case VIRTCHNL_OP_DEL_ETH_ADDR: - err = ice_vc_del_mac_addr_msg(vf, msg); + err = ops->del_mac_addr_msg(vf, msg); break; case VIRTCHNL_OP_CONFIG_VSI_QUEUES: - err = ice_vc_cfg_qs_msg(vf, msg); + err = ops->cfg_qs_msg(vf, msg); break; case VIRTCHNL_OP_ENABLE_QUEUES: - err = ice_vc_ena_qs_msg(vf, msg); + err = ops->ena_qs_msg(vf, msg); ice_vc_notify_vf_link_state(vf); break; case VIRTCHNL_OP_DISABLE_QUEUES: - err = ice_vc_dis_qs_msg(vf, msg); + err = ops->dis_qs_msg(vf, msg); break; case VIRTCHNL_OP_REQUEST_QUEUES: - err = ice_vc_request_qs_msg(vf, msg); + err = ops->request_qs_msg(vf, msg); break; case VIRTCHNL_OP_CONFIG_IRQ_MAP: - err = ice_vc_cfg_irq_map_msg(vf, msg); + err = ops->cfg_irq_map_msg(vf, msg); break; case VIRTCHNL_OP_CONFIG_RSS_KEY: - err = ice_vc_config_rss_key(vf, msg); + err = ops->config_rss_key(vf, msg); break; case VIRTCHNL_OP_CONFIG_RSS_LUT: - err = ice_vc_config_rss_lut(vf, msg); + err = ops->config_rss_lut(vf, msg); break; case VIRTCHNL_OP_GET_STATS: - err = ice_vc_get_stats_msg(vf, msg); + err = ops->get_stats_msg(vf, msg); break; case VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE: - err = ice_vc_cfg_promiscuous_mode_msg(vf, msg); + err = ops->cfg_promiscuous_mode_msg(vf, msg); break; case VIRTCHNL_OP_ADD_VLAN: - err = ice_vc_add_vlan_msg(vf, msg); + err = ops->add_vlan_msg(vf, msg); break; case VIRTCHNL_OP_DEL_VLAN: - err = ice_vc_remove_vlan_msg(vf, msg); + err = ops->remove_vlan_msg(vf, msg); break; case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING: - err = ice_vc_ena_vlan_stripping(vf); + err = ops->ena_vlan_stripping(vf); break; case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING: - err = ice_vc_dis_vlan_stripping(vf); + err = ops->dis_vlan_stripping(vf); break; case VIRTCHNL_OP_ADD_FDIR_FILTER: - err = ice_vc_add_fdir_fltr(vf, msg); + err = ops->add_fdir_fltr_msg(vf, msg); break; case VIRTCHNL_OP_DEL_FDIR_FILTER: - err = ice_vc_del_fdir_fltr(vf, msg); + err = ops->del_fdir_fltr_msg(vf, msg); break; case VIRTCHNL_OP_ADD_RSS_CFG: - err = ice_vc_handle_rss_cfg(vf, msg, true); + err = ops->handle_rss_cfg_msg(vf, msg, true); break; case VIRTCHNL_OP_DEL_RSS_CFG: - err = ice_vc_handle_rss_cfg(vf, msg, false); + err = ops->handle_rss_cfg_msg(vf, msg, false); break; case VIRTCHNL_OP_UNKNOWN: default: @@ -4588,8 +4826,8 @@ ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi) ivi->linkstate = IFLA_VF_LINK_STATE_ENABLE; else ivi->linkstate = IFLA_VF_LINK_STATE_DISABLE; - ivi->max_tx_rate = vf->tx_rate; - ivi->min_tx_rate = 0; + ivi->max_tx_rate = vf->max_tx_rate; + ivi->min_tx_rate = vf->min_tx_rate; return 0; } @@ -4699,6 +4937,11 @@ int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted) struct ice_vf *vf; int ret; + if (ice_is_eswitch_mode_switchdev(pf)) { + dev_info(ice_pf_to_dev(pf), "Trusted VF is forbidden in switchdev mode\n"); + return -EOPNOTSUPP; + } + if (ice_validate_vf_id(pf, vf_id)) return -EINVAL; @@ -4763,6 +5006,122 @@ int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state) } /** + * ice_calc_all_vfs_min_tx_rate - calculate cumulative min Tx rate on all VFs + * @pf: PF associated with VFs + */ +static int ice_calc_all_vfs_min_tx_rate(struct ice_pf *pf) +{ + int rate = 0, i; + + ice_for_each_vf(pf, i) + rate += pf->vf[i].min_tx_rate; + + return rate; +} + +/** + * ice_min_tx_rate_oversubscribed - check if min Tx rate causes oversubscription + * @vf: VF trying to configure min_tx_rate + * @min_tx_rate: min Tx rate in Mbps + * + * Check if the min_tx_rate being passed in will cause oversubscription of total + * min_tx_rate based on the current link speed and all other VFs configured + * min_tx_rate + * + * Return true if the passed min_tx_rate would cause oversubscription, else + * return false + */ +static bool +ice_min_tx_rate_oversubscribed(struct ice_vf *vf, int min_tx_rate) +{ + int link_speed_mbps = ice_get_link_speed_mbps(ice_get_vf_vsi(vf)); + int all_vfs_min_tx_rate = ice_calc_all_vfs_min_tx_rate(vf->pf); + + /* this VF's previous rate is being overwritten */ + all_vfs_min_tx_rate -= vf->min_tx_rate; + + if (all_vfs_min_tx_rate + min_tx_rate > link_speed_mbps) { + dev_err(ice_pf_to_dev(vf->pf), "min_tx_rate of %d Mbps on VF %u would cause oversubscription of %d Mbps based on the current link speed %d Mbps\n", + min_tx_rate, vf->vf_id, + all_vfs_min_tx_rate + min_tx_rate - link_speed_mbps, + link_speed_mbps); + return true; + } + + return false; +} + +/** + * ice_set_vf_bw - set min/max VF bandwidth + * @netdev: network interface device structure + * @vf_id: VF identifier + * @min_tx_rate: Minimum Tx rate in Mbps + * @max_tx_rate: Maximum Tx rate in Mbps + */ +int +ice_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate, + int max_tx_rate) +{ + struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct ice_vsi *vsi; + struct device *dev; + struct ice_vf *vf; + int ret; + + dev = ice_pf_to_dev(pf); + if (ice_validate_vf_id(pf, vf_id)) + return -EINVAL; + + vf = &pf->vf[vf_id]; + ret = ice_check_vf_ready_for_cfg(vf); + if (ret) + return ret; + + vsi = ice_get_vf_vsi(vf); + + /* when max_tx_rate is zero that means no max Tx rate limiting, so only + * check if max_tx_rate is non-zero + */ + if (max_tx_rate && min_tx_rate > max_tx_rate) { + dev_err(dev, "Cannot set min Tx rate %d Mbps greater than max Tx rate %d Mbps\n", + min_tx_rate, max_tx_rate); + return -EINVAL; + } + + if (min_tx_rate && ice_is_dcb_active(pf)) { + dev_err(dev, "DCB on PF is currently enabled. VF min Tx rate limiting not allowed on this PF.\n"); + return -EOPNOTSUPP; + } + + if (ice_min_tx_rate_oversubscribed(vf, min_tx_rate)) + return -EINVAL; + + if (vf->min_tx_rate != (unsigned int)min_tx_rate) { + ret = ice_set_min_bw_limit(vsi, (u64)min_tx_rate * 1000); + if (ret) { + dev_err(dev, "Unable to set min-tx-rate for VF %d\n", + vf->vf_id); + return ret; + } + + vf->min_tx_rate = min_tx_rate; + } + + if (vf->max_tx_rate != (unsigned int)max_tx_rate) { + ret = ice_set_max_bw_limit(vsi, (u64)max_tx_rate * 1000); + if (ret) { + dev_err(dev, "Unable to set max-tx-rate for VF %d\n", + vf->vf_id); + return ret; + } + + vf->max_tx_rate = max_tx_rate; + } + + return 0; +} + +/** * ice_get_vf_stats - populate some stats for the VF * @netdev: the netdev of the PF * @vf_id: the host OS identifier (0-255) diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h index 842cb077df86..5ff93a08f54c 100644 --- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h +++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h @@ -70,6 +70,32 @@ struct ice_mdd_vf_events { u16 last_printed; }; +struct ice_vf; + +struct ice_vc_vf_ops { + int (*get_ver_msg)(struct ice_vf *vf, u8 *msg); + int (*get_vf_res_msg)(struct ice_vf *vf, u8 *msg); + void (*reset_vf)(struct ice_vf *vf); + int (*add_mac_addr_msg)(struct ice_vf *vf, u8 *msg); + int (*del_mac_addr_msg)(struct ice_vf *vf, u8 *msg); + int (*cfg_qs_msg)(struct ice_vf *vf, u8 *msg); + int (*ena_qs_msg)(struct ice_vf *vf, u8 *msg); + int (*dis_qs_msg)(struct ice_vf *vf, u8 *msg); + int (*request_qs_msg)(struct ice_vf *vf, u8 *msg); + int (*cfg_irq_map_msg)(struct ice_vf *vf, u8 *msg); + int (*config_rss_key)(struct ice_vf *vf, u8 *msg); + int (*config_rss_lut)(struct ice_vf *vf, u8 *msg); + int (*get_stats_msg)(struct ice_vf *vf, u8 *msg); + int (*cfg_promiscuous_mode_msg)(struct ice_vf *vf, u8 *msg); + int (*add_vlan_msg)(struct ice_vf *vf, u8 *msg); + int (*remove_vlan_msg)(struct ice_vf *vf, u8 *msg); + int (*ena_vlan_stripping)(struct ice_vf *vf); + int (*dis_vlan_stripping)(struct ice_vf *vf); + int (*handle_rss_cfg_msg)(struct ice_vf *vf, u8 *msg, bool add); + int (*add_fdir_fltr_msg)(struct ice_vf *vf, u8 *msg); + int (*del_fdir_fltr_msg)(struct ice_vf *vf, u8 *msg); +}; + /* VF information structure */ struct ice_vf { struct ice_pf *pf; @@ -99,7 +125,8 @@ struct ice_vf { * the main LAN VSI for the PF. */ u16 lan_vsi_num; /* ID as used by firmware */ - unsigned int tx_rate; /* Tx bandwidth limit in Mbps */ + unsigned int min_tx_rate; /* Minimum Tx bandwidth limit in Mbps */ + unsigned int max_tx_rate; /* Maximum Tx bandwidth limit in Mbps */ DECLARE_BITMAP(vf_states, ICE_VF_STATES_NBITS); /* VF runtime states */ u64 num_inval_msgs; /* number of continuous invalid msgs */ @@ -111,9 +138,17 @@ struct ice_vf { struct ice_mdd_vf_events mdd_rx_events; struct ice_mdd_vf_events mdd_tx_events; DECLARE_BITMAP(opcodes_allowlist, VIRTCHNL_OP_MAX); + + struct ice_repr *repr; + + struct ice_vc_vf_ops vc_ops; + + /* devlink port data */ + struct devlink_port devlink_port; }; #ifdef CONFIG_PCI_IOV +struct ice_vsi *ice_get_vf_vsi(struct ice_vf *vf); void ice_process_vflr_event(struct ice_pf *pf); int ice_sriov_configure(struct pci_dev *pdev, int num_vfs); int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac); @@ -124,6 +159,9 @@ void ice_free_vfs(struct ice_pf *pf); void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event); void ice_vc_notify_link_state(struct ice_pf *pf); void ice_vc_notify_reset(struct ice_pf *pf); +void ice_vc_notify_vf_link_state(struct ice_vf *vf); +void ice_vc_change_ops_to_repr(struct ice_vc_vf_ops *ops); +void ice_vc_set_dflt_vf_ops(struct ice_vc_vf_ops *ops); bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr); bool ice_reset_vf(struct ice_vf *vf, bool is_vflr); void ice_restore_all_vfs_msi_state(struct pci_dev *pdev); @@ -135,10 +173,18 @@ int ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos, __be16 vlan_proto); +int +ice_set_vf_bw(struct net_device *netdev, int vf_id, int min_tx_rate, + int max_tx_rate); + int ice_set_vf_trust(struct net_device *netdev, int vf_id, bool trusted); int ice_set_vf_link_state(struct net_device *netdev, int vf_id, int link_state); +int ice_check_vf_ready_for_cfg(struct ice_vf *vf); + +bool ice_is_vf_disabled(struct ice_vf *vf); + int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena); int ice_calc_vf_reg_idx(struct ice_vf *vf, struct ice_q_vector *q_vector); @@ -164,6 +210,9 @@ static inline void ice_vc_process_vf_msg(struct ice_pf *pf, struct ice_rq_event_info *event) { } static inline void ice_vc_notify_link_state(struct ice_pf *pf) { } static inline void ice_vc_notify_reset(struct ice_pf *pf) { } +static inline void ice_vc_notify_vf_link_state(struct ice_vf *vf) { } +static inline void ice_vc_change_ops_to_repr(struct ice_vc_vf_ops *ops) { } +static inline void ice_vc_set_dflt_vf_ops(struct ice_vc_vf_ops *ops) { } static inline void ice_set_vf_state_qs_dis(struct ice_vf *vf) { } static inline void ice_vf_lan_overflow_event(struct ice_pf *pf, struct ice_rq_event_info *event) { } @@ -171,6 +220,21 @@ static inline void ice_print_vfs_mdd_events(struct ice_pf *pf) { } static inline void ice_print_vf_rx_mdd_event(struct ice_vf *vf) { } static inline void ice_restore_all_vfs_msi_state(struct pci_dev *pdev) { } +static inline int ice_check_vf_ready_for_cfg(struct ice_vf *vf) +{ + return -EOPNOTSUPP; +} + +static inline bool ice_is_vf_disabled(struct ice_vf *vf) +{ + return true; +} + +static inline struct ice_vsi *ice_get_vf_vsi(struct ice_vf *vf) +{ + return NULL; +} + static inline bool ice_is_malicious_vf(struct ice_pf __always_unused *pf, struct ice_rq_event_info __always_unused *event, @@ -245,6 +309,14 @@ ice_set_vf_link_state(struct net_device __always_unused *netdev, } static inline int +ice_set_vf_bw(struct net_device __always_unused *netdev, + int __always_unused vf_id, int __always_unused min_tx_rate, + int __always_unused max_tx_rate) +{ + return -EOPNOTSUPP; +} + +static inline int ice_calc_vf_reg_idx(struct ice_vf __always_unused *vf, struct ice_q_vector __always_unused *q_vector) { diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index 5a9f61deeb38..ff55cb415b11 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -67,7 +67,7 @@ ice_qvec_toggle_napi(struct ice_vsi *vsi, struct ice_q_vector *q_vector, * @q_vector: queue vector */ static void -ice_qvec_dis_irq(struct ice_vsi *vsi, struct ice_ring *rx_ring, +ice_qvec_dis_irq(struct ice_vsi *vsi, struct ice_rx_ring *rx_ring, struct ice_q_vector *q_vector) { struct ice_pf *pf = vsi->back; @@ -104,16 +104,17 @@ ice_qvec_cfg_msix(struct ice_vsi *vsi, struct ice_q_vector *q_vector) u16 reg_idx = q_vector->reg_idx; struct ice_pf *pf = vsi->back; struct ice_hw *hw = &pf->hw; - struct ice_ring *ring; + struct ice_tx_ring *tx_ring; + struct ice_rx_ring *rx_ring; ice_cfg_itr(hw, q_vector); - ice_for_each_ring(ring, q_vector->tx) - ice_cfg_txq_interrupt(vsi, ring->reg_idx, reg_idx, + ice_for_each_tx_ring(tx_ring, q_vector->tx) + ice_cfg_txq_interrupt(vsi, tx_ring->reg_idx, reg_idx, q_vector->tx.itr_idx); - ice_for_each_ring(ring, q_vector->rx) - ice_cfg_rxq_interrupt(vsi, ring->reg_idx, reg_idx, + ice_for_each_rx_ring(rx_ring, q_vector->rx) + ice_cfg_rxq_interrupt(vsi, rx_ring->reg_idx, reg_idx, q_vector->rx.itr_idx); ice_flush(hw); @@ -144,8 +145,9 @@ static void ice_qvec_ena_irq(struct ice_vsi *vsi, struct ice_q_vector *q_vector) static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) { struct ice_txq_meta txq_meta = { }; - struct ice_ring *tx_ring, *rx_ring; struct ice_q_vector *q_vector; + struct ice_tx_ring *tx_ring; + struct ice_rx_ring *rx_ring; int timeout = 50; int err; @@ -171,7 +173,7 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) if (err) return err; if (ice_is_xdp_ena_vsi(vsi)) { - struct ice_ring *xdp_ring = vsi->xdp_rings[q_idx]; + struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_idx]; memset(&txq_meta, 0, sizeof(txq_meta)); ice_fill_txq_meta(vsi, xdp_ring, &txq_meta); @@ -201,8 +203,9 @@ static int ice_qp_dis(struct ice_vsi *vsi, u16 q_idx) static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx) { struct ice_aqc_add_tx_qgrp *qg_buf; - struct ice_ring *tx_ring, *rx_ring; struct ice_q_vector *q_vector; + struct ice_tx_ring *tx_ring; + struct ice_rx_ring *rx_ring; u16 size; int err; @@ -225,7 +228,7 @@ static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx) goto free_buf; if (ice_is_xdp_ena_vsi(vsi)) { - struct ice_ring *xdp_ring = vsi->xdp_rings[q_idx]; + struct ice_tx_ring *xdp_ring = vsi->xdp_rings[q_idx]; memset(qg_buf, 0, size); qg_buf->num_txqs = 1; @@ -233,7 +236,7 @@ static int ice_qp_ena(struct ice_vsi *vsi, u16 q_idx) if (err) goto free_buf; ice_set_ring_xdp(xdp_ring); - xdp_ring->xsk_pool = ice_xsk_pool(xdp_ring); + xdp_ring->xsk_pool = ice_tx_xsk_pool(xdp_ring); } err = ice_vsi_cfg_rxq(rx_ring); @@ -360,56 +363,50 @@ xsk_pool_if_up: * * Returns true if all allocations were successful, false if any fail. */ -bool ice_alloc_rx_bufs_zc(struct ice_ring *rx_ring, u16 count) +bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count) { union ice_32b_rx_flex_desc *rx_desc; u16 ntu = rx_ring->next_to_use; - struct ice_rx_buf *rx_buf; - bool ok = true; + struct xdp_buff **xdp; + u32 nb_buffs, i; dma_addr_t dma; - if (!count) - return true; - rx_desc = ICE_RX_DESC(rx_ring, ntu); - rx_buf = &rx_ring->rx_buf[ntu]; + xdp = &rx_ring->xdp_buf[ntu]; - do { - rx_buf->xdp = xsk_buff_alloc(rx_ring->xsk_pool); - if (!rx_buf->xdp) { - ok = false; - break; - } + nb_buffs = min_t(u16, count, rx_ring->count - ntu); + nb_buffs = xsk_buff_alloc_batch(rx_ring->xsk_pool, xdp, nb_buffs); + if (!nb_buffs) + return false; - dma = xsk_buff_xdp_get_dma(rx_buf->xdp); + i = nb_buffs; + while (i--) { + dma = xsk_buff_xdp_get_dma(*xdp); rx_desc->read.pkt_addr = cpu_to_le64(dma); - rx_desc->wb.status_error0 = 0; rx_desc++; - rx_buf++; - ntu++; - - if (unlikely(ntu == rx_ring->count)) { - rx_desc = ICE_RX_DESC(rx_ring, 0); - rx_buf = rx_ring->rx_buf; - ntu = 0; - } - } while (--count); + xdp++; + } - if (rx_ring->next_to_use != ntu) { - /* clear the status bits for the next_to_use descriptor */ - rx_desc->wb.status_error0 = 0; - ice_release_rx_desc(rx_ring, ntu); + ntu += nb_buffs; + if (ntu == rx_ring->count) { + rx_desc = ICE_RX_DESC(rx_ring, 0); + xdp = rx_ring->xdp_buf; + ntu = 0; } - return ok; + /* clear the status bits for the next_to_use descriptor */ + rx_desc->wb.status_error0 = 0; + ice_release_rx_desc(rx_ring, ntu); + + return count == nb_buffs; } /** * ice_bump_ntc - Bump the next_to_clean counter of an Rx ring * @rx_ring: Rx ring */ -static void ice_bump_ntc(struct ice_ring *rx_ring) +static void ice_bump_ntc(struct ice_rx_ring *rx_ring) { int ntc = rx_ring->next_to_clean + 1; @@ -421,19 +418,19 @@ static void ice_bump_ntc(struct ice_ring *rx_ring) /** * ice_construct_skb_zc - Create an sk_buff from zero-copy buffer * @rx_ring: Rx ring - * @rx_buf: zero-copy Rx buffer + * @xdp_arr: Pointer to the SW ring of xdp_buff pointers * * This function allocates a new skb from a zero-copy Rx buffer. * * Returns the skb on success, NULL on failure. */ static struct sk_buff * -ice_construct_skb_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf) +ice_construct_skb_zc(struct ice_rx_ring *rx_ring, struct xdp_buff **xdp_arr) { - unsigned int metasize = rx_buf->xdp->data - rx_buf->xdp->data_meta; - unsigned int datasize = rx_buf->xdp->data_end - rx_buf->xdp->data; - unsigned int datasize_hard = rx_buf->xdp->data_end - - rx_buf->xdp->data_hard_start; + struct xdp_buff *xdp = *xdp_arr; + unsigned int metasize = xdp->data - xdp->data_meta; + unsigned int datasize = xdp->data_end - xdp->data; + unsigned int datasize_hard = xdp->data_end - xdp->data_hard_start; struct sk_buff *skb; skb = __napi_alloc_skb(&rx_ring->q_vector->napi, datasize_hard, @@ -441,13 +438,13 @@ ice_construct_skb_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf) if (unlikely(!skb)) return NULL; - skb_reserve(skb, rx_buf->xdp->data - rx_buf->xdp->data_hard_start); - memcpy(__skb_put(skb, datasize), rx_buf->xdp->data, datasize); + skb_reserve(skb, xdp->data - xdp->data_hard_start); + memcpy(__skb_put(skb, datasize), xdp->data, datasize); if (metasize) skb_metadata_set(skb, metasize); - xsk_buff_free(rx_buf->xdp); - rx_buf->xdp = NULL; + xsk_buff_free(xdp); + *xdp_arr = NULL; return skb; } @@ -455,22 +452,18 @@ ice_construct_skb_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf) * ice_run_xdp_zc - Executes an XDP program in zero-copy path * @rx_ring: Rx ring * @xdp: xdp_buff used as input to the XDP program + * @xdp_prog: XDP program to run + * @xdp_ring: ring to be used for XDP_TX action * * Returns any of ICE_XDP_{PASS, CONSUMED, TX, REDIR} */ static int -ice_run_xdp_zc(struct ice_ring *rx_ring, struct xdp_buff *xdp) +ice_run_xdp_zc(struct ice_rx_ring *rx_ring, struct xdp_buff *xdp, + struct bpf_prog *xdp_prog, struct ice_tx_ring *xdp_ring) { int err, result = ICE_XDP_PASS; - struct bpf_prog *xdp_prog; - struct ice_ring *xdp_ring; u32 act; - /* ZC patch is enabled only when XDP program is set, - * so here it can not be NULL - */ - xdp_prog = READ_ONCE(rx_ring->xdp_prog); - act = bpf_prog_run_xdp(xdp_prog, xdp); if (likely(act == XDP_REDIRECT)) { @@ -484,7 +477,6 @@ ice_run_xdp_zc(struct ice_ring *rx_ring, struct xdp_buff *xdp) case XDP_PASS: break; case XDP_TX: - xdp_ring = rx_ring->vsi->xdp_rings[rx_ring->q_index]; result = ice_xmit_xdp_buff(xdp, xdp_ring); if (result == ICE_XDP_CONSUMED) goto out_failure; @@ -511,17 +503,25 @@ out_failure: * * Returns number of processed packets on success, remaining budget on failure. */ -int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget) +int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget) { unsigned int total_rx_bytes = 0, total_rx_packets = 0; u16 cleaned_count = ICE_DESC_UNUSED(rx_ring); + struct ice_tx_ring *xdp_ring; unsigned int xdp_xmit = 0; + struct bpf_prog *xdp_prog; bool failure = false; + /* ZC patch is enabled only when XDP program is set, + * so here it can not be NULL + */ + xdp_prog = READ_ONCE(rx_ring->xdp_prog); + xdp_ring = rx_ring->xdp_ring; + while (likely(total_rx_packets < (unsigned int)budget)) { union ice_32b_rx_flex_desc *rx_desc; unsigned int size, xdp_res = 0; - struct ice_rx_buf *rx_buf; + struct xdp_buff **xdp; struct sk_buff *skb; u16 stat_err_bits; u16 vlan_tag = 0; @@ -544,18 +544,18 @@ int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget) if (!size) break; - rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean]; - rx_buf->xdp->data_end = rx_buf->xdp->data + size; - xsk_buff_dma_sync_for_cpu(rx_buf->xdp, rx_ring->xsk_pool); + xdp = &rx_ring->xdp_buf[rx_ring->next_to_clean]; + xsk_buff_set_size(*xdp, size); + xsk_buff_dma_sync_for_cpu(*xdp, rx_ring->xsk_pool); - xdp_res = ice_run_xdp_zc(rx_ring, rx_buf->xdp); + xdp_res = ice_run_xdp_zc(rx_ring, *xdp, xdp_prog, xdp_ring); if (xdp_res) { if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR)) xdp_xmit |= xdp_res; else - xsk_buff_free(rx_buf->xdp); + xsk_buff_free(*xdp); - rx_buf->xdp = NULL; + *xdp = NULL; total_rx_bytes += size; total_rx_packets++; cleaned_count++; @@ -565,7 +565,7 @@ int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget) } /* XDP_PASS path */ - skb = ice_construct_skb_zc(rx_ring, rx_buf); + skb = ice_construct_skb_zc(rx_ring, xdp); if (!skb) { rx_ring->rx_stats.alloc_buf_failed++; break; @@ -596,7 +596,7 @@ int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget) if (cleaned_count >= ICE_RX_BUF_WRITE) failure = !ice_alloc_rx_bufs_zc(rx_ring, cleaned_count); - ice_finalize_xdp_rx(rx_ring, xdp_xmit); + ice_finalize_xdp_rx(xdp_ring, xdp_xmit); ice_update_rx_ring_stats(rx_ring, total_rx_packets, total_rx_bytes); if (xsk_uses_need_wakeup(rx_ring->xsk_pool)) { @@ -618,7 +618,7 @@ int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget) * * Returns true if cleanup/transmission is done. */ -static bool ice_xmit_zc(struct ice_ring *xdp_ring, int budget) +static bool ice_xmit_zc(struct ice_tx_ring *xdp_ring, int budget) { struct ice_tx_desc *tx_desc = NULL; bool work_done = true; @@ -669,7 +669,7 @@ static bool ice_xmit_zc(struct ice_ring *xdp_ring, int budget) * @tx_buf: Tx buffer to clean */ static void -ice_clean_xdp_tx_buf(struct ice_ring *xdp_ring, struct ice_tx_buf *tx_buf) +ice_clean_xdp_tx_buf(struct ice_tx_ring *xdp_ring, struct ice_tx_buf *tx_buf) { xdp_return_frame((struct xdp_frame *)tx_buf->raw_buf); dma_unmap_single(xdp_ring->dev, dma_unmap_addr(tx_buf, dma), @@ -684,7 +684,7 @@ ice_clean_xdp_tx_buf(struct ice_ring *xdp_ring, struct ice_tx_buf *tx_buf) * * Returns true if cleanup/tranmission is done. */ -bool ice_clean_tx_irq_zc(struct ice_ring *xdp_ring, int budget) +bool ice_clean_tx_irq_zc(struct ice_tx_ring *xdp_ring, int budget) { int total_packets = 0, total_bytes = 0; s16 ntc = xdp_ring->next_to_clean; @@ -757,7 +757,7 @@ ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, struct ice_netdev_priv *np = netdev_priv(netdev); struct ice_q_vector *q_vector; struct ice_vsi *vsi = np->vsi; - struct ice_ring *ring; + struct ice_tx_ring *ring; if (test_bit(ICE_DOWN, vsi->state)) return -ENETDOWN; @@ -808,17 +808,17 @@ bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi) * ice_xsk_clean_rx_ring - clean buffer pool queues connected to a given Rx ring * @rx_ring: ring to be cleaned */ -void ice_xsk_clean_rx_ring(struct ice_ring *rx_ring) +void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring) { u16 i; for (i = 0; i < rx_ring->count; i++) { - struct ice_rx_buf *rx_buf = &rx_ring->rx_buf[i]; + struct xdp_buff **xdp = &rx_ring->xdp_buf[i]; - if (!rx_buf->xdp) + if (!xdp) continue; - rx_buf->xdp = NULL; + *xdp = NULL; } } @@ -826,7 +826,7 @@ void ice_xsk_clean_rx_ring(struct ice_ring *rx_ring) * ice_xsk_clean_xdp_ring - Clean the XDP Tx ring and its buffer pool queues * @xdp_ring: XDP_Tx ring */ -void ice_xsk_clean_xdp_ring(struct ice_ring *xdp_ring) +void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring) { u16 ntc = xdp_ring->next_to_clean, ntu = xdp_ring->next_to_use; u32 xsk_frames = 0; diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.h b/drivers/net/ethernet/intel/ice/ice_xsk.h index ea208808623a..4c7bd8e9dfc4 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.h +++ b/drivers/net/ethernet/intel/ice/ice_xsk.h @@ -11,13 +11,13 @@ struct ice_vsi; #ifdef CONFIG_XDP_SOCKETS int ice_xsk_pool_setup(struct ice_vsi *vsi, struct xsk_buff_pool *pool, u16 qid); -int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget); -bool ice_clean_tx_irq_zc(struct ice_ring *xdp_ring, int budget); +int ice_clean_rx_irq_zc(struct ice_rx_ring *rx_ring, int budget); +bool ice_clean_tx_irq_zc(struct ice_tx_ring *xdp_ring, int budget); int ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags); -bool ice_alloc_rx_bufs_zc(struct ice_ring *rx_ring, u16 count); +bool ice_alloc_rx_bufs_zc(struct ice_rx_ring *rx_ring, u16 count); bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi); -void ice_xsk_clean_rx_ring(struct ice_ring *rx_ring); -void ice_xsk_clean_xdp_ring(struct ice_ring *xdp_ring); +void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring); +void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring); #else static inline int ice_xsk_pool_setup(struct ice_vsi __always_unused *vsi, @@ -28,21 +28,21 @@ ice_xsk_pool_setup(struct ice_vsi __always_unused *vsi, } static inline int -ice_clean_rx_irq_zc(struct ice_ring __always_unused *rx_ring, +ice_clean_rx_irq_zc(struct ice_rx_ring __always_unused *rx_ring, int __always_unused budget) { return 0; } static inline bool -ice_clean_tx_irq_zc(struct ice_ring __always_unused *xdp_ring, +ice_clean_tx_irq_zc(struct ice_tx_ring __always_unused *xdp_ring, int __always_unused budget) { return false; } static inline bool -ice_alloc_rx_bufs_zc(struct ice_ring __always_unused *rx_ring, +ice_alloc_rx_bufs_zc(struct ice_rx_ring __always_unused *rx_ring, u16 __always_unused count) { return false; @@ -60,7 +60,7 @@ ice_xsk_wakeup(struct net_device __always_unused *netdev, return -EOPNOTSUPP; } -static inline void ice_xsk_clean_rx_ring(struct ice_ring *rx_ring) { } -static inline void ice_xsk_clean_xdp_ring(struct ice_ring *xdp_ring) { } +static inline void ice_xsk_clean_rx_ring(struct ice_rx_ring *rx_ring) { } +static inline void ice_xsk_clean_xdp_ring(struct ice_tx_ring *xdp_ring) { } #endif /* CONFIG_XDP_SOCKETS */ #endif /* !_ICE_XSK_H_ */ diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 751de06019a0..836be0d3b291 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -577,16 +577,15 @@ static void igb_set_i2c_data(void *data, int state) struct e1000_hw *hw = &adapter->hw; s32 i2cctl = rd32(E1000_I2CPARAMS); - if (state) - i2cctl |= E1000_I2C_DATA_OUT; - else + if (state) { + i2cctl |= E1000_I2C_DATA_OUT | E1000_I2C_DATA_OE_N; + } else { + i2cctl &= ~E1000_I2C_DATA_OE_N; i2cctl &= ~E1000_I2C_DATA_OUT; + } - i2cctl &= ~E1000_I2C_DATA_OE_N; - i2cctl |= E1000_I2C_CLK_OE_N; wr32(E1000_I2CPARAMS, i2cctl); wrfl(); - } /** @@ -603,8 +602,7 @@ static void igb_set_i2c_clk(void *data, int state) s32 i2cctl = rd32(E1000_I2CPARAMS); if (state) { - i2cctl |= E1000_I2C_CLK_OUT; - i2cctl &= ~E1000_I2C_CLK_OE_N; + i2cctl |= E1000_I2C_CLK_OUT | E1000_I2C_CLK_OE_N; } else { i2cctl &= ~E1000_I2C_CLK_OUT; i2cctl &= ~E1000_I2C_CLK_OE_N; @@ -3116,12 +3114,21 @@ static void igb_init_mas(struct igb_adapter *adapter) **/ static s32 igb_init_i2c(struct igb_adapter *adapter) { + struct e1000_hw *hw = &adapter->hw; s32 status = 0; + s32 i2cctl; /* I2C interface supported on i350 devices */ if (adapter->hw.mac.type != e1000_i350) return 0; + i2cctl = rd32(E1000_I2CPARAMS); + i2cctl |= E1000_I2CBB_EN + | E1000_I2C_CLK_OUT | E1000_I2C_CLK_OE_N + | E1000_I2C_DATA_OUT | E1000_I2C_DATA_OE_N; + wr32(E1000_I2CPARAMS, i2cctl); + wrfl(); + /* Initialize the i2c bus which is controlled by the registers. * This bus will use the i2c_algo_bit structure that implements * the protocol through toggling of the 4 bits in the register. @@ -3356,7 +3363,7 @@ static int igb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) dev_err(&pdev->dev, "NVM Read Error\n"); } - memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len); + eth_hw_addr_set(netdev, hw->mac.addr); if (!is_valid_ether_addr(netdev->dev_addr)) { dev_err(&pdev->dev, "Invalid MAC Address\n"); @@ -4988,7 +4995,7 @@ static int igb_set_mac(struct net_device *netdev, void *p) if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); + eth_hw_addr_set(netdev, addr->sa_data); memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len); /* set the correct pool for the new PF MAC address in entry 0 */ diff --git a/drivers/net/ethernet/intel/igbvf/netdev.c b/drivers/net/ethernet/intel/igbvf/netdev.c index d32e72d953c8..74ccd622251a 100644 --- a/drivers/net/ethernet/intel/igbvf/netdev.c +++ b/drivers/net/ethernet/intel/igbvf/netdev.c @@ -1527,8 +1527,7 @@ static void igbvf_reset(struct igbvf_adapter *adapter) spin_unlock_bh(&hw->mbx_lock); if (is_valid_ether_addr(adapter->hw.mac.addr)) { - memcpy(netdev->dev_addr, adapter->hw.mac.addr, - netdev->addr_len); + eth_hw_addr_set(netdev, adapter->hw.mac.addr); memcpy(netdev->perm_addr, adapter->hw.mac.addr, netdev->addr_len); } @@ -1813,7 +1812,7 @@ static int igbvf_set_mac(struct net_device *netdev, void *p) if (!ether_addr_equal(addr->sa_data, hw->mac.addr)) return -EADDRNOTAVAIL; - memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); + eth_hw_addr_set(netdev, addr->sa_data); return 0; } @@ -2816,8 +2815,7 @@ static int igbvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) else if (is_zero_ether_addr(adapter->hw.mac.addr)) dev_info(&pdev->dev, "MAC address not assigned by administrator.\n"); - memcpy(netdev->dev_addr, adapter->hw.mac.addr, - netdev->addr_len); + eth_hw_addr_set(netdev, adapter->hw.mac.addr); } spin_unlock_bh(&hw->mbx_lock); diff --git a/drivers/net/ethernet/intel/igc/igc_base.c b/drivers/net/ethernet/intel/igc/igc_base.c index 84f142f5e472..f068b66b8025 100644 --- a/drivers/net/ethernet/intel/igc/igc_base.c +++ b/drivers/net/ethernet/intel/igc/igc_base.c @@ -40,7 +40,7 @@ static s32 igc_reset_hw_base(struct igc_hw *hw) ctrl = rd32(IGC_CTRL); hw_dbg("Issuing a global reset to MAC\n"); - wr32(IGC_CTRL, ctrl | IGC_CTRL_DEV_RST); + wr32(IGC_CTRL, ctrl | IGC_CTRL_RST); ret_val = igc_get_auto_rd_done(hw); if (ret_val) { @@ -158,11 +158,6 @@ static s32 igc_init_phy_params_base(struct igc_hw *hw) struct igc_phy_info *phy = &hw->phy; s32 ret_val = 0; - if (hw->phy.media_type != igc_media_type_copper) { - phy->type = igc_phy_none; - goto out; - } - phy->autoneg_mask = AUTONEG_ADVERTISE_SPEED_DEFAULT_2500; phy->reset_delay_us = 100; @@ -207,6 +202,7 @@ static s32 igc_get_invariants_base(struct igc_hw *hw) case IGC_DEV_ID_I225_K2: case IGC_DEV_ID_I226_K: case IGC_DEV_ID_I225_LMVP: + case IGC_DEV_ID_I226_LMVP: case IGC_DEV_ID_I225_IT: case IGC_DEV_ID_I226_LM: case IGC_DEV_ID_I226_V: diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h index a4bbee748798..c7fe61509d5b 100644 --- a/drivers/net/ethernet/intel/igc/igc_defines.h +++ b/drivers/net/ethernet/intel/igc/igc_defines.h @@ -130,7 +130,7 @@ #define IGC_ERR_SWFW_SYNC 13 /* Device Control */ -#define IGC_CTRL_DEV_RST 0x20000000 /* Device reset */ +#define IGC_CTRL_RST 0x04000000 /* Global reset */ #define IGC_CTRL_PHY_RST 0x80000000 /* PHY Reset */ #define IGC_CTRL_SLU 0x00000040 /* Set link up (Force Link) */ diff --git a/drivers/net/ethernet/intel/igc/igc_hw.h b/drivers/net/ethernet/intel/igc/igc_hw.h index 4e0203336c6b..587db7483f25 100644 --- a/drivers/net/ethernet/intel/igc/igc_hw.h +++ b/drivers/net/ethernet/intel/igc/igc_hw.h @@ -24,6 +24,7 @@ #define IGC_DEV_ID_I225_K2 0x3101 #define IGC_DEV_ID_I226_K 0x3102 #define IGC_DEV_ID_I225_LMVP 0x5502 +#define IGC_DEV_ID_I226_LMVP 0x5503 #define IGC_DEV_ID_I225_IT 0x0D9F #define IGC_DEV_ID_I226_LM 0x125B #define IGC_DEV_ID_I226_V 0x125C diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c index 0e19b4d02e62..8e448288ee26 100644 --- a/drivers/net/ethernet/intel/igc/igc_main.c +++ b/drivers/net/ethernet/intel/igc/igc_main.c @@ -56,6 +56,7 @@ static const struct pci_device_id igc_pci_tbl[] = { { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_K2), board_base }, { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_K), board_base }, { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_LMVP), board_base }, + { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_LMVP), board_base }, { PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_IT), board_base }, { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_LM), board_base }, { PCI_VDEVICE(INTEL, IGC_DEV_ID_I226_V), board_base }, @@ -949,7 +950,7 @@ static int igc_set_mac(struct net_device *netdev, void *p) if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); + eth_hw_addr_set(netdev, addr->sa_data); memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len); /* set the correct pool for the new PF MAC address in entry 0 */ @@ -6377,7 +6378,7 @@ static int igc_probe(struct pci_dev *pdev, dev_err(&pdev->dev, "NVM Read Error\n"); } - memcpy(netdev->dev_addr, hw->mac.addr, netdev->addr_len); + eth_hw_addr_set(netdev, hw->mac.addr); if (!is_valid_ether_addr(netdev->dev_addr)) { dev_err(&pdev->dev, "Invalid MAC Address\n"); diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c index 0f021909b430..30568e3544cd 100644 --- a/drivers/net/ethernet/intel/igc/igc_ptp.c +++ b/drivers/net/ethernet/intel/igc/igc_ptp.c @@ -773,7 +773,7 @@ static bool igc_is_crosststamp_supported(struct igc_adapter *adapter) static struct system_counterval_t igc_device_tstamp_to_system(u64 tstamp) { -#if IS_ENABLED(CONFIG_X86_TSC) +#if IS_ENABLED(CONFIG_X86_TSC) && !defined(CONFIG_UML) return convert_art_ns_to_tsc(tstamp); #else return (struct system_counterval_t) { }; diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_hw.c b/drivers/net/ethernet/intel/ixgb/ixgb_hw.c index a430871d1c27..c8d1e815ec6b 100644 --- a/drivers/net/ethernet/intel/ixgb/ixgb_hw.c +++ b/drivers/net/ethernet/intel/ixgb/ixgb_hw.c @@ -549,7 +549,7 @@ ixgb_mta_set(struct ixgb_hw *hw, *****************************************************************************/ void ixgb_rar_set(struct ixgb_hw *hw, - u8 *addr, + const u8 *addr, u32 index) { u32 rar_low, rar_high; diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_hw.h b/drivers/net/ethernet/intel/ixgb/ixgb_hw.h index 6064583095da..70bcff5fb3db 100644 --- a/drivers/net/ethernet/intel/ixgb/ixgb_hw.h +++ b/drivers/net/ethernet/intel/ixgb/ixgb_hw.h @@ -740,7 +740,7 @@ bool ixgb_adapter_start(struct ixgb_hw *hw); void ixgb_check_for_link(struct ixgb_hw *hw); bool ixgb_check_for_bad_link(struct ixgb_hw *hw); -void ixgb_rar_set(struct ixgb_hw *hw, u8 *addr, u32 index); +void ixgb_rar_set(struct ixgb_hw *hw, const u8 *addr, u32 index); /* Filters (multicast, vlan, receive) */ void ixgb_mc_addr_list_update(struct ixgb_hw *hw, u8 *mc_addr_list, diff --git a/drivers/net/ethernet/intel/ixgb/ixgb_main.c b/drivers/net/ethernet/intel/ixgb/ixgb_main.c index 1588376d4c67..99d481904ce6 100644 --- a/drivers/net/ethernet/intel/ixgb/ixgb_main.c +++ b/drivers/net/ethernet/intel/ixgb/ixgb_main.c @@ -362,6 +362,7 @@ ixgb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) struct ixgb_adapter *adapter; static int cards_found = 0; int pci_using_dac; + u8 addr[ETH_ALEN]; int i; int err; @@ -461,7 +462,8 @@ ixgb_probe(struct pci_dev *pdev, const struct pci_device_id *ent) goto err_eeprom; } - ixgb_get_ee_mac_addr(&adapter->hw, netdev->dev_addr); + ixgb_get_ee_mac_addr(&adapter->hw, addr); + eth_hw_addr_set(netdev, addr); if (!is_valid_ether_addr(netdev->dev_addr)) { netif_err(adapter, probe, adapter->netdev, "Invalid MAC Address\n"); @@ -1030,7 +1032,7 @@ ixgb_set_mac(struct net_device *netdev, void *p) if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); + eth_hw_addr_set(netdev, addr->sa_data); ixgb_rar_set(&adapter->hw, addr->sa_data, 0); @@ -2227,6 +2229,7 @@ static pci_ers_result_t ixgb_io_slot_reset(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); struct ixgb_adapter *adapter = netdev_priv(netdev); + u8 addr[ETH_ALEN]; if (pci_enable_device(pdev)) { netif_err(adapter, probe, adapter->netdev, @@ -2250,7 +2253,8 @@ static pci_ers_result_t ixgb_io_slot_reset(struct pci_dev *pdev) "After reset, the EEPROM checksum is not valid\n"); return PCI_ERS_RESULT_DISCONNECT; } - ixgb_get_ee_mac_addr(&adapter->hw, netdev->dev_addr); + ixgb_get_ee_mac_addr(&adapter->hw, addr); + eth_hw_addr_set(netdev, addr); memcpy(netdev->perm_addr, netdev->dev_addr, netdev->addr_len); if (!is_valid_ether_addr(netdev->perm_addr)) { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h index a604552fa634..4a69823e6abd 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h @@ -351,6 +351,7 @@ struct ixgbe_ring { }; u16 rx_offset; struct xdp_rxq_info xdp_rxq; + spinlock_t tx_lock; /* used in XDP mode */ struct xsk_buff_pool *xsk_pool; u16 ring_idx; /* {rx,tx,xdp}_ring back reference idx */ u16 rx_buf_len; @@ -375,11 +376,13 @@ enum ixgbe_ring_f_enum { #define IXGBE_MAX_FCOE_INDICES 8 #define MAX_RX_QUEUES (IXGBE_MAX_FDIR_INDICES + 1) #define MAX_TX_QUEUES (IXGBE_MAX_FDIR_INDICES + 1) -#define MAX_XDP_QUEUES (IXGBE_MAX_FDIR_INDICES + 1) +#define IXGBE_MAX_XDP_QS (IXGBE_MAX_FDIR_INDICES + 1) #define IXGBE_MAX_L2A_QUEUES 4 #define IXGBE_BAD_L2A_QUEUE 3 #define IXGBE_MAX_MACVLANS 63 +DECLARE_STATIC_KEY_FALSE(ixgbe_xdp_locking_key); + struct ixgbe_ring_feature { u16 limit; /* upper limit on feature indices */ u16 indices; /* current value of indices */ @@ -629,7 +632,7 @@ struct ixgbe_adapter { /* XDP */ int num_xdp_queues; - struct ixgbe_ring *xdp_ring[MAX_XDP_QUEUES]; + struct ixgbe_ring *xdp_ring[IXGBE_MAX_XDP_QS]; unsigned long *af_xdp_zc_qps; /* tracks AF_XDP ZC enabled rings */ /* TX */ @@ -772,6 +775,22 @@ struct ixgbe_adapter { #endif /* CONFIG_IXGBE_IPSEC */ }; +static inline int ixgbe_determine_xdp_q_idx(int cpu) +{ + if (static_key_enabled(&ixgbe_xdp_locking_key)) + return cpu % IXGBE_MAX_XDP_QS; + else + return cpu; +} + +static inline +struct ixgbe_ring *ixgbe_determine_xdp_ring(struct ixgbe_adapter *adapter) +{ + int index = ixgbe_determine_xdp_q_idx(smp_processor_id()); + + return adapter->xdp_ring[index]; +} + static inline u8 ixgbe_max_rss_indices(struct ixgbe_adapter *adapter) { switch (adapter->hw.mac.type) { diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c index beda8e0ef7d4..8362822316a9 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_ethtool.c @@ -467,9 +467,8 @@ static int ixgbe_set_link_ksettings(struct net_device *netdev, * this function does not support duplex forcing, but can * limit the advertising of the adapter to the specified speed */ - if (!bitmap_subset(cmd->link_modes.advertising, - cmd->link_modes.supported, - __ETHTOOL_LINK_MODE_MASK_NBITS)) + if (!linkmode_subset(cmd->link_modes.advertising, + cmd->link_modes.supported)) return -EINVAL; /* only allow one speed at a time if no autoneg */ diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c index 0218f6c9b925..86b11164655e 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_lib.c @@ -299,7 +299,10 @@ static void ixgbe_cache_ring_register(struct ixgbe_adapter *adapter) static int ixgbe_xdp_queues(struct ixgbe_adapter *adapter) { - return adapter->xdp_prog ? nr_cpu_ids : 0; + int queues; + + queues = min_t(int, IXGBE_MAX_XDP_QS, nr_cpu_ids); + return adapter->xdp_prog ? queues : 0; } #define IXGBE_RSS_64Q_MASK 0x3F @@ -947,6 +950,7 @@ static int ixgbe_alloc_q_vector(struct ixgbe_adapter *adapter, ring->count = adapter->tx_ring_count; ring->queue_index = xdp_idx; set_ring_xdp(ring); + spin_lock_init(&ring->tx_lock); /* assign ring to adapter */ WRITE_ONCE(adapter->xdp_ring[xdp_idx], ring); @@ -1032,6 +1036,9 @@ static void ixgbe_free_q_vector(struct ixgbe_adapter *adapter, int v_idx) adapter->q_vector[v_idx] = NULL; __netif_napi_del(&q_vector->napi); + if (static_key_enabled(&ixgbe_xdp_locking_key)) + static_branch_dec(&ixgbe_xdp_locking_key); + /* * after a call to __netif_napi_del() napi may still be used and * ixgbe_get_stats64() might access the rings on this vector, diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 13c4782b920a..0f9f022260d7 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -165,6 +165,9 @@ MODULE_AUTHOR("Intel Corporation, <linux.nics@intel.com>"); MODULE_DESCRIPTION("Intel(R) 10 Gigabit PCI Express Network Driver"); MODULE_LICENSE("GPL v2"); +DEFINE_STATIC_KEY_FALSE(ixgbe_xdp_locking_key); +EXPORT_SYMBOL(ixgbe_xdp_locking_key); + static struct workqueue_struct *ixgbe_wq; static bool ixgbe_check_cfg_remove(struct ixgbe_hw *hw, struct pci_dev *pdev); @@ -2197,6 +2200,7 @@ static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter, { int err, result = IXGBE_XDP_PASS; struct bpf_prog *xdp_prog; + struct ixgbe_ring *ring; struct xdp_frame *xdpf; u32 act; @@ -2215,7 +2219,12 @@ static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter, xdpf = xdp_convert_buff_to_frame(xdp); if (unlikely(!xdpf)) goto out_failure; - result = ixgbe_xmit_xdp_ring(adapter, xdpf); + ring = ixgbe_determine_xdp_ring(adapter); + if (static_branch_unlikely(&ixgbe_xdp_locking_key)) + spin_lock(&ring->tx_lock); + result = ixgbe_xmit_xdp_ring(ring, xdpf); + if (static_branch_unlikely(&ixgbe_xdp_locking_key)) + spin_unlock(&ring->tx_lock); if (result == IXGBE_XDP_CONSUMED) goto out_failure; break; @@ -2422,13 +2431,9 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, xdp_do_flush_map(); if (xdp_xmit & IXGBE_XDP_TX) { - struct ixgbe_ring *ring = adapter->xdp_ring[smp_processor_id()]; + struct ixgbe_ring *ring = ixgbe_determine_xdp_ring(adapter); - /* Force memory writes to complete before letting h/w - * know there are new descriptors to fetch. - */ - wmb(); - writel(ring->next_to_use, ring->tail); + ixgbe_xdp_ring_update_tail_locked(ring); } u64_stats_update_begin(&rx_ring->syncp); @@ -6320,7 +6325,7 @@ static int ixgbe_sw_init(struct ixgbe_adapter *adapter, if (ixgbe_init_rss_key(adapter)) return -ENOMEM; - adapter->af_xdp_zc_qps = bitmap_zalloc(MAX_XDP_QUEUES, GFP_KERNEL); + adapter->af_xdp_zc_qps = bitmap_zalloc(IXGBE_MAX_XDP_QS, GFP_KERNEL); if (!adapter->af_xdp_zc_qps) return -ENOMEM; @@ -8536,10 +8541,9 @@ static u16 ixgbe_select_queue(struct net_device *dev, struct sk_buff *skb, } #endif -int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter, +int ixgbe_xmit_xdp_ring(struct ixgbe_ring *ring, struct xdp_frame *xdpf) { - struct ixgbe_ring *ring = adapter->xdp_ring[smp_processor_id()]; struct ixgbe_tx_buffer *tx_buffer; union ixgbe_adv_tx_desc *tx_desc; u32 len, cmd_type; @@ -8788,7 +8792,7 @@ static int ixgbe_set_mac(struct net_device *netdev, void *p) if (!is_valid_ether_addr(addr->sa_data)) return -EADDRNOTAVAIL; - memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len); + eth_hw_addr_set(netdev, addr->sa_data); memcpy(hw->mac.addr, addr->sa_data, netdev->addr_len); ixgbe_mac_set_default_filter(adapter); @@ -10131,8 +10135,13 @@ static int ixgbe_xdp_setup(struct net_device *dev, struct bpf_prog *prog) return -EINVAL; } - if (nr_cpu_ids > MAX_XDP_QUEUES) + /* if the number of cpus is much larger than the maximum of queues, + * we should stop it and then return with ENOMEM like before. + */ + if (nr_cpu_ids > IXGBE_MAX_XDP_QS * 2) return -ENOMEM; + else if (nr_cpu_ids > IXGBE_MAX_XDP_QS) + static_branch_inc(&ixgbe_xdp_locking_key); old_prog = xchg(&adapter->xdp_prog, prog); need_reset = (!!prog != !!old_prog); @@ -10199,6 +10208,15 @@ void ixgbe_xdp_ring_update_tail(struct ixgbe_ring *ring) writel(ring->next_to_use, ring->tail); } +void ixgbe_xdp_ring_update_tail_locked(struct ixgbe_ring *ring) +{ + if (static_branch_unlikely(&ixgbe_xdp_locking_key)) + spin_lock(&ring->tx_lock); + ixgbe_xdp_ring_update_tail(ring); + if (static_branch_unlikely(&ixgbe_xdp_locking_key)) + spin_unlock(&ring->tx_lock); +} + static int ixgbe_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, u32 flags) { @@ -10216,18 +10234,21 @@ static int ixgbe_xdp_xmit(struct net_device *dev, int n, /* During program transitions its possible adapter->xdp_prog is assigned * but ring has not been configured yet. In this case simply abort xmit. */ - ring = adapter->xdp_prog ? adapter->xdp_ring[smp_processor_id()] : NULL; + ring = adapter->xdp_prog ? ixgbe_determine_xdp_ring(adapter) : NULL; if (unlikely(!ring)) return -ENXIO; if (unlikely(test_bit(__IXGBE_TX_DISABLED, &ring->state))) return -ENXIO; + if (static_branch_unlikely(&ixgbe_xdp_locking_key)) + spin_lock(&ring->tx_lock); + for (i = 0; i < n; i++) { struct xdp_frame *xdpf = frames[i]; int err; - err = ixgbe_xmit_xdp_ring(adapter, xdpf); + err = ixgbe_xmit_xdp_ring(ring, xdpf); if (err != IXGBE_XDP_TX) break; nxmit++; @@ -10236,6 +10257,9 @@ static int ixgbe_xdp_xmit(struct net_device *dev, int n, if (unlikely(flags & XDP_XMIT_FLUSH)) ixgbe_xdp_ring_update_tail(ring); + if (static_branch_unlikely(&ixgbe_xdp_locking_key)) + spin_unlock(&ring->tx_lock); + return nxmit; } @@ -10903,7 +10927,7 @@ skip_sriov: eth_platform_get_mac_address(&adapter->pdev->dev, adapter->hw.mac.perm_addr); - memcpy(netdev->dev_addr, hw->mac.perm_addr, netdev->addr_len); + eth_hw_addr_set(netdev, hw->mac.perm_addr); if (!is_valid_ether_addr(netdev->dev_addr)) { e_dev_err("invalid MAC address\n"); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h index 2aeec78029bc..a82533f21d36 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h @@ -12,7 +12,7 @@ #define IXGBE_TXD_CMD (IXGBE_TXD_CMD_EOP | \ IXGBE_TXD_CMD_RS) -int ixgbe_xmit_xdp_ring(struct ixgbe_adapter *adapter, +int ixgbe_xmit_xdp_ring(struct ixgbe_ring *ring, struct xdp_frame *xdpf); bool ixgbe_cleanup_headers(struct ixgbe_ring *rx_ring, union ixgbe_adv_rx_desc *rx_desc, @@ -23,6 +23,7 @@ void ixgbe_process_skb_fields(struct ixgbe_ring *rx_ring, void ixgbe_rx_skb(struct ixgbe_q_vector *q_vector, struct sk_buff *skb); void ixgbe_xdp_ring_update_tail(struct ixgbe_ring *ring); +void ixgbe_xdp_ring_update_tail_locked(struct ixgbe_ring *ring); void ixgbe_irq_rearm_queues(struct ixgbe_adapter *adapter, u64 qmask); void ixgbe_txrx_ring_disable(struct ixgbe_adapter *adapter, int ring); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c index b1d22e4d5ec9..db2bc58dfcfd 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c @@ -100,6 +100,7 @@ static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter, { int err, result = IXGBE_XDP_PASS; struct bpf_prog *xdp_prog; + struct ixgbe_ring *ring; struct xdp_frame *xdpf; u32 act; @@ -120,7 +121,12 @@ static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter, xdpf = xdp_convert_buff_to_frame(xdp); if (unlikely(!xdpf)) goto out_failure; - result = ixgbe_xmit_xdp_ring(adapter, xdpf); + ring = ixgbe_determine_xdp_ring(adapter); + if (static_branch_unlikely(&ixgbe_xdp_locking_key)) + spin_lock(&ring->tx_lock); + result = ixgbe_xmit_xdp_ring(ring, xdpf); + if (static_branch_unlikely(&ixgbe_xdp_locking_key)) + spin_unlock(&ring->tx_lock); if (result == IXGBE_XDP_CONSUMED) goto out_failure; break; @@ -334,13 +340,9 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector, xdp_do_flush_map(); if (xdp_xmit & IXGBE_XDP_TX) { - struct ixgbe_ring *ring = adapter->xdp_ring[smp_processor_id()]; + struct ixgbe_ring *ring = ixgbe_determine_xdp_ring(adapter); - /* Force memory writes to complete before letting h/w - * know there are new descriptors to fetch. - */ - wmb(); - writel(ring->next_to_use, ring->tail); + ixgbe_xdp_ring_update_tail_locked(ring); } u64_stats_update_begin(&rx_ring->syncp); diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c index c714e1ecd308..d81811ab4ec4 100644 --- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c +++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c @@ -2540,7 +2540,7 @@ void ixgbevf_reset(struct ixgbevf_adapter *adapter) } if (is_valid_ether_addr(adapter->hw.mac.addr)) { - ether_addr_copy(netdev->dev_addr, adapter->hw.mac.addr); + eth_hw_addr_set(netdev, adapter->hw.mac.addr); ether_addr_copy(netdev->perm_addr, adapter->hw.mac.addr); } @@ -3054,7 +3054,7 @@ static int ixgbevf_sw_init(struct ixgbevf_adapter *adapter) else if (is_zero_ether_addr(adapter->hw.mac.addr)) dev_info(&pdev->dev, "MAC address not assigned by administrator.\n"); - ether_addr_copy(netdev->dev_addr, hw->mac.addr); + eth_hw_addr_set(netdev, hw->mac.addr); } if (!is_valid_ether_addr(netdev->dev_addr)) { @@ -4231,7 +4231,7 @@ static int ixgbevf_set_mac(struct net_device *netdev, void *p) ether_addr_copy(hw->mac.addr, addr->sa_data); ether_addr_copy(hw->mac.perm_addr, addr->sa_data); - ether_addr_copy(netdev->dev_addr, addr->sa_data); + eth_hw_addr_set(netdev, addr->sa_data); return 0; } diff --git a/drivers/net/ethernet/intel/ixgbevf/vf.c b/drivers/net/ethernet/intel/ixgbevf/vf.c index 5fc347abab3c..d459f5c8e98f 100644 --- a/drivers/net/ethernet/intel/ixgbevf/vf.c +++ b/drivers/net/ethernet/intel/ixgbevf/vf.c @@ -66,9 +66,9 @@ static s32 ixgbevf_reset_hw_vf(struct ixgbe_hw *hw) { struct ixgbe_mbx_info *mbx = &hw->mbx; u32 timeout = IXGBE_VF_INIT_TIMEOUT; - s32 ret_val = IXGBE_ERR_INVALID_MAC_ADDR; u32 msgbuf[IXGBE_VF_PERMADDR_MSG_LEN]; u8 *addr = (u8 *)(&msgbuf[1]); + s32 ret_val; /* Call adapter stop to disable tx/rx and clear interrupts */ hw->mac.ops.stop_adapter(hw); |