summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/intel/i40e/i40e_main.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-01-22 08:28:57 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2025-01-22 08:28:57 -0800
commit0ad9617c78acbc71373fb341a6f75d4012b01d69 (patch)
tree602d7c9ec86d9a4891a96a2996af6e4368a647eb /drivers/net/ethernet/intel/i40e/i40e_main.c
parent5f537664e705b0bf8b7e329861f20128534f6a83 (diff)
parentcf33d96f50903214226b379b3f10d1f262dae018 (diff)
Merge tag 'net-next-6.14' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next
Pull networking updates from Paolo Abeni: "This is slightly smaller than usual, with the most interesting work being still around RTNL scope reduction. Core: - More core refactoring to reduce the RTNL lock contention, including preparatory work for the per-network namespace RTNL lock, replacing RTNL lock with a per device-one to protect NAPI-related net device data and moving synchronize_net() calls outside such lock. - Extend drop reasons usage, adding net scheduler, AF_UNIX, bridge and more specific TCP coverage. - Reduce network namespace tear-down time by removing per-subsystems synchronize_net() in tipc and sched. - Add flow label selector support for fib rules, allowing traffic redirection based on such header field. Netfilter: - Do not remove netdev basechain when last device is gone, allowing netdev basechains without devices. - Revisit the flowtable teardown strategy, dealing better with fin, reset and re-open events. - Scale-up IP-vs connection dumping by avoiding linear search on each restart. Protocols: - A significant XDP socket refactor, consolidating and optimizing several helpers into the core - Better scaling of ICMP rate-limiting, by removing false-sharing in inet peers handling. - Introduces netlink notifications for multicast IPv4 and IPv6 address changes. - Add ipsec support for IP-TFS/AggFrag encapsulation, allowing aggregation and fragmentation of the inner IP. - Add sysctl to configure TIME-WAIT reuse delay for TCP sockets, to avoid local port exhaustion issues when the average connection lifetime is very short. - Support updating keys (re-keying) for connections using kernel TLS (for TLS 1.3 only). - Support ipv4-mapped ipv6 address clients in smc-r v2. - Add support for jumbo data packet transmission in RxRPC sockets, gluing multiple data packets in a single UDP packet. - Support RxRPC RACK-TLP to manage packet loss and retransmission in conjunction with the congestion control algorithm. Driver API: - Introduce a unified and structured interface for reporting PHY statistics, exposing consistent data across different H/W via ethtool. - Make timestamping selectable, allow the user to select the desired hwtstamp provider (PHY or MAC) administratively. - Add support for configuring a header-data-split threshold (HDS) value via ethtool, to deal with partial or buggy H/W implementation. - Consolidate DSA drivers Energy Efficiency Ethernet support. - Add EEE management to phylink, making use of the phylib implementation. - Add phylib support for in-band capabilities negotiation. - Simplify how phylib-enabled mac drivers expose the supported interfaces. Tests and tooling: - Make the YNL tool package-friendly to make it easier to deploy it separately from the kernel. - Increase TCP selftest coverage importing several packetdrill test-cases. - Regenerate the ethtool uapi header from the YNL spec, to ease maintenance and future development. - Add YNL support for decoding the link types used in net self-tests, allowing a single build to run both net and drivers/net. Drivers: - Ethernet high-speed NICs: - nVidia/Mellanox (mlx5): - add cross E-Switch QoS support - add SW Steering support for ConnectX-8 - implement support for HW-Managed Flow Steering, improving the rule deletion/insertion rate - support for multi-host LAG - Intel (ixgbe, ice, igb): - ice: add support for devlink health events - ixgbe: add initial support for E610 chipset variant - igb: add support for AF_XDP zero-copy - Meta: - add support for basic RSS config - allow changing the number of channels - add hardware monitoring support - Broadcom (bnxt): - implement TCP data split and HDS threshold ethtool support, enabling Device Memory TCP. - Marvell Octeon: - implement egress ipsec offload support for the cn10k family - Hisilicon (HIBMC): - implement unicast MAC filtering - Ethernet NICs embedded and virtual: - Convert UDP tunnel drivers to NETDEV_PCPU_STAT_DSTATS, avoiding contented atomic operations for drop counters - Freescale: - quicc: phylink conversion - enetc: support Tx and Rx checksum offload and improve TSO performances - MediaTek: - airoha: introduce support for ETS and HTB Qdisc offload - Microchip: - lan78XX USB: preparation work for phylink conversion - Synopsys (stmmac): - support DWMAC IP on NXP Automotive SoCs S32G2xx/S32G3xx/S32R45 - refactor EEE support to leverage the new driver API - optimize DMA and cache access to increase raw RX performances by 40% - TI: - icssg-prueth: add multicast filtering support for VLAN interface - netkit: - add ability to configure head/tailroom - VXLAN: - accepts packets with user-defined reserved bit - Ethernet switches: - Microchip: - lan969x: add RGMII support - lan969x: improve TX and RX performance using the FDMA engine - nVidia/Mellanox: - move Tx header handling to PCI driver, to ease XDP support - Ethernet PHYs: - Texas Instruments DP83822: - add support for GPIO2 clock output - Realtek: - 8169: add support for RTL8125D rev.b - rtl822x: add hwmon support for the temperature sensor - Microchip: - add support for RDS PTP hardware - consolidate periodic output signal generation - CAN: - several DT-bindings to DT schema conversions - tcan4x5x: - add HW standby support - support nWKRQ voltage selection - kvaser: - allowing Bus Error Reporting runtime configuration - WiFi: - the on-going Multi-Link Operation (MLO) effort continues, affecting both the stack and in drivers - mac80211/cfg80211: - Emergency Preparedness Communication Services (EPCS) station mode support - support for adding and removing station links for MLO - add support for WiFi 7/EHT mesh over 320 MHz channels - report Tx power info for each link - RealTek (rtw88): - enable USB Rx aggregation and USB 3 to improve performance - LED support - RealTek (rtw89): - refactor power save to support Multi-Link Operations - add support for RTL8922AE-VS variant - MediaTek (mt76): - single wiphy multiband support (preparation for MLO) - p2p device support - add TP-Link TXE50UH USB adapter support - Qualcomm (ath10k): - support for the QCA6698AQ IP core - Qualcomm (ath12k): - enable MLO for QCN9274 - Bluetooth: - Allow sysfs to trigger hdev reset, to allow recovering devices not responsive from user-space - MediaTek: add support for MT7922, MT7925, MT7921e devices - Realtek: add support for RTL8851BE devices - Qualcomm: add support for WCN785x devices - ISO: allow BIG re-sync" * tag 'net-next-6.14' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net-next: (1386 commits) net/rose: prevent integer overflows in rose_setsockopt() net: phylink: fix regression when binding a PHY net: ethernet: ti: am65-cpsw: streamline TX queue creation and cleanup net: ethernet: ti: am65-cpsw: streamline RX queue creation and cleanup net: ethernet: ti: am65-cpsw: ensure proper channel cleanup in error path ipv6: Convert inet6_rtm_deladdr() to per-netns RTNL. ipv6: Convert inet6_rtm_newaddr() to per-netns RTNL. ipv6: Move lifetime validation to inet6_rtm_newaddr(). ipv6: Set cfg.ifa_flags before device lookup in inet6_rtm_newaddr(). ipv6: Pass dev to inet6_addr_add(). ipv6: Convert inet6_ioctl() to per-netns RTNL. ipv6: Hold rtnl_net_lock() in addrconf_init() and addrconf_cleanup(). ipv6: Hold rtnl_net_lock() in addrconf_dad_work(). ipv6: Hold rtnl_net_lock() in addrconf_verify_work(). ipv6: Convert net.ipv6.conf.${DEV}.XXX sysctl to per-netns RTNL. ipv6: Add __in6_dev_get_rtnl_net(). net: stmmac: Drop redundant skb_mark_for_recycle() for SKB frags net: mii: Fix the Speed display when the network cable is not connected sysctl net: Remove macro checks for CONFIG_SYSCTL eth: bnxt: update header sizing defaults ...
Diffstat (limited to 'drivers/net/ethernet/intel/i40e/i40e_main.c')
-rw-r--r--drivers/net/ethernet/intel/i40e/i40e_main.c231
1 files changed, 96 insertions, 135 deletions
diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 0e1d9e2fbf38..65a702668e21 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -1666,9 +1666,8 @@ struct i40e_mac_filter *i40e_add_filter(struct i40e_vsi *vsi,
* @vsi: VSI to remove from
* @f: the filter to remove from the list
*
- * This function should be called instead of i40e_del_filter only if you know
- * the exact filter you will remove already, such as via i40e_find_filter or
- * i40e_find_mac.
+ * This function requires you've found * the exact filter you will remove
+ * already, such as via i40e_find_filter or i40e_find_mac.
*
* NOTE: This function is expected to be called with mac_filter_hash_lock
* being held.
@@ -1698,29 +1697,6 @@ void __i40e_del_filter(struct i40e_vsi *vsi, struct i40e_mac_filter *f)
}
/**
- * i40e_del_filter - Remove a MAC/VLAN filter from the VSI
- * @vsi: the VSI to be searched
- * @macaddr: the MAC address
- * @vlan: the VLAN
- *
- * NOTE: This function is expected to be called with mac_filter_hash_lock
- * being held.
- * ANOTHER NOTE: This function MUST be called from within the context of
- * the "safe" variants of any list iterators, e.g. list_for_each_entry_safe()
- * instead of list_for_each_entry().
- **/
-void i40e_del_filter(struct i40e_vsi *vsi, const u8 *macaddr, s16 vlan)
-{
- struct i40e_mac_filter *f;
-
- if (!vsi || !macaddr)
- return;
-
- f = i40e_find_filter(vsi, macaddr, vlan);
- __i40e_del_filter(vsi, f);
-}
-
-/**
* i40e_add_mac_filter - Add a MAC filter for all active VLANs
* @vsi: the VSI to be searched
* @macaddr: the mac address to be filtered
@@ -9629,19 +9605,6 @@ static void i40e_handle_lan_overflow_event(struct i40e_pf *pf,
}
/**
- * i40e_get_cur_guaranteed_fd_count - Get the consumed guaranteed FD filters
- * @pf: board private structure
- **/
-u32 i40e_get_cur_guaranteed_fd_count(struct i40e_pf *pf)
-{
- u32 val, fcnt_prog;
-
- val = rd32(&pf->hw, I40E_PFQF_FDSTAT);
- fcnt_prog = (val & I40E_PFQF_FDSTAT_GUARANT_CNT_MASK);
- return fcnt_prog;
-}
-
-/**
* i40e_get_current_fd_count - Get total FD filters programmed for this PF
* @pf: board private structure
**/
@@ -11217,6 +11180,67 @@ static void i40e_handle_reset_warning(struct i40e_pf *pf, bool lock_acquired)
}
/**
+ * i40e_print_vf_mdd_event - print VF Tx/Rx malicious driver detect event
+ * @pf: board private structure
+ * @vf: pointer to the VF structure
+ * @is_tx: true - for Tx event, false - for Rx
+ */
+static void i40e_print_vf_mdd_event(struct i40e_pf *pf, struct i40e_vf *vf,
+ bool is_tx)
+{
+ dev_err(&pf->pdev->dev, is_tx ?
+ "%lld Tx Malicious Driver Detection events detected on PF %d VF %d MAC %pm. mdd-auto-reset-vfs=%s\n" :
+ "%lld Rx Malicious Driver Detection events detected on PF %d VF %d MAC %pm. mdd-auto-reset-vfs=%s\n",
+ is_tx ? vf->mdd_tx_events.count : vf->mdd_rx_events.count,
+ pf->hw.pf_id,
+ vf->vf_id,
+ vf->default_lan_addr.addr,
+ str_on_off(test_bit(I40E_FLAG_MDD_AUTO_RESET_VF, pf->flags)));
+}
+
+/**
+ * i40e_print_vfs_mdd_events - print VFs malicious driver detect event
+ * @pf: pointer to the PF structure
+ *
+ * Called from i40e_handle_mdd_event to rate limit and print VFs MDD events.
+ */
+static void i40e_print_vfs_mdd_events(struct i40e_pf *pf)
+{
+ unsigned int i;
+
+ /* check that there are pending MDD events to print */
+ if (!test_and_clear_bit(__I40E_MDD_VF_PRINT_PENDING, pf->state))
+ return;
+
+ if (!__ratelimit(&pf->mdd_message_rate_limit))
+ return;
+
+ for (i = 0; i < pf->num_alloc_vfs; i++) {
+ struct i40e_vf *vf = &pf->vf[i];
+ bool is_printed = false;
+
+ /* only print Rx MDD event message if there are new events */
+ if (vf->mdd_rx_events.count != vf->mdd_rx_events.last_printed) {
+ vf->mdd_rx_events.last_printed = vf->mdd_rx_events.count;
+ i40e_print_vf_mdd_event(pf, vf, false);
+ is_printed = true;
+ }
+
+ /* only print Tx MDD event message if there are new events */
+ if (vf->mdd_tx_events.count != vf->mdd_tx_events.last_printed) {
+ vf->mdd_tx_events.last_printed = vf->mdd_tx_events.count;
+ i40e_print_vf_mdd_event(pf, vf, true);
+ is_printed = true;
+ }
+
+ if (is_printed && !test_bit(I40E_FLAG_MDD_AUTO_RESET_VF, pf->flags))
+ dev_info(&pf->pdev->dev,
+ "Use PF Control I/F to re-enable the VF #%d\n",
+ i);
+ }
+}
+
+/**
* i40e_handle_mdd_event
* @pf: pointer to the PF structure
*
@@ -11230,8 +11254,13 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf)
u32 reg;
int i;
- if (!test_bit(__I40E_MDD_EVENT_PENDING, pf->state))
+ if (!test_and_clear_bit(__I40E_MDD_EVENT_PENDING, pf->state)) {
+ /* Since the VF MDD event logging is rate limited, check if
+ * there are pending MDD events.
+ */
+ i40e_print_vfs_mdd_events(pf);
return;
+ }
/* find what triggered the MDD event */
reg = rd32(hw, I40E_GL_MDET_TX);
@@ -11275,36 +11304,48 @@ static void i40e_handle_mdd_event(struct i40e_pf *pf)
/* see if one of the VFs needs its hand slapped */
for (i = 0; i < pf->num_alloc_vfs && mdd_detected; i++) {
+ bool is_mdd_on_tx = false;
+ bool is_mdd_on_rx = false;
+
vf = &(pf->vf[i]);
reg = rd32(hw, I40E_VP_MDET_TX(i));
if (reg & I40E_VP_MDET_TX_VALID_MASK) {
+ set_bit(__I40E_MDD_VF_PRINT_PENDING, pf->state);
wr32(hw, I40E_VP_MDET_TX(i), 0xFFFF);
- vf->num_mdd_events++;
- dev_info(&pf->pdev->dev, "TX driver issue detected on VF %d\n",
- i);
- dev_info(&pf->pdev->dev,
- "Use PF Control I/F to re-enable the VF\n");
+ vf->mdd_tx_events.count++;
set_bit(I40E_VF_STATE_DISABLED, &vf->vf_states);
+ is_mdd_on_tx = true;
}
reg = rd32(hw, I40E_VP_MDET_RX(i));
if (reg & I40E_VP_MDET_RX_VALID_MASK) {
+ set_bit(__I40E_MDD_VF_PRINT_PENDING, pf->state);
wr32(hw, I40E_VP_MDET_RX(i), 0xFFFF);
- vf->num_mdd_events++;
- dev_info(&pf->pdev->dev, "RX driver issue detected on VF %d\n",
- i);
- dev_info(&pf->pdev->dev,
- "Use PF Control I/F to re-enable the VF\n");
+ vf->mdd_rx_events.count++;
set_bit(I40E_VF_STATE_DISABLED, &vf->vf_states);
+ is_mdd_on_rx = true;
+ }
+
+ if ((is_mdd_on_tx || is_mdd_on_rx) &&
+ test_bit(I40E_FLAG_MDD_AUTO_RESET_VF, pf->flags)) {
+ /* VF MDD event counters will be cleared by
+ * reset, so print the event prior to reset.
+ */
+ if (is_mdd_on_rx)
+ i40e_print_vf_mdd_event(pf, vf, false);
+ if (is_mdd_on_tx)
+ i40e_print_vf_mdd_event(pf, vf, true);
+
+ i40e_vc_reset_vf(vf, true);
}
}
- /* re-enable mdd interrupt cause */
- clear_bit(__I40E_MDD_EVENT_PENDING, pf->state);
reg = rd32(hw, I40E_PFINT_ICR0_ENA);
reg |= I40E_PFINT_ICR0_ENA_MAL_DETECT_MASK;
wr32(hw, I40E_PFINT_ICR0_ENA, reg);
i40e_flush(hw);
+
+ i40e_print_vfs_mdd_events(pf);
}
/**
@@ -12614,89 +12655,6 @@ int i40e_set_partition_bw_setting(struct i40e_pf *pf)
}
/**
- * i40e_commit_partition_bw_setting - Commit BW settings for this PF partition
- * @pf: board private structure
- **/
-int i40e_commit_partition_bw_setting(struct i40e_pf *pf)
-{
- /* Commit temporary BW setting to permanent NVM image */
- enum i40e_admin_queue_err last_aq_status;
- u16 nvm_word;
- int ret;
-
- if (pf->hw.partition_id != 1) {
- dev_info(&pf->pdev->dev,
- "Commit BW only works on partition 1! This is partition %d",
- pf->hw.partition_id);
- ret = -EOPNOTSUPP;
- goto bw_commit_out;
- }
-
- /* Acquire NVM for read access */
- ret = i40e_acquire_nvm(&pf->hw, I40E_RESOURCE_READ);
- last_aq_status = pf->hw.aq.asq_last_status;
- if (ret) {
- dev_info(&pf->pdev->dev,
- "Cannot acquire NVM for read access, err %pe aq_err %s\n",
- ERR_PTR(ret),
- i40e_aq_str(&pf->hw, last_aq_status));
- goto bw_commit_out;
- }
-
- /* Read word 0x10 of NVM - SW compatibility word 1 */
- ret = i40e_aq_read_nvm(&pf->hw,
- I40E_SR_NVM_CONTROL_WORD,
- 0x10, sizeof(nvm_word), &nvm_word,
- false, NULL);
- /* Save off last admin queue command status before releasing
- * the NVM
- */
- last_aq_status = pf->hw.aq.asq_last_status;
- i40e_release_nvm(&pf->hw);
- if (ret) {
- dev_info(&pf->pdev->dev, "NVM read error, err %pe aq_err %s\n",
- ERR_PTR(ret),
- i40e_aq_str(&pf->hw, last_aq_status));
- goto bw_commit_out;
- }
-
- /* Wait a bit for NVM release to complete */
- msleep(50);
-
- /* Acquire NVM for write access */
- ret = i40e_acquire_nvm(&pf->hw, I40E_RESOURCE_WRITE);
- last_aq_status = pf->hw.aq.asq_last_status;
- if (ret) {
- dev_info(&pf->pdev->dev,
- "Cannot acquire NVM for write access, err %pe aq_err %s\n",
- ERR_PTR(ret),
- i40e_aq_str(&pf->hw, last_aq_status));
- goto bw_commit_out;
- }
- /* Write it back out unchanged to initiate update NVM,
- * which will force a write of the shadow (alt) RAM to
- * the NVM - thus storing the bandwidth values permanently.
- */
- ret = i40e_aq_update_nvm(&pf->hw,
- I40E_SR_NVM_CONTROL_WORD,
- 0x10, sizeof(nvm_word),
- &nvm_word, true, 0, NULL);
- /* Save off last admin queue command status before releasing
- * the NVM
- */
- last_aq_status = pf->hw.aq.asq_last_status;
- i40e_release_nvm(&pf->hw);
- if (ret)
- dev_info(&pf->pdev->dev,
- "BW settings NOT SAVED, err %pe aq_err %s\n",
- ERR_PTR(ret),
- i40e_aq_str(&pf->hw, last_aq_status));
-bw_commit_out:
-
- return ret;
-}
-
-/**
* i40e_is_total_port_shutdown_enabled - read NVM and return value
* if total port shutdown feature is enabled for this PF
* @pf: board private structure
@@ -15998,6 +15956,9 @@ static int i40e_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
ERR_PTR(err),
i40e_aq_str(&pf->hw, pf->hw.aq.asq_last_status));
+ /* VF MDD event logs are rate limited to one second intervals */
+ ratelimit_state_init(&pf->mdd_message_rate_limit, 1 * HZ, 1);
+
/* Reconfigure hardware for allowing smaller MSS in the case
* of TSO, so that we avoid the MDD being fired and causing
* a reset in the case of small MSS+TSO.