diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2024-09-12 12:45:24 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2024-09-12 12:45:24 -0700 |
commit | 5abfdfd402699ce7c1e81d1a25bc37f60f7741ff (patch) | |
tree | 43a15f5de4091c396a1ac9eba157e616894b6419 | |
parent | 42c5b519498820e95d96311b1200eb4b854fc2bd (diff) | |
parent | 3e705251d998c9688be0e7e0526c250fec24d233 (diff) |
Merge tag 'net-6.11-rc8' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from Paolo Abeni:
"Including fixes from netfilter.
There is a recently notified BT regression with no fix yet. I do not
think a fix will land in the next week.
Current release - regressions:
- core: tighten bad gso csum offset check in virtio_net_hdr
- netfilter: move nf flowtable bpf initialization in
nf_flow_table_module_init()
- eth: ice: stop calling pci_disable_device() as we use pcim
- eth: fou: fix null-ptr-deref in GRO.
Current release - new code bugs:
- hsr: prevent NULL pointer dereference in hsr_proxy_announce()
Previous releases - regressions:
- hsr: remove seqnr_lock
- netfilter: nft_socket: fix sk refcount leaks
- mptcp: pm: fix uaf in __timer_delete_sync
- phy: dp83822: fix NULL pointer dereference on DP83825 devices
- eth: revert "virtio_net: rx enable premapped mode by default"
- eth: octeontx2-af: Modify SMQ flush sequence to drop packets
Previous releases - always broken:
- eth: mlx5: fix bridge mode operations when there are no VFs
- eth: igb: Always call igb_xdp_ring_update_tail() under Tx lock"
* tag 'net-6.11-rc8' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (36 commits)
net: netfilter: move nf flowtable bpf initialization in nf_flow_table_module_init()
net: tighten bad gso csum offset check in virtio_net_hdr
netlink: specs: mptcp: fix port endianness
net: dpaa: Pad packets to ETH_ZLEN
mptcp: pm: Fix uaf in __timer_delete_sync
net: libwx: fix number of Rx and Tx descriptors
net: dsa: felix: ignore pending status of TAS module when it's disabled
net: hsr: prevent NULL pointer dereference in hsr_proxy_announce()
selftests: mptcp: include net_helper.sh file
selftests: mptcp: include lib.sh file
selftests: mptcp: join: restrict fullmesh endp on 1st sf
netfilter: nft_socket: make cgroupsv2 matching work with namespaces
netfilter: nft_socket: fix sk refcount leaks
MAINTAINERS: Add ethtool pse-pd to PSE NETWORK DRIVER
dt-bindings: net: tja11xx: fix the broken binding
selftests: net: csum: Fix checksums for packets with non-zero padding
net: phy: dp83822: Fix NULL pointer dereference on DP83825 devices
virtio_net: disable premapped mode by default
Revert "virtio_net: big mode skip the unmap check"
Revert "virtio_net: rx remove premapped failover code"
...
34 files changed, 364 insertions, 190 deletions
diff --git a/Documentation/devicetree/bindings/net/nxp,tja11xx.yaml b/Documentation/devicetree/bindings/net/nxp,tja11xx.yaml index 85bfa45f5122..a754a61adc2d 100644 --- a/Documentation/devicetree/bindings/net/nxp,tja11xx.yaml +++ b/Documentation/devicetree/bindings/net/nxp,tja11xx.yaml @@ -14,8 +14,53 @@ maintainers: description: Bindings for NXP TJA11xx automotive PHYs +properties: + compatible: + enum: + - ethernet-phy-id0180.dc40 + - ethernet-phy-id0180.dc41 + - ethernet-phy-id0180.dc48 + - ethernet-phy-id0180.dd00 + - ethernet-phy-id0180.dd01 + - ethernet-phy-id0180.dd02 + - ethernet-phy-id0180.dc80 + - ethernet-phy-id0180.dc82 + - ethernet-phy-id001b.b010 + - ethernet-phy-id001b.b013 + - ethernet-phy-id001b.b030 + - ethernet-phy-id001b.b031 + allOf: - $ref: ethernet-phy.yaml# + - if: + properties: + compatible: + contains: + enum: + - ethernet-phy-id0180.dc40 + - ethernet-phy-id0180.dc41 + - ethernet-phy-id0180.dc48 + - ethernet-phy-id0180.dd00 + - ethernet-phy-id0180.dd01 + - ethernet-phy-id0180.dd02 + + then: + properties: + nxp,rmii-refclk-in: + type: boolean + description: | + The REF_CLK is provided for both transmitted and received data + in RMII mode. This clock signal is provided by the PHY and is + typically derived from an external 25MHz crystal. Alternatively, + a 50MHz clock signal generated by an external oscillator can be + connected to pin REF_CLK. A third option is to connect a 25MHz + clock to pin CLK_IN_OUT. So, the REF_CLK should be configured + as input or output according to the actual circuit connection. + If present, indicates that the REF_CLK will be configured as + interface reference clock input when RMII mode enabled. + If not present, the REF_CLK will be configured as interface + reference clock output when RMII mode enabled. + Only supported on TJA1100 and TJA1101. patternProperties: "^ethernet-phy@[0-9a-f]+$": @@ -32,22 +77,6 @@ patternProperties: description: The ID number for the child PHY. Should be +1 of parent PHY. - nxp,rmii-refclk-in: - type: boolean - description: | - The REF_CLK is provided for both transmitted and received data - in RMII mode. This clock signal is provided by the PHY and is - typically derived from an external 25MHz crystal. Alternatively, - a 50MHz clock signal generated by an external oscillator can be - connected to pin REF_CLK. A third option is to connect a 25MHz - clock to pin CLK_IN_OUT. So, the REF_CLK should be configured - as input or output according to the actual circuit connection. - If present, indicates that the REF_CLK will be configured as - interface reference clock input when RMII mode enabled. - If not present, the REF_CLK will be configured as interface - reference clock output when RMII mode enabled. - Only supported on TJA1100 and TJA1101. - required: - reg @@ -60,6 +89,7 @@ examples: #size-cells = <0>; tja1101_phy0: ethernet-phy@4 { + compatible = "ethernet-phy-id0180.dc40"; reg = <0x4>; nxp,rmii-refclk-in; }; diff --git a/Documentation/netlink/specs/mptcp_pm.yaml b/Documentation/netlink/specs/mptcp_pm.yaml index af525ed29792..30d8342cacc8 100644 --- a/Documentation/netlink/specs/mptcp_pm.yaml +++ b/Documentation/netlink/specs/mptcp_pm.yaml @@ -109,7 +109,6 @@ attribute-sets: - name: port type: u16 - byte-order: big-endian - name: flags type: u32 diff --git a/MAINTAINERS b/MAINTAINERS index 10430778c998..f70987f7e462 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -18407,6 +18407,7 @@ L: netdev@vger.kernel.org S: Maintained F: Documentation/devicetree/bindings/net/pse-pd/ F: drivers/net/pse-pd/ +F: net/ethtool/pse-pd.c PSTORE FILESYSTEM M: Kees Cook <kees@kernel.org> diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index ba37a566da39..ecfa73725d25 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -1474,10 +1474,13 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port, /* Hardware errata - Admin config could not be overwritten if * config is pending, need reset the TAS module */ - val = ocelot_read(ocelot, QSYS_PARAM_STATUS_REG_8); - if (val & QSYS_PARAM_STATUS_REG_8_CONFIG_PENDING) { - ret = -EBUSY; - goto err_reset_tc; + val = ocelot_read_rix(ocelot, QSYS_TAG_CONFIG, port); + if (val & QSYS_TAG_CONFIG_ENABLE) { + val = ocelot_read(ocelot, QSYS_PARAM_STATUS_REG_8); + if (val & QSYS_PARAM_STATUS_REG_8_CONFIG_PENDING) { + ret = -EBUSY; + goto err_reset_tc; + } } ocelot_rmw_rix(ocelot, diff --git a/drivers/net/ethernet/faraday/ftgmac100.h b/drivers/net/ethernet/faraday/ftgmac100.h index 63b3e02fab16..4968f6f0bdbc 100644 --- a/drivers/net/ethernet/faraday/ftgmac100.h +++ b/drivers/net/ethernet/faraday/ftgmac100.h @@ -84,7 +84,7 @@ FTGMAC100_INT_RPKT_BUF) /* All the interrupts we care about */ -#define FTGMAC100_INT_ALL (FTGMAC100_INT_RPKT_BUF | \ +#define FTGMAC100_INT_ALL (FTGMAC100_INT_RXTX | \ FTGMAC100_INT_BAD) /* diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index cfe6b57b1da0..4a55e521c17e 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -2272,12 +2272,12 @@ static netdev_tx_t dpaa_start_xmit(struct sk_buff *skb, struct net_device *net_dev) { const int queue_mapping = skb_get_queue_mapping(skb); - bool nonlinear = skb_is_nonlinear(skb); struct rtnl_link_stats64 *percpu_stats; struct dpaa_percpu_priv *percpu_priv; struct netdev_queue *txq; struct dpaa_priv *priv; struct qm_fd fd; + bool nonlinear; int offset = 0; int err = 0; @@ -2287,6 +2287,13 @@ dpaa_start_xmit(struct sk_buff *skb, struct net_device *net_dev) qm_fd_clear_fd(&fd); + /* Packet data is always read as 32-bit words, so zero out any part of + * the skb which might be sent if we have to pad the packet + */ + if (__skb_put_padto(skb, ETH_ZLEN, false)) + goto enomem; + + nonlinear = skb_is_nonlinear(skb); if (!nonlinear) { /* We're going to store the skb backpointer at the beginning * of the data buffer, so we need a privately owned skb diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 737c00b02dd0..2405e5ed9128 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -2413,13 +2413,6 @@ void ice_vsi_decfg(struct ice_vsi *vsi) struct ice_pf *pf = vsi->back; int err; - /* The Rx rule will only exist to remove if the LLDP FW - * engine is currently stopped - */ - if (!ice_is_safe_mode(pf) && vsi->type == ICE_VSI_PF && - !test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags)) - ice_cfg_sw_lldp(vsi, false, false); - ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx); err = ice_rm_vsi_rdma_cfg(vsi->port_info, vsi->idx); if (err) @@ -2764,6 +2757,14 @@ int ice_vsi_release(struct ice_vsi *vsi) ice_rss_clean(vsi); ice_vsi_close(vsi); + + /* The Rx rule will only exist to remove if the LLDP FW + * engine is currently stopped + */ + if (!ice_is_safe_mode(pf) && vsi->type == ICE_VSI_PF && + !test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags)) + ice_cfg_sw_lldp(vsi, false, false); + ice_vsi_decfg(vsi); /* retain SW VSI data structure since it is needed to unregister and diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index c7db88b517da..ea780d468579 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -5363,7 +5363,6 @@ err_load: ice_deinit(pf); err_init: ice_adapter_put(pdev); - pci_disable_device(pdev); return err; } @@ -5470,7 +5469,6 @@ static void ice_remove(struct pci_dev *pdev) ice_set_wake(pf); ice_adapter_put(pdev); - pci_disable_device(pdev); } /** diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c index fe8847184cb1..79d91e95358c 100644 --- a/drivers/net/ethernet/intel/ice/ice_switch.c +++ b/drivers/net/ethernet/intel/ice/ice_switch.c @@ -3194,7 +3194,7 @@ ice_add_update_vsi_list(struct ice_hw *hw, /* A rule already exists with the new VSI being added */ if (test_bit(vsi_handle, m_entry->vsi_list_info->vsi_map)) - return 0; + return -EEXIST; /* Update the previously created VSI list set with * the new VSI ID passed in @@ -3264,7 +3264,7 @@ ice_find_vsi_list_entry(struct ice_hw *hw, u8 recp_id, u16 vsi_handle, list_head = &sw->recp_list[recp_id].filt_rules; list_for_each_entry(list_itr, list_head, list_entry) { - if (list_itr->vsi_list_info) { + if (list_itr->vsi_count == 1 && list_itr->vsi_list_info) { map_info = list_itr->vsi_list_info; if (test_bit(vsi_handle, map_info->vsi_map)) { *vsi_list_id = map_info->vsi_list_id; diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 9dc7c60838ed..1ef4cb871452 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -33,6 +33,7 @@ #include <linux/bpf_trace.h> #include <linux/pm_runtime.h> #include <linux/etherdevice.h> +#include <linux/lockdep.h> #ifdef CONFIG_IGB_DCA #include <linux/dca.h> #endif @@ -2914,8 +2915,11 @@ static int igb_xdp(struct net_device *dev, struct netdev_bpf *xdp) } } +/* This function assumes __netif_tx_lock is held by the caller. */ static void igb_xdp_ring_update_tail(struct igb_ring *ring) { + lockdep_assert_held(&txring_txq(ring)->_xmit_lock); + /* Force memory writes to complete before letting h/w know there * are new descriptors to fetch. */ @@ -3000,11 +3004,11 @@ static int igb_xdp_xmit(struct net_device *dev, int n, nxmit++; } - __netif_tx_unlock(nq); - if (unlikely(flags & XDP_XMIT_FLUSH)) igb_xdp_ring_update_tail(tx_ring); + __netif_tx_unlock(nq); + return nxmit; } @@ -8864,12 +8868,14 @@ static void igb_put_rx_buffer(struct igb_ring *rx_ring, static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget) { + unsigned int total_bytes = 0, total_packets = 0; struct igb_adapter *adapter = q_vector->adapter; struct igb_ring *rx_ring = q_vector->rx.ring; - struct sk_buff *skb = rx_ring->skb; - unsigned int total_bytes = 0, total_packets = 0; u16 cleaned_count = igb_desc_unused(rx_ring); + struct sk_buff *skb = rx_ring->skb; + int cpu = smp_processor_id(); unsigned int xdp_xmit = 0; + struct netdev_queue *nq; struct xdp_buff xdp; u32 frame_sz = 0; int rx_buf_pgcnt; @@ -8997,7 +9003,10 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget) if (xdp_xmit & IGB_XDP_TX) { struct igb_ring *tx_ring = igb_xdp_tx_queue_mapping(adapter); + nq = txring_txq(tx_ring); + __netif_tx_lock(nq, cpu); igb_xdp_ring_update_tail(tx_ring); + __netif_tx_unlock(nq); } u64_stats_update_begin(&rx_ring->rx_syncp); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index 03ee93fd9e94..db2db0738ee4 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -319,6 +319,7 @@ struct nix_mark_format { /* smq(flush) to tl1 cir/pir info */ struct nix_smq_tree_ctx { + u16 schq; u64 cir_off; u64 cir_val; u64 pir_off; @@ -328,8 +329,6 @@ struct nix_smq_tree_ctx { /* smq flush context */ struct nix_smq_flush_ctx { int smq; - u16 tl1_schq; - u16 tl2_schq; struct nix_smq_tree_ctx smq_tree_ctx[NIX_TXSCH_LVL_CNT]; }; diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 222f9e00b836..82832a24fbd8 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -2259,14 +2259,13 @@ static void nix_smq_flush_fill_ctx(struct rvu *rvu, int blkaddr, int smq, schq = smq; for (lvl = NIX_TXSCH_LVL_SMQ; lvl <= NIX_TXSCH_LVL_TL1; lvl++) { smq_tree_ctx = &smq_flush_ctx->smq_tree_ctx[lvl]; + smq_tree_ctx->schq = schq; if (lvl == NIX_TXSCH_LVL_TL1) { - smq_flush_ctx->tl1_schq = schq; smq_tree_ctx->cir_off = NIX_AF_TL1X_CIR(schq); smq_tree_ctx->pir_off = 0; smq_tree_ctx->pir_val = 0; parent_off = 0; } else if (lvl == NIX_TXSCH_LVL_TL2) { - smq_flush_ctx->tl2_schq = schq; smq_tree_ctx->cir_off = NIX_AF_TL2X_CIR(schq); smq_tree_ctx->pir_off = NIX_AF_TL2X_PIR(schq); parent_off = NIX_AF_TL2X_PARENT(schq); @@ -2301,8 +2300,8 @@ static void nix_smq_flush_enadis_xoff(struct rvu *rvu, int blkaddr, { struct nix_txsch *txsch; struct nix_hw *nix_hw; + int tl2, tl2_schq; u64 regoff; - int tl2; nix_hw = get_nix_hw(rvu->hw, blkaddr); if (!nix_hw) @@ -2310,16 +2309,17 @@ static void nix_smq_flush_enadis_xoff(struct rvu *rvu, int blkaddr, /* loop through all TL2s with matching PF_FUNC */ txsch = &nix_hw->txsch[NIX_TXSCH_LVL_TL2]; + tl2_schq = smq_flush_ctx->smq_tree_ctx[NIX_TXSCH_LVL_TL2].schq; for (tl2 = 0; tl2 < txsch->schq.max; tl2++) { /* skip the smq(flush) TL2 */ - if (tl2 == smq_flush_ctx->tl2_schq) + if (tl2 == tl2_schq) continue; /* skip unused TL2s */ if (TXSCH_MAP_FLAGS(txsch->pfvf_map[tl2]) & NIX_TXSCHQ_FREE) continue; /* skip if PF_FUNC doesn't match */ if ((TXSCH_MAP_FUNC(txsch->pfvf_map[tl2]) & ~RVU_PFVF_FUNC_MASK) != - (TXSCH_MAP_FUNC(txsch->pfvf_map[smq_flush_ctx->tl2_schq] & + (TXSCH_MAP_FUNC(txsch->pfvf_map[tl2_schq] & ~RVU_PFVF_FUNC_MASK))) continue; /* enable/disable XOFF */ @@ -2361,10 +2361,12 @@ static int nix_smq_flush(struct rvu *rvu, int blkaddr, int smq, u16 pcifunc, int nixlf) { struct nix_smq_flush_ctx *smq_flush_ctx; + int err, restore_tx_en = 0, i; int pf = rvu_get_pf(pcifunc); u8 cgx_id = 0, lmac_id = 0; - int err, restore_tx_en = 0; - u64 cfg; + u16 tl2_tl3_link_schq; + u8 link, link_level; + u64 cfg, bmap = 0; if (!is_rvu_otx2(rvu)) { /* Skip SMQ flush if pkt count is zero */ @@ -2388,16 +2390,38 @@ static int nix_smq_flush(struct rvu *rvu, int blkaddr, nix_smq_flush_enadis_xoff(rvu, blkaddr, smq_flush_ctx, true); nix_smq_flush_enadis_rate(rvu, blkaddr, smq_flush_ctx, false); - cfg = rvu_read64(rvu, blkaddr, NIX_AF_SMQX_CFG(smq)); - /* Do SMQ flush and set enqueue xoff */ - cfg |= BIT_ULL(50) | BIT_ULL(49); - rvu_write64(rvu, blkaddr, NIX_AF_SMQX_CFG(smq), cfg); - /* Disable backpressure from physical link, * otherwise SMQ flush may stall. */ rvu_cgx_enadis_rx_bp(rvu, pf, false); + link_level = rvu_read64(rvu, blkaddr, NIX_AF_PSE_CHANNEL_LEVEL) & 0x01 ? + NIX_TXSCH_LVL_TL3 : NIX_TXSCH_LVL_TL2; + tl2_tl3_link_schq = smq_flush_ctx->smq_tree_ctx[link_level].schq; + link = smq_flush_ctx->smq_tree_ctx[NIX_TXSCH_LVL_TL1].schq; + + /* SMQ set enqueue xoff */ + cfg = rvu_read64(rvu, blkaddr, NIX_AF_SMQX_CFG(smq)); + cfg |= BIT_ULL(50); + rvu_write64(rvu, blkaddr, NIX_AF_SMQX_CFG(smq), cfg); + + /* Clear all NIX_AF_TL3_TL2_LINK_CFG[ENA] for the TL3/TL2 queue */ + for (i = 0; i < (rvu->hw->cgx_links + rvu->hw->lbk_links); i++) { + cfg = rvu_read64(rvu, blkaddr, + NIX_AF_TL3_TL2X_LINKX_CFG(tl2_tl3_link_schq, link)); + if (!(cfg & BIT_ULL(12))) + continue; + bmap |= (1 << i); + cfg &= ~BIT_ULL(12); + rvu_write64(rvu, blkaddr, + NIX_AF_TL3_TL2X_LINKX_CFG(tl2_tl3_link_schq, link), cfg); + } + + /* Do SMQ flush and set enqueue xoff */ + cfg = rvu_read64(rvu, blkaddr, NIX_AF_SMQX_CFG(smq)); + cfg |= BIT_ULL(50) | BIT_ULL(49); + rvu_write64(rvu, blkaddr, NIX_AF_SMQX_CFG(smq), cfg); + /* Wait for flush to complete */ err = rvu_poll_reg(rvu, blkaddr, NIX_AF_SMQX_CFG(smq), BIT_ULL(49), true); @@ -2406,6 +2430,17 @@ static int nix_smq_flush(struct rvu *rvu, int blkaddr, "NIXLF%d: SMQ%d flush failed, txlink might be busy\n", nixlf, smq); + /* Set NIX_AF_TL3_TL2_LINKX_CFG[ENA] for the TL3/TL2 queue */ + for (i = 0; i < (rvu->hw->cgx_links + rvu->hw->lbk_links); i++) { + if (!(bmap & (1 << i))) + continue; + cfg = rvu_read64(rvu, blkaddr, + NIX_AF_TL3_TL2X_LINKX_CFG(tl2_tl3_link_schq, link)); + cfg |= BIT_ULL(12); + rvu_write64(rvu, blkaddr, + NIX_AF_TL3_TL2X_LINKX_CFG(tl2_tl3_link_schq, link), cfg); + } + /* clear XOFF on TL2s */ nix_smq_flush_enadis_rate(rvu, blkaddr, smq_flush_ctx, true); nix_smq_flush_enadis_xoff(rvu, blkaddr, smq_flush_ctx, false); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 36845872ae94..1cf3c54d343e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -139,6 +139,10 @@ void mlx5e_build_ptys2ethtool_map(void) ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT); MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_LR4, legacy, ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100BASE_TX, legacy, + ETHTOOL_LINK_MODE_100baseT_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_T, legacy, + ETHTOOL_LINK_MODE_1000baseT_Full_BIT); MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_T, legacy, ETHTOOL_LINK_MODE_10000baseT_Full_BIT); MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_CR, legacy, @@ -204,6 +208,12 @@ void mlx5e_build_ptys2ethtool_map(void) ETHTOOL_LINK_MODE_200000baseLR4_ER4_FR4_Full_BIT, ETHTOOL_LINK_MODE_200000baseDR4_Full_BIT, ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT); + MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_400GAUI_8_400GBASE_CR8, ext, + ETHTOOL_LINK_MODE_400000baseKR8_Full_BIT, + ETHTOOL_LINK_MODE_400000baseSR8_Full_BIT, + ETHTOOL_LINK_MODE_400000baseLR8_ER8_FR8_Full_BIT, + ETHTOOL_LINK_MODE_400000baseDR8_Full_BIT, + ETHTOOL_LINK_MODE_400000baseCR8_Full_BIT); MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GAUI_1_100GBASE_CR_KR, ext, ETHTOOL_LINK_MODE_100000baseKR_Full_BIT, ETHTOOL_LINK_MODE_100000baseSR_Full_BIT, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c index 255bc8b749f9..8587cd572da5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c @@ -319,7 +319,7 @@ int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting) return -EPERM; mutex_lock(&esw->state_lock); - if (esw->mode != MLX5_ESWITCH_LEGACY) { + if (esw->mode != MLX5_ESWITCH_LEGACY || !mlx5_esw_is_fdb_created(esw)) { err = -EOPNOTSUPP; goto out; } @@ -339,7 +339,7 @@ int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting) if (!mlx5_esw_allowed(esw)) return -EPERM; - if (esw->mode != MLX5_ESWITCH_LEGACY) + if (esw->mode != MLX5_ESWITCH_LEGACY || !mlx5_esw_is_fdb_created(esw)) return -EOPNOTSUPP; *setting = esw->fdb_table.legacy.vepa_uplink_rule ? 1 : 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index 20146a2dc7f4..02a3563f51ad 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -312,6 +312,25 @@ static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw, return err; } +static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type) +{ + switch (type) { + case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR: + return MLX5_CAP_QOS(dev, esw_element_type) & + ELEMENT_TYPE_CAP_MASK_TSAR; + case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT: + return MLX5_CAP_QOS(dev, esw_element_type) & + ELEMENT_TYPE_CAP_MASK_VPORT; + case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC: + return MLX5_CAP_QOS(dev, esw_element_type) & + ELEMENT_TYPE_CAP_MASK_VPORT_TC; + case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC: + return MLX5_CAP_QOS(dev, esw_element_type) & + ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC; + } + return false; +} + static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw, struct mlx5_vport *vport, u32 max_rate, u32 bw_share) @@ -323,6 +342,9 @@ static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw, void *vport_elem; int err; + if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT)) + return -EOPNOTSUPP; + parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix; MLX5_SET(scheduling_context, sched_ctx, element_type, SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT); @@ -421,6 +443,7 @@ __esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *ex { u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; struct mlx5_esw_rate_group *group; + __be32 *attr; u32 divider; int err; @@ -428,6 +451,12 @@ __esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *ex if (!group) return ERR_PTR(-ENOMEM); + MLX5_SET(scheduling_context, tsar_ctx, element_type, + SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); + + attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes); + *attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16); + MLX5_SET(scheduling_context, tsar_ctx, parent_element_id, esw->qos.root_tsar_ix); err = mlx5_create_scheduling_element_cmd(esw->dev, @@ -526,25 +555,6 @@ static int esw_qos_destroy_rate_group(struct mlx5_eswitch *esw, return err; } -static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type) -{ - switch (type) { - case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR: - return MLX5_CAP_QOS(dev, esw_element_type) & - ELEMENT_TYPE_CAP_MASK_TSAR; - case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT: - return MLX5_CAP_QOS(dev, esw_element_type) & - ELEMENT_TYPE_CAP_MASK_VPORT; - case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC: - return MLX5_CAP_QOS(dev, esw_element_type) & - ELEMENT_TYPE_CAP_MASK_VPORT_TC; - case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC: - return MLX5_CAP_QOS(dev, esw_element_type) & - ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC; - } - return false; -} - static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack) { u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {}; @@ -555,7 +565,8 @@ static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *exta if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling)) return -EOPNOTSUPP; - if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR)) + if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR) || + !(MLX5_CAP_QOS(dev, esw_tsar_type) & TSAR_TYPE_CAP_MASK_DWRR)) return -EOPNOTSUPP; MLX5_SET(scheduling_context, tsar_ctx, element_type, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 5b7e6f4b5c7e..2ec33c4a2a3a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -2217,6 +2217,7 @@ static const struct pci_device_id mlx5_core_pci_table[] = { { PCI_VDEVICE(MELLANOX, 0x101f) }, /* ConnectX-6 LX */ { PCI_VDEVICE(MELLANOX, 0x1021) }, /* ConnectX-7 */ { PCI_VDEVICE(MELLANOX, 0x1023) }, /* ConnectX-8 */ + { PCI_VDEVICE(MELLANOX, 0x1025) }, /* ConnectX-9 */ { PCI_VDEVICE(MELLANOX, 0xa2d2) }, /* BlueField integrated ConnectX-5 network controller */ { PCI_VDEVICE(MELLANOX, 0xa2d3), MLX5_PCI_DEV_IS_VF}, /* BlueField integrated ConnectX-5 network controller VF */ { PCI_VDEVICE(MELLANOX, 0xa2d6) }, /* BlueField-2 integrated ConnectX-6 Dx network controller */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/qos.c index 8bce730b5c5b..db2bd3ad63ba 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/qos.c @@ -28,6 +28,9 @@ int mlx5_qos_create_leaf_node(struct mlx5_core_dev *mdev, u32 parent_id, { u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; + if (!(MLX5_CAP_QOS(mdev, nic_element_type) & ELEMENT_TYPE_CAP_MASK_QUEUE_GROUP)) + return -EOPNOTSUPP; + MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_id); MLX5_SET(scheduling_context, sched_ctx, element_type, SCHEDULING_CONTEXT_ELEMENT_TYPE_QUEUE_GROUP); @@ -44,6 +47,10 @@ int mlx5_qos_create_inner_node(struct mlx5_core_dev *mdev, u32 parent_id, u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0}; void *attr; + if (!(MLX5_CAP_QOS(mdev, nic_element_type) & ELEMENT_TYPE_CAP_MASK_TSAR) || + !(MLX5_CAP_QOS(mdev, nic_tsar_type) & TSAR_TYPE_CAP_MASK_DWRR)) + return -EOPNOTSUPP; + MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_id); MLX5_SET(scheduling_context, sched_ctx, element_type, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR); diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h index 1d57b047817b..b54bffda027b 100644 --- a/drivers/net/ethernet/wangxun/libwx/wx_type.h +++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h @@ -426,9 +426,9 @@ enum WX_MSCA_CMD_value { #define WX_MIN_RXD 128 #define WX_MIN_TXD 128 -/* Number of Transmit and Receive Descriptors must be a multiple of 8 */ -#define WX_REQ_RX_DESCRIPTOR_MULTIPLE 8 -#define WX_REQ_TX_DESCRIPTOR_MULTIPLE 8 +/* Number of Transmit and Receive Descriptors must be a multiple of 128 */ +#define WX_REQ_RX_DESCRIPTOR_MULTIPLE 128 +#define WX_REQ_TX_DESCRIPTOR_MULTIPLE 128 #define WX_MAX_JUMBO_FRAME_SIZE 9432 /* max payload 9414 */ #define VMDQ_P(p) p diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c index efeb643c1373..fc247f479257 100644 --- a/drivers/net/phy/dp83822.c +++ b/drivers/net/phy/dp83822.c @@ -271,8 +271,7 @@ static int dp83822_config_intr(struct phy_device *phydev) DP83822_ENERGY_DET_INT_EN | DP83822_LINK_QUAL_INT_EN); - /* Private data pointer is NULL on DP83825 */ - if (!dp83822 || !dp83822->fx_enabled) + if (!dp83822->fx_enabled) misr_status |= DP83822_ANEG_COMPLETE_INT_EN | DP83822_DUP_MODE_CHANGE_INT_EN | DP83822_SPEED_CHANGED_INT_EN; @@ -292,8 +291,7 @@ static int dp83822_config_intr(struct phy_device *phydev) DP83822_PAGE_RX_INT_EN | DP83822_EEE_ERROR_CHANGE_INT_EN); - /* Private data pointer is NULL on DP83825 */ - if (!dp83822 || !dp83822->fx_enabled) + if (!dp83822->fx_enabled) misr_status |= DP83822_ANEG_ERR_INT_EN | DP83822_WOL_PKT_INT_EN; @@ -691,10 +689,9 @@ static int dp83822_read_straps(struct phy_device *phydev) return 0; } -static int dp83822_probe(struct phy_device *phydev) +static int dp8382x_probe(struct phy_device *phydev) { struct dp83822_private *dp83822; - int ret; dp83822 = devm_kzalloc(&phydev->mdio.dev, sizeof(*dp83822), GFP_KERNEL); @@ -703,6 +700,20 @@ static int dp83822_probe(struct phy_device *phydev) phydev->priv = dp83822; + return 0; +} + +static int dp83822_probe(struct phy_device *phydev) +{ + struct dp83822_private *dp83822; + int ret; + + ret = dp8382x_probe(phydev); + if (ret) + return ret; + + dp83822 = phydev->priv; + ret = dp83822_read_straps(phydev); if (ret) return ret; @@ -717,14 +728,11 @@ static int dp83822_probe(struct phy_device *phydev) static int dp83826_probe(struct phy_device *phydev) { - struct dp83822_private *dp83822; - - dp83822 = devm_kzalloc(&phydev->mdio.dev, sizeof(*dp83822), - GFP_KERNEL); - if (!dp83822) - return -ENOMEM; + int ret; - phydev->priv = dp83822; + ret = dp8382x_probe(phydev); + if (ret) + return ret; dp83826_of_init(phydev); @@ -795,6 +803,7 @@ static int dp83822_resume(struct phy_device *phydev) PHY_ID_MATCH_MODEL(_id), \ .name = (_name), \ /* PHY_BASIC_FEATURES */ \ + .probe = dp8382x_probe, \ .soft_reset = dp83822_phy_reset, \ .config_init = dp8382x_config_init, \ .get_wol = dp83822_get_wol, \ diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index c6af18948092..5a1c1ec5a64b 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -356,6 +356,9 @@ struct receive_queue { struct xdp_rxq_info xsk_rxq_info; struct xdp_buff **xsk_buffs; + + /* Do dma by self */ + bool do_dma; }; /* This structure can contain rss message with maximum settings for indirection table and keysize @@ -885,7 +888,7 @@ static void *virtnet_rq_get_buf(struct receive_queue *rq, u32 *len, void **ctx) void *buf; buf = virtqueue_get_buf_ctx(rq->vq, len, ctx); - if (buf) + if (buf && rq->do_dma) virtnet_rq_unmap(rq, buf, *len); return buf; @@ -898,6 +901,11 @@ static void virtnet_rq_init_one_sg(struct receive_queue *rq, void *buf, u32 len) u32 offset; void *head; + if (!rq->do_dma) { + sg_init_one(rq->sg, buf, len); + return; + } + head = page_address(rq->alloc_frag.page); offset = buf - head; @@ -923,42 +931,44 @@ static void *virtnet_rq_alloc(struct receive_queue *rq, u32 size, gfp_t gfp) head = page_address(alloc_frag->page); - dma = head; + if (rq->do_dma) { + dma = head; + + /* new pages */ + if (!alloc_frag->offset) { + if (rq->last_dma) { + /* Now, the new page is allocated, the last dma + * will not be used. So the dma can be unmapped + * if the ref is 0. + */ + virtnet_rq_unmap(rq, rq->last_dma, 0); + rq->last_dma = NULL; + } - /* new pages */ - if (!alloc_frag->offset) { - if (rq->last_dma) { - /* Now, the new page is allocated, the last dma - * will not be used. So the dma can be unmapped - * if the ref is 0. - */ - virtnet_rq_unmap(rq, rq->last_dma, 0); - rq->last_dma = NULL; - } + dma->len = alloc_frag->size - sizeof(*dma); - dma->len = alloc_frag->size - sizeof(*dma); + addr = virtqueue_dma_map_single_attrs(rq->vq, dma + 1, + dma->len, DMA_FROM_DEVICE, 0); + if (virtqueue_dma_mapping_error(rq->vq, addr)) + return NULL; - addr = virtqueue_dma_map_single_attrs(rq->vq, dma + 1, - dma->len, DMA_FROM_DEVICE, 0); - if (virtqueue_dma_mapping_error(rq->vq, addr)) - return NULL; + dma->addr = addr; + dma->need_sync = virtqueue_dma_need_sync(rq->vq, addr); - dma->addr = addr; - dma->need_sync = virtqueue_dma_need_sync(rq->vq, addr); + /* Add a reference to dma to prevent the entire dma from + * being released during error handling. This reference + * will be freed after the pages are no longer used. + */ + get_page(alloc_frag->page); + dma->ref = 1; + alloc_frag->offset = sizeof(*dma); - /* Add a reference to dma to prevent the entire dma from - * being released during error handling. This reference - * will be freed after the pages are no longer used. - */ - get_page(alloc_frag->page); - dma->ref = 1; - alloc_frag->offset = sizeof(*dma); + rq->last_dma = dma; + } - rq->last_dma = dma; + ++dma->ref; } - ++dma->ref; - buf = head + alloc_frag->offset; get_page(alloc_frag->page); @@ -967,19 +977,6 @@ static void *virtnet_rq_alloc(struct receive_queue *rq, u32 size, gfp_t gfp) return buf; } -static void virtnet_rq_set_premapped(struct virtnet_info *vi) -{ - int i; - - /* disable for big mode */ - if (!vi->mergeable_rx_bufs && vi->big_packets) - return; - - for (i = 0; i < vi->max_queue_pairs; i++) - /* error should never happen */ - BUG_ON(virtqueue_set_dma_premapped(vi->rq[i].vq)); -} - static void virtnet_rq_unmap_free_buf(struct virtqueue *vq, void *buf) { struct virtnet_info *vi = vq->vdev->priv; @@ -993,7 +990,7 @@ static void virtnet_rq_unmap_free_buf(struct virtqueue *vq, void *buf) return; } - if (!vi->big_packets || vi->mergeable_rx_bufs) + if (rq->do_dma) virtnet_rq_unmap(rq, buf, 0); virtnet_rq_free_buf(vi, rq, buf); @@ -2430,7 +2427,8 @@ static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq, err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp); if (err < 0) { - virtnet_rq_unmap(rq, buf, 0); + if (rq->do_dma) + virtnet_rq_unmap(rq, buf, 0); put_page(virt_to_head_page(buf)); } @@ -2544,7 +2542,8 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi, ctx = mergeable_len_to_ctx(len + room, headroom); err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp); if (err < 0) { - virtnet_rq_unmap(rq, buf, 0); + if (rq->do_dma) + virtnet_rq_unmap(rq, buf, 0); put_page(virt_to_head_page(buf)); } @@ -2701,7 +2700,7 @@ static int virtnet_receive_packets(struct virtnet_info *vi, } } else { while (packets < budget && - (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) { + (buf = virtnet_rq_get_buf(rq, &len, NULL)) != NULL) { receive_buf(vi, rq, buf, len, NULL, xdp_xmit, stats); packets++; } @@ -5892,7 +5891,7 @@ static void free_receive_page_frags(struct virtnet_info *vi) int i; for (i = 0; i < vi->max_queue_pairs; i++) if (vi->rq[i].alloc_frag.page) { - if (vi->rq[i].last_dma) + if (vi->rq[i].do_dma && vi->rq[i].last_dma) virtnet_rq_unmap(&vi->rq[i], vi->rq[i].last_dma, 0); put_page(vi->rq[i].alloc_frag.page); } @@ -6090,8 +6089,6 @@ static int init_vqs(struct virtnet_info *vi) if (ret) goto err_free; - virtnet_rq_set_premapped(vi); - cpus_read_lock(); virtnet_set_affinity(vi); cpus_read_unlock(); diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index cab228cf51c6..cfdf984a95a8 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1027,7 +1027,8 @@ struct mlx5_ifc_qos_cap_bits { u8 max_tsar_bw_share[0x20]; - u8 reserved_at_100[0x20]; + u8 nic_element_type[0x10]; + u8 nic_tsar_type[0x10]; u8 reserved_at_120[0x3]; u8 log_meter_aso_granularity[0x5]; @@ -3966,6 +3967,7 @@ enum { ELEMENT_TYPE_CAP_MASK_VPORT = 1 << 1, ELEMENT_TYPE_CAP_MASK_VPORT_TC = 1 << 2, ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC = 1 << 3, + ELEMENT_TYPE_CAP_MASK_QUEUE_GROUP = 1 << 4, }; struct mlx5_ifc_scheduling_context_bits { @@ -4675,6 +4677,12 @@ enum { TSAR_ELEMENT_TSAR_TYPE_ETS = 0x2, }; +enum { + TSAR_TYPE_CAP_MASK_DWRR = 1 << 0, + TSAR_TYPE_CAP_MASK_ROUND_ROBIN = 1 << 1, + TSAR_TYPE_CAP_MASK_ETS = 1 << 2, +}; + struct mlx5_ifc_tsar_element_bits { u8 reserved_at_0[0x8]; u8 tsar_type[0x8]; diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 6c395a2600e8..276ca543ef44 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -173,7 +173,8 @@ retry: break; case SKB_GSO_TCPV4: case SKB_GSO_TCPV6: - if (skb->csum_offset != offsetof(struct tcphdr, check)) + if (skb->ip_summed == CHECKSUM_PARTIAL && + skb->csum_offset != offsetof(struct tcphdr, check)) return -EINVAL; break; } diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c index e4cc6b78dcfc..049e22bdaafb 100644 --- a/net/hsr/hsr_device.c +++ b/net/hsr/hsr_device.c @@ -231,9 +231,7 @@ static netdev_tx_t hsr_dev_xmit(struct sk_buff *skb, struct net_device *dev) skb->dev = master->dev; skb_reset_mac_header(skb); skb_reset_mac_len(skb); - spin_lock_bh(&hsr->seqnr_lock); hsr_forward_skb(skb, master); - spin_unlock_bh(&hsr->seqnr_lock); } else { dev_core_stats_tx_dropped_inc(dev); dev_kfree_skb_any(skb); @@ -314,14 +312,10 @@ static void send_hsr_supervision_frame(struct hsr_port *port, set_hsr_stag_HSR_ver(hsr_stag, hsr->prot_version); /* From HSRv1 on we have separate supervision sequence numbers. */ - spin_lock_bh(&hsr->seqnr_lock); - if (hsr->prot_version > 0) { - hsr_stag->sequence_nr = htons(hsr->sup_sequence_nr); - hsr->sup_sequence_nr++; - } else { - hsr_stag->sequence_nr = htons(hsr->sequence_nr); - hsr->sequence_nr++; - } + if (hsr->prot_version > 0) + hsr_stag->sequence_nr = htons(atomic_inc_return(&hsr->sup_sequence_nr)); + else + hsr_stag->sequence_nr = htons(atomic_inc_return(&hsr->sequence_nr)); hsr_stag->tlv.HSR_TLV_type = type; /* TODO: Why 12 in HSRv0? */ @@ -343,13 +337,11 @@ static void send_hsr_supervision_frame(struct hsr_port *port, ether_addr_copy(hsr_sp->macaddress_A, hsr->macaddress_redbox); } - if (skb_put_padto(skb, ETH_ZLEN)) { - spin_unlock_bh(&hsr->seqnr_lock); + if (skb_put_padto(skb, ETH_ZLEN)) return; - } hsr_forward_skb(skb, port); - spin_unlock_bh(&hsr->seqnr_lock); + return; } @@ -374,9 +366,7 @@ static void send_prp_supervision_frame(struct hsr_port *master, set_hsr_stag_HSR_ver(hsr_stag, (hsr->prot_version ? 1 : 0)); /* From HSRv1 on we have separate supervision sequence numbers. */ - spin_lock_bh(&hsr->seqnr_lock); - hsr_stag->sequence_nr = htons(hsr->sup_sequence_nr); - hsr->sup_sequence_nr++; + hsr_stag->sequence_nr = htons(atomic_inc_return(&hsr->sup_sequence_nr)); hsr_stag->tlv.HSR_TLV_type = PRP_TLV_LIFE_CHECK_DD; hsr_stag->tlv.HSR_TLV_length = sizeof(struct hsr_sup_payload); @@ -384,13 +374,10 @@ static void send_prp_supervision_frame(struct hsr_port *master, hsr_sp = skb_put(skb, sizeof(struct hsr_sup_payload)); ether_addr_copy(hsr_sp->macaddress_A, master->dev->dev_addr); - if (skb_put_padto(skb, ETH_ZLEN)) { - spin_unlock_bh(&hsr->seqnr_lock); + if (skb_put_padto(skb, ETH_ZLEN)) return; - } hsr_forward_skb(skb, master); - spin_unlock_bh(&hsr->seqnr_lock); } /* Announce (supervision frame) timer function @@ -427,6 +414,9 @@ static void hsr_proxy_announce(struct timer_list *t) * of SAN nodes stored in ProxyNodeTable. */ interlink = hsr_port_get_hsr(hsr, HSR_PT_INTERLINK); + if (!interlink) + goto done; + list_for_each_entry_rcu(node, &hsr->proxy_node_db, mac_list) { if (hsr_addr_is_redbox(hsr, node->macaddress_A)) continue; @@ -441,6 +431,7 @@ static void hsr_proxy_announce(struct timer_list *t) mod_timer(&hsr->announce_proxy_timer, jiffies + interval); } +done: rcu_read_unlock(); } @@ -621,11 +612,9 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2], if (res < 0) return res; - spin_lock_init(&hsr->seqnr_lock); /* Overflow soon to find bugs easier: */ - hsr->sequence_nr = HSR_SEQNR_START; - hsr->sup_sequence_nr = HSR_SUP_SEQNR_START; - hsr->interlink_sequence_nr = HSR_SEQNR_START; + atomic_set(&hsr->sequence_nr, HSR_SEQNR_START); + atomic_set(&hsr->sup_sequence_nr, HSR_SUP_SEQNR_START); timer_setup(&hsr->announce_timer, hsr_announce, 0); timer_setup(&hsr->prune_timer, hsr_prune_nodes, 0); diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c index b38060246e62..6f63c8a775c4 100644 --- a/net/hsr/hsr_forward.c +++ b/net/hsr/hsr_forward.c @@ -599,9 +599,7 @@ static void handle_std_frame(struct sk_buff *skb, if (port->type == HSR_PT_MASTER || port->type == HSR_PT_INTERLINK) { /* Sequence nr for the master/interlink node */ - lockdep_assert_held(&hsr->seqnr_lock); - frame->sequence_nr = hsr->sequence_nr; - hsr->sequence_nr++; + frame->sequence_nr = atomic_inc_return(&hsr->sequence_nr); } } diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h index ab1f8d35d9dc..6f7bbf01f3e4 100644 --- a/net/hsr/hsr_main.h +++ b/net/hsr/hsr_main.h @@ -202,11 +202,9 @@ struct hsr_priv { struct timer_list prune_timer; struct timer_list prune_proxy_timer; int announce_count; - u16 sequence_nr; - u16 interlink_sequence_nr; /* Interlink port seq_nr */ - u16 sup_sequence_nr; /* For HSRv1 separate seq_nr for supervision */ + atomic_t sequence_nr; + atomic_t sup_sequence_nr; /* For HSRv1 separate seq_nr for supervision */ enum hsr_version prot_version; /* Indicate if HSRv0, HSRv1 or PRPv1 */ - spinlock_t seqnr_lock; /* locking for sequence_nr */ spinlock_t list_lock; /* locking for node list */ struct hsr_proto_ops *proto_ops; #define PRP_LAN_ID 0x5 /* 0x1010 for A and 0x1011 for B. Bit 0 is set diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c index f6ff0b61e08a..8aea4ff5f49e 100644 --- a/net/hsr/hsr_netlink.c +++ b/net/hsr/hsr_netlink.c @@ -163,7 +163,7 @@ static int hsr_fill_info(struct sk_buff *skb, const struct net_device *dev) if (nla_put(skb, IFLA_HSR_SUPERVISION_ADDR, ETH_ALEN, hsr->sup_multicast_addr) || - nla_put_u16(skb, IFLA_HSR_SEQ_NR, hsr->sequence_nr)) + nla_put_u16(skb, IFLA_HSR_SEQ_NR, atomic_read(&hsr->sequence_nr))) goto nla_put_failure; if (hsr->prot_version == PRP_V1) proto = HSR_PROTOCOL_PRP; diff --git a/net/ipv4/fou_core.c b/net/ipv4/fou_core.c index 78b869b31492..3e30745e2c09 100644 --- a/net/ipv4/fou_core.c +++ b/net/ipv4/fou_core.c @@ -336,11 +336,11 @@ static struct sk_buff *gue_gro_receive(struct sock *sk, struct gro_remcsum grc; u8 proto; + skb_gro_remcsum_init(&grc); + if (!fou) goto out; - skb_gro_remcsum_init(&grc); - off = skb_gro_offset(skb); len = off + sizeof(*guehdr); diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c index f891bc714668..ad935d34c973 100644 --- a/net/mptcp/pm_netlink.c +++ b/net/mptcp/pm_netlink.c @@ -334,15 +334,21 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk, { struct mptcp_pm_add_entry *entry; struct sock *sk = (struct sock *)msk; + struct timer_list *add_timer = NULL; spin_lock_bh(&msk->pm.lock); entry = mptcp_lookup_anno_list_by_saddr(msk, addr); - if (entry && (!check_id || entry->addr.id == addr->id)) + if (entry && (!check_id || entry->addr.id == addr->id)) { entry->retrans_times = ADD_ADDR_RETRANS_MAX; + add_timer = &entry->add_timer; + } + if (!check_id && entry) + list_del(&entry->list); spin_unlock_bh(&msk->pm.lock); - if (entry && (!check_id || entry->addr.id == addr->id)) - sk_stop_timer_sync(sk, &entry->add_timer); + /* no lock, because sk_stop_timer_sync() is calling del_timer_sync() */ + if (add_timer) + sk_stop_timer_sync(sk, add_timer); return entry; } @@ -1462,7 +1468,6 @@ static bool remove_anno_list_by_saddr(struct mptcp_sock *msk, entry = mptcp_pm_del_add_timer(msk, addr, false); if (entry) { - list_del(&entry->list); kfree(entry); return true; } diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 5c1ff07eaee0..df72b0376970 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -670,8 +670,14 @@ static int __init nf_flow_table_module_init(void) if (ret) goto out_offload; + ret = nf_flow_register_bpf(); + if (ret) + goto out_bpf; + return 0; +out_bpf: + nf_flow_table_offload_exit(); out_offload: unregister_pernet_subsys(&nf_flow_table_net_ops); return ret; diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c index 8b541a080342..b0f199171932 100644 --- a/net/netfilter/nf_flow_table_inet.c +++ b/net/netfilter/nf_flow_table_inet.c @@ -101,7 +101,7 @@ static int __init nf_flow_inet_module_init(void) nft_register_flowtable_type(&flowtable_ipv6); nft_register_flowtable_type(&flowtable_inet); - return nf_flow_register_bpf(); + return 0; } static void __exit nf_flow_inet_module_exit(void) diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c index f30163e2ca62..12cdff640492 100644 --- a/net/netfilter/nft_socket.c +++ b/net/netfilter/nft_socket.c @@ -9,7 +9,8 @@ struct nft_socket { enum nft_socket_keys key:8; - u8 level; + u8 level; /* cgroupv2 level to extract */ + u8 level_user; /* cgroupv2 level provided by userspace */ u8 len; union { u8 dreg; @@ -53,6 +54,28 @@ nft_sock_get_eval_cgroupv2(u32 *dest, struct sock *sk, const struct nft_pktinfo memcpy(dest, &cgid, sizeof(u64)); return true; } + +/* process context only, uses current->nsproxy. */ +static noinline int nft_socket_cgroup_subtree_level(void) +{ + struct cgroup *cgrp = cgroup_get_from_path("/"); + int level; + + if (!cgrp) + return -ENOENT; + + level = cgrp->level; + + cgroup_put(cgrp); + + if (WARN_ON_ONCE(level > 255)) + return -ERANGE; + + if (WARN_ON_ONCE(level < 0)) + return -EINVAL; + + return level; +} #endif static struct sock *nft_socket_do_lookup(const struct nft_pktinfo *pkt) @@ -110,13 +133,13 @@ static void nft_socket_eval(const struct nft_expr *expr, *dest = READ_ONCE(sk->sk_mark); } else { regs->verdict.code = NFT_BREAK; - return; + goto out_put_sk; } break; case NFT_SOCKET_WILDCARD: if (!sk_fullsock(sk)) { regs->verdict.code = NFT_BREAK; - return; + goto out_put_sk; } nft_socket_wildcard(pkt, regs, sk, dest); break; @@ -124,7 +147,7 @@ static void nft_socket_eval(const struct nft_expr *expr, case NFT_SOCKET_CGROUPV2: if (!nft_sock_get_eval_cgroupv2(dest, sk, pkt, priv->level)) { regs->verdict.code = NFT_BREAK; - return; + goto out_put_sk; } break; #endif @@ -133,6 +156,7 @@ static void nft_socket_eval(const struct nft_expr *expr, regs->verdict.code = NFT_BREAK; } +out_put_sk: if (sk != skb->sk) sock_gen_put(sk); } @@ -173,9 +197,10 @@ static int nft_socket_init(const struct nft_ctx *ctx, case NFT_SOCKET_MARK: len = sizeof(u32); break; -#ifdef CONFIG_CGROUPS +#ifdef CONFIG_SOCK_CGROUP_DATA case NFT_SOCKET_CGROUPV2: { unsigned int level; + int err; if (!tb[NFTA_SOCKET_LEVEL]) return -EINVAL; @@ -184,6 +209,17 @@ static int nft_socket_init(const struct nft_ctx *ctx, if (level > 255) return -EOPNOTSUPP; + err = nft_socket_cgroup_subtree_level(); + if (err < 0) + return err; + + priv->level_user = level; + + level += err; + /* Implies a giant cgroup tree */ + if (WARN_ON_ONCE(level > 255)) + return -EOPNOTSUPP; + priv->level = level; len = sizeof(u64); break; @@ -208,7 +244,7 @@ static int nft_socket_dump(struct sk_buff *skb, if (nft_dump_register(skb, NFTA_SOCKET_DREG, priv->dreg)) return -1; if (priv->key == NFT_SOCKET_CGROUPV2 && - nla_put_be32(skb, NFTA_SOCKET_LEVEL, htonl(priv->level))) + nla_put_be32(skb, NFTA_SOCKET_LEVEL, htonl(priv->level_user))) return -1; return 0; } diff --git a/tools/testing/selftests/net/lib/csum.c b/tools/testing/selftests/net/lib/csum.c index b9f3fc3c3426..e0a34e5e8dd5 100644 --- a/tools/testing/selftests/net/lib/csum.c +++ b/tools/testing/selftests/net/lib/csum.c @@ -654,10 +654,16 @@ static int recv_verify_packet_ipv4(void *nh, int len) { struct iphdr *iph = nh; uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto; + uint16_t ip_len; if (len < sizeof(*iph) || iph->protocol != proto) return -1; + ip_len = ntohs(iph->tot_len); + if (ip_len > len || ip_len < sizeof(*iph)) + return -1; + + len = ip_len; iph_addr_p = &iph->saddr; if (proto == IPPROTO_TCP) return recv_verify_packet_tcp(iph + 1, len - sizeof(*iph)); @@ -669,16 +675,22 @@ static int recv_verify_packet_ipv6(void *nh, int len) { struct ipv6hdr *ip6h = nh; uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto; + uint16_t ip_len; if (len < sizeof(*ip6h) || ip6h->nexthdr != proto) return -1; + ip_len = ntohs(ip6h->payload_len); + if (ip_len > len - sizeof(*ip6h)) + return -1; + + len = ip_len; iph_addr_p = &ip6h->saddr; if (proto == IPPROTO_TCP) - return recv_verify_packet_tcp(ip6h + 1, len - sizeof(*ip6h)); + return recv_verify_packet_tcp(ip6h + 1, len); else - return recv_verify_packet_udp(ip6h + 1, len - sizeof(*ip6h)); + return recv_verify_packet_udp(ip6h + 1, len); } /* return whether auxdata includes TP_STATUS_CSUM_VALID */ diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile index 7b936a926859..5d796622e730 100644 --- a/tools/testing/selftests/net/mptcp/Makefile +++ b/tools/testing/selftests/net/mptcp/Makefile @@ -11,6 +11,8 @@ TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq TEST_FILES := mptcp_lib.sh settings +TEST_INCLUDES := ../lib.sh ../net_helper.sh + EXTRA_CLEAN := *.pcap include ../../lib.mk diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index a4762c49a878..cde041c93906 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -3064,7 +3064,9 @@ fullmesh_tests() pm_nl_set_limits $ns1 1 3 pm_nl_set_limits $ns2 1 3 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal - pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,fullmesh + if mptcp_lib_kallsyms_has "mptcp_pm_subflow_check_next$"; then + pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,fullmesh + fi fullmesh=1 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 3 3 3 |