summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-09-12 12:45:24 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2024-09-12 12:45:24 -0700
commit5abfdfd402699ce7c1e81d1a25bc37f60f7741ff (patch)
tree43a15f5de4091c396a1ac9eba157e616894b6419
parent42c5b519498820e95d96311b1200eb4b854fc2bd (diff)
parent3e705251d998c9688be0e7e0526c250fec24d233 (diff)
Merge tag 'net-6.11-rc8' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from Paolo Abeni: "Including fixes from netfilter. There is a recently notified BT regression with no fix yet. I do not think a fix will land in the next week. Current release - regressions: - core: tighten bad gso csum offset check in virtio_net_hdr - netfilter: move nf flowtable bpf initialization in nf_flow_table_module_init() - eth: ice: stop calling pci_disable_device() as we use pcim - eth: fou: fix null-ptr-deref in GRO. Current release - new code bugs: - hsr: prevent NULL pointer dereference in hsr_proxy_announce() Previous releases - regressions: - hsr: remove seqnr_lock - netfilter: nft_socket: fix sk refcount leaks - mptcp: pm: fix uaf in __timer_delete_sync - phy: dp83822: fix NULL pointer dereference on DP83825 devices - eth: revert "virtio_net: rx enable premapped mode by default" - eth: octeontx2-af: Modify SMQ flush sequence to drop packets Previous releases - always broken: - eth: mlx5: fix bridge mode operations when there are no VFs - eth: igb: Always call igb_xdp_ring_update_tail() under Tx lock" * tag 'net-6.11-rc8' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (36 commits) net: netfilter: move nf flowtable bpf initialization in nf_flow_table_module_init() net: tighten bad gso csum offset check in virtio_net_hdr netlink: specs: mptcp: fix port endianness net: dpaa: Pad packets to ETH_ZLEN mptcp: pm: Fix uaf in __timer_delete_sync net: libwx: fix number of Rx and Tx descriptors net: dsa: felix: ignore pending status of TAS module when it's disabled net: hsr: prevent NULL pointer dereference in hsr_proxy_announce() selftests: mptcp: include net_helper.sh file selftests: mptcp: include lib.sh file selftests: mptcp: join: restrict fullmesh endp on 1st sf netfilter: nft_socket: make cgroupsv2 matching work with namespaces netfilter: nft_socket: fix sk refcount leaks MAINTAINERS: Add ethtool pse-pd to PSE NETWORK DRIVER dt-bindings: net: tja11xx: fix the broken binding selftests: net: csum: Fix checksums for packets with non-zero padding net: phy: dp83822: Fix NULL pointer dereference on DP83825 devices virtio_net: disable premapped mode by default Revert "virtio_net: big mode skip the unmap check" Revert "virtio_net: rx remove premapped failover code" ...
-rw-r--r--Documentation/devicetree/bindings/net/nxp,tja11xx.yaml62
-rw-r--r--Documentation/netlink/specs/mptcp_pm.yaml1
-rw-r--r--MAINTAINERS1
-rw-r--r--drivers/net/dsa/ocelot/felix_vsc9959.c11
-rw-r--r--drivers/net/ethernet/faraday/ftgmac100.h2
-rw-r--r--drivers/net/ethernet/freescale/dpaa/dpaa_eth.c9
-rw-r--r--drivers/net/ethernet/intel/ice/ice_lib.c15
-rw-r--r--drivers/net/ethernet/intel/ice/ice_main.c2
-rw-r--r--drivers/net/ethernet/intel/ice/ice_switch.c4
-rw-r--r--drivers/net/ethernet/intel/igb/igb_main.c17
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu.h3
-rw-r--r--drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c59
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c10
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c4
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c51
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/main.c1
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/qos.c7
-rw-r--r--drivers/net/ethernet/wangxun/libwx/wx_type.h6
-rw-r--r--drivers/net/phy/dp83822.c35
-rw-r--r--drivers/net/virtio_net.c95
-rw-r--r--include/linux/mlx5/mlx5_ifc.h10
-rw-r--r--include/linux/virtio_net.h3
-rw-r--r--net/hsr/hsr_device.c39
-rw-r--r--net/hsr/hsr_forward.c4
-rw-r--r--net/hsr/hsr_main.h6
-rw-r--r--net/hsr/hsr_netlink.c2
-rw-r--r--net/ipv4/fou_core.c4
-rw-r--r--net/mptcp/pm_netlink.c13
-rw-r--r--net/netfilter/nf_flow_table_core.c6
-rw-r--r--net/netfilter/nf_flow_table_inet.c2
-rw-r--r--net/netfilter/nft_socket.c48
-rw-r--r--tools/testing/selftests/net/lib/csum.c16
-rw-r--r--tools/testing/selftests/net/mptcp/Makefile2
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh4
34 files changed, 364 insertions, 190 deletions
diff --git a/Documentation/devicetree/bindings/net/nxp,tja11xx.yaml b/Documentation/devicetree/bindings/net/nxp,tja11xx.yaml
index 85bfa45f5122..a754a61adc2d 100644
--- a/Documentation/devicetree/bindings/net/nxp,tja11xx.yaml
+++ b/Documentation/devicetree/bindings/net/nxp,tja11xx.yaml
@@ -14,8 +14,53 @@ maintainers:
description:
Bindings for NXP TJA11xx automotive PHYs
+properties:
+ compatible:
+ enum:
+ - ethernet-phy-id0180.dc40
+ - ethernet-phy-id0180.dc41
+ - ethernet-phy-id0180.dc48
+ - ethernet-phy-id0180.dd00
+ - ethernet-phy-id0180.dd01
+ - ethernet-phy-id0180.dd02
+ - ethernet-phy-id0180.dc80
+ - ethernet-phy-id0180.dc82
+ - ethernet-phy-id001b.b010
+ - ethernet-phy-id001b.b013
+ - ethernet-phy-id001b.b030
+ - ethernet-phy-id001b.b031
+
allOf:
- $ref: ethernet-phy.yaml#
+ - if:
+ properties:
+ compatible:
+ contains:
+ enum:
+ - ethernet-phy-id0180.dc40
+ - ethernet-phy-id0180.dc41
+ - ethernet-phy-id0180.dc48
+ - ethernet-phy-id0180.dd00
+ - ethernet-phy-id0180.dd01
+ - ethernet-phy-id0180.dd02
+
+ then:
+ properties:
+ nxp,rmii-refclk-in:
+ type: boolean
+ description: |
+ The REF_CLK is provided for both transmitted and received data
+ in RMII mode. This clock signal is provided by the PHY and is
+ typically derived from an external 25MHz crystal. Alternatively,
+ a 50MHz clock signal generated by an external oscillator can be
+ connected to pin REF_CLK. A third option is to connect a 25MHz
+ clock to pin CLK_IN_OUT. So, the REF_CLK should be configured
+ as input or output according to the actual circuit connection.
+ If present, indicates that the REF_CLK will be configured as
+ interface reference clock input when RMII mode enabled.
+ If not present, the REF_CLK will be configured as interface
+ reference clock output when RMII mode enabled.
+ Only supported on TJA1100 and TJA1101.
patternProperties:
"^ethernet-phy@[0-9a-f]+$":
@@ -32,22 +77,6 @@ patternProperties:
description:
The ID number for the child PHY. Should be +1 of parent PHY.
- nxp,rmii-refclk-in:
- type: boolean
- description: |
- The REF_CLK is provided for both transmitted and received data
- in RMII mode. This clock signal is provided by the PHY and is
- typically derived from an external 25MHz crystal. Alternatively,
- a 50MHz clock signal generated by an external oscillator can be
- connected to pin REF_CLK. A third option is to connect a 25MHz
- clock to pin CLK_IN_OUT. So, the REF_CLK should be configured
- as input or output according to the actual circuit connection.
- If present, indicates that the REF_CLK will be configured as
- interface reference clock input when RMII mode enabled.
- If not present, the REF_CLK will be configured as interface
- reference clock output when RMII mode enabled.
- Only supported on TJA1100 and TJA1101.
-
required:
- reg
@@ -60,6 +89,7 @@ examples:
#size-cells = <0>;
tja1101_phy0: ethernet-phy@4 {
+ compatible = "ethernet-phy-id0180.dc40";
reg = <0x4>;
nxp,rmii-refclk-in;
};
diff --git a/Documentation/netlink/specs/mptcp_pm.yaml b/Documentation/netlink/specs/mptcp_pm.yaml
index af525ed29792..30d8342cacc8 100644
--- a/Documentation/netlink/specs/mptcp_pm.yaml
+++ b/Documentation/netlink/specs/mptcp_pm.yaml
@@ -109,7 +109,6 @@ attribute-sets:
-
name: port
type: u16
- byte-order: big-endian
-
name: flags
type: u32
diff --git a/MAINTAINERS b/MAINTAINERS
index 10430778c998..f70987f7e462 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -18407,6 +18407,7 @@ L: netdev@vger.kernel.org
S: Maintained
F: Documentation/devicetree/bindings/net/pse-pd/
F: drivers/net/pse-pd/
+F: net/ethtool/pse-pd.c
PSTORE FILESYSTEM
M: Kees Cook <kees@kernel.org>
diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c
index ba37a566da39..ecfa73725d25 100644
--- a/drivers/net/dsa/ocelot/felix_vsc9959.c
+++ b/drivers/net/dsa/ocelot/felix_vsc9959.c
@@ -1474,10 +1474,13 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port,
/* Hardware errata - Admin config could not be overwritten if
* config is pending, need reset the TAS module
*/
- val = ocelot_read(ocelot, QSYS_PARAM_STATUS_REG_8);
- if (val & QSYS_PARAM_STATUS_REG_8_CONFIG_PENDING) {
- ret = -EBUSY;
- goto err_reset_tc;
+ val = ocelot_read_rix(ocelot, QSYS_TAG_CONFIG, port);
+ if (val & QSYS_TAG_CONFIG_ENABLE) {
+ val = ocelot_read(ocelot, QSYS_PARAM_STATUS_REG_8);
+ if (val & QSYS_PARAM_STATUS_REG_8_CONFIG_PENDING) {
+ ret = -EBUSY;
+ goto err_reset_tc;
+ }
}
ocelot_rmw_rix(ocelot,
diff --git a/drivers/net/ethernet/faraday/ftgmac100.h b/drivers/net/ethernet/faraday/ftgmac100.h
index 63b3e02fab16..4968f6f0bdbc 100644
--- a/drivers/net/ethernet/faraday/ftgmac100.h
+++ b/drivers/net/ethernet/faraday/ftgmac100.h
@@ -84,7 +84,7 @@
FTGMAC100_INT_RPKT_BUF)
/* All the interrupts we care about */
-#define FTGMAC100_INT_ALL (FTGMAC100_INT_RPKT_BUF | \
+#define FTGMAC100_INT_ALL (FTGMAC100_INT_RXTX | \
FTGMAC100_INT_BAD)
/*
diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
index cfe6b57b1da0..4a55e521c17e 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@ -2272,12 +2272,12 @@ static netdev_tx_t
dpaa_start_xmit(struct sk_buff *skb, struct net_device *net_dev)
{
const int queue_mapping = skb_get_queue_mapping(skb);
- bool nonlinear = skb_is_nonlinear(skb);
struct rtnl_link_stats64 *percpu_stats;
struct dpaa_percpu_priv *percpu_priv;
struct netdev_queue *txq;
struct dpaa_priv *priv;
struct qm_fd fd;
+ bool nonlinear;
int offset = 0;
int err = 0;
@@ -2287,6 +2287,13 @@ dpaa_start_xmit(struct sk_buff *skb, struct net_device *net_dev)
qm_fd_clear_fd(&fd);
+ /* Packet data is always read as 32-bit words, so zero out any part of
+ * the skb which might be sent if we have to pad the packet
+ */
+ if (__skb_put_padto(skb, ETH_ZLEN, false))
+ goto enomem;
+
+ nonlinear = skb_is_nonlinear(skb);
if (!nonlinear) {
/* We're going to store the skb backpointer at the beginning
* of the data buffer, so we need a privately owned skb
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 737c00b02dd0..2405e5ed9128 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -2413,13 +2413,6 @@ void ice_vsi_decfg(struct ice_vsi *vsi)
struct ice_pf *pf = vsi->back;
int err;
- /* The Rx rule will only exist to remove if the LLDP FW
- * engine is currently stopped
- */
- if (!ice_is_safe_mode(pf) && vsi->type == ICE_VSI_PF &&
- !test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags))
- ice_cfg_sw_lldp(vsi, false, false);
-
ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx);
err = ice_rm_vsi_rdma_cfg(vsi->port_info, vsi->idx);
if (err)
@@ -2764,6 +2757,14 @@ int ice_vsi_release(struct ice_vsi *vsi)
ice_rss_clean(vsi);
ice_vsi_close(vsi);
+
+ /* The Rx rule will only exist to remove if the LLDP FW
+ * engine is currently stopped
+ */
+ if (!ice_is_safe_mode(pf) && vsi->type == ICE_VSI_PF &&
+ !test_bit(ICE_FLAG_FW_LLDP_AGENT, pf->flags))
+ ice_cfg_sw_lldp(vsi, false, false);
+
ice_vsi_decfg(vsi);
/* retain SW VSI data structure since it is needed to unregister and
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index c7db88b517da..ea780d468579 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -5363,7 +5363,6 @@ err_load:
ice_deinit(pf);
err_init:
ice_adapter_put(pdev);
- pci_disable_device(pdev);
return err;
}
@@ -5470,7 +5469,6 @@ static void ice_remove(struct pci_dev *pdev)
ice_set_wake(pf);
ice_adapter_put(pdev);
- pci_disable_device(pdev);
}
/**
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
index fe8847184cb1..79d91e95358c 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.c
+++ b/drivers/net/ethernet/intel/ice/ice_switch.c
@@ -3194,7 +3194,7 @@ ice_add_update_vsi_list(struct ice_hw *hw,
/* A rule already exists with the new VSI being added */
if (test_bit(vsi_handle, m_entry->vsi_list_info->vsi_map))
- return 0;
+ return -EEXIST;
/* Update the previously created VSI list set with
* the new VSI ID passed in
@@ -3264,7 +3264,7 @@ ice_find_vsi_list_entry(struct ice_hw *hw, u8 recp_id, u16 vsi_handle,
list_head = &sw->recp_list[recp_id].filt_rules;
list_for_each_entry(list_itr, list_head, list_entry) {
- if (list_itr->vsi_list_info) {
+ if (list_itr->vsi_count == 1 && list_itr->vsi_list_info) {
map_info = list_itr->vsi_list_info;
if (test_bit(vsi_handle, map_info->vsi_map)) {
*vsi_list_id = map_info->vsi_list_id;
diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c
index 9dc7c60838ed..1ef4cb871452 100644
--- a/drivers/net/ethernet/intel/igb/igb_main.c
+++ b/drivers/net/ethernet/intel/igb/igb_main.c
@@ -33,6 +33,7 @@
#include <linux/bpf_trace.h>
#include <linux/pm_runtime.h>
#include <linux/etherdevice.h>
+#include <linux/lockdep.h>
#ifdef CONFIG_IGB_DCA
#include <linux/dca.h>
#endif
@@ -2914,8 +2915,11 @@ static int igb_xdp(struct net_device *dev, struct netdev_bpf *xdp)
}
}
+/* This function assumes __netif_tx_lock is held by the caller. */
static void igb_xdp_ring_update_tail(struct igb_ring *ring)
{
+ lockdep_assert_held(&txring_txq(ring)->_xmit_lock);
+
/* Force memory writes to complete before letting h/w know there
* are new descriptors to fetch.
*/
@@ -3000,11 +3004,11 @@ static int igb_xdp_xmit(struct net_device *dev, int n,
nxmit++;
}
- __netif_tx_unlock(nq);
-
if (unlikely(flags & XDP_XMIT_FLUSH))
igb_xdp_ring_update_tail(tx_ring);
+ __netif_tx_unlock(nq);
+
return nxmit;
}
@@ -8864,12 +8868,14 @@ static void igb_put_rx_buffer(struct igb_ring *rx_ring,
static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
{
+ unsigned int total_bytes = 0, total_packets = 0;
struct igb_adapter *adapter = q_vector->adapter;
struct igb_ring *rx_ring = q_vector->rx.ring;
- struct sk_buff *skb = rx_ring->skb;
- unsigned int total_bytes = 0, total_packets = 0;
u16 cleaned_count = igb_desc_unused(rx_ring);
+ struct sk_buff *skb = rx_ring->skb;
+ int cpu = smp_processor_id();
unsigned int xdp_xmit = 0;
+ struct netdev_queue *nq;
struct xdp_buff xdp;
u32 frame_sz = 0;
int rx_buf_pgcnt;
@@ -8997,7 +9003,10 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget)
if (xdp_xmit & IGB_XDP_TX) {
struct igb_ring *tx_ring = igb_xdp_tx_queue_mapping(adapter);
+ nq = txring_txq(tx_ring);
+ __netif_tx_lock(nq, cpu);
igb_xdp_ring_update_tail(tx_ring);
+ __netif_tx_unlock(nq);
}
u64_stats_update_begin(&rx_ring->rx_syncp);
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
index 03ee93fd9e94..db2db0738ee4 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h
@@ -319,6 +319,7 @@ struct nix_mark_format {
/* smq(flush) to tl1 cir/pir info */
struct nix_smq_tree_ctx {
+ u16 schq;
u64 cir_off;
u64 cir_val;
u64 pir_off;
@@ -328,8 +329,6 @@ struct nix_smq_tree_ctx {
/* smq flush context */
struct nix_smq_flush_ctx {
int smq;
- u16 tl1_schq;
- u16 tl2_schq;
struct nix_smq_tree_ctx smq_tree_ctx[NIX_TXSCH_LVL_CNT];
};
diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
index 222f9e00b836..82832a24fbd8 100644
--- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
+++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c
@@ -2259,14 +2259,13 @@ static void nix_smq_flush_fill_ctx(struct rvu *rvu, int blkaddr, int smq,
schq = smq;
for (lvl = NIX_TXSCH_LVL_SMQ; lvl <= NIX_TXSCH_LVL_TL1; lvl++) {
smq_tree_ctx = &smq_flush_ctx->smq_tree_ctx[lvl];
+ smq_tree_ctx->schq = schq;
if (lvl == NIX_TXSCH_LVL_TL1) {
- smq_flush_ctx->tl1_schq = schq;
smq_tree_ctx->cir_off = NIX_AF_TL1X_CIR(schq);
smq_tree_ctx->pir_off = 0;
smq_tree_ctx->pir_val = 0;
parent_off = 0;
} else if (lvl == NIX_TXSCH_LVL_TL2) {
- smq_flush_ctx->tl2_schq = schq;
smq_tree_ctx->cir_off = NIX_AF_TL2X_CIR(schq);
smq_tree_ctx->pir_off = NIX_AF_TL2X_PIR(schq);
parent_off = NIX_AF_TL2X_PARENT(schq);
@@ -2301,8 +2300,8 @@ static void nix_smq_flush_enadis_xoff(struct rvu *rvu, int blkaddr,
{
struct nix_txsch *txsch;
struct nix_hw *nix_hw;
+ int tl2, tl2_schq;
u64 regoff;
- int tl2;
nix_hw = get_nix_hw(rvu->hw, blkaddr);
if (!nix_hw)
@@ -2310,16 +2309,17 @@ static void nix_smq_flush_enadis_xoff(struct rvu *rvu, int blkaddr,
/* loop through all TL2s with matching PF_FUNC */
txsch = &nix_hw->txsch[NIX_TXSCH_LVL_TL2];
+ tl2_schq = smq_flush_ctx->smq_tree_ctx[NIX_TXSCH_LVL_TL2].schq;
for (tl2 = 0; tl2 < txsch->schq.max; tl2++) {
/* skip the smq(flush) TL2 */
- if (tl2 == smq_flush_ctx->tl2_schq)
+ if (tl2 == tl2_schq)
continue;
/* skip unused TL2s */
if (TXSCH_MAP_FLAGS(txsch->pfvf_map[tl2]) & NIX_TXSCHQ_FREE)
continue;
/* skip if PF_FUNC doesn't match */
if ((TXSCH_MAP_FUNC(txsch->pfvf_map[tl2]) & ~RVU_PFVF_FUNC_MASK) !=
- (TXSCH_MAP_FUNC(txsch->pfvf_map[smq_flush_ctx->tl2_schq] &
+ (TXSCH_MAP_FUNC(txsch->pfvf_map[tl2_schq] &
~RVU_PFVF_FUNC_MASK)))
continue;
/* enable/disable XOFF */
@@ -2361,10 +2361,12 @@ static int nix_smq_flush(struct rvu *rvu, int blkaddr,
int smq, u16 pcifunc, int nixlf)
{
struct nix_smq_flush_ctx *smq_flush_ctx;
+ int err, restore_tx_en = 0, i;
int pf = rvu_get_pf(pcifunc);
u8 cgx_id = 0, lmac_id = 0;
- int err, restore_tx_en = 0;
- u64 cfg;
+ u16 tl2_tl3_link_schq;
+ u8 link, link_level;
+ u64 cfg, bmap = 0;
if (!is_rvu_otx2(rvu)) {
/* Skip SMQ flush if pkt count is zero */
@@ -2388,16 +2390,38 @@ static int nix_smq_flush(struct rvu *rvu, int blkaddr,
nix_smq_flush_enadis_xoff(rvu, blkaddr, smq_flush_ctx, true);
nix_smq_flush_enadis_rate(rvu, blkaddr, smq_flush_ctx, false);
- cfg = rvu_read64(rvu, blkaddr, NIX_AF_SMQX_CFG(smq));
- /* Do SMQ flush and set enqueue xoff */
- cfg |= BIT_ULL(50) | BIT_ULL(49);
- rvu_write64(rvu, blkaddr, NIX_AF_SMQX_CFG(smq), cfg);
-
/* Disable backpressure from physical link,
* otherwise SMQ flush may stall.
*/
rvu_cgx_enadis_rx_bp(rvu, pf, false);
+ link_level = rvu_read64(rvu, blkaddr, NIX_AF_PSE_CHANNEL_LEVEL) & 0x01 ?
+ NIX_TXSCH_LVL_TL3 : NIX_TXSCH_LVL_TL2;
+ tl2_tl3_link_schq = smq_flush_ctx->smq_tree_ctx[link_level].schq;
+ link = smq_flush_ctx->smq_tree_ctx[NIX_TXSCH_LVL_TL1].schq;
+
+ /* SMQ set enqueue xoff */
+ cfg = rvu_read64(rvu, blkaddr, NIX_AF_SMQX_CFG(smq));
+ cfg |= BIT_ULL(50);
+ rvu_write64(rvu, blkaddr, NIX_AF_SMQX_CFG(smq), cfg);
+
+ /* Clear all NIX_AF_TL3_TL2_LINK_CFG[ENA] for the TL3/TL2 queue */
+ for (i = 0; i < (rvu->hw->cgx_links + rvu->hw->lbk_links); i++) {
+ cfg = rvu_read64(rvu, blkaddr,
+ NIX_AF_TL3_TL2X_LINKX_CFG(tl2_tl3_link_schq, link));
+ if (!(cfg & BIT_ULL(12)))
+ continue;
+ bmap |= (1 << i);
+ cfg &= ~BIT_ULL(12);
+ rvu_write64(rvu, blkaddr,
+ NIX_AF_TL3_TL2X_LINKX_CFG(tl2_tl3_link_schq, link), cfg);
+ }
+
+ /* Do SMQ flush and set enqueue xoff */
+ cfg = rvu_read64(rvu, blkaddr, NIX_AF_SMQX_CFG(smq));
+ cfg |= BIT_ULL(50) | BIT_ULL(49);
+ rvu_write64(rvu, blkaddr, NIX_AF_SMQX_CFG(smq), cfg);
+
/* Wait for flush to complete */
err = rvu_poll_reg(rvu, blkaddr,
NIX_AF_SMQX_CFG(smq), BIT_ULL(49), true);
@@ -2406,6 +2430,17 @@ static int nix_smq_flush(struct rvu *rvu, int blkaddr,
"NIXLF%d: SMQ%d flush failed, txlink might be busy\n",
nixlf, smq);
+ /* Set NIX_AF_TL3_TL2_LINKX_CFG[ENA] for the TL3/TL2 queue */
+ for (i = 0; i < (rvu->hw->cgx_links + rvu->hw->lbk_links); i++) {
+ if (!(bmap & (1 << i)))
+ continue;
+ cfg = rvu_read64(rvu, blkaddr,
+ NIX_AF_TL3_TL2X_LINKX_CFG(tl2_tl3_link_schq, link));
+ cfg |= BIT_ULL(12);
+ rvu_write64(rvu, blkaddr,
+ NIX_AF_TL3_TL2X_LINKX_CFG(tl2_tl3_link_schq, link), cfg);
+ }
+
/* clear XOFF on TL2s */
nix_smq_flush_enadis_rate(rvu, blkaddr, smq_flush_ctx, true);
nix_smq_flush_enadis_xoff(rvu, blkaddr, smq_flush_ctx, false);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 36845872ae94..1cf3c54d343e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -139,6 +139,10 @@ void mlx5e_build_ptys2ethtool_map(void)
ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT);
MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GBASE_LR4, legacy,
ETHTOOL_LINK_MODE_100000baseLR4_ER4_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100BASE_TX, legacy,
+ ETHTOOL_LINK_MODE_100baseT_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_1000BASE_T, legacy,
+ ETHTOOL_LINK_MODE_1000baseT_Full_BIT);
MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_10GBASE_T, legacy,
ETHTOOL_LINK_MODE_10000baseT_Full_BIT);
MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_25GBASE_CR, legacy,
@@ -204,6 +208,12 @@ void mlx5e_build_ptys2ethtool_map(void)
ETHTOOL_LINK_MODE_200000baseLR4_ER4_FR4_Full_BIT,
ETHTOOL_LINK_MODE_200000baseDR4_Full_BIT,
ETHTOOL_LINK_MODE_200000baseCR4_Full_BIT);
+ MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_400GAUI_8_400GBASE_CR8, ext,
+ ETHTOOL_LINK_MODE_400000baseKR8_Full_BIT,
+ ETHTOOL_LINK_MODE_400000baseSR8_Full_BIT,
+ ETHTOOL_LINK_MODE_400000baseLR8_ER8_FR8_Full_BIT,
+ ETHTOOL_LINK_MODE_400000baseDR8_Full_BIT,
+ ETHTOOL_LINK_MODE_400000baseCR8_Full_BIT);
MLX5_BUILD_PTYS2ETHTOOL_CONFIG(MLX5E_100GAUI_1_100GBASE_CR_KR, ext,
ETHTOOL_LINK_MODE_100000baseKR_Full_BIT,
ETHTOOL_LINK_MODE_100000baseSR_Full_BIT,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
index 255bc8b749f9..8587cd572da5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/legacy.c
@@ -319,7 +319,7 @@ int mlx5_eswitch_set_vepa(struct mlx5_eswitch *esw, u8 setting)
return -EPERM;
mutex_lock(&esw->state_lock);
- if (esw->mode != MLX5_ESWITCH_LEGACY) {
+ if (esw->mode != MLX5_ESWITCH_LEGACY || !mlx5_esw_is_fdb_created(esw)) {
err = -EOPNOTSUPP;
goto out;
}
@@ -339,7 +339,7 @@ int mlx5_eswitch_get_vepa(struct mlx5_eswitch *esw, u8 *setting)
if (!mlx5_esw_allowed(esw))
return -EPERM;
- if (esw->mode != MLX5_ESWITCH_LEGACY)
+ if (esw->mode != MLX5_ESWITCH_LEGACY || !mlx5_esw_is_fdb_created(esw))
return -EOPNOTSUPP;
*setting = esw->fdb_table.legacy.vepa_uplink_rule ? 1 : 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
index 20146a2dc7f4..02a3563f51ad 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
@@ -312,6 +312,25 @@ static int esw_qos_set_group_max_rate(struct mlx5_eswitch *esw,
return err;
}
+static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type)
+{
+ switch (type) {
+ case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR:
+ return MLX5_CAP_QOS(dev, esw_element_type) &
+ ELEMENT_TYPE_CAP_MASK_TSAR;
+ case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT:
+ return MLX5_CAP_QOS(dev, esw_element_type) &
+ ELEMENT_TYPE_CAP_MASK_VPORT;
+ case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC:
+ return MLX5_CAP_QOS(dev, esw_element_type) &
+ ELEMENT_TYPE_CAP_MASK_VPORT_TC;
+ case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC:
+ return MLX5_CAP_QOS(dev, esw_element_type) &
+ ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC;
+ }
+ return false;
+}
+
static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw,
struct mlx5_vport *vport,
u32 max_rate, u32 bw_share)
@@ -323,6 +342,9 @@ static int esw_qos_vport_create_sched_element(struct mlx5_eswitch *esw,
void *vport_elem;
int err;
+ if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT))
+ return -EOPNOTSUPP;
+
parent_tsar_ix = group ? group->tsar_ix : esw->qos.root_tsar_ix;
MLX5_SET(scheduling_context, sched_ctx, element_type,
SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT);
@@ -421,6 +443,7 @@ __esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *ex
{
u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
struct mlx5_esw_rate_group *group;
+ __be32 *attr;
u32 divider;
int err;
@@ -428,6 +451,12 @@ __esw_qos_create_rate_group(struct mlx5_eswitch *esw, struct netlink_ext_ack *ex
if (!group)
return ERR_PTR(-ENOMEM);
+ MLX5_SET(scheduling_context, tsar_ctx, element_type,
+ SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
+
+ attr = MLX5_ADDR_OF(scheduling_context, tsar_ctx, element_attributes);
+ *attr = cpu_to_be32(TSAR_ELEMENT_TSAR_TYPE_DWRR << 16);
+
MLX5_SET(scheduling_context, tsar_ctx, parent_element_id,
esw->qos.root_tsar_ix);
err = mlx5_create_scheduling_element_cmd(esw->dev,
@@ -526,25 +555,6 @@ static int esw_qos_destroy_rate_group(struct mlx5_eswitch *esw,
return err;
}
-static bool esw_qos_element_type_supported(struct mlx5_core_dev *dev, int type)
-{
- switch (type) {
- case SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR:
- return MLX5_CAP_QOS(dev, esw_element_type) &
- ELEMENT_TYPE_CAP_MASK_TSAR;
- case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT:
- return MLX5_CAP_QOS(dev, esw_element_type) &
- ELEMENT_TYPE_CAP_MASK_VPORT;
- case SCHEDULING_CONTEXT_ELEMENT_TYPE_VPORT_TC:
- return MLX5_CAP_QOS(dev, esw_element_type) &
- ELEMENT_TYPE_CAP_MASK_VPORT_TC;
- case SCHEDULING_CONTEXT_ELEMENT_TYPE_PARA_VPORT_TC:
- return MLX5_CAP_QOS(dev, esw_element_type) &
- ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC;
- }
- return false;
-}
-
static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *extack)
{
u32 tsar_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {};
@@ -555,7 +565,8 @@ static int esw_qos_create(struct mlx5_eswitch *esw, struct netlink_ext_ack *exta
if (!MLX5_CAP_GEN(dev, qos) || !MLX5_CAP_QOS(dev, esw_scheduling))
return -EOPNOTSUPP;
- if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR))
+ if (!esw_qos_element_type_supported(dev, SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR) ||
+ !(MLX5_CAP_QOS(dev, esw_tsar_type) & TSAR_TYPE_CAP_MASK_DWRR))
return -EOPNOTSUPP;
MLX5_SET(scheduling_context, tsar_ctx, element_type,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 5b7e6f4b5c7e..2ec33c4a2a3a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -2217,6 +2217,7 @@ static const struct pci_device_id mlx5_core_pci_table[] = {
{ PCI_VDEVICE(MELLANOX, 0x101f) }, /* ConnectX-6 LX */
{ PCI_VDEVICE(MELLANOX, 0x1021) }, /* ConnectX-7 */
{ PCI_VDEVICE(MELLANOX, 0x1023) }, /* ConnectX-8 */
+ { PCI_VDEVICE(MELLANOX, 0x1025) }, /* ConnectX-9 */
{ PCI_VDEVICE(MELLANOX, 0xa2d2) }, /* BlueField integrated ConnectX-5 network controller */
{ PCI_VDEVICE(MELLANOX, 0xa2d3), MLX5_PCI_DEV_IS_VF}, /* BlueField integrated ConnectX-5 network controller VF */
{ PCI_VDEVICE(MELLANOX, 0xa2d6) }, /* BlueField-2 integrated ConnectX-6 Dx network controller */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/qos.c
index 8bce730b5c5b..db2bd3ad63ba 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qos.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qos.c
@@ -28,6 +28,9 @@ int mlx5_qos_create_leaf_node(struct mlx5_core_dev *mdev, u32 parent_id,
{
u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
+ if (!(MLX5_CAP_QOS(mdev, nic_element_type) & ELEMENT_TYPE_CAP_MASK_QUEUE_GROUP))
+ return -EOPNOTSUPP;
+
MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_id);
MLX5_SET(scheduling_context, sched_ctx, element_type,
SCHEDULING_CONTEXT_ELEMENT_TYPE_QUEUE_GROUP);
@@ -44,6 +47,10 @@ int mlx5_qos_create_inner_node(struct mlx5_core_dev *mdev, u32 parent_id,
u32 sched_ctx[MLX5_ST_SZ_DW(scheduling_context)] = {0};
void *attr;
+ if (!(MLX5_CAP_QOS(mdev, nic_element_type) & ELEMENT_TYPE_CAP_MASK_TSAR) ||
+ !(MLX5_CAP_QOS(mdev, nic_tsar_type) & TSAR_TYPE_CAP_MASK_DWRR))
+ return -EOPNOTSUPP;
+
MLX5_SET(scheduling_context, sched_ctx, parent_element_id, parent_id);
MLX5_SET(scheduling_context, sched_ctx, element_type,
SCHEDULING_CONTEXT_ELEMENT_TYPE_TSAR);
diff --git a/drivers/net/ethernet/wangxun/libwx/wx_type.h b/drivers/net/ethernet/wangxun/libwx/wx_type.h
index 1d57b047817b..b54bffda027b 100644
--- a/drivers/net/ethernet/wangxun/libwx/wx_type.h
+++ b/drivers/net/ethernet/wangxun/libwx/wx_type.h
@@ -426,9 +426,9 @@ enum WX_MSCA_CMD_value {
#define WX_MIN_RXD 128
#define WX_MIN_TXD 128
-/* Number of Transmit and Receive Descriptors must be a multiple of 8 */
-#define WX_REQ_RX_DESCRIPTOR_MULTIPLE 8
-#define WX_REQ_TX_DESCRIPTOR_MULTIPLE 8
+/* Number of Transmit and Receive Descriptors must be a multiple of 128 */
+#define WX_REQ_RX_DESCRIPTOR_MULTIPLE 128
+#define WX_REQ_TX_DESCRIPTOR_MULTIPLE 128
#define WX_MAX_JUMBO_FRAME_SIZE 9432 /* max payload 9414 */
#define VMDQ_P(p) p
diff --git a/drivers/net/phy/dp83822.c b/drivers/net/phy/dp83822.c
index efeb643c1373..fc247f479257 100644
--- a/drivers/net/phy/dp83822.c
+++ b/drivers/net/phy/dp83822.c
@@ -271,8 +271,7 @@ static int dp83822_config_intr(struct phy_device *phydev)
DP83822_ENERGY_DET_INT_EN |
DP83822_LINK_QUAL_INT_EN);
- /* Private data pointer is NULL on DP83825 */
- if (!dp83822 || !dp83822->fx_enabled)
+ if (!dp83822->fx_enabled)
misr_status |= DP83822_ANEG_COMPLETE_INT_EN |
DP83822_DUP_MODE_CHANGE_INT_EN |
DP83822_SPEED_CHANGED_INT_EN;
@@ -292,8 +291,7 @@ static int dp83822_config_intr(struct phy_device *phydev)
DP83822_PAGE_RX_INT_EN |
DP83822_EEE_ERROR_CHANGE_INT_EN);
- /* Private data pointer is NULL on DP83825 */
- if (!dp83822 || !dp83822->fx_enabled)
+ if (!dp83822->fx_enabled)
misr_status |= DP83822_ANEG_ERR_INT_EN |
DP83822_WOL_PKT_INT_EN;
@@ -691,10 +689,9 @@ static int dp83822_read_straps(struct phy_device *phydev)
return 0;
}
-static int dp83822_probe(struct phy_device *phydev)
+static int dp8382x_probe(struct phy_device *phydev)
{
struct dp83822_private *dp83822;
- int ret;
dp83822 = devm_kzalloc(&phydev->mdio.dev, sizeof(*dp83822),
GFP_KERNEL);
@@ -703,6 +700,20 @@ static int dp83822_probe(struct phy_device *phydev)
phydev->priv = dp83822;
+ return 0;
+}
+
+static int dp83822_probe(struct phy_device *phydev)
+{
+ struct dp83822_private *dp83822;
+ int ret;
+
+ ret = dp8382x_probe(phydev);
+ if (ret)
+ return ret;
+
+ dp83822 = phydev->priv;
+
ret = dp83822_read_straps(phydev);
if (ret)
return ret;
@@ -717,14 +728,11 @@ static int dp83822_probe(struct phy_device *phydev)
static int dp83826_probe(struct phy_device *phydev)
{
- struct dp83822_private *dp83822;
-
- dp83822 = devm_kzalloc(&phydev->mdio.dev, sizeof(*dp83822),
- GFP_KERNEL);
- if (!dp83822)
- return -ENOMEM;
+ int ret;
- phydev->priv = dp83822;
+ ret = dp8382x_probe(phydev);
+ if (ret)
+ return ret;
dp83826_of_init(phydev);
@@ -795,6 +803,7 @@ static int dp83822_resume(struct phy_device *phydev)
PHY_ID_MATCH_MODEL(_id), \
.name = (_name), \
/* PHY_BASIC_FEATURES */ \
+ .probe = dp8382x_probe, \
.soft_reset = dp83822_phy_reset, \
.config_init = dp8382x_config_init, \
.get_wol = dp83822_get_wol, \
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index c6af18948092..5a1c1ec5a64b 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -356,6 +356,9 @@ struct receive_queue {
struct xdp_rxq_info xsk_rxq_info;
struct xdp_buff **xsk_buffs;
+
+ /* Do dma by self */
+ bool do_dma;
};
/* This structure can contain rss message with maximum settings for indirection table and keysize
@@ -885,7 +888,7 @@ static void *virtnet_rq_get_buf(struct receive_queue *rq, u32 *len, void **ctx)
void *buf;
buf = virtqueue_get_buf_ctx(rq->vq, len, ctx);
- if (buf)
+ if (buf && rq->do_dma)
virtnet_rq_unmap(rq, buf, *len);
return buf;
@@ -898,6 +901,11 @@ static void virtnet_rq_init_one_sg(struct receive_queue *rq, void *buf, u32 len)
u32 offset;
void *head;
+ if (!rq->do_dma) {
+ sg_init_one(rq->sg, buf, len);
+ return;
+ }
+
head = page_address(rq->alloc_frag.page);
offset = buf - head;
@@ -923,42 +931,44 @@ static void *virtnet_rq_alloc(struct receive_queue *rq, u32 size, gfp_t gfp)
head = page_address(alloc_frag->page);
- dma = head;
+ if (rq->do_dma) {
+ dma = head;
+
+ /* new pages */
+ if (!alloc_frag->offset) {
+ if (rq->last_dma) {
+ /* Now, the new page is allocated, the last dma
+ * will not be used. So the dma can be unmapped
+ * if the ref is 0.
+ */
+ virtnet_rq_unmap(rq, rq->last_dma, 0);
+ rq->last_dma = NULL;
+ }
- /* new pages */
- if (!alloc_frag->offset) {
- if (rq->last_dma) {
- /* Now, the new page is allocated, the last dma
- * will not be used. So the dma can be unmapped
- * if the ref is 0.
- */
- virtnet_rq_unmap(rq, rq->last_dma, 0);
- rq->last_dma = NULL;
- }
+ dma->len = alloc_frag->size - sizeof(*dma);
- dma->len = alloc_frag->size - sizeof(*dma);
+ addr = virtqueue_dma_map_single_attrs(rq->vq, dma + 1,
+ dma->len, DMA_FROM_DEVICE, 0);
+ if (virtqueue_dma_mapping_error(rq->vq, addr))
+ return NULL;
- addr = virtqueue_dma_map_single_attrs(rq->vq, dma + 1,
- dma->len, DMA_FROM_DEVICE, 0);
- if (virtqueue_dma_mapping_error(rq->vq, addr))
- return NULL;
+ dma->addr = addr;
+ dma->need_sync = virtqueue_dma_need_sync(rq->vq, addr);
- dma->addr = addr;
- dma->need_sync = virtqueue_dma_need_sync(rq->vq, addr);
+ /* Add a reference to dma to prevent the entire dma from
+ * being released during error handling. This reference
+ * will be freed after the pages are no longer used.
+ */
+ get_page(alloc_frag->page);
+ dma->ref = 1;
+ alloc_frag->offset = sizeof(*dma);
- /* Add a reference to dma to prevent the entire dma from
- * being released during error handling. This reference
- * will be freed after the pages are no longer used.
- */
- get_page(alloc_frag->page);
- dma->ref = 1;
- alloc_frag->offset = sizeof(*dma);
+ rq->last_dma = dma;
+ }
- rq->last_dma = dma;
+ ++dma->ref;
}
- ++dma->ref;
-
buf = head + alloc_frag->offset;
get_page(alloc_frag->page);
@@ -967,19 +977,6 @@ static void *virtnet_rq_alloc(struct receive_queue *rq, u32 size, gfp_t gfp)
return buf;
}
-static void virtnet_rq_set_premapped(struct virtnet_info *vi)
-{
- int i;
-
- /* disable for big mode */
- if (!vi->mergeable_rx_bufs && vi->big_packets)
- return;
-
- for (i = 0; i < vi->max_queue_pairs; i++)
- /* error should never happen */
- BUG_ON(virtqueue_set_dma_premapped(vi->rq[i].vq));
-}
-
static void virtnet_rq_unmap_free_buf(struct virtqueue *vq, void *buf)
{
struct virtnet_info *vi = vq->vdev->priv;
@@ -993,7 +990,7 @@ static void virtnet_rq_unmap_free_buf(struct virtqueue *vq, void *buf)
return;
}
- if (!vi->big_packets || vi->mergeable_rx_bufs)
+ if (rq->do_dma)
virtnet_rq_unmap(rq, buf, 0);
virtnet_rq_free_buf(vi, rq, buf);
@@ -2430,7 +2427,8 @@ static int add_recvbuf_small(struct virtnet_info *vi, struct receive_queue *rq,
err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
if (err < 0) {
- virtnet_rq_unmap(rq, buf, 0);
+ if (rq->do_dma)
+ virtnet_rq_unmap(rq, buf, 0);
put_page(virt_to_head_page(buf));
}
@@ -2544,7 +2542,8 @@ static int add_recvbuf_mergeable(struct virtnet_info *vi,
ctx = mergeable_len_to_ctx(len + room, headroom);
err = virtqueue_add_inbuf_ctx(rq->vq, rq->sg, 1, buf, ctx, gfp);
if (err < 0) {
- virtnet_rq_unmap(rq, buf, 0);
+ if (rq->do_dma)
+ virtnet_rq_unmap(rq, buf, 0);
put_page(virt_to_head_page(buf));
}
@@ -2701,7 +2700,7 @@ static int virtnet_receive_packets(struct virtnet_info *vi,
}
} else {
while (packets < budget &&
- (buf = virtqueue_get_buf(rq->vq, &len)) != NULL) {
+ (buf = virtnet_rq_get_buf(rq, &len, NULL)) != NULL) {
receive_buf(vi, rq, buf, len, NULL, xdp_xmit, stats);
packets++;
}
@@ -5892,7 +5891,7 @@ static void free_receive_page_frags(struct virtnet_info *vi)
int i;
for (i = 0; i < vi->max_queue_pairs; i++)
if (vi->rq[i].alloc_frag.page) {
- if (vi->rq[i].last_dma)
+ if (vi->rq[i].do_dma && vi->rq[i].last_dma)
virtnet_rq_unmap(&vi->rq[i], vi->rq[i].last_dma, 0);
put_page(vi->rq[i].alloc_frag.page);
}
@@ -6090,8 +6089,6 @@ static int init_vqs(struct virtnet_info *vi)
if (ret)
goto err_free;
- virtnet_rq_set_premapped(vi);
-
cpus_read_lock();
virtnet_set_affinity(vi);
cpus_read_unlock();
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index cab228cf51c6..cfdf984a95a8 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1027,7 +1027,8 @@ struct mlx5_ifc_qos_cap_bits {
u8 max_tsar_bw_share[0x20];
- u8 reserved_at_100[0x20];
+ u8 nic_element_type[0x10];
+ u8 nic_tsar_type[0x10];
u8 reserved_at_120[0x3];
u8 log_meter_aso_granularity[0x5];
@@ -3966,6 +3967,7 @@ enum {
ELEMENT_TYPE_CAP_MASK_VPORT = 1 << 1,
ELEMENT_TYPE_CAP_MASK_VPORT_TC = 1 << 2,
ELEMENT_TYPE_CAP_MASK_PARA_VPORT_TC = 1 << 3,
+ ELEMENT_TYPE_CAP_MASK_QUEUE_GROUP = 1 << 4,
};
struct mlx5_ifc_scheduling_context_bits {
@@ -4675,6 +4677,12 @@ enum {
TSAR_ELEMENT_TSAR_TYPE_ETS = 0x2,
};
+enum {
+ TSAR_TYPE_CAP_MASK_DWRR = 1 << 0,
+ TSAR_TYPE_CAP_MASK_ROUND_ROBIN = 1 << 1,
+ TSAR_TYPE_CAP_MASK_ETS = 1 << 2,
+};
+
struct mlx5_ifc_tsar_element_bits {
u8 reserved_at_0[0x8];
u8 tsar_type[0x8];
diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h
index 6c395a2600e8..276ca543ef44 100644
--- a/include/linux/virtio_net.h
+++ b/include/linux/virtio_net.h
@@ -173,7 +173,8 @@ retry:
break;
case SKB_GSO_TCPV4:
case SKB_GSO_TCPV6:
- if (skb->csum_offset != offsetof(struct tcphdr, check))
+ if (skb->ip_summed == CHECKSUM_PARTIAL &&
+ skb->csum_offset != offsetof(struct tcphdr, check))
return -EINVAL;
break;
}
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index e4cc6b78dcfc..049e22bdaafb 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -231,9 +231,7 @@ static netdev_tx_t hsr_dev_xmit(struct sk_buff *skb, struct net_device *dev)
skb->dev = master->dev;
skb_reset_mac_header(skb);
skb_reset_mac_len(skb);
- spin_lock_bh(&hsr->seqnr_lock);
hsr_forward_skb(skb, master);
- spin_unlock_bh(&hsr->seqnr_lock);
} else {
dev_core_stats_tx_dropped_inc(dev);
dev_kfree_skb_any(skb);
@@ -314,14 +312,10 @@ static void send_hsr_supervision_frame(struct hsr_port *port,
set_hsr_stag_HSR_ver(hsr_stag, hsr->prot_version);
/* From HSRv1 on we have separate supervision sequence numbers. */
- spin_lock_bh(&hsr->seqnr_lock);
- if (hsr->prot_version > 0) {
- hsr_stag->sequence_nr = htons(hsr->sup_sequence_nr);
- hsr->sup_sequence_nr++;
- } else {
- hsr_stag->sequence_nr = htons(hsr->sequence_nr);
- hsr->sequence_nr++;
- }
+ if (hsr->prot_version > 0)
+ hsr_stag->sequence_nr = htons(atomic_inc_return(&hsr->sup_sequence_nr));
+ else
+ hsr_stag->sequence_nr = htons(atomic_inc_return(&hsr->sequence_nr));
hsr_stag->tlv.HSR_TLV_type = type;
/* TODO: Why 12 in HSRv0? */
@@ -343,13 +337,11 @@ static void send_hsr_supervision_frame(struct hsr_port *port,
ether_addr_copy(hsr_sp->macaddress_A, hsr->macaddress_redbox);
}
- if (skb_put_padto(skb, ETH_ZLEN)) {
- spin_unlock_bh(&hsr->seqnr_lock);
+ if (skb_put_padto(skb, ETH_ZLEN))
return;
- }
hsr_forward_skb(skb, port);
- spin_unlock_bh(&hsr->seqnr_lock);
+
return;
}
@@ -374,9 +366,7 @@ static void send_prp_supervision_frame(struct hsr_port *master,
set_hsr_stag_HSR_ver(hsr_stag, (hsr->prot_version ? 1 : 0));
/* From HSRv1 on we have separate supervision sequence numbers. */
- spin_lock_bh(&hsr->seqnr_lock);
- hsr_stag->sequence_nr = htons(hsr->sup_sequence_nr);
- hsr->sup_sequence_nr++;
+ hsr_stag->sequence_nr = htons(atomic_inc_return(&hsr->sup_sequence_nr));
hsr_stag->tlv.HSR_TLV_type = PRP_TLV_LIFE_CHECK_DD;
hsr_stag->tlv.HSR_TLV_length = sizeof(struct hsr_sup_payload);
@@ -384,13 +374,10 @@ static void send_prp_supervision_frame(struct hsr_port *master,
hsr_sp = skb_put(skb, sizeof(struct hsr_sup_payload));
ether_addr_copy(hsr_sp->macaddress_A, master->dev->dev_addr);
- if (skb_put_padto(skb, ETH_ZLEN)) {
- spin_unlock_bh(&hsr->seqnr_lock);
+ if (skb_put_padto(skb, ETH_ZLEN))
return;
- }
hsr_forward_skb(skb, master);
- spin_unlock_bh(&hsr->seqnr_lock);
}
/* Announce (supervision frame) timer function
@@ -427,6 +414,9 @@ static void hsr_proxy_announce(struct timer_list *t)
* of SAN nodes stored in ProxyNodeTable.
*/
interlink = hsr_port_get_hsr(hsr, HSR_PT_INTERLINK);
+ if (!interlink)
+ goto done;
+
list_for_each_entry_rcu(node, &hsr->proxy_node_db, mac_list) {
if (hsr_addr_is_redbox(hsr, node->macaddress_A))
continue;
@@ -441,6 +431,7 @@ static void hsr_proxy_announce(struct timer_list *t)
mod_timer(&hsr->announce_proxy_timer, jiffies + interval);
}
+done:
rcu_read_unlock();
}
@@ -621,11 +612,9 @@ int hsr_dev_finalize(struct net_device *hsr_dev, struct net_device *slave[2],
if (res < 0)
return res;
- spin_lock_init(&hsr->seqnr_lock);
/* Overflow soon to find bugs easier: */
- hsr->sequence_nr = HSR_SEQNR_START;
- hsr->sup_sequence_nr = HSR_SUP_SEQNR_START;
- hsr->interlink_sequence_nr = HSR_SEQNR_START;
+ atomic_set(&hsr->sequence_nr, HSR_SEQNR_START);
+ atomic_set(&hsr->sup_sequence_nr, HSR_SUP_SEQNR_START);
timer_setup(&hsr->announce_timer, hsr_announce, 0);
timer_setup(&hsr->prune_timer, hsr_prune_nodes, 0);
diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
index b38060246e62..6f63c8a775c4 100644
--- a/net/hsr/hsr_forward.c
+++ b/net/hsr/hsr_forward.c
@@ -599,9 +599,7 @@ static void handle_std_frame(struct sk_buff *skb,
if (port->type == HSR_PT_MASTER ||
port->type == HSR_PT_INTERLINK) {
/* Sequence nr for the master/interlink node */
- lockdep_assert_held(&hsr->seqnr_lock);
- frame->sequence_nr = hsr->sequence_nr;
- hsr->sequence_nr++;
+ frame->sequence_nr = atomic_inc_return(&hsr->sequence_nr);
}
}
diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h
index ab1f8d35d9dc..6f7bbf01f3e4 100644
--- a/net/hsr/hsr_main.h
+++ b/net/hsr/hsr_main.h
@@ -202,11 +202,9 @@ struct hsr_priv {
struct timer_list prune_timer;
struct timer_list prune_proxy_timer;
int announce_count;
- u16 sequence_nr;
- u16 interlink_sequence_nr; /* Interlink port seq_nr */
- u16 sup_sequence_nr; /* For HSRv1 separate seq_nr for supervision */
+ atomic_t sequence_nr;
+ atomic_t sup_sequence_nr; /* For HSRv1 separate seq_nr for supervision */
enum hsr_version prot_version; /* Indicate if HSRv0, HSRv1 or PRPv1 */
- spinlock_t seqnr_lock; /* locking for sequence_nr */
spinlock_t list_lock; /* locking for node list */
struct hsr_proto_ops *proto_ops;
#define PRP_LAN_ID 0x5 /* 0x1010 for A and 0x1011 for B. Bit 0 is set
diff --git a/net/hsr/hsr_netlink.c b/net/hsr/hsr_netlink.c
index f6ff0b61e08a..8aea4ff5f49e 100644
--- a/net/hsr/hsr_netlink.c
+++ b/net/hsr/hsr_netlink.c
@@ -163,7 +163,7 @@ static int hsr_fill_info(struct sk_buff *skb, const struct net_device *dev)
if (nla_put(skb, IFLA_HSR_SUPERVISION_ADDR, ETH_ALEN,
hsr->sup_multicast_addr) ||
- nla_put_u16(skb, IFLA_HSR_SEQ_NR, hsr->sequence_nr))
+ nla_put_u16(skb, IFLA_HSR_SEQ_NR, atomic_read(&hsr->sequence_nr)))
goto nla_put_failure;
if (hsr->prot_version == PRP_V1)
proto = HSR_PROTOCOL_PRP;
diff --git a/net/ipv4/fou_core.c b/net/ipv4/fou_core.c
index 78b869b31492..3e30745e2c09 100644
--- a/net/ipv4/fou_core.c
+++ b/net/ipv4/fou_core.c
@@ -336,11 +336,11 @@ static struct sk_buff *gue_gro_receive(struct sock *sk,
struct gro_remcsum grc;
u8 proto;
+ skb_gro_remcsum_init(&grc);
+
if (!fou)
goto out;
- skb_gro_remcsum_init(&grc);
-
off = skb_gro_offset(skb);
len = off + sizeof(*guehdr);
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index f891bc714668..ad935d34c973 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -334,15 +334,21 @@ mptcp_pm_del_add_timer(struct mptcp_sock *msk,
{
struct mptcp_pm_add_entry *entry;
struct sock *sk = (struct sock *)msk;
+ struct timer_list *add_timer = NULL;
spin_lock_bh(&msk->pm.lock);
entry = mptcp_lookup_anno_list_by_saddr(msk, addr);
- if (entry && (!check_id || entry->addr.id == addr->id))
+ if (entry && (!check_id || entry->addr.id == addr->id)) {
entry->retrans_times = ADD_ADDR_RETRANS_MAX;
+ add_timer = &entry->add_timer;
+ }
+ if (!check_id && entry)
+ list_del(&entry->list);
spin_unlock_bh(&msk->pm.lock);
- if (entry && (!check_id || entry->addr.id == addr->id))
- sk_stop_timer_sync(sk, &entry->add_timer);
+ /* no lock, because sk_stop_timer_sync() is calling del_timer_sync() */
+ if (add_timer)
+ sk_stop_timer_sync(sk, add_timer);
return entry;
}
@@ -1462,7 +1468,6 @@ static bool remove_anno_list_by_saddr(struct mptcp_sock *msk,
entry = mptcp_pm_del_add_timer(msk, addr, false);
if (entry) {
- list_del(&entry->list);
kfree(entry);
return true;
}
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 5c1ff07eaee0..df72b0376970 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -670,8 +670,14 @@ static int __init nf_flow_table_module_init(void)
if (ret)
goto out_offload;
+ ret = nf_flow_register_bpf();
+ if (ret)
+ goto out_bpf;
+
return 0;
+out_bpf:
+ nf_flow_table_offload_exit();
out_offload:
unregister_pernet_subsys(&nf_flow_table_net_ops);
return ret;
diff --git a/net/netfilter/nf_flow_table_inet.c b/net/netfilter/nf_flow_table_inet.c
index 8b541a080342..b0f199171932 100644
--- a/net/netfilter/nf_flow_table_inet.c
+++ b/net/netfilter/nf_flow_table_inet.c
@@ -101,7 +101,7 @@ static int __init nf_flow_inet_module_init(void)
nft_register_flowtable_type(&flowtable_ipv6);
nft_register_flowtable_type(&flowtable_inet);
- return nf_flow_register_bpf();
+ return 0;
}
static void __exit nf_flow_inet_module_exit(void)
diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
index f30163e2ca62..12cdff640492 100644
--- a/net/netfilter/nft_socket.c
+++ b/net/netfilter/nft_socket.c
@@ -9,7 +9,8 @@
struct nft_socket {
enum nft_socket_keys key:8;
- u8 level;
+ u8 level; /* cgroupv2 level to extract */
+ u8 level_user; /* cgroupv2 level provided by userspace */
u8 len;
union {
u8 dreg;
@@ -53,6 +54,28 @@ nft_sock_get_eval_cgroupv2(u32 *dest, struct sock *sk, const struct nft_pktinfo
memcpy(dest, &cgid, sizeof(u64));
return true;
}
+
+/* process context only, uses current->nsproxy. */
+static noinline int nft_socket_cgroup_subtree_level(void)
+{
+ struct cgroup *cgrp = cgroup_get_from_path("/");
+ int level;
+
+ if (!cgrp)
+ return -ENOENT;
+
+ level = cgrp->level;
+
+ cgroup_put(cgrp);
+
+ if (WARN_ON_ONCE(level > 255))
+ return -ERANGE;
+
+ if (WARN_ON_ONCE(level < 0))
+ return -EINVAL;
+
+ return level;
+}
#endif
static struct sock *nft_socket_do_lookup(const struct nft_pktinfo *pkt)
@@ -110,13 +133,13 @@ static void nft_socket_eval(const struct nft_expr *expr,
*dest = READ_ONCE(sk->sk_mark);
} else {
regs->verdict.code = NFT_BREAK;
- return;
+ goto out_put_sk;
}
break;
case NFT_SOCKET_WILDCARD:
if (!sk_fullsock(sk)) {
regs->verdict.code = NFT_BREAK;
- return;
+ goto out_put_sk;
}
nft_socket_wildcard(pkt, regs, sk, dest);
break;
@@ -124,7 +147,7 @@ static void nft_socket_eval(const struct nft_expr *expr,
case NFT_SOCKET_CGROUPV2:
if (!nft_sock_get_eval_cgroupv2(dest, sk, pkt, priv->level)) {
regs->verdict.code = NFT_BREAK;
- return;
+ goto out_put_sk;
}
break;
#endif
@@ -133,6 +156,7 @@ static void nft_socket_eval(const struct nft_expr *expr,
regs->verdict.code = NFT_BREAK;
}
+out_put_sk:
if (sk != skb->sk)
sock_gen_put(sk);
}
@@ -173,9 +197,10 @@ static int nft_socket_init(const struct nft_ctx *ctx,
case NFT_SOCKET_MARK:
len = sizeof(u32);
break;
-#ifdef CONFIG_CGROUPS
+#ifdef CONFIG_SOCK_CGROUP_DATA
case NFT_SOCKET_CGROUPV2: {
unsigned int level;
+ int err;
if (!tb[NFTA_SOCKET_LEVEL])
return -EINVAL;
@@ -184,6 +209,17 @@ static int nft_socket_init(const struct nft_ctx *ctx,
if (level > 255)
return -EOPNOTSUPP;
+ err = nft_socket_cgroup_subtree_level();
+ if (err < 0)
+ return err;
+
+ priv->level_user = level;
+
+ level += err;
+ /* Implies a giant cgroup tree */
+ if (WARN_ON_ONCE(level > 255))
+ return -EOPNOTSUPP;
+
priv->level = level;
len = sizeof(u64);
break;
@@ -208,7 +244,7 @@ static int nft_socket_dump(struct sk_buff *skb,
if (nft_dump_register(skb, NFTA_SOCKET_DREG, priv->dreg))
return -1;
if (priv->key == NFT_SOCKET_CGROUPV2 &&
- nla_put_be32(skb, NFTA_SOCKET_LEVEL, htonl(priv->level)))
+ nla_put_be32(skb, NFTA_SOCKET_LEVEL, htonl(priv->level_user)))
return -1;
return 0;
}
diff --git a/tools/testing/selftests/net/lib/csum.c b/tools/testing/selftests/net/lib/csum.c
index b9f3fc3c3426..e0a34e5e8dd5 100644
--- a/tools/testing/selftests/net/lib/csum.c
+++ b/tools/testing/selftests/net/lib/csum.c
@@ -654,10 +654,16 @@ static int recv_verify_packet_ipv4(void *nh, int len)
{
struct iphdr *iph = nh;
uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto;
+ uint16_t ip_len;
if (len < sizeof(*iph) || iph->protocol != proto)
return -1;
+ ip_len = ntohs(iph->tot_len);
+ if (ip_len > len || ip_len < sizeof(*iph))
+ return -1;
+
+ len = ip_len;
iph_addr_p = &iph->saddr;
if (proto == IPPROTO_TCP)
return recv_verify_packet_tcp(iph + 1, len - sizeof(*iph));
@@ -669,16 +675,22 @@ static int recv_verify_packet_ipv6(void *nh, int len)
{
struct ipv6hdr *ip6h = nh;
uint16_t proto = cfg_encap ? IPPROTO_UDP : cfg_proto;
+ uint16_t ip_len;
if (len < sizeof(*ip6h) || ip6h->nexthdr != proto)
return -1;
+ ip_len = ntohs(ip6h->payload_len);
+ if (ip_len > len - sizeof(*ip6h))
+ return -1;
+
+ len = ip_len;
iph_addr_p = &ip6h->saddr;
if (proto == IPPROTO_TCP)
- return recv_verify_packet_tcp(ip6h + 1, len - sizeof(*ip6h));
+ return recv_verify_packet_tcp(ip6h + 1, len);
else
- return recv_verify_packet_udp(ip6h + 1, len - sizeof(*ip6h));
+ return recv_verify_packet_udp(ip6h + 1, len);
}
/* return whether auxdata includes TP_STATUS_CSUM_VALID */
diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile
index 7b936a926859..5d796622e730 100644
--- a/tools/testing/selftests/net/mptcp/Makefile
+++ b/tools/testing/selftests/net/mptcp/Makefile
@@ -11,6 +11,8 @@ TEST_GEN_FILES = mptcp_connect pm_nl_ctl mptcp_sockopt mptcp_inq
TEST_FILES := mptcp_lib.sh settings
+TEST_INCLUDES := ../lib.sh ../net_helper.sh
+
EXTRA_CLEAN := *.pcap
include ../../lib.mk
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index a4762c49a878..cde041c93906 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -3064,7 +3064,9 @@ fullmesh_tests()
pm_nl_set_limits $ns1 1 3
pm_nl_set_limits $ns2 1 3
pm_nl_add_endpoint $ns1 10.0.2.1 flags signal
- pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,fullmesh
+ if mptcp_lib_kallsyms_has "mptcp_pm_subflow_check_next$"; then
+ pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,fullmesh
+ fi
fullmesh=1 speed=slow \
run_tests $ns1 $ns2 10.0.1.1
chk_join_nr 3 3 3