diff options
107 files changed, 4388 insertions, 1400 deletions
diff --git a/Documentation/devicetree/bindings/net/realtek,rtl82xx.yaml b/Documentation/devicetree/bindings/net/realtek,rtl82xx.yaml index bb94a2388520..18ee72f5c74a 100644 --- a/Documentation/devicetree/bindings/net/realtek,rtl82xx.yaml +++ b/Documentation/devicetree/bindings/net/realtek,rtl82xx.yaml @@ -18,6 +18,29 @@ allOf: - $ref: ethernet-phy.yaml# properties: + compatible: + enum: + - ethernet-phy-id001c.c800 + - ethernet-phy-id001c.c816 + - ethernet-phy-id001c.c838 + - ethernet-phy-id001c.c840 + - ethernet-phy-id001c.c848 + - ethernet-phy-id001c.c849 + - ethernet-phy-id001c.c84a + - ethernet-phy-id001c.c862 + - ethernet-phy-id001c.c878 + - ethernet-phy-id001c.c880 + - ethernet-phy-id001c.c910 + - ethernet-phy-id001c.c912 + - ethernet-phy-id001c.c913 + - ethernet-phy-id001c.c914 + - ethernet-phy-id001c.c915 + - ethernet-phy-id001c.c916 + - ethernet-phy-id001c.c942 + - ethernet-phy-id001c.c961 + - ethernet-phy-id001c.cad0 + - ethernet-phy-id001c.cb00 + realtek,clkout-disable: type: boolean description: diff --git a/Documentation/netlink/specs/ethtool.yaml b/Documentation/netlink/specs/ethtool.yaml index 683f5c3f30ad..949e2722505d 100644 --- a/Documentation/netlink/specs/ethtool.yaml +++ b/Documentation/netlink/specs/ethtool.yaml @@ -20,6 +20,10 @@ definitions: name: header-flags type: flags entries: [ compact-bitsets, omit-reply, stats ] + - + name: module-fw-flash-status + type: enum + entries: [ started, in_progress, completed, error ] attribute-sets: - @@ -1004,6 +1008,32 @@ attribute-sets: - name: burst-tmr type: u32 + - + name: module-fw-flash + attributes: + - + name: header + type: nest + nested-attributes: header + - + name: file-name + type: string + - + name: password + type: u32 + - + name: status + type: u32 + enum: module-fw-flash-status + - + name: status-msg + type: string + - + name: done + type: uint + - + name: total + type: uint operations: enum-model: directional @@ -1764,3 +1794,28 @@ operations: name: mm-ntf doc: Notification for change in MAC Merge configuration. notify: mm-get + - + name: module-fw-flash-act + doc: Flash transceiver module firmware. + + attribute-set: module-fw-flash + + do: + request: + attributes: + - header + - file-name + - password + - + name: module-fw-flash-ntf + doc: Notification for firmware flashing progress and status. + + attribute-set: module-fw-flash + + event: + attributes: + - header + - status + - status-msg + - done + - total diff --git a/Documentation/netlink/specs/tcp_metrics.yaml b/Documentation/netlink/specs/tcp_metrics.yaml new file mode 100644 index 000000000000..1bd94f43e526 --- /dev/null +++ b/Documentation/netlink/specs/tcp_metrics.yaml @@ -0,0 +1,169 @@ +# SPDX-License-Identifier: ((GPL-2.0 WITH Linux-syscall-note) OR BSD-3-Clause) + +name: tcp_metrics + +protocol: genetlink-legacy + +doc: | + Management interface for TCP metrics. + +c-family-name: tcp-metrics-genl-name +c-version-name: tcp-metrics-genl-version +max-by-define: true +kernel-policy: global + +definitions: + - + name: tcp-fastopen-cookie-max + type: const + value: 16 + +attribute-sets: + - + name: tcp-metrics + name-prefix: tcp-metrics-attr- + attributes: + - + name: addr-ipv4 + type: u32 + byte-order: big-endian + display-hint: ipv4 + - + name: addr-ipv6 + type: binary + checks: + min-len: 16 + byte-order: big-endian + display-hint: ipv6 + - + name: age + type: u64 + - + name: tw-tsval + type: u32 + doc: unused + - + name: tw-ts-stamp + type: s32 + doc: unused + - + name: vals + type: nest + nested-attributes: metrics + - + name: fopen-mss + type: u16 + - + name: fopen-syn-drops + type: u16 + - + name: fopen-syn-drop-ts + type: u64 + - + name: fopen-cookie + type: binary + checks: + min-len: tcp-fastopen-cookie-max + - + name: saddr-ipv4 + type: u32 + byte-order: big-endian + display-hint: ipv4 + - + name: saddr-ipv6 + type: binary + checks: + min-len: 16 + byte-order: big-endian + display-hint: ipv6 + - + name: pad + type: pad + + - + name: metrics + # Intentionally don't define the name-prefix, see below. + doc: | + Attributes with metrics. Note that the values here do not match + the TCP_METRIC_* defines in the kernel, because kernel defines + are off-by one (e.g. rtt is defined as enum 0, while netlink carries + attribute type 1). + attributes: + - + name: rtt + type: u32 + doc: | + Round Trip Time (RTT), in msecs with 3 bits fractional + (left-shift by 3 to get the msec value). + - + name: rttvar + type: u32 + doc: | + Round Trip Time VARiance (RTT), in msecs with 2 bits fractional + (left-shift by 2 to get the msec value). + - + name: ssthresh + type: u32 + doc: Slow Start THRESHold. + - + name: cwnd + type: u32 + doc: Congestion Window. + - + name: reodering + type: u32 + doc: Reodering metric. + - + name: rtt-us + type: u32 + doc: | + Round Trip Time (RTT), in usecs, with 3 bits fractional + (left-shift by 3 to get the msec value). + - + name: rttvar-us + type: u32 + doc: | + Round Trip Time (RTT), in usecs, with 2 bits fractional + (left-shift by 3 to get the msec value). + +operations: + list: + - + name: get + doc: Retrieve metrics. + attribute-set: tcp-metrics + + dont-validate: [ strict, dump ] + + do: + request: &sel_attrs + attributes: + - addr-ipv4 + - addr-ipv6 + - saddr-ipv4 + - saddr-ipv6 + reply: &all_attrs + attributes: + - addr-ipv4 + - addr-ipv6 + - saddr-ipv4 + - saddr-ipv6 + - age + - vals + - fopen-mss + - fopen-syn-drops + - fopen-syn-drop-ts + - fopen-cookie + dump: + reply: *all_attrs + + - + name: del + doc: Delete metrics. + attribute-set: tcp-metrics + + dont-validate: [ strict, dump ] + flags: [ admin-perm ] + + do: + request: *sel_attrs diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst index 7ec08e903bab..bfe2eda8580d 100644 --- a/Documentation/networking/ethtool-netlink.rst +++ b/Documentation/networking/ethtool-netlink.rst @@ -228,6 +228,7 @@ Userspace to kernel: ``ETHTOOL_MSG_PLCA_GET_STATUS`` get PLCA RS status ``ETHTOOL_MSG_MM_GET`` get MAC merge layer state ``ETHTOOL_MSG_MM_SET`` set MAC merge layer parameters + ``ETHTOOL_MSG_MODULE_FW_FLASH_ACT`` flash transceiver module firmware ===================================== ================================= Kernel to userspace: @@ -274,6 +275,7 @@ Kernel to userspace: ``ETHTOOL_MSG_PLCA_GET_STATUS_REPLY`` PLCA RS status ``ETHTOOL_MSG_PLCA_NTF`` PLCA RS parameters ``ETHTOOL_MSG_MM_GET_REPLY`` MAC merge layer status + ``ETHTOOL_MSG_MODULE_FW_FLASH_NTF`` transceiver module flash updates ======================================== ================================= ``GET`` requests are sent by userspace applications to retrieve device @@ -2041,6 +2043,73 @@ The attributes are propagated to the driver through the following structure: .. kernel-doc:: include/linux/ethtool.h :identifiers: ethtool_mm_cfg +MODULE_FW_FLASH_ACT +=================== + +Flashes transceiver module firmware. + +Request contents: + + ======================================= ====== =========================== + ``ETHTOOL_A_MODULE_FW_FLASH_HEADER`` nested request header + ``ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME`` string firmware image file name + ``ETHTOOL_A_MODULE_FW_FLASH_PASSWORD`` u32 transceiver module password + ======================================= ====== =========================== + +The firmware update process consists of three logical steps: + +1. Downloading a firmware image to the transceiver module and validating it. +2. Running the firmware image. +3. Committing the firmware image so that it is run upon reset. + +When flash command is given, those three steps are taken in that order. + +This message merely schedules the update process and returns immediately +without blocking. The process then runs asynchronously. +Since it can take several minutes to complete, during the update process +notifications are emitted from the kernel to user space updating it about +the status and progress. + +The ``ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME`` attribute encodes the firmware +image file name. The firmware image is downloaded to the transceiver module, +validated, run and committed. + +The optional ``ETHTOOL_A_MODULE_FW_FLASH_PASSWORD`` attribute encodes a password +that might be required as part of the transceiver module firmware update +process. + +The firmware update process can take several minutes to complete. Therefore, +during the update process notifications are emitted from the kernel to user +space updating it about the status and progress. + + + +Notification contents: + + +---------------------------------------------------+--------+----------------+ + | ``ETHTOOL_A_MODULE_FW_FLASH_HEADER`` | nested | reply header | + +---------------------------------------------------+--------+----------------+ + | ``ETHTOOL_A_MODULE_FW_FLASH_STATUS`` | u32 | status | + +---------------------------------------------------+--------+----------------+ + | ``ETHTOOL_A_MODULE_FW_FLASH_STATUS_MSG`` | string | status message | + +---------------------------------------------------+--------+----------------+ + | ``ETHTOOL_A_MODULE_FW_FLASH_DONE`` | uint | progress | + +---------------------------------------------------+--------+----------------+ + | ``ETHTOOL_A_MODULE_FW_FLASH_TOTAL`` | uint | total | + +---------------------------------------------------+--------+----------------+ + +The ``ETHTOOL_A_MODULE_FW_FLASH_STATUS`` attribute encodes the current status +of the firmware update process. Possible values are: + +.. kernel-doc:: include/uapi/linux/ethtool.h + :identifiers: ethtool_module_fw_flash_status + +The ``ETHTOOL_A_MODULE_FW_FLASH_STATUS_MSG`` attribute encodes a status message +string. + +The ``ETHTOOL_A_MODULE_FW_FLASH_DONE`` and ``ETHTOOL_A_MODULE_FW_FLASH_TOTAL`` +attributes encode the completed and total amount of work, respectively. + Request translation =================== @@ -2147,4 +2216,5 @@ are netlink only. n/a ``ETHTOOL_MSG_PLCA_GET_STATUS`` n/a ``ETHTOOL_MSG_MM_GET`` n/a ``ETHTOOL_MSG_MM_SET`` + n/a ``ETHTOOL_MSG_MODULE_FW_FLASH_ACT`` =================================== ===================================== diff --git a/MAINTAINERS b/MAINTAINERS index 22328600cfd0..e0b6b419318f 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -11051,8 +11051,8 @@ F: include/drm/xe* F: include/uapi/drm/xe_drm.h INTEL ETHERNET DRIVERS -M: Jesse Brandeburg <jesse.brandeburg@intel.com> M: Tony Nguyen <anthony.l.nguyen@intel.com> +M: Przemek Kitszel <przemyslaw.kitszel@intel.com> L: intel-wired-lan@lists.osuosl.org (moderated for non-subscribers) S: Supported W: https://www.intel.com/content/www/us/en/support.html diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index 1bd0c5973252..a1690207d793 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -456,8 +456,9 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) dma_addr_t mapping; unsigned int length, pad = 0; u32 len, free_size, vlan_tag_flags, cfa_action, flags; - u16 prod, last_frag; + struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; struct pci_dev *pdev = bp->pdev; + u16 prod, last_frag, txts_prod; struct bnxt_tx_ring_info *txr; struct bnxt_sw_tx_bd *tx_buf; __le32 lflags = 0; @@ -509,27 +510,29 @@ static netdev_tx_t bnxt_start_xmit(struct sk_buff *skb, struct net_device *dev) vlan_tag_flags |= 1 << TX_BD_CFA_META_TPID_SHIFT; } - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { - struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) && ptp && + ptp->tx_tstamp_en) { + if (bp->fw_cap & BNXT_FW_CAP_TX_TS_CMP) { + lflags |= cpu_to_le32(TX_BD_FLAGS_STAMP); + tx_buf->is_ts_pkt = 1; + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + } else if (!skb_is_gso(skb)) { + u16 seq_id, hdr_off; - if (ptp && ptp->tx_tstamp_en && !skb_is_gso(skb)) { - if (atomic_dec_if_positive(&ptp->tx_avail) < 0) { - atomic64_inc(&ptp->stats.ts_err); - goto tx_no_ts; - } - if (!bnxt_ptp_parse(skb, &ptp->tx_seqid, - &ptp->tx_hdr_off)) { + if (!bnxt_ptp_parse(skb, &seq_id, &hdr_off) && + !bnxt_ptp_get_txts_prod(ptp, &txts_prod)) { if (vlan_tag_flags) - ptp->tx_hdr_off += VLAN_HLEN; + hdr_off += VLAN_HLEN; lflags |= cpu_to_le32(TX_BD_FLAGS_STAMP); + tx_buf->is_ts_pkt = 1; skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; - } else { - atomic_inc(&bp->ptp_cfg->tx_avail); + + ptp->txts_req[txts_prod].tx_seqid = seq_id; + ptp->txts_req[txts_prod].tx_hdr_off = hdr_off; + tx_buf->txts_prod = txts_prod; } } } - -tx_no_ts: if (unlikely(skb->no_fcs)) lflags |= cpu_to_le32(TX_BD_FLAGS_NO_CRC); @@ -758,8 +761,11 @@ tx_free: dev_kfree_skb_any(skb); tx_kick_pending: if (BNXT_TX_PTP_IS_SET(lflags)) { + txr->tx_buf_ring[txr->tx_prod].is_ts_pkt = 0; atomic64_inc(&bp->ptp_cfg->stats.ts_err); - atomic_inc(&bp->ptp_cfg->tx_avail); + if (!(bp->fw_cap & BNXT_FW_CAP_TX_TS_CMP)) + /* set SKB to err so PTP worker will clean up */ + ptp->txts_req[txts_prod].tx_skb = ERR_PTR(-EIO); } if (txr->kick_pending) bnxt_txr_db_kick(bp, txr, txr->tx_prod); @@ -768,7 +774,8 @@ tx_kick_pending: return NETDEV_TX_OK; } -static void __bnxt_tx_int(struct bnxt *bp, struct bnxt_tx_ring_info *txr, +/* Returns true if some remaining TX packets not processed. */ +static bool __bnxt_tx_int(struct bnxt *bp, struct bnxt_tx_ring_info *txr, int budget) { struct netdev_queue *txq = netdev_get_tx_queue(bp->dev, txr->txq_index); @@ -777,24 +784,33 @@ static void __bnxt_tx_int(struct bnxt *bp, struct bnxt_tx_ring_info *txr, unsigned int tx_bytes = 0; u16 cons = txr->tx_cons; int tx_pkts = 0; + bool rc = false; while (RING_TX(bp, cons) != hw_cons) { struct bnxt_sw_tx_bd *tx_buf; struct sk_buff *skb; + bool is_ts_pkt; int j, last; tx_buf = &txr->tx_buf_ring[RING_TX(bp, cons)]; - cons = NEXT_TX(cons); skb = tx_buf->skb; - tx_buf->skb = NULL; if (unlikely(!skb)) { bnxt_sched_reset_txr(bp, txr, cons); - return; + return rc; } + is_ts_pkt = tx_buf->is_ts_pkt; + if (is_ts_pkt && (bp->fw_cap & BNXT_FW_CAP_TX_TS_CMP)) { + rc = true; + break; + } + + cons = NEXT_TX(cons); tx_pkts++; tx_bytes += skb->len; + tx_buf->skb = NULL; + tx_buf->is_ts_pkt = 0; if (tx_buf->is_push) { tx_buf->is_push = 0; @@ -814,15 +830,11 @@ static void __bnxt_tx_int(struct bnxt *bp, struct bnxt_tx_ring_info *txr, skb_frag_size(&skb_shinfo(skb)->frags[j]), DMA_TO_DEVICE); } - if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) { + if (unlikely(is_ts_pkt)) { if (BNXT_CHIP_P5(bp)) { /* PTP worker takes ownership of the skb */ - if (!bnxt_get_tx_ts_p5(bp, skb)) { - skb = NULL; - } else { - atomic64_inc(&bp->ptp_cfg->stats.ts_err); - atomic_inc(&bp->ptp_cfg->tx_avail); - } + bnxt_get_tx_ts_p5(bp, skb, tx_buf->txts_prod); + skb = NULL; } } @@ -837,18 +849,22 @@ next_tx_int: __netif_txq_completed_wake(txq, tx_pkts, tx_bytes, bnxt_tx_avail(bp, txr), bp->tx_wake_thresh, READ_ONCE(txr->dev_state) == BNXT_DEV_STATE_CLOSING); + + return rc; } static void bnxt_tx_int(struct bnxt *bp, struct bnxt_napi *bnapi, int budget) { struct bnxt_tx_ring_info *txr; + bool more = false; int i; bnxt_for_each_napi_tx(i, bnapi, txr) { if (txr->tx_hw_cons != RING_TX(bp, txr->tx_cons)) - __bnxt_tx_int(bp, txr, budget); + more |= __bnxt_tx_int(bp, txr, budget); } - bnapi->events &= ~BNXT_TX_CMP_EVENT; + if (!more) + bnapi->events &= ~BNXT_TX_CMP_EVENT; } static struct page *__bnxt_alloc_rx_page(struct bnxt *bp, dma_addr_t *mapping, @@ -2914,6 +2930,8 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, cpr->has_more_work = 1; break; } + } else if (cmp_type == CMP_TYPE_TX_L2_PKT_TS_CMP) { + bnxt_tx_ts_cmp(bp, bnapi, (struct tx_ts_cmp *)txcmp); } else if (cmp_type >= CMP_TYPE_RX_L2_CMP && cmp_type <= CMP_TYPE_RX_L2_TPA_START_V3_CMP) { if (likely(budget)) @@ -2945,8 +2963,10 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, } } - if (event & BNXT_REDIRECT_EVENT) + if (event & BNXT_REDIRECT_EVENT) { xdp_do_flush(); + event &= ~BNXT_REDIRECT_EVENT; + } if (event & BNXT_TX_EVENT) { struct bnxt_tx_ring_info *txr = bnapi->tx_ring[0]; @@ -2956,6 +2976,7 @@ static int __bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, wmb(); bnxt_db_write_relaxed(bp, &txr->tx_db, prod); + event &= ~BNXT_TX_EVENT; } cpr->cp_raw_cons = raw_cons; @@ -2973,13 +2994,14 @@ static void __bnxt_poll_work_done(struct bnxt *bp, struct bnxt_napi *bnapi, struct bnxt_rx_ring_info *rxr = bnapi->rx_ring; bnxt_db_write(bp, &rxr->rx_db, rxr->rx_prod); + bnapi->events &= ~BNXT_RX_EVENT; } if (bnapi->events & BNXT_AGG_EVENT) { struct bnxt_rx_ring_info *rxr = bnapi->rx_ring; bnxt_db_write(bp, &rxr->rx_agg_db, rxr->rx_agg_prod); + bnapi->events &= ~BNXT_AGG_EVENT; } - bnapi->events &= BNXT_TX_CMP_EVENT; } static int bnxt_poll_work(struct bnxt *bp, struct bnxt_cp_ring_info *cpr, @@ -6788,6 +6810,7 @@ static int hwrm_ring_alloc_send_msg(struct bnxt *bp, switch (ring_type) { case HWRM_RING_ALLOC_TX: { struct bnxt_tx_ring_info *txr; + u16 flags = 0; txr = container_of(ring, struct bnxt_tx_ring_info, tx_ring_struct); @@ -6801,6 +6824,9 @@ static int hwrm_ring_alloc_send_msg(struct bnxt *bp, if (bp->flags & BNXT_FLAG_TX_COAL_CMPL) req->cmpl_coal_cnt = RING_ALLOC_REQ_CMPL_COAL_CNT_COAL_64; + if ((bp->fw_cap & BNXT_FW_CAP_TX_TS_CMP) && bp->ptp_cfg) + flags |= RING_ALLOC_REQ_FLAGS_TX_PKT_TS_CMPL_ENABLE; + req->flags = cpu_to_le16(flags); break; } case HWRM_RING_ALLOC_RX: @@ -8981,7 +9007,7 @@ static int __bnxt_hwrm_ptp_qcfg(struct bnxt *bp) u8 flags; int rc; - if (bp->hwrm_spec_code < 0x10801 || !BNXT_CHIP_P5(bp)) { + if (bp->hwrm_spec_code < 0x10801 || !BNXT_CHIP_P5_PLUS(bp)) { rc = -ENODEV; goto no_ptp; } @@ -8997,7 +9023,8 @@ static int __bnxt_hwrm_ptp_qcfg(struct bnxt *bp) goto exit; flags = resp->flags; - if (!(flags & PORT_MAC_PTP_QCFG_RESP_FLAGS_HWRM_ACCESS)) { + if (BNXT_CHIP_P5_AND_MINUS(bp) && + !(flags & PORT_MAC_PTP_QCFG_RESP_FLAGS_HWRM_ACCESS)) { rc = -ENODEV; goto exit; } @@ -9010,10 +9037,13 @@ static int __bnxt_hwrm_ptp_qcfg(struct bnxt *bp) ptp->bp = bp; bp->ptp_cfg = ptp; } - if (flags & PORT_MAC_PTP_QCFG_RESP_FLAGS_PARTIAL_DIRECT_ACCESS_REF_CLOCK) { + + if (flags & + (PORT_MAC_PTP_QCFG_RESP_FLAGS_PARTIAL_DIRECT_ACCESS_REF_CLOCK | + PORT_MAC_PTP_QCFG_RESP_FLAGS_64B_PHC_TIME)) { ptp->refclk_regs[0] = le32_to_cpu(resp->ts_ref_clock_reg_lower); ptp->refclk_regs[1] = le32_to_cpu(resp->ts_ref_clock_reg_upper); - } else if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { + } else if (BNXT_CHIP_P5(bp)) { ptp->refclk_regs[0] = BNXT_TS_REG_TIMESYNC_TS0_LOWER; ptp->refclk_regs[1] = BNXT_TS_REG_TIMESYNC_TS0_UPPER; } else { @@ -9095,6 +9125,8 @@ static int __bnxt_hwrm_func_qcaps(struct bnxt *bp) bp->fw_cap |= BNXT_FW_CAP_RX_ALL_PKT_TS; if (flags_ext2 & FUNC_QCAPS_RESP_FLAGS_EXT2_UDP_GSO_SUPPORTED) bp->flags |= BNXT_FLAG_UDP_GSO_CAP; + if (flags_ext2 & FUNC_QCAPS_RESP_FLAGS_EXT2_TX_PKT_TS_CMPL_SUPPORTED) + bp->fw_cap |= BNXT_FW_CAP_TX_TS_CMP; bp->tx_push_thresh = 0; if ((flags & FUNC_QCAPS_RESP_FLAGS_PUSH_MODE_SUPPORTED) && @@ -12136,8 +12168,8 @@ static int __bnxt_open_nic(struct bnxt *bp, bool irq_re_init, bool link_re_init) /* VF-reps may need to be re-opened after the PF is re-opened */ if (BNXT_PF(bp)) bnxt_vf_reps_open(bp); - if (bp->ptp_cfg) - atomic_set(&bp->ptp_cfg->tx_avail, BNXT_MAX_TX_TS); + if (bp->ptp_cfg && !(bp->fw_cap & BNXT_FW_CAP_TX_TS_CMP)) + WRITE_ONCE(bp->ptp_cfg->tx_avail, BNXT_MAX_TX_TS); bnxt_ptp_init_rtc(bp, true); bnxt_ptp_cfg_tstamp_filters(bp); if (BNXT_SUPPORTS_MULTI_RSS_CTX(bp)) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h index 9cf0acfa04e5..e46bd11e52b0 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h @@ -181,6 +181,32 @@ struct tx_cmp { #define TX_CMP_SQ_CONS_IDX(txcmp) \ (le32_to_cpu((txcmp)->sq_cons_idx) & TX_CMP_SQ_CONS_IDX_MASK) +struct tx_ts_cmp { + __le32 tx_ts_cmp_flags_type; + #define TX_TS_CMP_FLAGS_ERROR (1 << 6) + #define TX_TS_CMP_FLAGS_TS_TYPE (1 << 7) + #define TX_TS_CMP_FLAGS_TS_TYPE_PM (0 << 7) + #define TX_TS_CMP_FLAGS_TS_TYPE_PA (1 << 7) + #define TX_TS_CMP_FLAGS_TS_FALLBACK (1 << 8) + #define TX_TS_CMP_TS_SUB_NS (0xf << 12) + #define TX_TS_CMP_TS_NS_MID (0xffff << 16) + #define TX_TS_CMP_TS_NS_MID_SFT 16 + u32 tx_ts_cmp_opaque; + __le32 tx_ts_cmp_errors_v; + #define TX_TS_CMP_V (1 << 0) + #define TX_TS_CMP_TS_INVALID_ERR (1 << 10) + __le32 tx_ts_cmp_ts_ns_lo; +}; + +#define BNXT_GET_TX_TS_48B_NS(tscmp) \ + (le32_to_cpu((tscmp)->tx_ts_cmp_ts_ns_lo) | \ + ((u64)(le32_to_cpu((tscmp)->tx_ts_cmp_flags_type) & \ + TX_TS_CMP_TS_NS_MID) << TX_TS_CMP_TS_NS_MID_SFT)) + +#define BNXT_TX_TS_ERR(tscmp) \ + (((tscmp)->tx_ts_cmp_flags_type & cpu_to_le32(TX_TS_CMP_FLAGS_ERROR)) &&\ + ((tscmp)->tx_ts_cmp_errors_v & cpu_to_le32(TX_TS_CMP_TS_INVALID_ERR))) + struct rx_cmp { __le32 rx_cmp_len_flags_type; #define RX_CMP_CMP_TYPE (0x3f << 0) @@ -848,11 +874,14 @@ struct bnxt_sw_tx_bd { DEFINE_DMA_UNMAP_ADDR(mapping); DEFINE_DMA_UNMAP_LEN(len); struct page *page; - u8 is_gso; + u8 is_ts_pkt; u8 is_push; u8 action; unsigned short nr_frags; - u16 rx_prod; + union { + u16 rx_prod; + u16 txts_prod; + }; }; struct bnxt_sw_rx_bd { @@ -2237,9 +2266,17 @@ struct bnxt { (BNXT_CHIP_NUM_58700((bp)->chip_num) && \ !BNXT_CHIP_TYPE_NITRO_A0(bp))) +/* Chip class phase 3.x */ +#define BNXT_CHIP_P3(bp) \ + (BNXT_CHIP_NUM_57X0X((bp)->chip_num) || \ + BNXT_CHIP_TYPE_NITRO_A0(bp)) + #define BNXT_CHIP_P4_PLUS(bp) \ (BNXT_CHIP_P4(bp) || BNXT_CHIP_P5_PLUS(bp)) +#define BNXT_CHIP_P5_AND_MINUS(bp) \ + (BNXT_CHIP_P3(bp) || BNXT_CHIP_P4(bp) || BNXT_CHIP_P5(bp)) + struct bnxt_aux_priv *aux_priv; struct bnxt_en_dev *edev; @@ -2384,6 +2421,7 @@ struct bnxt { #define BNXT_FW_CAP_CFA_RFS_RING_TBL_IDX_V2 BIT_ULL(16) #define BNXT_FW_CAP_PCIE_STATS_SUPPORTED BIT_ULL(17) #define BNXT_FW_CAP_EXT_STATS_SUPPORTED BIT_ULL(18) + #define BNXT_FW_CAP_TX_TS_CMP BIT_ULL(19) #define BNXT_FW_CAP_ERR_RECOVER_RELOAD BIT_ULL(20) #define BNXT_FW_CAP_HOT_RESET BIT_ULL(21) #define BNXT_FW_CAP_PTP_RTC BIT_ULL(22) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c index a14d46b9bfdf..37d42423459c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.c @@ -110,7 +110,7 @@ static void bnxt_ptp_get_current_time(struct bnxt *bp) } static int bnxt_hwrm_port_ts_query(struct bnxt *bp, u32 flags, u64 *ts, - u32 txts_tmo) + u32 txts_tmo, int slot) { struct hwrm_port_ts_query_output *resp; struct hwrm_port_ts_query_input *req; @@ -123,11 +123,12 @@ static int bnxt_hwrm_port_ts_query(struct bnxt *bp, u32 flags, u64 *ts, req->flags = cpu_to_le32(flags); if ((flags & PORT_TS_QUERY_REQ_FLAGS_PATH) == PORT_TS_QUERY_REQ_FLAGS_PATH_TX) { + struct bnxt_ptp_tx_req *txts_req = &bp->ptp_cfg->txts_req[slot]; u32 tmo_us = txts_tmo * 1000; req->enables = cpu_to_le16(BNXT_PTP_QTS_TX_ENABLES); - req->ptp_seq_id = cpu_to_le32(bp->ptp_cfg->tx_seqid); - req->ptp_hdr_offset = cpu_to_le16(bp->ptp_cfg->tx_hdr_off); + req->ptp_seq_id = cpu_to_le32(txts_req->tx_seqid); + req->ptp_hdr_offset = cpu_to_le16(txts_req->tx_hdr_off); if (!tmo_us) tmo_us = BNXT_PTP_QTS_TIMEOUT; tmo_us = min(tmo_us, BNXT_PTP_QTS_MAX_TMO_US); @@ -656,6 +657,14 @@ static int bnxt_map_ptp_regs(struct bnxt *bp) (ptp->refclk_regs[i] & BNXT_GRC_OFFSET_MASK); return 0; } + if (bp->flags & BNXT_FLAG_CHIP_P7) { + for (i = 0; i < 2; i++) { + if (reg_arr[i] & BNXT_GRC_BASE_MASK) + return -EINVAL; + ptp->refclk_mapped_regs[i] = reg_arr[i]; + } + return 0; + } return -ENODEV; } @@ -674,43 +683,44 @@ static u64 bnxt_cc_read(const struct cyclecounter *cc) return ns; } -static void bnxt_stamp_tx_skb(struct bnxt *bp, struct sk_buff *skb) +static int bnxt_stamp_tx_skb(struct bnxt *bp, int slot) { struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; struct skb_shared_hwtstamps timestamp; + struct bnxt_ptp_tx_req *txts_req; unsigned long now = jiffies; u64 ts = 0, ns = 0; u32 tmo = 0; int rc; - if (!ptp->txts_pending) - ptp->abs_txts_tmo = now + msecs_to_jiffies(ptp->txts_tmo); - if (!time_after_eq(now, ptp->abs_txts_tmo)) - tmo = jiffies_to_msecs(ptp->abs_txts_tmo - now); + txts_req = &ptp->txts_req[slot]; + /* make sure bnxt_get_tx_ts_p5() has updated abs_txts_tmo */ + smp_rmb(); + if (!time_after_eq(now, txts_req->abs_txts_tmo)) + tmo = jiffies_to_msecs(txts_req->abs_txts_tmo - now); rc = bnxt_hwrm_port_ts_query(bp, PORT_TS_QUERY_REQ_FLAGS_PATH_TX, &ts, - tmo); + tmo, slot); if (!rc) { memset(×tamp, 0, sizeof(timestamp)); spin_lock_bh(&ptp->ptp_lock); ns = timecounter_cyc2time(&ptp->tc, ts); spin_unlock_bh(&ptp->ptp_lock); timestamp.hwtstamp = ns_to_ktime(ns); - skb_tstamp_tx(ptp->tx_skb, ×tamp); + skb_tstamp_tx(txts_req->tx_skb, ×tamp); ptp->stats.ts_pkts++; } else { - if (!time_after_eq(jiffies, ptp->abs_txts_tmo)) { - ptp->txts_pending = true; - return; - } + if (!time_after_eq(jiffies, txts_req->abs_txts_tmo)) + return -EAGAIN; + ptp->stats.ts_lost++; netdev_warn_once(bp->dev, "TS query for TX timer failed rc = %x\n", rc); } - dev_kfree_skb_any(ptp->tx_skb); - ptp->tx_skb = NULL; - atomic_inc(&ptp->tx_avail); - ptp->txts_pending = false; + dev_kfree_skb_any(txts_req->tx_skb); + txts_req->tx_skb = NULL; + + return 0; } static long bnxt_ptp_ts_aux_work(struct ptp_clock_info *ptp_info) @@ -719,12 +729,30 @@ static long bnxt_ptp_ts_aux_work(struct ptp_clock_info *ptp_info) ptp_info); unsigned long now = jiffies; struct bnxt *bp = ptp->bp; + u16 cons = ptp->txts_cons; + u32 num_requests; + int rc = 0; + + num_requests = BNXT_MAX_TX_TS - READ_ONCE(ptp->tx_avail); + while (num_requests--) { + if (IS_ERR(ptp->txts_req[cons].tx_skb)) + goto next_slot; + if (!ptp->txts_req[cons].tx_skb) + break; + rc = bnxt_stamp_tx_skb(bp, cons); + if (rc == -EAGAIN) + break; +next_slot: + BNXT_PTP_INC_TX_AVAIL(ptp); + cons = NEXT_TXTS(cons); + } + ptp->txts_cons = cons; - if (ptp->tx_skb) - bnxt_stamp_tx_skb(bp, ptp->tx_skb); - - if (!time_after_eq(now, ptp->next_period)) + if (!time_after_eq(now, ptp->next_period)) { + if (rc == -EAGAIN) + return 0; return ptp->next_period - now; + } bnxt_ptp_get_current_time(bp); ptp->next_period = now + HZ; @@ -734,22 +762,37 @@ static long bnxt_ptp_ts_aux_work(struct ptp_clock_info *ptp_info) spin_unlock_bh(&ptp->ptp_lock); ptp->next_overflow_check = now + BNXT_PHC_OVERFLOW_PERIOD; } - if (ptp->txts_pending) + if (rc == -EAGAIN) return 0; return HZ; } -int bnxt_get_tx_ts_p5(struct bnxt *bp, struct sk_buff *skb) +int bnxt_ptp_get_txts_prod(struct bnxt_ptp_cfg *ptp, u16 *prod) +{ + spin_lock_bh(&ptp->ptp_tx_lock); + if (ptp->tx_avail) { + *prod = ptp->txts_prod; + ptp->txts_prod = NEXT_TXTS(*prod); + ptp->tx_avail--; + spin_unlock_bh(&ptp->ptp_tx_lock); + return 0; + } + spin_unlock_bh(&ptp->ptp_tx_lock); + atomic64_inc(&ptp->stats.ts_err); + return -ENOSPC; +} + +void bnxt_get_tx_ts_p5(struct bnxt *bp, struct sk_buff *skb, u16 prod) { struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; + struct bnxt_ptp_tx_req *txts_req; - if (ptp->tx_skb) { - netdev_err(bp->dev, "deferring skb:one SKB is still outstanding\n"); - return -EBUSY; - } - ptp->tx_skb = skb; + txts_req = &ptp->txts_req[prod]; + txts_req->abs_txts_tmo = jiffies + msecs_to_jiffies(ptp->txts_tmo); + /* make sure abs_txts_tmo is written first */ + smp_wmb(); + txts_req->tx_skb = skb; ptp_schedule_worker(ptp->ptp_clock, 0); - return 0; } int bnxt_get_rx_ts_p5(struct bnxt *bp, u64 *ts, u32 pkt_ts) @@ -768,6 +811,38 @@ int bnxt_get_rx_ts_p5(struct bnxt *bp, u64 *ts, u32 pkt_ts) return 0; } +void bnxt_tx_ts_cmp(struct bnxt *bp, struct bnxt_napi *bnapi, + struct tx_ts_cmp *tscmp) +{ + struct skb_shared_hwtstamps timestamp = {}; + struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; + u32 opaque = tscmp->tx_ts_cmp_opaque; + struct bnxt_tx_ring_info *txr; + struct bnxt_sw_tx_bd *tx_buf; + u64 ts, ns; + u16 cons; + + txr = bnapi->tx_ring[TX_OPAQUE_RING(opaque)]; + ts = BNXT_GET_TX_TS_48B_NS(tscmp); + cons = TX_OPAQUE_IDX(opaque); + tx_buf = &txr->tx_buf_ring[RING_TX(bp, cons)]; + if (tx_buf->is_ts_pkt) { + if (BNXT_TX_TS_ERR(tscmp)) { + netdev_err(bp->dev, + "timestamp completion error 0x%x 0x%x\n", + le32_to_cpu(tscmp->tx_ts_cmp_flags_type), + le32_to_cpu(tscmp->tx_ts_cmp_errors_v)); + } else { + spin_lock_bh(&ptp->ptp_lock); + ns = timecounter_cyc2time(&ptp->tc, ts); + spin_unlock_bh(&ptp->ptp_lock); + timestamp.hwtstamp = ns_to_ktime(ns); + skb_tstamp_tx(tx_buf->skb, ×tamp); + } + tx_buf->is_ts_pkt = 0; + } +} + static const struct ptp_clock_info bnxt_ptp_caps = { .owner = THIS_MODULE, .name = "bnxt clock", @@ -914,7 +989,7 @@ int bnxt_ptp_init_rtc(struct bnxt *bp, bool phc_cfg) return rc; } else { rc = bnxt_hwrm_port_ts_query(bp, PORT_TS_QUERY_REQ_FLAGS_CURRENT_TIME, - &ns, 0); + &ns, 0, 0); if (rc) return rc; } @@ -954,8 +1029,9 @@ int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg) bnxt_ptp_free(bp); - atomic_set(&ptp->tx_avail, BNXT_MAX_TX_TS); + WRITE_ONCE(ptp->tx_avail, BNXT_MAX_TX_TS); spin_lock_init(&ptp->ptp_lock); + spin_lock_init(&ptp->ptp_tx_lock); if (BNXT_PTP_USE_RTC(bp)) { bnxt_ptp_timecounter_init(bp, false); @@ -986,7 +1062,7 @@ int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg) ptp->stats.ts_lost = 0; atomic64_set(&ptp->stats.ts_err, 0); - if (BNXT_CHIP_P5(bp)) { + if (bp->flags & BNXT_FLAG_CHIP_P5_PLUS) { spin_lock_bh(&ptp->ptp_lock); bnxt_refclk_read(bp, NULL, &ptp->current_time); WRITE_ONCE(ptp->old_time, ptp->current_time); @@ -1005,6 +1081,7 @@ out: void bnxt_ptp_clear(struct bnxt *bp) { struct bnxt_ptp_cfg *ptp = bp->ptp_cfg; + int i; if (!ptp) return; @@ -1016,9 +1093,11 @@ void bnxt_ptp_clear(struct bnxt *bp) kfree(ptp->ptp_info.pin_config); ptp->ptp_info.pin_config = NULL; - if (ptp->tx_skb) { - dev_kfree_skb_any(ptp->tx_skb); - ptp->tx_skb = NULL; + for (i = 0; i < BNXT_MAX_TX_TS; i++) { + if (ptp->txts_req[i].tx_skb) { + dev_kfree_skb_any(ptp->txts_req[i].tx_skb); + ptp->txts_req[i].tx_skb = NULL; + } } bnxt_unmap_ptp_regs(bp); diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h index 8c30b428a428..a9a2f9a18c9c 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ptp.h @@ -85,6 +85,16 @@ struct bnxt_ptp_stats { atomic64_t ts_err; }; +#define BNXT_MAX_TX_TS 4 +#define NEXT_TXTS(idx) (((idx) + 1) & (BNXT_MAX_TX_TS - 1)) + +struct bnxt_ptp_tx_req { + struct sk_buff *tx_skb; + u16 tx_seqid; + u16 tx_hdr_off; + unsigned long abs_txts_tmo; +}; + struct bnxt_ptp_cfg { struct ptp_clock_info ptp_info; struct ptp_clock *ptp_clock; @@ -93,7 +103,8 @@ struct bnxt_ptp_cfg { struct bnxt_pps pps_info; /* serialize timecounter access */ spinlock_t ptp_lock; - struct sk_buff *tx_skb; + /* serialize ts tx request queuing */ + spinlock_t ptp_tx_lock; u64 current_time; u64 old_time; unsigned long next_period; @@ -102,11 +113,10 @@ struct bnxt_ptp_cfg { /* a 23b shift cyclecounter will overflow in ~36 mins. Check overflow every 18 mins. */ #define BNXT_PHC_OVERFLOW_PERIOD (18 * 60 * HZ) - u16 tx_seqid; - u16 tx_hdr_off; + struct bnxt_ptp_tx_req txts_req[BNXT_MAX_TX_TS]; + struct bnxt *bp; - atomic_t tx_avail; -#define BNXT_MAX_TX_TS 1 + u32 tx_avail; u16 rxctl; #define BNXT_PTP_MSG_SYNC (1 << 0) #define BNXT_PTP_MSG_DELAY_REQ (1 << 1) @@ -123,14 +133,14 @@ struct bnxt_ptp_cfg { BNXT_PTP_MSG_PDELAY_REQ | \ BNXT_PTP_MSG_PDELAY_RESP) u8 tx_tstamp_en:1; - u8 txts_pending:1; int rx_filter; u32 tstamp_filters; u32 refclk_regs[2]; u32 refclk_mapped_regs[2]; u32 txts_tmo; - unsigned long abs_txts_tmo; + u16 txts_prod; + u16 txts_cons; struct bnxt_ptp_stats stats; }; @@ -147,6 +157,13 @@ do { \ ((dst) = READ_ONCE(src)) #endif +#define BNXT_PTP_INC_TX_AVAIL(ptp) \ +do { \ + spin_lock_bh(&(ptp)->ptp_tx_lock); \ + (ptp)->tx_avail++; \ + spin_unlock_bh(&(ptp)->ptp_tx_lock); \ +} while (0) + int bnxt_ptp_parse(struct sk_buff *skb, u16 *seq_id, u16 *hdr_off); void bnxt_ptp_update_current_time(struct bnxt *bp); void bnxt_ptp_pps_event(struct bnxt *bp, u32 data1, u32 data2); @@ -154,8 +171,11 @@ int bnxt_ptp_cfg_tstamp_filters(struct bnxt *bp); void bnxt_ptp_reapply_pps(struct bnxt *bp); int bnxt_hwtstamp_set(struct net_device *dev, struct ifreq *ifr); int bnxt_hwtstamp_get(struct net_device *dev, struct ifreq *ifr); -int bnxt_get_tx_ts_p5(struct bnxt *bp, struct sk_buff *skb); +int bnxt_ptp_get_txts_prod(struct bnxt_ptp_cfg *ptp, u16 *prod); +void bnxt_get_tx_ts_p5(struct bnxt *bp, struct sk_buff *skb, u16 prod); int bnxt_get_rx_ts_p5(struct bnxt *bp, u64 *ts, u32 pkt_ts); +void bnxt_tx_ts_cmp(struct bnxt *bp, struct bnxt_napi *bnapi, + struct tx_ts_cmp *tscmp); void bnxt_ptp_rtc_timecounter_init(struct bnxt_ptp_cfg *ptp, u64 ns); int bnxt_ptp_init_rtc(struct bnxt *bp, bool phc_cfg); int bnxt_ptp_init(struct bnxt *bp, bool phc_cfg); diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c index 241906697019..a42f3f280f3e 100644 --- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c +++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c @@ -608,6 +608,28 @@ static int enic_get_ts_info(struct net_device *netdev, return 0; } +static void enic_get_channels(struct net_device *netdev, + struct ethtool_channels *channels) +{ + struct enic *enic = netdev_priv(netdev); + + switch (vnic_dev_get_intr_mode(enic->vdev)) { + case VNIC_DEV_INTR_MODE_MSIX: + channels->max_rx = ENIC_RQ_MAX; + channels->max_tx = ENIC_WQ_MAX; + channels->rx_count = enic->rq_count; + channels->tx_count = enic->wq_count; + break; + case VNIC_DEV_INTR_MODE_MSI: + case VNIC_DEV_INTR_MODE_INTX: + channels->max_combined = 1; + channels->combined_count = 1; + break; + default: + break; + } +} + static const struct ethtool_ops enic_ethtool_ops = { .supported_coalesce_params = ETHTOOL_COALESCE_USECS | ETHTOOL_COALESCE_USE_ADAPTIVE_RX | @@ -632,6 +654,7 @@ static const struct ethtool_ops enic_ethtool_ops = { .set_rxfh = enic_set_rxfh, .get_link_ksettings = enic_get_ksettings, .get_ts_info = enic_get_ts_info, + .get_channels = enic_get_channels, }; void enic_set_ethtool_ops(struct net_device *netdev) diff --git a/drivers/net/ethernet/intel/ice/devlink/devlink_port.c b/drivers/net/ethernet/intel/ice/devlink/devlink_port.c index c9fbeebf7fb9..00fed5a61d62 100644 --- a/drivers/net/ethernet/intel/ice/devlink/devlink_port.c +++ b/drivers/net/ethernet/intel/ice/devlink/devlink_port.c @@ -373,6 +373,62 @@ void ice_devlink_destroy_pf_port(struct ice_pf *pf) } /** + * ice_devlink_port_get_vf_fn_mac - .port_fn_hw_addr_get devlink handler + * @port: devlink port structure + * @hw_addr: MAC address of the port + * @hw_addr_len: length of MAC address + * @extack: extended netdev ack structure + * + * Callback for the devlink .port_fn_hw_addr_get operation + * Return: zero on success or an error code on failure. + */ +static int ice_devlink_port_get_vf_fn_mac(struct devlink_port *port, + u8 *hw_addr, int *hw_addr_len, + struct netlink_ext_ack *extack) +{ + struct ice_vf *vf = container_of(port, struct ice_vf, devlink_port); + + ether_addr_copy(hw_addr, vf->dev_lan_addr); + *hw_addr_len = ETH_ALEN; + + return 0; +} + +/** + * ice_devlink_port_set_vf_fn_mac - .port_fn_hw_addr_set devlink handler + * @port: devlink port structure + * @hw_addr: MAC address of the port + * @hw_addr_len: length of MAC address + * @extack: extended netdev ack structure + * + * Callback for the devlink .port_fn_hw_addr_set operation + * Return: zero on success or an error code on failure. + */ +static int ice_devlink_port_set_vf_fn_mac(struct devlink_port *port, + const u8 *hw_addr, + int hw_addr_len, + struct netlink_ext_ack *extack) + +{ + struct devlink_port_attrs *attrs = &port->attrs; + struct devlink_port_pci_vf_attrs *pci_vf; + struct devlink *devlink = port->devlink; + struct ice_pf *pf; + u16 vf_id; + + pf = devlink_priv(devlink); + pci_vf = &attrs->pci_vf; + vf_id = pci_vf->vf; + + return __ice_set_vf_mac(pf, vf_id, hw_addr); +} + +static const struct devlink_port_ops ice_devlink_vf_port_ops = { + .port_fn_hw_addr_get = ice_devlink_port_get_vf_fn_mac, + .port_fn_hw_addr_set = ice_devlink_port_set_vf_fn_mac, +}; + +/** * ice_devlink_create_vf_port - Create a devlink port for this VF * @vf: the VF to create a port for * @@ -407,7 +463,8 @@ int ice_devlink_create_vf_port(struct ice_vf *vf) devlink_port_attrs_set(devlink_port, &attrs); devlink = priv_to_devlink(pf); - err = devl_port_register(devlink, devlink_port, vsi->idx); + err = devl_port_register_with_ops(devlink, devlink_port, vsi->idx, + &ice_devlink_vf_port_ops); if (err) { dev_err(dev, "Failed to create devlink port for VF %d, error %d\n", vf->vf_id, err); diff --git a/drivers/net/ethernet/intel/ice/ice_adapter.c b/drivers/net/ethernet/intel/ice/ice_adapter.c index 52d15ef7f4b1..ad84d8ad49a6 100644 --- a/drivers/net/ethernet/intel/ice/ice_adapter.c +++ b/drivers/net/ethernet/intel/ice/ice_adapter.c @@ -11,6 +11,7 @@ #include "ice_adapter.h" static DEFINE_XARRAY(ice_adapters); +static DEFINE_MUTEX(ice_adapters_mutex); /* PCI bus number is 8 bits. Slot is 5 bits. Domain can have the rest. */ #define INDEX_FIELD_DOMAIN GENMASK(BITS_PER_LONG - 1, 13) @@ -47,8 +48,6 @@ static void ice_adapter_free(struct ice_adapter *adapter) kfree(adapter); } -DEFINE_FREE(ice_adapter_free, struct ice_adapter*, if (_T) ice_adapter_free(_T)) - /** * ice_adapter_get - Get a shared ice_adapter structure. * @pdev: Pointer to the pci_dev whose driver is getting the ice_adapter. @@ -64,27 +63,26 @@ DEFINE_FREE(ice_adapter_free, struct ice_adapter*, if (_T) ice_adapter_free(_T)) */ struct ice_adapter *ice_adapter_get(const struct pci_dev *pdev) { - struct ice_adapter *ret, __free(ice_adapter_free) *adapter = NULL; unsigned long index = ice_adapter_index(pdev); - - adapter = ice_adapter_new(); - if (!adapter) - return ERR_PTR(-ENOMEM); - - xa_lock(&ice_adapters); - ret = __xa_cmpxchg(&ice_adapters, index, NULL, adapter, GFP_KERNEL); - if (xa_is_err(ret)) { - ret = ERR_PTR(xa_err(ret)); - goto unlock; - } - if (ret) { - refcount_inc(&ret->refcount); - goto unlock; + struct ice_adapter *adapter; + int err; + + scoped_guard(mutex, &ice_adapters_mutex) { + err = xa_insert(&ice_adapters, index, NULL, GFP_KERNEL); + if (err == -EBUSY) { + adapter = xa_load(&ice_adapters, index); + refcount_inc(&adapter->refcount); + return adapter; + } + if (err) + return ERR_PTR(err); + + adapter = ice_adapter_new(); + if (!adapter) + return ERR_PTR(-ENOMEM); + xa_store(&ice_adapters, index, adapter, GFP_KERNEL); } - ret = no_free_ptr(adapter); -unlock: - xa_unlock(&ice_adapters); - return ret; + return adapter; } /** @@ -94,23 +92,21 @@ unlock: * Releases the reference to ice_adapter previously obtained with * ice_adapter_get. * - * Context: Any. + * Context: Process, may sleep. */ void ice_adapter_put(const struct pci_dev *pdev) { unsigned long index = ice_adapter_index(pdev); struct ice_adapter *adapter; - xa_lock(&ice_adapters); - adapter = xa_load(&ice_adapters, index); - if (WARN_ON(!adapter)) - goto unlock; + scoped_guard(mutex, &ice_adapters_mutex) { + adapter = xa_load(&ice_adapters, index); + if (WARN_ON(!adapter)) + return; + if (!refcount_dec_and_test(&adapter->refcount)) + return; - if (!refcount_dec_and_test(&adapter->refcount)) - goto unlock; - - WARN_ON(__xa_erase(&ice_adapters, index) != adapter); + WARN_ON(xa_erase(&ice_adapters, index) != adapter); + } ice_adapter_free(adapter); -unlock: - xa_unlock(&ice_adapters); } diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h index 86cc1df469dd..40dc735dc25c 100644 --- a/drivers/net/ethernet/intel/ice/ice_common.h +++ b/drivers/net/ethernet/intel/ice/ice_common.h @@ -23,7 +23,7 @@ int ice_check_reset(struct ice_hw *hw); int ice_reset(struct ice_hw *hw, enum ice_reset_req req); int ice_create_all_ctrlq(struct ice_hw *hw); int ice_init_all_ctrlq(struct ice_hw *hw); -void ice_shutdown_all_ctrlq(struct ice_hw *hw); +void ice_shutdown_all_ctrlq(struct ice_hw *hw, bool unloading); void ice_destroy_all_ctrlq(struct ice_hw *hw); int ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq, diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c index ffe660f34992..ffaa6511c455 100644 --- a/drivers/net/ethernet/intel/ice/ice_controlq.c +++ b/drivers/net/ethernet/intel/ice/ice_controlq.c @@ -510,16 +510,19 @@ shutdown_sq_out: */ static bool ice_aq_ver_check(struct ice_hw *hw) { - if (hw->api_maj_ver > EXP_FW_API_VER_MAJOR) { + u8 exp_fw_api_ver_major = EXP_FW_API_VER_MAJOR_BY_MAC(hw); + u8 exp_fw_api_ver_minor = EXP_FW_API_VER_MINOR_BY_MAC(hw); + + if (hw->api_maj_ver > exp_fw_api_ver_major) { /* Major API version is newer than expected, don't load */ dev_warn(ice_hw_to_dev(hw), "The driver for the device stopped because the NVM image is newer than expected. You must install the most recent version of the network driver.\n"); return false; - } else if (hw->api_maj_ver == EXP_FW_API_VER_MAJOR) { - if (hw->api_min_ver > (EXP_FW_API_VER_MINOR + 2)) + } else if (hw->api_maj_ver == exp_fw_api_ver_major) { + if (hw->api_min_ver > (exp_fw_api_ver_minor + 2)) dev_info(ice_hw_to_dev(hw), "The driver for the device detected a newer version of the NVM image than expected. Please install the most recent version of the network driver.\n"); - else if ((hw->api_min_ver + 2) < EXP_FW_API_VER_MINOR) + else if ((hw->api_min_ver + 2) < exp_fw_api_ver_minor) dev_info(ice_hw_to_dev(hw), "The driver for the device detected an older version of the NVM image than expected. Please update the NVM image.\n"); } else { @@ -684,10 +687,12 @@ struct ice_ctl_q_info *ice_get_sbq(struct ice_hw *hw) * ice_shutdown_ctrlq - shutdown routine for any control queue * @hw: pointer to the hardware structure * @q_type: specific Control queue type + * @unloading: is the driver unloading itself * * NOTE: this function does not destroy the control queue locks. */ -static void ice_shutdown_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type) +static void ice_shutdown_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type, + bool unloading) { struct ice_ctl_q_info *cq; @@ -695,7 +700,7 @@ static void ice_shutdown_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type) case ICE_CTL_Q_ADMIN: cq = &hw->adminq; if (ice_check_sq_alive(hw, cq)) - ice_aq_q_shutdown(hw, true); + ice_aq_q_shutdown(hw, unloading); break; case ICE_CTL_Q_SB: cq = &hw->sbq; @@ -714,20 +719,21 @@ static void ice_shutdown_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type) /** * ice_shutdown_all_ctrlq - shutdown routine for all control queues * @hw: pointer to the hardware structure + * @unloading: is the driver unloading itself * * NOTE: this function does not destroy the control queue locks. The driver * may call this at runtime to shutdown and later restart control queues, such * as in response to a reset event. */ -void ice_shutdown_all_ctrlq(struct ice_hw *hw) +void ice_shutdown_all_ctrlq(struct ice_hw *hw, bool unloading) { /* Shutdown FW admin queue */ - ice_shutdown_ctrlq(hw, ICE_CTL_Q_ADMIN); + ice_shutdown_ctrlq(hw, ICE_CTL_Q_ADMIN, unloading); /* Shutdown PHY Sideband */ if (ice_is_sbq_supported(hw)) - ice_shutdown_ctrlq(hw, ICE_CTL_Q_SB); + ice_shutdown_ctrlq(hw, ICE_CTL_Q_SB, unloading); /* Shutdown PF-VF Mailbox */ - ice_shutdown_ctrlq(hw, ICE_CTL_Q_MAILBOX); + ice_shutdown_ctrlq(hw, ICE_CTL_Q_MAILBOX, unloading); } /** @@ -759,7 +765,7 @@ int ice_init_all_ctrlq(struct ice_hw *hw) break; ice_debug(hw, ICE_DBG_AQ_MSG, "Retry Admin Queue init due to FW critical error\n"); - ice_shutdown_ctrlq(hw, ICE_CTL_Q_ADMIN); + ice_shutdown_ctrlq(hw, ICE_CTL_Q_ADMIN, true); msleep(ICE_CTL_Q_ADMIN_INIT_MSEC); } while (retry++ < ICE_CTL_Q_ADMIN_INIT_TIMEOUT); @@ -840,7 +846,7 @@ static void ice_destroy_ctrlq_locks(struct ice_ctl_q_info *cq) void ice_destroy_all_ctrlq(struct ice_hw *hw) { /* shut down all the control queues first */ - ice_shutdown_all_ctrlq(hw); + ice_shutdown_all_ctrlq(hw, true); ice_destroy_ctrlq_locks(&hw->adminq); if (ice_is_sbq_supported(hw)) diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.h b/drivers/net/ethernet/intel/ice/ice_controlq.h index 8f2fd1613a95..1d54b1cdb1c5 100644 --- a/drivers/net/ethernet/intel/ice/ice_controlq.h +++ b/drivers/net/ethernet/intel/ice/ice_controlq.h @@ -21,9 +21,18 @@ /* Defines that help manage the driver vs FW API checks. * Take a look at ice_aq_ver_check in ice_controlq.c for actual usage. */ -#define EXP_FW_API_VER_BRANCH 0x00 -#define EXP_FW_API_VER_MAJOR 0x01 -#define EXP_FW_API_VER_MINOR 0x05 +#define EXP_FW_API_VER_MAJOR_E810 0x01 +#define EXP_FW_API_VER_MINOR_E810 0x05 + +#define EXP_FW_API_VER_MAJOR_E830 0x01 +#define EXP_FW_API_VER_MINOR_E830 0x07 + +#define EXP_FW_API_VER_MAJOR_BY_MAC(hw) ((hw)->mac_type == ICE_MAC_E830 ? \ + EXP_FW_API_VER_MAJOR_E830 : \ + EXP_FW_API_VER_MAJOR_E810) +#define EXP_FW_API_VER_MINOR_BY_MAC(hw) ((hw)->mac_type == ICE_MAC_E830 ? \ + EXP_FW_API_VER_MINOR_E830 : \ + EXP_FW_API_VER_MINOR_E810) /* Different control queue types: These are mainly for SW consumption. */ enum ice_ctl_q { diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 080efb7473aa..14ec4ebcd9af 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -623,7 +623,7 @@ skip: if (hw->port_info) ice_sched_clear_port(hw->port_info); - ice_shutdown_all_ctrlq(hw); + ice_shutdown_all_ctrlq(hw, false); set_bit(ICE_PREPARED_FOR_RESET, pf->state); } @@ -4158,13 +4158,17 @@ int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx, bool locked) /* set for the next time the netdev is started */ if (!netif_running(vsi->netdev)) { - ice_vsi_rebuild(vsi, ICE_VSI_FLAG_NO_INIT); + err = ice_vsi_rebuild(vsi, ICE_VSI_FLAG_NO_INIT); + if (err) + goto rebuild_err; dev_dbg(ice_pf_to_dev(pf), "Link is down, queue count change happens when link is brought up\n"); goto done; } ice_vsi_close(vsi); - ice_vsi_rebuild(vsi, ICE_VSI_FLAG_NO_INIT); + err = ice_vsi_rebuild(vsi, ICE_VSI_FLAG_NO_INIT); + if (err) + goto rebuild_err; ice_for_each_traffic_class(i) { if (vsi->tc_cfg.ena_tc & BIT(i)) @@ -4175,6 +4179,11 @@ int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx, bool locked) } ice_pf_dcb_recfg(pf, locked); ice_vsi_open(vsi); + goto done; + +rebuild_err: + dev_err(ice_pf_to_dev(pf), "Error during VSI rebuild: %d. Unload and reload the driver.\n", + err); done: clear_bit(ICE_CFG_BUSY, pf->state); return err; @@ -5490,7 +5499,7 @@ static void ice_prepare_for_shutdown(struct ice_pf *pf) if (pf->vsi[v]) pf->vsi[v]->vsi_num = 0; - ice_shutdown_all_ctrlq(hw); + ice_shutdown_all_ctrlq(hw, true); } /** @@ -7750,7 +7759,7 @@ err_vsi_rebuild: err_sched_init_port: ice_sched_cleanup_all(hw); err_init_ctrlq: - ice_shutdown_all_ctrlq(hw); + ice_shutdown_all_ctrlq(hw, false); set_bit(ICE_RESET_FAILED, pf->state); clear_recovery: /* set this bit in PF state to control service task scheduling */ diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.c b/drivers/net/ethernet/intel/ice/ice_sriov.c index 067712f4923f..55ef33208456 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.c +++ b/drivers/net/ethernet/intel/ice/ice_sriov.c @@ -1416,21 +1416,23 @@ out_put_vf: } /** - * ice_set_vf_mac - * @netdev: network interface device structure + * __ice_set_vf_mac - program VF MAC address + * @pf: PF to be configure * @vf_id: VF identifier * @mac: MAC address * * program VF MAC address + * Return: zero on success or an error code on failure */ -int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) +int __ice_set_vf_mac(struct ice_pf *pf, u16 vf_id, const u8 *mac) { - struct ice_pf *pf = ice_netdev_to_pf(netdev); + struct device *dev; struct ice_vf *vf; int ret; + dev = ice_pf_to_dev(pf); if (is_multicast_ether_addr(mac)) { - netdev_err(netdev, "%pM not a valid unicast address\n", mac); + dev_err(dev, "%pM not a valid unicast address\n", mac); return -EINVAL; } @@ -1459,13 +1461,13 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) if (is_zero_ether_addr(mac)) { /* VF will send VIRTCHNL_OP_ADD_ETH_ADDR message with its MAC */ vf->pf_set_mac = false; - netdev_info(netdev, "Removing MAC on VF %d. VF driver will be reinitialized\n", - vf->vf_id); + dev_info(dev, "Removing MAC on VF %d. VF driver will be reinitialized\n", + vf->vf_id); } else { /* PF will add MAC rule for the VF */ vf->pf_set_mac = true; - netdev_info(netdev, "Setting MAC %pM on VF %d. VF driver will be reinitialized\n", - mac, vf_id); + dev_info(dev, "Setting MAC %pM on VF %d. VF driver will be reinitialized\n", + mac, vf_id); } ice_reset_vf(vf, ICE_VF_RESET_NOTIFY); @@ -1477,6 +1479,20 @@ out_put_vf: } /** + * ice_set_vf_mac - .ndo_set_vf_mac handler + * @netdev: network interface device structure + * @vf_id: VF identifier + * @mac: MAC address + * + * program VF MAC address + * Return: zero on success or an error code on failure + */ +int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac) +{ + return __ice_set_vf_mac(ice_netdev_to_pf(netdev), vf_id, mac); +} + +/** * ice_set_vf_trust * @netdev: network interface device structure * @vf_id: VF identifier diff --git a/drivers/net/ethernet/intel/ice/ice_sriov.h b/drivers/net/ethernet/intel/ice/ice_sriov.h index 8f22313474d6..96549ca5c52c 100644 --- a/drivers/net/ethernet/intel/ice/ice_sriov.h +++ b/drivers/net/ethernet/intel/ice/ice_sriov.h @@ -28,6 +28,7 @@ #ifdef CONFIG_PCI_IOV void ice_process_vflr_event(struct ice_pf *pf); int ice_sriov_configure(struct pci_dev *pdev, int num_vfs); +int __ice_set_vf_mac(struct ice_pf *pf, u16 vf_id, const u8 *mac); int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac); int ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi); @@ -81,6 +82,13 @@ ice_sriov_configure(struct pci_dev __always_unused *pdev, } static inline int +__ice_set_vf_mac(struct ice_pf __always_unused *pf, + u16 __always_unused vf_id, const u8 __always_unused *mac) +{ + return -EOPNOTSUPP; +} + +static inline int ice_set_vf_mac(struct net_device __always_unused *netdev, int __always_unused vf_id, u8 __always_unused *mac) { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h index 4a77f6fe2622..ebf84c240d6d 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/mbox.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/mbox.h @@ -139,6 +139,7 @@ M(MSIX_OFFSET, 0x005, msix_offset, msg_req, msix_offset_rsp) \ M(VF_FLR, 0x006, vf_flr, msg_req, msg_rsp) \ M(PTP_OP, 0x007, ptp_op, ptp_req, ptp_rsp) \ M(GET_HW_CAP, 0x008, get_hw_cap, msg_req, get_hw_cap_rsp) \ +M(NDC_SYNC_OP, 0x009, ndc_sync_op, ndc_sync_op, msg_rsp) \ M(LMTST_TBL_SETUP, 0x00a, lmtst_tbl_setup, lmtst_tbl_setup_req, \ msg_rsp) \ M(SET_VF_PERM, 0x00b, set_vf_perm, set_vf_perm, msg_rsp) \ @@ -1716,6 +1717,13 @@ struct lmtst_tbl_setup_req { u64 rsvd[4]; }; +struct ndc_sync_op { + struct mbox_msghdr hdr; + u8 nix_lf_tx_sync; + u8 nix_lf_rx_sync; + u8 npa_lf_sync; +}; + /* CPT mailbox error codes * Range 901 - 1000. */ diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c index ff78251f92d4..5a4f774aed64 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.c @@ -2014,6 +2014,13 @@ int rvu_mbox_handler_vf_flr(struct rvu *rvu, struct msg_req *req, return 0; } +int rvu_ndc_sync(struct rvu *rvu, int lfblkaddr, int lfidx, u64 lfoffset) +{ + /* Sync cached info for this LF in NDC to LLC/DRAM */ + rvu_write64(rvu, lfblkaddr, lfoffset, BIT_ULL(12) | lfidx); + return rvu_poll_reg(rvu, lfblkaddr, lfoffset, BIT_ULL(12), true); +} + int rvu_mbox_handler_get_hw_cap(struct rvu *rvu, struct msg_req *req, struct get_hw_cap_rsp *rsp) { @@ -2068,6 +2075,65 @@ int rvu_mbox_handler_set_vf_perm(struct rvu *rvu, struct set_vf_perm *req, return 0; } +int rvu_mbox_handler_ndc_sync_op(struct rvu *rvu, + struct ndc_sync_op *req, + struct msg_rsp *rsp) +{ + struct rvu_hwinfo *hw = rvu->hw; + u16 pcifunc = req->hdr.pcifunc; + int err, lfidx, lfblkaddr; + + if (req->npa_lf_sync) { + /* Get NPA LF data */ + lfblkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NPA, pcifunc); + if (lfblkaddr < 0) + return NPA_AF_ERR_AF_LF_INVALID; + + lfidx = rvu_get_lf(rvu, &hw->block[lfblkaddr], pcifunc, 0); + if (lfidx < 0) + return NPA_AF_ERR_AF_LF_INVALID; + + /* Sync NPA NDC */ + err = rvu_ndc_sync(rvu, lfblkaddr, + lfidx, NPA_AF_NDC_SYNC); + if (err) + dev_err(rvu->dev, + "NDC-NPA sync failed for LF %u\n", lfidx); + } + + if (!req->nix_lf_tx_sync && !req->nix_lf_rx_sync) + return 0; + + /* Get NIX LF data */ + lfblkaddr = rvu_get_blkaddr(rvu, BLKTYPE_NIX, pcifunc); + if (lfblkaddr < 0) + return NIX_AF_ERR_AF_LF_INVALID; + + lfidx = rvu_get_lf(rvu, &hw->block[lfblkaddr], pcifunc, 0); + if (lfidx < 0) + return NIX_AF_ERR_AF_LF_INVALID; + + if (req->nix_lf_tx_sync) { + /* Sync NIX TX NDC */ + err = rvu_ndc_sync(rvu, lfblkaddr, + lfidx, NIX_AF_NDC_TX_SYNC); + if (err) + dev_err(rvu->dev, + "NDC-NIX-TX sync fail for LF %u\n", lfidx); + } + + if (req->nix_lf_rx_sync) { + /* Sync NIX RX NDC */ + err = rvu_ndc_sync(rvu, lfblkaddr, + lfidx, NIX_AF_NDC_RX_SYNC); + if (err) + dev_err(rvu->dev, + "NDC-NIX-RX sync failed for LF %u\n", lfidx); + } + + return 0; +} + static int rvu_process_mbox_msg(struct otx2_mbox *mbox, int devid, struct mbox_msghdr *req) { diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h index 3063a84a45ef..03ee93fd9e94 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu.h @@ -800,6 +800,7 @@ int rvu_lf_reset(struct rvu *rvu, struct rvu_block *block, int lf); int rvu_get_blkaddr(struct rvu *rvu, int blktype, u16 pcifunc); int rvu_poll_reg(struct rvu *rvu, u64 block, u64 offset, u64 mask, bool zero); int rvu_get_num_lbk_chans(void); +int rvu_ndc_sync(struct rvu *rvu, int lfblkid, int lfidx, u64 lfoffset); int rvu_get_blkaddr_from_slot(struct rvu *rvu, int blktype, u16 pcifunc, u16 global_slot, u16 *slot_in_block); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c index 00af8888e329..4dbbaa719cbf 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_nix.c @@ -2497,9 +2497,7 @@ static int nix_txschq_free(struct rvu *rvu, u16 pcifunc) } mutex_unlock(&rvu->rsrc_lock); - /* Sync cached info for this LF in NDC-TX to LLC/DRAM */ - rvu_write64(rvu, blkaddr, NIX_AF_NDC_TX_SYNC, BIT_ULL(12) | nixlf); - err = rvu_poll_reg(rvu, blkaddr, NIX_AF_NDC_TX_SYNC, BIT_ULL(12), true); + err = rvu_ndc_sync(rvu, blkaddr, nixlf, NIX_AF_NDC_TX_SYNC); if (err) dev_err(rvu->dev, "NDC-TX sync failed for NIXLF %d\n", nixlf); diff --git a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h index 5ec92654e7ad..d56be5fb7eb4 100644 --- a/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h +++ b/drivers/net/ethernet/marvell/octeontx2/af/rvu_reg.h @@ -121,6 +121,7 @@ #define NPA_AF_LF_RST (0x0020) #define NPA_AF_GEN_CFG (0x0030) #define NPA_AF_NDC_CFG (0x0040) +#define NPA_AF_NDC_SYNC (0x0050) #define NPA_AF_INP_CTL (0x00D0) #define NPA_AF_ACTIVE_CYCLES_PC (0x00F0) #define NPA_AF_AVG_DELAY (0x0100) @@ -239,6 +240,7 @@ #define NIX_AF_RX_CPTX_INST_ADDR (0x0310) #define NIX_AF_RX_CPTX_INST_QSEL(a) (0x0320ull | (uint64_t)(a) << 3) #define NIX_AF_RX_CPTX_CREDIT(a) (0x0360ull | (uint64_t)(a) << 3) +#define NIX_AF_NDC_RX_SYNC (0x03E0) #define NIX_AF_NDC_TX_SYNC (0x03F0) #define NIX_AF_AQ_CFG (0x0400) #define NIX_AF_AQ_BASE (0x0410) diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c index ff05ea20409a..5492dea547a1 100644 --- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c +++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c @@ -3245,6 +3245,29 @@ static int otx2_sriov_configure(struct pci_dev *pdev, int numvfs) return otx2_sriov_enable(pdev, numvfs); } +static void otx2_ndc_sync(struct otx2_nic *pf) +{ + struct mbox *mbox = &pf->mbox; + struct ndc_sync_op *req; + + mutex_lock(&mbox->lock); + + req = otx2_mbox_alloc_msg_ndc_sync_op(mbox); + if (!req) { + mutex_unlock(&mbox->lock); + return; + } + + req->nix_lf_tx_sync = 1; + req->nix_lf_rx_sync = 1; + req->npa_lf_sync = 1; + + if (!otx2_sync_mbox_msg(mbox)) + dev_err(pf->dev, "NDC sync operation failed\n"); + + mutex_unlock(&mbox->lock); +} + static void otx2_remove(struct pci_dev *pdev) { struct net_device *netdev = pci_get_drvdata(pdev); @@ -3293,6 +3316,7 @@ static void otx2_remove(struct pci_dev *pdev) otx2_mcam_flow_del(pf); otx2_shutdown_tc(pf); otx2_shutdown_qos(pf); + otx2_ndc_sync(pf); otx2_detach_resources(&pf->mbox); if (pf->hw.lmt_info) free_percpu(pf->hw.lmt_info); diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.c b/drivers/net/ethernet/mellanox/mlxsw/core_env.c index 6c06b0592760..294e758f1067 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_env.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.c @@ -513,6 +513,63 @@ mlxsw_env_get_module_eeprom_by_page(struct mlxsw_core *mlxsw_core, } EXPORT_SYMBOL(mlxsw_env_get_module_eeprom_by_page); +int +mlxsw_env_set_module_eeprom_by_page(struct mlxsw_core *mlxsw_core, + u8 slot_index, u8 module, + const struct ethtool_module_eeprom *page, + struct netlink_ext_ack *extack) +{ + struct mlxsw_env *mlxsw_env = mlxsw_core_env(mlxsw_core); + u32 bytes_written = 0; + u16 device_addr; + int err; + + if (!mlxsw_env_linecard_is_active(mlxsw_env, slot_index)) { + NL_SET_ERR_MSG_MOD(extack, + "Cannot write to EEPROM of a module on an inactive line card"); + return -EIO; + } + + err = mlxsw_env_validate_module_type(mlxsw_core, slot_index, module); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "EEPROM is not equipped on port module type"); + return err; + } + + device_addr = page->offset; + + while (bytes_written < page->length) { + char mcia_pl[MLXSW_REG_MCIA_LEN]; + char eeprom_tmp[128] = {}; + u8 size; + + size = min_t(u8, page->length - bytes_written, + mlxsw_env->max_eeprom_len); + + mlxsw_reg_mcia_pack(mcia_pl, slot_index, module, page->page, + device_addr + bytes_written, size, + page->i2c_address); + mlxsw_reg_mcia_bank_number_set(mcia_pl, page->bank); + memcpy(eeprom_tmp, page->data + bytes_written, size); + mlxsw_reg_mcia_eeprom_memcpy_to(mcia_pl, eeprom_tmp); + + err = mlxsw_reg_write(mlxsw_core, MLXSW_REG(mcia), mcia_pl); + if (err) { + NL_SET_ERR_MSG_MOD(extack, "Failed to access module's EEPROM"); + return err; + } + + err = mlxsw_env_mcia_status_process(mcia_pl, extack); + if (err) + return err; + + bytes_written += size; + } + + return 0; +} +EXPORT_SYMBOL(mlxsw_env_set_module_eeprom_by_page); + static int mlxsw_env_module_reset(struct mlxsw_core *mlxsw_core, u8 slot_index, u8 module) { diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_env.h b/drivers/net/ethernet/mellanox/mlxsw/core_env.h index a197e3ae069c..e4ff17869400 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_env.h +++ b/drivers/net/ethernet/mellanox/mlxsw/core_env.h @@ -28,6 +28,12 @@ mlxsw_env_get_module_eeprom_by_page(struct mlxsw_core *mlxsw_core, const struct ethtool_module_eeprom *page, struct netlink_ext_ack *extack); +int +mlxsw_env_set_module_eeprom_by_page(struct mlxsw_core *mlxsw_core, + u8 slot_index, u8 module, + const struct ethtool_module_eeprom *page, + struct netlink_ext_ack *extack); + int mlxsw_env_reset_module(struct net_device *netdev, struct mlxsw_core *mlxsw_core, u8 slot_index, u8 module, u32 *flags); diff --git a/drivers/net/ethernet/mellanox/mlxsw/minimal.c b/drivers/net/ethernet/mellanox/mlxsw/minimal.c index 431accdc6213..828c65036a4c 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/minimal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/minimal.c @@ -140,6 +140,20 @@ mlxsw_m_get_module_eeprom_by_page(struct net_device *netdev, page, extack); } +static int +mlxsw_m_set_module_eeprom_by_page(struct net_device *netdev, + const struct ethtool_module_eeprom *page, + struct netlink_ext_ack *extack) +{ + struct mlxsw_m_port *mlxsw_m_port = netdev_priv(netdev); + struct mlxsw_core *core = mlxsw_m_port->mlxsw_m->core; + + return mlxsw_env_set_module_eeprom_by_page(core, + mlxsw_m_port->slot_index, + mlxsw_m_port->module, + page, extack); +} + static int mlxsw_m_reset(struct net_device *netdev, u32 *flags) { struct mlxsw_m_port *mlxsw_m_port = netdev_priv(netdev); @@ -181,6 +195,7 @@ static const struct ethtool_ops mlxsw_m_port_ethtool_ops = { .get_module_info = mlxsw_m_get_module_info, .get_module_eeprom = mlxsw_m_get_module_eeprom, .get_module_eeprom_by_page = mlxsw_m_get_module_eeprom_by_page, + .set_module_eeprom_by_page = mlxsw_m_set_module_eeprom_by_page, .reset = mlxsw_m_reset, .get_module_power_mode = mlxsw_m_get_module_power_mode, .set_module_power_mode = mlxsw_m_set_module_power_mode, diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c index a755b0a901d3..c79da1411d33 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_ethtool.c @@ -1068,6 +1068,20 @@ mlxsw_sp_get_module_eeprom_by_page(struct net_device *dev, } static int +mlxsw_sp_set_module_eeprom_by_page(struct net_device *dev, + const struct ethtool_module_eeprom *page, + struct netlink_ext_ack *extack) +{ + struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev); + struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp; + u8 slot_index = mlxsw_sp_port->mapping.slot_index; + u8 module = mlxsw_sp_port->mapping.module; + + return mlxsw_env_set_module_eeprom_by_page(mlxsw_sp->core, slot_index, + module, page, extack); +} + +static int mlxsw_sp_get_ts_info(struct net_device *netdev, struct ethtool_ts_info *info) { struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(netdev); @@ -1256,6 +1270,7 @@ const struct ethtool_ops mlxsw_sp_port_ethtool_ops = { .get_module_info = mlxsw_sp_get_module_info, .get_module_eeprom = mlxsw_sp_get_module_eeprom, .get_module_eeprom_by_page = mlxsw_sp_get_module_eeprom_by_page, + .set_module_eeprom_by_page = mlxsw_sp_set_module_eeprom_by_page, .get_ts_info = mlxsw_sp_get_ts_info, .get_eth_phy_stats = mlxsw_sp_get_eth_phy_stats, .get_eth_mac_stats = mlxsw_sp_get_eth_mac_stats, diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c index 9246ea2118ff..714d2e804694 100644 --- a/drivers/net/ethernet/realtek/r8169_main.c +++ b/drivers/net/ethernet/realtek/r8169_main.c @@ -1608,7 +1608,7 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts) if (!tp->dash_enabled) { rtl_set_d3_pll_down(tp, !wolopts); - tp->dev->wol_enabled = wolopts ? 1 : 0; + tp->dev->ethtool->wol_enabled = wolopts ? 1 : 0; } } @@ -5478,7 +5478,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent) rtl_set_d3_pll_down(tp, true); } else { rtl_set_d3_pll_down(tp, false); - dev->wol_enabled = 1; + dev->ethtool->wol_enabled = 1; } jumbo_max = rtl_jumbo_max(tp); diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c index 8fa6c0e9195b..7d69302ffa0a 100644 --- a/drivers/net/ethernet/sfc/ef10.c +++ b/drivers/net/ethernet/sfc/ef10.c @@ -1396,7 +1396,7 @@ static void efx_ef10_table_reset_mc_allocations(struct efx_nic *efx) efx_mcdi_filter_table_reset_mc_allocations(efx); nic_data->must_restore_piobufs = true; efx_ef10_forget_old_piobufs(efx); - efx->rss_context.context_id = EFX_MCDI_RSS_CONTEXT_INVALID; + efx->rss_context.priv.context_id = EFX_MCDI_RSS_CONTEXT_INVALID; /* Driver-created vswitches and vports must be re-created */ nic_data->must_probe_vswitching = true; diff --git a/drivers/net/ethernet/sfc/ef100_ethtool.c b/drivers/net/ethernet/sfc/ef100_ethtool.c index cf55202b3a7b..896ffca4aee2 100644 --- a/drivers/net/ethernet/sfc/ef100_ethtool.c +++ b/drivers/net/ethernet/sfc/ef100_ethtool.c @@ -59,8 +59,12 @@ const struct ethtool_ops ef100_ethtool_ops = { .get_rxfh_indir_size = efx_ethtool_get_rxfh_indir_size, .get_rxfh_key_size = efx_ethtool_get_rxfh_key_size, + .rxfh_priv_size = sizeof(struct efx_rss_context_priv), .get_rxfh = efx_ethtool_get_rxfh, .set_rxfh = efx_ethtool_set_rxfh, + .create_rxfh_context = efx_ethtool_create_rxfh_context, + .modify_rxfh_context = efx_ethtool_modify_rxfh_context, + .remove_rxfh_context = efx_ethtool_remove_rxfh_context, .get_module_info = efx_ethtool_get_module_info, .get_module_eeprom = efx_ethtool_get_module_eeprom, diff --git a/drivers/net/ethernet/sfc/efx.c b/drivers/net/ethernet/sfc/efx.c index e9d9de8e648a..6f1a01ded7d4 100644 --- a/drivers/net/ethernet/sfc/efx.c +++ b/drivers/net/ethernet/sfc/efx.c @@ -299,7 +299,7 @@ static int efx_probe_nic(struct efx_nic *efx) if (efx->n_channels > 1) netdev_rss_key_fill(efx->rss_context.rx_hash_key, sizeof(efx->rss_context.rx_hash_key)); - efx_set_default_rx_indir_table(efx, &efx->rss_context); + efx_set_default_rx_indir_table(efx, efx->rss_context.rx_indir_table); /* Initialise the interrupt moderation settings */ efx->irq_mod_step_us = DIV_ROUND_UP(efx->timer_quantum_ns, 1000); diff --git a/drivers/net/ethernet/sfc/efx.h b/drivers/net/ethernet/sfc/efx.h index 48d3623735ba..7a6cab883d66 100644 --- a/drivers/net/ethernet/sfc/efx.h +++ b/drivers/net/ethernet/sfc/efx.h @@ -158,7 +158,7 @@ static inline s32 efx_filter_get_rx_ids(struct efx_nic *efx, } /* RSS contexts */ -static inline bool efx_rss_active(struct efx_rss_context *ctx) +static inline bool efx_rss_active(struct efx_rss_context_priv *ctx) { return ctx->context_id != EFX_MCDI_RSS_CONTEXT_INVALID; } diff --git a/drivers/net/ethernet/sfc/efx_common.c b/drivers/net/ethernet/sfc/efx_common.c index 4ebd5ae23eca..13cf647051af 100644 --- a/drivers/net/ethernet/sfc/efx_common.c +++ b/drivers/net/ethernet/sfc/efx_common.c @@ -714,7 +714,7 @@ void efx_reset_down(struct efx_nic *efx, enum reset_type method) mutex_lock(&efx->mac_lock); down_write(&efx->filter_sem); - mutex_lock(&efx->rss_lock); + mutex_lock(&efx->net_dev->ethtool->rss_lock); efx->type->fini(efx); } @@ -777,7 +777,7 @@ int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok) if (efx->type->rx_restore_rss_contexts) efx->type->rx_restore_rss_contexts(efx); - mutex_unlock(&efx->rss_lock); + mutex_unlock(&efx->net_dev->ethtool->rss_lock); efx->type->filter_table_restore(efx); up_write(&efx->filter_sem); @@ -793,7 +793,7 @@ int efx_reset_up(struct efx_nic *efx, enum reset_type method, bool ok) fail: efx->port_initialized = false; - mutex_unlock(&efx->rss_lock); + mutex_unlock(&efx->net_dev->ethtool->rss_lock); up_write(&efx->filter_sem); mutex_unlock(&efx->mac_lock); @@ -1000,9 +1000,7 @@ int efx_init_struct(struct efx_nic *efx, struct pci_dev *pci_dev) efx->type->rx_hash_offset - efx->type->rx_prefix_size; efx->rx_packet_ts_offset = efx->type->rx_ts_offset - efx->type->rx_prefix_size; - INIT_LIST_HEAD(&efx->rss_context.list); - efx->rss_context.context_id = EFX_MCDI_RSS_CONTEXT_INVALID; - mutex_init(&efx->rss_lock); + efx->rss_context.priv.context_id = EFX_MCDI_RSS_CONTEXT_INVALID; efx->vport_id = EVB_PORT_ID_ASSIGNED; spin_lock_init(&efx->stats_lock); efx->vi_stride = EFX_DEFAULT_VI_STRIDE; diff --git a/drivers/net/ethernet/sfc/ethtool.c b/drivers/net/ethernet/sfc/ethtool.c index 37c69c8d90b1..0f5c68b8bab7 100644 --- a/drivers/net/ethernet/sfc/ethtool.c +++ b/drivers/net/ethernet/sfc/ethtool.c @@ -268,8 +268,12 @@ const struct ethtool_ops efx_ethtool_ops = { .set_rxnfc = efx_ethtool_set_rxnfc, .get_rxfh_indir_size = efx_ethtool_get_rxfh_indir_size, .get_rxfh_key_size = efx_ethtool_get_rxfh_key_size, + .rxfh_priv_size = sizeof(struct efx_rss_context_priv), .get_rxfh = efx_ethtool_get_rxfh, .set_rxfh = efx_ethtool_set_rxfh, + .create_rxfh_context = efx_ethtool_create_rxfh_context, + .modify_rxfh_context = efx_ethtool_modify_rxfh_context, + .remove_rxfh_context = efx_ethtool_remove_rxfh_context, .get_ts_info = efx_ethtool_get_ts_info, .get_module_info = efx_ethtool_get_module_info, .get_module_eeprom = efx_ethtool_get_module_eeprom, diff --git a/drivers/net/ethernet/sfc/ethtool_common.c b/drivers/net/ethernet/sfc/ethtool_common.c index 7d5e5db4eac5..6ded44b86052 100644 --- a/drivers/net/ethernet/sfc/ethtool_common.c +++ b/drivers/net/ethernet/sfc/ethtool_common.c @@ -820,10 +820,10 @@ int efx_ethtool_get_rxnfc(struct net_device *net_dev, return 0; case ETHTOOL_GRXFH: { - struct efx_rss_context *ctx = &efx->rss_context; + struct efx_rss_context_priv *ctx = &efx->rss_context.priv; __u64 data; - mutex_lock(&efx->rss_lock); + mutex_lock(&net_dev->ethtool->rss_lock); if (info->flow_type & FLOW_RSS && info->rss_context) { ctx = efx_find_rss_context_entry(efx, info->rss_context); if (!ctx) { @@ -864,7 +864,7 @@ int efx_ethtool_get_rxnfc(struct net_device *net_dev, out_setdata_unlock: info->data = data; out_unlock: - mutex_unlock(&efx->rss_lock); + mutex_unlock(&net_dev->ethtool->rss_lock); return rc; } @@ -1163,46 +1163,14 @@ u32 efx_ethtool_get_rxfh_key_size(struct net_device *net_dev) return efx->type->rx_hash_key_size; } -static int efx_ethtool_get_rxfh_context(struct net_device *net_dev, - struct ethtool_rxfh_param *rxfh) -{ - struct efx_nic *efx = efx_netdev_priv(net_dev); - struct efx_rss_context *ctx; - int rc = 0; - - if (!efx->type->rx_pull_rss_context_config) - return -EOPNOTSUPP; - - mutex_lock(&efx->rss_lock); - ctx = efx_find_rss_context_entry(efx, rxfh->rss_context); - if (!ctx) { - rc = -ENOENT; - goto out_unlock; - } - rc = efx->type->rx_pull_rss_context_config(efx, ctx); - if (rc) - goto out_unlock; - - rxfh->hfunc = ETH_RSS_HASH_TOP; - if (rxfh->indir) - memcpy(rxfh->indir, ctx->rx_indir_table, - sizeof(ctx->rx_indir_table)); - if (rxfh->key) - memcpy(rxfh->key, ctx->rx_hash_key, - efx->type->rx_hash_key_size); -out_unlock: - mutex_unlock(&efx->rss_lock); - return rc; -} - int efx_ethtool_get_rxfh(struct net_device *net_dev, struct ethtool_rxfh_param *rxfh) { struct efx_nic *efx = efx_netdev_priv(net_dev); int rc; - if (rxfh->rss_context) - return efx_ethtool_get_rxfh_context(net_dev, rxfh); + if (rxfh->rss_context) /* core should never call us for these */ + return -EINVAL; rc = efx->type->rx_pull_rss_config(efx); if (rc) @@ -1218,68 +1186,85 @@ int efx_ethtool_get_rxfh(struct net_device *net_dev, return 0; } -static int efx_ethtool_set_rxfh_context(struct net_device *net_dev, - struct ethtool_rxfh_param *rxfh, - struct netlink_ext_ack *extack) +int efx_ethtool_modify_rxfh_context(struct net_device *net_dev, + struct ethtool_rxfh_context *ctx, + const struct ethtool_rxfh_param *rxfh, + struct netlink_ext_ack *extack) { struct efx_nic *efx = efx_netdev_priv(net_dev); - u32 *rss_context = &rxfh->rss_context; - struct efx_rss_context *ctx; - u32 *indir = rxfh->indir; - bool allocated = false; - u8 *key = rxfh->key; - int rc; + struct efx_rss_context_priv *priv; + const u32 *indir = rxfh->indir; + const u8 *key = rxfh->key; - if (!efx->type->rx_push_rss_context_config) + if (!efx->type->rx_push_rss_context_config) { + NL_SET_ERR_MSG_MOD(extack, + "NIC type does not support custom contexts"); return -EOPNOTSUPP; - - mutex_lock(&efx->rss_lock); - - if (*rss_context == ETH_RXFH_CONTEXT_ALLOC) { - if (rxfh->rss_delete) { - /* alloc + delete == Nothing to do */ - rc = -EINVAL; - goto out_unlock; - } - ctx = efx_alloc_rss_context_entry(efx); - if (!ctx) { - rc = -ENOMEM; - goto out_unlock; - } - ctx->context_id = EFX_MCDI_RSS_CONTEXT_INVALID; - /* Initialise indir table and key to defaults */ - efx_set_default_rx_indir_table(efx, ctx); - netdev_rss_key_fill(ctx->rx_hash_key, sizeof(ctx->rx_hash_key)); - allocated = true; - } else { - ctx = efx_find_rss_context_entry(efx, *rss_context); - if (!ctx) { - rc = -ENOENT; - goto out_unlock; - } } - - if (rxfh->rss_delete) { - /* delete this context */ - rc = efx->type->rx_push_rss_context_config(efx, ctx, NULL, NULL); - if (!rc) - efx_free_rss_context_entry(ctx); - goto out_unlock; + /* Hash function is Toeplitz, cannot be changed */ + if (rxfh->hfunc != ETH_RSS_HASH_NO_CHANGE && + rxfh->hfunc != ETH_RSS_HASH_TOP) { + NL_SET_ERR_MSG_MOD(extack, "Only Toeplitz hash is supported"); + return -EOPNOTSUPP; } + priv = ethtool_rxfh_context_priv(ctx); + if (!key) - key = ctx->rx_hash_key; + key = ethtool_rxfh_context_key(ctx); if (!indir) - indir = ctx->rx_indir_table; + indir = ethtool_rxfh_context_indir(ctx); - rc = efx->type->rx_push_rss_context_config(efx, ctx, indir, key); - if (rc && allocated) - efx_free_rss_context_entry(ctx); - else - *rss_context = ctx->user_id; -out_unlock: - mutex_unlock(&efx->rss_lock); - return rc; + return efx->type->rx_push_rss_context_config(efx, priv, indir, key, + false); +} + +int efx_ethtool_create_rxfh_context(struct net_device *net_dev, + struct ethtool_rxfh_context *ctx, + const struct ethtool_rxfh_param *rxfh, + struct netlink_ext_ack *extack) +{ + struct efx_nic *efx = efx_netdev_priv(net_dev); + struct efx_rss_context_priv *priv; + + priv = ethtool_rxfh_context_priv(ctx); + + priv->context_id = EFX_MCDI_RSS_CONTEXT_INVALID; + priv->rx_hash_udp_4tuple = false; + /* Generate default indir table and/or key if not specified. + * We use ctx as a place to store these; this is fine because + * we're doing a create, so if we fail then the ctx will just + * be deleted. + */ + if (!rxfh->indir) + efx_set_default_rx_indir_table(efx, ethtool_rxfh_context_indir(ctx)); + if (!rxfh->key) + netdev_rss_key_fill(ethtool_rxfh_context_key(ctx), + ctx->key_size); + if (rxfh->hfunc == ETH_RSS_HASH_NO_CHANGE) + ctx->hfunc = ETH_RSS_HASH_TOP; + if (rxfh->input_xfrm == RXH_XFRM_NO_CHANGE) + ctx->input_xfrm = 0; + return efx_ethtool_modify_rxfh_context(net_dev, ctx, rxfh, extack); +} + +int efx_ethtool_remove_rxfh_context(struct net_device *net_dev, + struct ethtool_rxfh_context *ctx, + u32 rss_context, + struct netlink_ext_ack *extack) +{ + struct efx_nic *efx = efx_netdev_priv(net_dev); + struct efx_rss_context_priv *priv; + + if (!efx->type->rx_push_rss_context_config) { + NL_SET_ERR_MSG_MOD(extack, + "NIC type does not support custom contexts"); + return -EOPNOTSUPP; + } + + priv = ethtool_rxfh_context_priv(ctx); + return efx->type->rx_push_rss_context_config(efx, priv, NULL, NULL, + true); } int efx_ethtool_set_rxfh(struct net_device *net_dev, @@ -1295,8 +1280,9 @@ int efx_ethtool_set_rxfh(struct net_device *net_dev, rxfh->hfunc != ETH_RSS_HASH_TOP) return -EOPNOTSUPP; - if (rxfh->rss_context) - return efx_ethtool_set_rxfh_context(net_dev, rxfh, extack); + /* Custom contexts should use new API */ + if (WARN_ON_ONCE(rxfh->rss_context)) + return -EIO; if (!indir && !key) return 0; diff --git a/drivers/net/ethernet/sfc/ethtool_common.h b/drivers/net/ethernet/sfc/ethtool_common.h index a680e5980213..fc52e891637d 100644 --- a/drivers/net/ethernet/sfc/ethtool_common.h +++ b/drivers/net/ethernet/sfc/ethtool_common.h @@ -49,6 +49,18 @@ int efx_ethtool_get_rxfh(struct net_device *net_dev, int efx_ethtool_set_rxfh(struct net_device *net_dev, struct ethtool_rxfh_param *rxfh, struct netlink_ext_ack *extack); +int efx_ethtool_create_rxfh_context(struct net_device *net_dev, + struct ethtool_rxfh_context *ctx, + const struct ethtool_rxfh_param *rxfh, + struct netlink_ext_ack *extack); +int efx_ethtool_modify_rxfh_context(struct net_device *net_dev, + struct ethtool_rxfh_context *ctx, + const struct ethtool_rxfh_param *rxfh, + struct netlink_ext_ack *extack); +int efx_ethtool_remove_rxfh_context(struct net_device *net_dev, + struct ethtool_rxfh_context *ctx, + u32 rss_context, + struct netlink_ext_ack *extack); int efx_ethtool_reset(struct net_device *net_dev, u32 *flags); int efx_ethtool_get_module_eeprom(struct net_device *net_dev, struct ethtool_eeprom *ee, diff --git a/drivers/net/ethernet/sfc/mcdi_filters.c b/drivers/net/ethernet/sfc/mcdi_filters.c index 4ff6586116ee..6ef96292909a 100644 --- a/drivers/net/ethernet/sfc/mcdi_filters.c +++ b/drivers/net/ethernet/sfc/mcdi_filters.c @@ -194,7 +194,7 @@ efx_mcdi_filter_push_prep_set_match_fields(struct efx_nic *efx, static void efx_mcdi_filter_push_prep(struct efx_nic *efx, const struct efx_filter_spec *spec, efx_dword_t *inbuf, u64 handle, - struct efx_rss_context *ctx, + struct efx_rss_context_priv *ctx, bool replacing) { u32 flags = spec->flags; @@ -245,7 +245,7 @@ static void efx_mcdi_filter_push_prep(struct efx_nic *efx, static int efx_mcdi_filter_push(struct efx_nic *efx, const struct efx_filter_spec *spec, u64 *handle, - struct efx_rss_context *ctx, bool replacing) + struct efx_rss_context_priv *ctx, bool replacing) { MCDI_DECLARE_BUF(inbuf, MC_CMD_FILTER_OP_EXT_IN_LEN); MCDI_DECLARE_BUF(outbuf, MC_CMD_FILTER_OP_EXT_OUT_LEN); @@ -345,9 +345,9 @@ static s32 efx_mcdi_filter_insert_locked(struct efx_nic *efx, bool replace_equal) { DECLARE_BITMAP(mc_rem_map, EFX_EF10_FILTER_SEARCH_LIMIT); + struct efx_rss_context_priv *ctx = NULL; struct efx_mcdi_filter_table *table; struct efx_filter_spec *saved_spec; - struct efx_rss_context *ctx = NULL; unsigned int match_pri, hash; unsigned int priv_flags; bool rss_locked = false; @@ -380,12 +380,12 @@ static s32 efx_mcdi_filter_insert_locked(struct efx_nic *efx, bitmap_zero(mc_rem_map, EFX_EF10_FILTER_SEARCH_LIMIT); if (spec->flags & EFX_FILTER_FLAG_RX_RSS) { - mutex_lock(&efx->rss_lock); + mutex_lock(&efx->net_dev->ethtool->rss_lock); rss_locked = true; if (spec->rss_context) ctx = efx_find_rss_context_entry(efx, spec->rss_context); else - ctx = &efx->rss_context; + ctx = &efx->rss_context.priv; if (!ctx) { rc = -ENOENT; goto out_unlock; @@ -548,7 +548,7 @@ static s32 efx_mcdi_filter_insert_locked(struct efx_nic *efx, out_unlock: if (rss_locked) - mutex_unlock(&efx->rss_lock); + mutex_unlock(&efx->net_dev->ethtool->rss_lock); up_write(&table->lock); return rc; } @@ -611,13 +611,13 @@ static int efx_mcdi_filter_remove_internal(struct efx_nic *efx, new_spec.priority = EFX_FILTER_PRI_AUTO; new_spec.flags = (EFX_FILTER_FLAG_RX | - (efx_rss_active(&efx->rss_context) ? + (efx_rss_active(&efx->rss_context.priv) ? EFX_FILTER_FLAG_RX_RSS : 0)); new_spec.dmaq_id = 0; new_spec.rss_context = 0; rc = efx_mcdi_filter_push(efx, &new_spec, &table->entry[filter_idx].handle, - &efx->rss_context, + &efx->rss_context.priv, true); if (rc == 0) @@ -764,7 +764,7 @@ static int efx_mcdi_filter_insert_addr_list(struct efx_nic *efx, ids = vlan->uc; } - filter_flags = efx_rss_active(&efx->rss_context) ? EFX_FILTER_FLAG_RX_RSS : 0; + filter_flags = efx_rss_active(&efx->rss_context.priv) ? EFX_FILTER_FLAG_RX_RSS : 0; /* Insert/renew filters */ for (i = 0; i < addr_count; i++) { @@ -833,7 +833,7 @@ static int efx_mcdi_filter_insert_def(struct efx_nic *efx, int rc; u16 *id; - filter_flags = efx_rss_active(&efx->rss_context) ? EFX_FILTER_FLAG_RX_RSS : 0; + filter_flags = efx_rss_active(&efx->rss_context.priv) ? EFX_FILTER_FLAG_RX_RSS : 0; efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO, filter_flags, 0); @@ -1375,8 +1375,8 @@ void efx_mcdi_filter_table_restore(struct efx_nic *efx) struct efx_mcdi_filter_table *table = efx->filter_state; unsigned int invalid_filters = 0, failed = 0; struct efx_mcdi_filter_vlan *vlan; + struct efx_rss_context_priv *ctx; struct efx_filter_spec *spec; - struct efx_rss_context *ctx; unsigned int filter_idx; u32 mcdi_flags; int match_pri; @@ -1388,7 +1388,7 @@ void efx_mcdi_filter_table_restore(struct efx_nic *efx) return; down_write(&table->lock); - mutex_lock(&efx->rss_lock); + mutex_lock(&efx->net_dev->ethtool->rss_lock); for (filter_idx = 0; filter_idx < EFX_MCDI_FILTER_TBL_ROWS; filter_idx++) { spec = efx_mcdi_filter_entry_spec(table, filter_idx); @@ -1407,7 +1407,7 @@ void efx_mcdi_filter_table_restore(struct efx_nic *efx) if (spec->rss_context) ctx = efx_find_rss_context_entry(efx, spec->rss_context); else - ctx = &efx->rss_context; + ctx = &efx->rss_context.priv; if (spec->flags & EFX_FILTER_FLAG_RX_RSS) { if (!ctx) { netif_warn(efx, drv, efx->net_dev, @@ -1444,7 +1444,7 @@ not_restored: } } - mutex_unlock(&efx->rss_lock); + mutex_unlock(&efx->net_dev->ethtool->rss_lock); up_write(&table->lock); /* @@ -1861,7 +1861,8 @@ out_unlock: RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_UDP_IPV6_RSS_MODE_LBN |\ RSS_MODE_HASH_ADDRS << MC_CMD_RSS_CONTEXT_GET_FLAGS_OUT_OTHER_IPV6_RSS_MODE_LBN) -int efx_mcdi_get_rss_context_flags(struct efx_nic *efx, u32 context, u32 *flags) +static int efx_mcdi_get_rss_context_flags(struct efx_nic *efx, u32 context, + u32 *flags) { /* * Firmware had a bug (sfc bug 61952) where it would not actually @@ -1909,8 +1910,8 @@ int efx_mcdi_get_rss_context_flags(struct efx_nic *efx, u32 context, u32 *flags) * Defaults are 4-tuple for TCP and 2-tuple for UDP and other-IP, so we * just need to set the UDP ports flags (for both IP versions). */ -void efx_mcdi_set_rss_context_flags(struct efx_nic *efx, - struct efx_rss_context *ctx) +static void efx_mcdi_set_rss_context_flags(struct efx_nic *efx, + struct efx_rss_context_priv *ctx) { MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_SET_FLAGS_IN_LEN); u32 flags; @@ -1931,7 +1932,7 @@ void efx_mcdi_set_rss_context_flags(struct efx_nic *efx, } static int efx_mcdi_filter_alloc_rss_context(struct efx_nic *efx, bool exclusive, - struct efx_rss_context *ctx, + struct efx_rss_context_priv *ctx, unsigned *context_size) { MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_ALLOC_IN_LEN); @@ -2032,25 +2033,26 @@ void efx_mcdi_rx_free_indir_table(struct efx_nic *efx) { int rc; - if (efx->rss_context.context_id != EFX_MCDI_RSS_CONTEXT_INVALID) { - rc = efx_mcdi_filter_free_rss_context(efx, efx->rss_context.context_id); + if (efx->rss_context.priv.context_id != EFX_MCDI_RSS_CONTEXT_INVALID) { + rc = efx_mcdi_filter_free_rss_context(efx, efx->rss_context.priv.context_id); WARN_ON(rc != 0); } - efx->rss_context.context_id = EFX_MCDI_RSS_CONTEXT_INVALID; + efx->rss_context.priv.context_id = EFX_MCDI_RSS_CONTEXT_INVALID; } static int efx_mcdi_filter_rx_push_shared_rss_config(struct efx_nic *efx, unsigned *context_size) { struct efx_mcdi_filter_table *table = efx->filter_state; - int rc = efx_mcdi_filter_alloc_rss_context(efx, false, &efx->rss_context, - context_size); + int rc = efx_mcdi_filter_alloc_rss_context(efx, false, + &efx->rss_context.priv, + context_size); if (rc != 0) return rc; table->rx_rss_context_exclusive = false; - efx_set_default_rx_indir_table(efx, &efx->rss_context); + efx_set_default_rx_indir_table(efx, efx->rss_context.rx_indir_table); return 0; } @@ -2058,26 +2060,27 @@ static int efx_mcdi_filter_rx_push_exclusive_rss_config(struct efx_nic *efx, const u32 *rx_indir_table, const u8 *key) { + u32 old_rx_rss_context = efx->rss_context.priv.context_id; struct efx_mcdi_filter_table *table = efx->filter_state; - u32 old_rx_rss_context = efx->rss_context.context_id; int rc; - if (efx->rss_context.context_id == EFX_MCDI_RSS_CONTEXT_INVALID || + if (efx->rss_context.priv.context_id == EFX_MCDI_RSS_CONTEXT_INVALID || !table->rx_rss_context_exclusive) { - rc = efx_mcdi_filter_alloc_rss_context(efx, true, &efx->rss_context, - NULL); + rc = efx_mcdi_filter_alloc_rss_context(efx, true, + &efx->rss_context.priv, + NULL); if (rc == -EOPNOTSUPP) return rc; else if (rc != 0) goto fail1; } - rc = efx_mcdi_filter_populate_rss_table(efx, efx->rss_context.context_id, - rx_indir_table, key); + rc = efx_mcdi_filter_populate_rss_table(efx, efx->rss_context.priv.context_id, + rx_indir_table, key); if (rc != 0) goto fail2; - if (efx->rss_context.context_id != old_rx_rss_context && + if (efx->rss_context.priv.context_id != old_rx_rss_context && old_rx_rss_context != EFX_MCDI_RSS_CONTEXT_INVALID) WARN_ON(efx_mcdi_filter_free_rss_context(efx, old_rx_rss_context) != 0); table->rx_rss_context_exclusive = true; @@ -2091,9 +2094,9 @@ static int efx_mcdi_filter_rx_push_exclusive_rss_config(struct efx_nic *efx, return 0; fail2: - if (old_rx_rss_context != efx->rss_context.context_id) { - WARN_ON(efx_mcdi_filter_free_rss_context(efx, efx->rss_context.context_id) != 0); - efx->rss_context.context_id = old_rx_rss_context; + if (old_rx_rss_context != efx->rss_context.priv.context_id) { + WARN_ON(efx_mcdi_filter_free_rss_context(efx, efx->rss_context.priv.context_id) != 0); + efx->rss_context.priv.context_id = old_rx_rss_context; } fail1: netif_err(efx, hw, efx->net_dev, "%s: failed rc=%d\n", __func__, rc); @@ -2101,33 +2104,28 @@ fail1: } int efx_mcdi_rx_push_rss_context_config(struct efx_nic *efx, - struct efx_rss_context *ctx, + struct efx_rss_context_priv *ctx, const u32 *rx_indir_table, - const u8 *key) + const u8 *key, bool delete) { int rc; - WARN_ON(!mutex_is_locked(&efx->rss_lock)); + WARN_ON(!mutex_is_locked(&efx->net_dev->ethtool->rss_lock)); if (ctx->context_id == EFX_MCDI_RSS_CONTEXT_INVALID) { + if (delete) + /* already wasn't in HW, nothing to do */ + return 0; rc = efx_mcdi_filter_alloc_rss_context(efx, true, ctx, NULL); if (rc) return rc; } - if (!rx_indir_table) /* Delete this context */ + if (delete) /* Delete this context */ return efx_mcdi_filter_free_rss_context(efx, ctx->context_id); - rc = efx_mcdi_filter_populate_rss_table(efx, ctx->context_id, - rx_indir_table, key); - if (rc) - return rc; - - memcpy(ctx->rx_indir_table, rx_indir_table, - sizeof(efx->rss_context.rx_indir_table)); - memcpy(ctx->rx_hash_key, key, efx->type->rx_hash_key_size); - - return 0; + return efx_mcdi_filter_populate_rss_table(efx, ctx->context_id, + rx_indir_table, key); } int efx_mcdi_rx_pull_rss_context_config(struct efx_nic *efx, @@ -2139,16 +2137,16 @@ int efx_mcdi_rx_pull_rss_context_config(struct efx_nic *efx, size_t outlen; int rc, i; - WARN_ON(!mutex_is_locked(&efx->rss_lock)); + WARN_ON(!mutex_is_locked(&efx->net_dev->ethtool->rss_lock)); BUILD_BUG_ON(MC_CMD_RSS_CONTEXT_GET_TABLE_IN_LEN != MC_CMD_RSS_CONTEXT_GET_KEY_IN_LEN); - if (ctx->context_id == EFX_MCDI_RSS_CONTEXT_INVALID) + if (ctx->priv.context_id == EFX_MCDI_RSS_CONTEXT_INVALID) return -ENOENT; MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_TABLE_IN_RSS_CONTEXT_ID, - ctx->context_id); + ctx->priv.context_id); BUILD_BUG_ON(ARRAY_SIZE(ctx->rx_indir_table) != MC_CMD_RSS_CONTEXT_GET_TABLE_OUT_INDIRECTION_TABLE_LEN); rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_GET_TABLE, inbuf, sizeof(inbuf), @@ -2164,7 +2162,7 @@ int efx_mcdi_rx_pull_rss_context_config(struct efx_nic *efx, RSS_CONTEXT_GET_TABLE_OUT_INDIRECTION_TABLE)[i]; MCDI_SET_DWORD(inbuf, RSS_CONTEXT_GET_KEY_IN_RSS_CONTEXT_ID, - ctx->context_id); + ctx->priv.context_id); BUILD_BUG_ON(ARRAY_SIZE(ctx->rx_hash_key) != MC_CMD_RSS_CONTEXT_SET_KEY_IN_TOEPLITZ_KEY_LEN); rc = efx_mcdi_rpc(efx, MC_CMD_RSS_CONTEXT_GET_KEY, inbuf, sizeof(inbuf), @@ -2186,35 +2184,42 @@ int efx_mcdi_rx_pull_rss_config(struct efx_nic *efx) { int rc; - mutex_lock(&efx->rss_lock); + mutex_lock(&efx->net_dev->ethtool->rss_lock); rc = efx_mcdi_rx_pull_rss_context_config(efx, &efx->rss_context); - mutex_unlock(&efx->rss_lock); + mutex_unlock(&efx->net_dev->ethtool->rss_lock); return rc; } void efx_mcdi_rx_restore_rss_contexts(struct efx_nic *efx) { struct efx_mcdi_filter_table *table = efx->filter_state; - struct efx_rss_context *ctx; + struct ethtool_rxfh_context *ctx; + unsigned long context; int rc; - WARN_ON(!mutex_is_locked(&efx->rss_lock)); + WARN_ON(!mutex_is_locked(&efx->net_dev->ethtool->rss_lock)); if (!table->must_restore_rss_contexts) return; - list_for_each_entry(ctx, &efx->rss_context.list, list) { + xa_for_each(&efx->net_dev->ethtool->rss_ctx, context, ctx) { + struct efx_rss_context_priv *priv; + u32 *indir; + u8 *key; + + priv = ethtool_rxfh_context_priv(ctx); /* previous NIC RSS context is gone */ - ctx->context_id = EFX_MCDI_RSS_CONTEXT_INVALID; + priv->context_id = EFX_MCDI_RSS_CONTEXT_INVALID; /* so try to allocate a new one */ - rc = efx_mcdi_rx_push_rss_context_config(efx, ctx, - ctx->rx_indir_table, - ctx->rx_hash_key); + indir = ethtool_rxfh_context_indir(ctx); + key = ethtool_rxfh_context_key(ctx); + rc = efx_mcdi_rx_push_rss_context_config(efx, priv, indir, key, + false); if (rc) netif_warn(efx, probe, efx->net_dev, - "failed to restore RSS context %u, rc=%d" + "failed to restore RSS context %lu, rc=%d" "; RSS filters may fail to be applied\n", - ctx->user_id, rc); + context, rc); } table->must_restore_rss_contexts = false; } @@ -2276,7 +2281,7 @@ int efx_mcdi_vf_rx_push_rss_config(struct efx_nic *efx, bool user, { if (user) return -EOPNOTSUPP; - if (efx->rss_context.context_id != EFX_MCDI_RSS_CONTEXT_INVALID) + if (efx->rss_context.priv.context_id != EFX_MCDI_RSS_CONTEXT_INVALID) return 0; return efx_mcdi_filter_rx_push_shared_rss_config(efx, NULL); } @@ -2295,7 +2300,7 @@ int efx_mcdi_push_default_indir_table(struct efx_nic *efx, efx_mcdi_rx_free_indir_table(efx); if (rss_spread > 1) { - efx_set_default_rx_indir_table(efx, &efx->rss_context); + efx_set_default_rx_indir_table(efx, efx->rss_context.rx_indir_table); rc = efx->type->rx_push_rss_config(efx, false, efx->rss_context.rx_indir_table, NULL); } diff --git a/drivers/net/ethernet/sfc/mcdi_filters.h b/drivers/net/ethernet/sfc/mcdi_filters.h index c0d6558b9fd2..11b9f87ed9e1 100644 --- a/drivers/net/ethernet/sfc/mcdi_filters.h +++ b/drivers/net/ethernet/sfc/mcdi_filters.h @@ -145,9 +145,9 @@ void efx_mcdi_filter_del_vlan(struct efx_nic *efx, u16 vid); void efx_mcdi_rx_free_indir_table(struct efx_nic *efx); int efx_mcdi_rx_push_rss_context_config(struct efx_nic *efx, - struct efx_rss_context *ctx, + struct efx_rss_context_priv *ctx, const u32 *rx_indir_table, - const u8 *key); + const u8 *key, bool delete); int efx_mcdi_pf_rx_push_rss_config(struct efx_nic *efx, bool user, const u32 *rx_indir_table, const u8 *key); @@ -161,10 +161,6 @@ int efx_mcdi_push_default_indir_table(struct efx_nic *efx, int efx_mcdi_rx_pull_rss_config(struct efx_nic *efx); int efx_mcdi_rx_pull_rss_context_config(struct efx_nic *efx, struct efx_rss_context *ctx); -int efx_mcdi_get_rss_context_flags(struct efx_nic *efx, u32 context, - u32 *flags); -void efx_mcdi_set_rss_context_flags(struct efx_nic *efx, - struct efx_rss_context *ctx); void efx_mcdi_rx_restore_rss_contexts(struct efx_nic *efx); static inline void efx_mcdi_update_rx_scatter(struct efx_nic *efx) diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h index f2dd7feb0e0c..b85c51cbe7f9 100644 --- a/drivers/net/ethernet/sfc/net_driver.h +++ b/drivers/net/ethernet/sfc/net_driver.h @@ -737,21 +737,24 @@ struct vfdi_status; /* The reserved RSS context value */ #define EFX_MCDI_RSS_CONTEXT_INVALID 0xffffffff /** - * struct efx_rss_context - A user-defined RSS context for filtering - * @list: node of linked list on which this struct is stored + * struct efx_rss_context_priv - driver private data for an RSS context * @context_id: the RSS_CONTEXT_ID returned by MC firmware, or * %EFX_MCDI_RSS_CONTEXT_INVALID if this context is not present on the NIC. - * For Siena, 0 if RSS is active, else %EFX_MCDI_RSS_CONTEXT_INVALID. - * @user_id: the rss_context ID exposed to userspace over ethtool. * @rx_hash_udp_4tuple: UDP 4-tuple hashing enabled + */ +struct efx_rss_context_priv { + u32 context_id; + bool rx_hash_udp_4tuple; +}; + +/** + * struct efx_rss_context - an RSS context + * @priv: hardware-specific state * @rx_hash_key: Toeplitz hash key for this RSS context * @indir_table: Indirection table for this RSS context */ struct efx_rss_context { - struct list_head list; - u32 context_id; - u32 user_id; - bool rx_hash_udp_4tuple; + struct efx_rss_context_priv priv; u8 rx_hash_key[40]; u32 rx_indir_table[128]; }; @@ -883,9 +886,7 @@ struct efx_mae; * @rx_packet_ts_offset: Offset of timestamp from start of packet data * (valid only if channel->sync_timestamps_enabled; always negative) * @rx_scatter: Scatter mode enabled for receives - * @rss_context: Main RSS context. Its @list member is the head of the list of - * RSS contexts created by user requests - * @rss_lock: Protects custom RSS context software state in @rss_context.list + * @rss_context: Main RSS context. * @vport_id: The function's vport ID, only relevant for PFs * @int_error_count: Number of internal errors seen recently * @int_error_expire: Time at which error count will be expired @@ -1052,7 +1053,6 @@ struct efx_nic { int rx_packet_ts_offset; bool rx_scatter; struct efx_rss_context rss_context; - struct mutex rss_lock; u32 vport_id; unsigned int_error_count; @@ -1416,9 +1416,9 @@ struct efx_nic_type { const u32 *rx_indir_table, const u8 *key); int (*rx_pull_rss_config)(struct efx_nic *efx); int (*rx_push_rss_context_config)(struct efx_nic *efx, - struct efx_rss_context *ctx, + struct efx_rss_context_priv *ctx, const u32 *rx_indir_table, - const u8 *key); + const u8 *key, bool delete); int (*rx_pull_rss_context_config)(struct efx_nic *efx, struct efx_rss_context *ctx); void (*rx_restore_rss_contexts)(struct efx_nic *efx); diff --git a/drivers/net/ethernet/sfc/rx_common.c b/drivers/net/ethernet/sfc/rx_common.c index dcd901eccfc8..0b7dc75c40f9 100644 --- a/drivers/net/ethernet/sfc/rx_common.c +++ b/drivers/net/ethernet/sfc/rx_common.c @@ -557,69 +557,25 @@ efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf, napi_gro_frags(napi); } -/* RSS contexts. We're using linked lists and crappy O(n) algorithms, because - * (a) this is an infrequent control-plane operation and (b) n is small (max 64) - */ -struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx) +struct efx_rss_context_priv *efx_find_rss_context_entry(struct efx_nic *efx, + u32 id) { - struct list_head *head = &efx->rss_context.list; - struct efx_rss_context *ctx, *new; - u32 id = 1; /* Don't use zero, that refers to the master RSS context */ - - WARN_ON(!mutex_is_locked(&efx->rss_lock)); + struct ethtool_rxfh_context *ctx; - /* Search for first gap in the numbering */ - list_for_each_entry(ctx, head, list) { - if (ctx->user_id != id) - break; - id++; - /* Check for wrap. If this happens, we have nearly 2^32 - * allocated RSS contexts, which seems unlikely. - */ - if (WARN_ON_ONCE(!id)) - return NULL; - } + WARN_ON(!mutex_is_locked(&efx->net_dev->ethtool->rss_lock)); - /* Create the new entry */ - new = kmalloc(sizeof(*new), GFP_KERNEL); - if (!new) + ctx = xa_load(&efx->net_dev->ethtool->rss_ctx, id); + if (!ctx) return NULL; - new->context_id = EFX_MCDI_RSS_CONTEXT_INVALID; - new->rx_hash_udp_4tuple = false; - - /* Insert the new entry into the gap */ - new->user_id = id; - list_add_tail(&new->list, &ctx->list); - return new; -} - -struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id) -{ - struct list_head *head = &efx->rss_context.list; - struct efx_rss_context *ctx; - - WARN_ON(!mutex_is_locked(&efx->rss_lock)); - - list_for_each_entry(ctx, head, list) - if (ctx->user_id == id) - return ctx; - return NULL; -} - -void efx_free_rss_context_entry(struct efx_rss_context *ctx) -{ - list_del(&ctx->list); - kfree(ctx); + return ethtool_rxfh_context_priv(ctx); } -void efx_set_default_rx_indir_table(struct efx_nic *efx, - struct efx_rss_context *ctx) +void efx_set_default_rx_indir_table(struct efx_nic *efx, u32 *indir) { size_t i; - for (i = 0; i < ARRAY_SIZE(ctx->rx_indir_table); i++) - ctx->rx_indir_table[i] = - ethtool_rxfh_indir_default(i, efx->rss_spread); + for (i = 0; i < ARRAY_SIZE(efx->rss_context.rx_indir_table); i++) + indir[i] = ethtool_rxfh_indir_default(i, efx->rss_spread); } /** diff --git a/drivers/net/ethernet/sfc/rx_common.h b/drivers/net/ethernet/sfc/rx_common.h index fbd2769307f9..75fa84192362 100644 --- a/drivers/net/ethernet/sfc/rx_common.h +++ b/drivers/net/ethernet/sfc/rx_common.h @@ -84,11 +84,9 @@ void efx_rx_packet_gro(struct efx_channel *channel, struct efx_rx_buffer *rx_buf, unsigned int n_frags, u8 *eh, __wsum csum); -struct efx_rss_context *efx_alloc_rss_context_entry(struct efx_nic *efx); -struct efx_rss_context *efx_find_rss_context_entry(struct efx_nic *efx, u32 id); -void efx_free_rss_context_entry(struct efx_rss_context *ctx); -void efx_set_default_rx_indir_table(struct efx_nic *efx, - struct efx_rss_context *ctx); +struct efx_rss_context_priv *efx_find_rss_context_entry(struct efx_nic *efx, + u32 id); +void efx_set_default_rx_indir_table(struct efx_nic *efx, u32 *indir); bool efx_filter_is_mc_recipient(const struct efx_filter_spec *spec); bool efx_filter_spec_equal(const struct efx_filter_spec *left, diff --git a/drivers/net/ethernet/tehuti/tn40.c b/drivers/net/ethernet/tehuti/tn40.c index 11db9fde11fe..565b72537efa 100644 --- a/drivers/net/ethernet/tehuti/tn40.c +++ b/drivers/net/ethernet/tehuti/tn40.c @@ -1571,6 +1571,19 @@ static const struct net_device_ops tn40_netdev_ops = { .ndo_vlan_rx_kill_vid = tn40_vlan_rx_kill_vid, }; +static int tn40_ethtool_get_link_ksettings(struct net_device *ndev, + struct ethtool_link_ksettings *cmd) +{ + struct tn40_priv *priv = netdev_priv(ndev); + + return phylink_ethtool_ksettings_get(priv->phylink, cmd); +} + +static const struct ethtool_ops tn40_ethtool_ops = { + .get_link = ethtool_op_get_link, + .get_link_ksettings = tn40_ethtool_get_link_ksettings, +}; + static int tn40_priv_init(struct tn40_priv *priv) { int ret; @@ -1599,6 +1612,7 @@ static struct net_device *tn40_netdev_alloc(struct pci_dev *pdev) if (!ndev) return NULL; ndev->netdev_ops = &tn40_netdev_ops; + ndev->ethtool_ops = &tn40_ethtool_ops; ndev->tx_queue_len = TN40_NDEV_TXQ_LEN; ndev->mem_start = pci_resource_start(pdev, 0); ndev->mem_end = pci_resource_end(pdev, 0); diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c index 46a5a3e95202..e868f7ef4920 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_ethtool.c @@ -37,9 +37,9 @@ static int ngbe_set_wol(struct net_device *netdev, wx->wol = 0; if (wol->wolopts & WAKE_MAGIC) wx->wol = WX_PSR_WKUP_CTL_MAG; - netdev->wol_enabled = !!(wx->wol); + netdev->ethtool->wol_enabled = !!(wx->wol); wr32(wx, WX_PSR_WKUP_CTL, wx->wol); - device_set_wakeup_enable(&pdev->dev, netdev->wol_enabled); + device_set_wakeup_enable(&pdev->dev, netdev->ethtool->wol_enabled); return 0; } diff --git a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c index e894e01d030d..a8119de60deb 100644 --- a/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c +++ b/drivers/net/ethernet/wangxun/ngbe/ngbe_main.c @@ -650,7 +650,7 @@ static int ngbe_probe(struct pci_dev *pdev, if (wx->wol_hw_supported) wx->wol = NGBE_PSR_WKUP_CTL_MAG; - netdev->wol_enabled = !!(wx->wol); + netdev->ethtool->wol_enabled = !!(wx->wol); wr32(wx, NGBE_PSR_WKUP_CTL, wx->wol); device_set_wakeup_enable(&pdev->dev, wx->wol); diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c index c4236564c1cd..785182fa5fe0 100644 --- a/drivers/net/phy/phy.c +++ b/drivers/net/phy/phy.c @@ -1309,7 +1309,7 @@ static irqreturn_t phy_interrupt(int irq, void *phy_dat) if (netdev) { struct device *parent = netdev->dev.parent; - if (netdev->wol_enabled) + if (netdev->ethtool->wol_enabled) pm_system_wakeup(); else if (device_may_wakeup(&netdev->dev)) pm_wakeup_dev_event(&netdev->dev, 0, true); diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c index 6c6ec9475709..473cbc1d497b 100644 --- a/drivers/net/phy/phy_device.c +++ b/drivers/net/phy/phy_device.c @@ -296,7 +296,7 @@ static bool mdio_bus_phy_may_suspend(struct phy_device *phydev) if (!netdev) goto out; - if (netdev->wol_enabled) + if (netdev->ethtool->wol_enabled) return false; /* As long as not all affected network drivers support the @@ -1984,7 +1984,8 @@ int phy_suspend(struct phy_device *phydev) return 0; phy_ethtool_get_wol(phydev, &wol); - phydev->wol_enabled = wol.wolopts || (netdev && netdev->wol_enabled); + phydev->wol_enabled = wol.wolopts || + (netdev && netdev->ethtool->wol_enabled); /* If the device has WOL enabled, we cannot suspend the PHY */ if (phydev->wol_enabled && !(phydrv->flags & PHY_ALWAYS_CALL_SUSPEND)) return -EBUSY; diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c index 6c24c48dcf0f..51c526d227fa 100644 --- a/drivers/net/phy/phylink.c +++ b/drivers/net/phy/phylink.c @@ -2282,7 +2282,7 @@ void phylink_suspend(struct phylink *pl, bool mac_wol) { ASSERT_RTNL(); - if (mac_wol && (!pl->netdev || pl->netdev->wol_enabled)) { + if (mac_wol && (!pl->netdev || pl->netdev->ethtool->wol_enabled)) { /* Wake-on-Lan enabled, MAC handling */ mutex_lock(&pl->state_mutex); diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c index 2174893c974f..bed839237fb5 100644 --- a/drivers/net/phy/realtek.c +++ b/drivers/net/phy/realtek.c @@ -32,6 +32,15 @@ #define RTL8211F_PHYCR2 0x19 #define RTL8211F_INSR 0x1d +#define RTL8211F_LEDCR 0x10 +#define RTL8211F_LEDCR_MODE BIT(15) +#define RTL8211F_LEDCR_ACT_TXRX BIT(4) +#define RTL8211F_LEDCR_LINK_1000 BIT(3) +#define RTL8211F_LEDCR_LINK_100 BIT(1) +#define RTL8211F_LEDCR_LINK_10 BIT(0) +#define RTL8211F_LEDCR_MASK GENMASK(4, 0) +#define RTL8211F_LEDCR_SHIFT 5 + #define RTL8211F_TX_DELAY BIT(8) #define RTL8211F_RX_DELAY BIT(3) @@ -87,6 +96,8 @@ #define RTL_8221B_VN_CG 0x001cc84a #define RTL_8251B 0x001cc862 +#define RTL8211F_LED_COUNT 3 + MODULE_DESCRIPTION("Realtek PHY driver"); MODULE_AUTHOR("Johnson Leung"); MODULE_LICENSE("GPL"); @@ -476,6 +487,98 @@ static int rtl821x_resume(struct phy_device *phydev) return 0; } +static int rtl8211f_led_hw_is_supported(struct phy_device *phydev, u8 index, + unsigned long rules) +{ + const unsigned long mask = BIT(TRIGGER_NETDEV_LINK_10) | + BIT(TRIGGER_NETDEV_LINK_100) | + BIT(TRIGGER_NETDEV_LINK_1000) | + BIT(TRIGGER_NETDEV_RX) | + BIT(TRIGGER_NETDEV_TX); + + /* The RTL8211F PHY supports these LED settings on up to three LEDs: + * - Link: Configurable subset of 10/100/1000 link rates + * - Active: Blink on activity, RX or TX is not differentiated + * The Active option has two modes, A and B: + * - A: Link and Active indication at configurable, but matching, + * subset of 10/100/1000 link rates + * - B: Link indication at configurable subset of 10/100/1000 link + * rates and Active indication always at all three 10+100+1000 + * link rates. + * This code currently uses mode B only. + */ + + if (index >= RTL8211F_LED_COUNT) + return -EINVAL; + + /* Filter out any other unsupported triggers. */ + if (rules & ~mask) + return -EOPNOTSUPP; + + /* RX and TX are not differentiated, either both are set or not set. */ + if (!(rules & BIT(TRIGGER_NETDEV_RX)) ^ !(rules & BIT(TRIGGER_NETDEV_TX))) + return -EOPNOTSUPP; + + return 0; +} + +static int rtl8211f_led_hw_control_get(struct phy_device *phydev, u8 index, + unsigned long *rules) +{ + int val; + + val = phy_read_paged(phydev, 0xd04, RTL8211F_LEDCR); + if (val < 0) + return val; + + val >>= RTL8211F_LEDCR_SHIFT * index; + val &= RTL8211F_LEDCR_MASK; + + if (val & RTL8211F_LEDCR_LINK_10) + set_bit(TRIGGER_NETDEV_LINK_10, rules); + + if (val & RTL8211F_LEDCR_LINK_100) + set_bit(TRIGGER_NETDEV_LINK_100, rules); + + if (val & RTL8211F_LEDCR_LINK_1000) + set_bit(TRIGGER_NETDEV_LINK_1000, rules); + + if (val & RTL8211F_LEDCR_ACT_TXRX) { + set_bit(TRIGGER_NETDEV_RX, rules); + set_bit(TRIGGER_NETDEV_TX, rules); + } + + return 0; +} + +static int rtl8211f_led_hw_control_set(struct phy_device *phydev, u8 index, + unsigned long rules) +{ + const u16 mask = RTL8211F_LEDCR_MASK << (RTL8211F_LEDCR_SHIFT * index); + u16 reg = RTL8211F_LEDCR_MODE; /* Mode B */ + + if (index >= RTL8211F_LED_COUNT) + return -EINVAL; + + if (test_bit(TRIGGER_NETDEV_LINK_10, &rules)) + reg |= RTL8211F_LEDCR_LINK_10; + + if (test_bit(TRIGGER_NETDEV_LINK_100, &rules)) + reg |= RTL8211F_LEDCR_LINK_100; + + if (test_bit(TRIGGER_NETDEV_LINK_1000, &rules)) + reg |= RTL8211F_LEDCR_LINK_1000; + + if (test_bit(TRIGGER_NETDEV_RX, &rules) || + test_bit(TRIGGER_NETDEV_TX, &rules)) { + reg |= RTL8211F_LEDCR_ACT_TXRX; + } + + reg <<= RTL8211F_LEDCR_SHIFT * index; + + return phy_modify_paged(phydev, 0xd04, RTL8211F_LEDCR, mask, reg); +} + static int rtl8211e_config_init(struct phy_device *phydev) { int ret = 0, oldpage; @@ -1192,6 +1295,9 @@ static struct phy_driver realtek_drvs[] = { .read_page = rtl821x_read_page, .write_page = rtl821x_write_page, .flags = PHY_ALWAYS_CALL_SUSPEND, + .led_hw_is_supported = rtl8211f_led_hw_is_supported, + .led_hw_control_get = rtl8211f_led_hw_control_get, + .led_hw_control_set = rtl8211f_led_hw_control_set, }, { PHY_ID_MATCH_EXACT(RTL_8211FVD_PHYID), .name = "RTL8211F-VD Gigabit Ethernet", diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 959196af7f5a..f74bb0cf8ed1 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -159,6 +159,57 @@ static inline u32 ethtool_rxfh_indir_default(u32 index, u32 n_rx_rings) return index % n_rx_rings; } +/** + * struct ethtool_rxfh_context - a custom RSS context configuration + * @indir_size: Number of u32 entries in indirection table + * @key_size: Size of hash key, in bytes + * @priv_size: Size of driver private data, in bytes + * @hfunc: RSS hash function identifier. One of the %ETH_RSS_HASH_* + * @input_xfrm: Defines how the input data is transformed. Valid values are one + * of %RXH_XFRM_*. + * @indir_configured: indir has been specified (at create time or subsequently) + * @key_configured: hkey has been specified (at create time or subsequently) + */ +struct ethtool_rxfh_context { + u32 indir_size; + u32 key_size; + u16 priv_size; + u8 hfunc; + u8 input_xfrm; + u8 indir_configured:1; + u8 key_configured:1; + /* private: driver private data, indirection table, and hash key are + * stored sequentially in @data area. Use below helpers to access. + */ + u8 data[] __aligned(sizeof(void *)); +}; + +static inline void *ethtool_rxfh_context_priv(struct ethtool_rxfh_context *ctx) +{ + return ctx->data; +} + +static inline u32 *ethtool_rxfh_context_indir(struct ethtool_rxfh_context *ctx) +{ + return (u32 *)(ctx->data + ALIGN(ctx->priv_size, sizeof(u32))); +} + +static inline u8 *ethtool_rxfh_context_key(struct ethtool_rxfh_context *ctx) +{ + return (u8 *)(ethtool_rxfh_context_indir(ctx) + ctx->indir_size); +} + +static inline size_t ethtool_rxfh_context_size(u32 indir_size, u32 key_size, + u16 priv_size) +{ + size_t indir_bytes = array_size(indir_size, sizeof(u32)); + size_t flex_len; + + flex_len = size_add(size_add(indir_bytes, key_size), + ALIGN(priv_size, sizeof(u32))); + return struct_size_t(struct ethtool_rxfh_context, data, flex_len); +} + /* declare a link mode bitmap */ #define __ETHTOOL_DECLARE_LINK_MODE_MASK(name) \ DECLARE_BITMAP(name, __ETHTOOL_LINK_MODE_MASK_NBITS) @@ -506,17 +557,16 @@ struct ethtool_ts_stats { #define ETH_MODULE_MAX_I2C_ADDRESS 0x7f /** - * struct ethtool_module_eeprom - EEPROM dump from specified page - * @offset: Offset within the specified EEPROM page to begin read, in bytes. - * @length: Number of bytes to read. - * @page: Page number to read from. - * @bank: Page bank number to read from, if applicable by EEPROM spec. + * struct ethtool_module_eeprom - plug-in module EEPROM read / write parameters + * @offset: When @offset is 0-127, it is used as an address to the Lower Memory + * (@page must be 0). Otherwise, it is used as an address to the + * Upper Memory. + * @length: Number of bytes to read / write. + * @page: Page number. + * @bank: Bank number, if supported by EEPROM spec. * @i2c_address: I2C address of a page. Value less than 0x7f expected. Most * EEPROMs use 0x50 or 0x51. * @data: Pointer to buffer with EEPROM data of @length size. - * - * This can be used to manage pages during EEPROM dump in ethtool and pass - * required information to the driver. */ struct ethtool_module_eeprom { u32 offset; @@ -671,6 +721,12 @@ struct ethtool_rxfh_param { * contexts. * @cap_rss_sym_xor_supported: indicates if the driver supports symmetric-xor * RSS. + * @rxfh_priv_size: size of the driver private data area the core should + * allocate for an RSS context (in &struct ethtool_rxfh_context). + * @rxfh_max_context_id: maximum (exclusive) supported RSS context ID. If this + * is zero then the core may choose any (nonzero) ID, otherwise the core + * will only use IDs strictly less than this value, as the @rss_context + * argument to @create_rxfh_context and friends. * @supported_coalesce_params: supported types of interrupt coalescing. * @supported_ring_params: supported ring params. * @get_drvinfo: Report driver/device information. Modern drivers no @@ -767,6 +823,32 @@ struct ethtool_rxfh_param { * will remain unchanged. * Returns a negative error code or zero. An error code must be returned * if at least one unsupported change was requested. + * @create_rxfh_context: Create a new RSS context with the specified RX flow + * hash indirection table, hash key, and hash function. + * The &struct ethtool_rxfh_context for this context is passed in @ctx; + * note that the indir table, hkey and hfunc are not yet populated as + * of this call. The driver does not need to update these; the core + * will do so if this op succeeds. + * However, if @rxfh.indir is set to %NULL, the driver must update the + * indir table in @ctx with the (default or inherited) table actually in + * use; similarly, if @rxfh.key is %NULL, @rxfh.hfunc is + * %ETH_RSS_HASH_NO_CHANGE, or @rxfh.input_xfrm is %RXH_XFRM_NO_CHANGE, + * the driver should update the corresponding information in @ctx. + * If the driver provides this method, it must also provide + * @modify_rxfh_context and @remove_rxfh_context. + * Returns a negative error code or zero. + * @modify_rxfh_context: Reconfigure the specified RSS context. Allows setting + * the contents of the RX flow hash indirection table, hash key, and/or + * hash function associated with the given context. + * Parameters which are set to %NULL or zero will remain unchanged. + * The &struct ethtool_rxfh_context for this context is passed in @ctx; + * note that it will still contain the *old* settings. The driver does + * not need to update these; the core will do so if this op succeeds. + * Returns a negative error code or zero. An error code must be returned + * if at least one unsupported change was requested. + * @remove_rxfh_context: Remove the specified RSS context. + * The &struct ethtool_rxfh_context for this context is passed in @ctx. + * Returns a negative error code or zero. * @get_channels: Get number of channels. * @set_channels: Set number of channels. Returns a negative error code or * zero. @@ -824,6 +906,8 @@ struct ethtool_rxfh_param { * @get_module_eeprom_by_page: Get a region of plug-in module EEPROM data from * specified page. Returns a negative error code or the amount of bytes * read. + * @set_module_eeprom_by_page: Write to a region of plug-in module EEPROM, + * from kernel space only. Returns a negative error code or zero. * @get_eth_phy_stats: Query some of the IEEE 802.3 PHY statistics. * @get_eth_mac_stats: Query some of the IEEE 802.3 MAC statistics. * @get_eth_ctrl_stats: Query some of the IEEE 802.3 MAC Ctrl statistics. @@ -854,6 +938,8 @@ struct ethtool_ops { u32 cap_link_lanes_supported:1; u32 cap_rss_ctx_supported:1; u32 cap_rss_sym_xor_supported:1; + u16 rxfh_priv_size; + u32 rxfh_max_context_id; u32 supported_coalesce_params; u32 supported_ring_params; void (*get_drvinfo)(struct net_device *, struct ethtool_drvinfo *); @@ -916,6 +1002,18 @@ struct ethtool_ops { int (*get_rxfh)(struct net_device *, struct ethtool_rxfh_param *); int (*set_rxfh)(struct net_device *, struct ethtool_rxfh_param *, struct netlink_ext_ack *extack); + int (*create_rxfh_context)(struct net_device *, + struct ethtool_rxfh_context *ctx, + const struct ethtool_rxfh_param *rxfh, + struct netlink_ext_ack *extack); + int (*modify_rxfh_context)(struct net_device *, + struct ethtool_rxfh_context *ctx, + const struct ethtool_rxfh_param *rxfh, + struct netlink_ext_ack *extack); + int (*remove_rxfh_context)(struct net_device *, + struct ethtool_rxfh_context *ctx, + u32 rss_context, + struct netlink_ext_ack *extack); void (*get_channels)(struct net_device *, struct ethtool_channels *); int (*set_channels)(struct net_device *, struct ethtool_channels *); int (*get_dump_flag)(struct net_device *, struct ethtool_dump *); @@ -958,6 +1056,9 @@ struct ethtool_ops { int (*get_module_eeprom_by_page)(struct net_device *dev, const struct ethtool_module_eeprom *page, struct netlink_ext_ack *extack); + int (*set_module_eeprom_by_page)(struct net_device *dev, + const struct ethtool_module_eeprom *page, + struct netlink_ext_ack *extack); void (*get_eth_phy_stats)(struct net_device *dev, struct ethtool_eth_phy_stats *phy_stats); void (*get_eth_mac_stats)(struct net_device *dev, @@ -1000,6 +1101,19 @@ int ethtool_virtdev_set_link_ksettings(struct net_device *dev, const struct ethtool_link_ksettings *cmd, u32 *dev_speed, u8 *dev_duplex); +/** + * struct ethtool_netdev_state - per-netdevice state for ethtool features + * @rss_ctx: XArray of custom RSS contexts + * @rss_lock: Protects entries in @rss_ctx. May be taken from + * within RTNL. + * @wol_enabled: Wake-on-LAN is enabled + */ +struct ethtool_netdev_state { + struct xarray rss_ctx; + struct mutex rss_lock; + unsigned wol_enabled:1; +}; + struct phy_device; struct phy_tdr_config; struct phy_plca_cfg; diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index cc18acd3c58b..3c719f0d5f5a 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -80,6 +80,7 @@ struct xdp_buff; struct xdp_frame; struct xdp_metadata_ops; struct xdp_md; +struct ethtool_netdev_state; typedef u32 xdp_features_t; @@ -1986,10 +1987,10 @@ enum netdev_reg_state { * switch driver and used to set the phys state of the * switch port. * - * @wol_enabled: Wake-on-LAN is enabled - * * @threaded: napi threaded mode is enabled * + * @module_fw_flash_in_progress: Module firmware flashing is in progress. + * * @net_notifier_list: List of per-net netdev notifier block * that follow this device when it is moved * to another network namespace. @@ -1999,6 +2000,7 @@ enum netdev_reg_state { * @udp_tunnel_nic_info: static structure describing the UDP tunnel * offload capabilities of the device * @udp_tunnel_nic: UDP tunnel offload state + * @ethtool: ethtool related state * @xdp_state: stores info on attached XDP BPF programs * * @nested_level: Used as a parameter of spin_lock_nested() of @@ -2373,8 +2375,8 @@ struct net_device { struct lock_class_key *qdisc_tx_busylock; bool proto_down; bool threaded; - unsigned wol_enabled:1; + unsigned module_fw_flash_in_progress:1; struct list_head net_notifier_list; #if IS_ENABLED(CONFIG_MACSEC) @@ -2384,6 +2386,8 @@ struct net_device { const struct udp_tunnel_nic_info *udp_tunnel_nic_info; struct udp_tunnel_nic *udp_tunnel_nic; + struct ethtool_netdev_state *ethtool; + /* protected by rtnl_lock */ struct bpf_xdp_entity xdp_state[__MAX_XDP_MODE]; diff --git a/include/linux/sfp.h b/include/linux/sfp.h index a45da7eef9a2..b14be59550e3 100644 --- a/include/linux/sfp.h +++ b/include/linux/sfp.h @@ -284,6 +284,12 @@ enum { SFF8024_ID_QSFP_8438 = 0x0c, SFF8024_ID_QSFP_8436_8636 = 0x0d, SFF8024_ID_QSFP28_8636 = 0x11, + SFF8024_ID_QSFP_DD = 0x18, + SFF8024_ID_OSFP = 0x19, + SFF8024_ID_DSFP = 0x1B, + SFF8024_ID_QSFP_PLUS_CMIS = 0x1E, + SFF8024_ID_SFP_DD_CMIS = 0x1F, + SFF8024_ID_SFP_PLUS_CMIS = 0x20, SFF8024_ENCODING_UNSPEC = 0x00, SFF8024_ENCODING_8B10B = 0x01, diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 188d41da1a40..1bfdd16890fa 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1176,7 +1176,7 @@ static inline bool nft_chain_is_bound(struct nft_chain *chain) int nft_chain_add(struct nft_table *table, struct nft_chain *chain); void nft_chain_del(struct nft_chain *chain); -void nf_tables_chain_destroy(struct nft_ctx *ctx); +void nf_tables_chain_destroy(struct nft_chain *chain); struct nft_stats { u64 bytes; @@ -1613,41 +1613,67 @@ static inline int nft_set_elem_is_dead(const struct nft_set_ext *ext) } /** - * struct nft_trans - nf_tables object update in transaction + * struct nft_trans - nf_tables object update in transaction * - * @list: used internally - * @binding_list: list of objects with possible bindings - * @msg_type: message type - * @put_net: ctx->net needs to be put - * @ctx: transaction context - * @data: internal information related to the transaction + * @list: used internally + * @net: struct net + * @table: struct nft_table the object resides in + * @msg_type: message type + * @seq: netlink sequence number + * @flags: modifiers to new request + * @report: notify via unicast netlink message + * @put_net: net needs to be put + * + * This is the information common to all objects in the transaction, + * this must always be the first member of derived sub-types. */ struct nft_trans { struct list_head list; - struct list_head binding_list; + struct net *net; + struct nft_table *table; int msg_type; - bool put_net; - struct nft_ctx ctx; - char data[]; + u32 seq; + u16 flags; + u8 report:1; + u8 put_net:1; +}; + +/** + * struct nft_trans_binding - nf_tables object with binding support in transaction + * @nft_trans: base structure, MUST be first member + * @binding_list: list of objects with possible bindings + * + * This is the base type used by objects that can be bound to a chain. + */ +struct nft_trans_binding { + struct nft_trans nft_trans; + struct list_head binding_list; }; struct nft_trans_rule { + struct nft_trans nft_trans; struct nft_rule *rule; + struct nft_chain *chain; struct nft_flow_rule *flow; u32 rule_id; bool bound; }; -#define nft_trans_rule(trans) \ - (((struct nft_trans_rule *)trans->data)->rule) -#define nft_trans_flow_rule(trans) \ - (((struct nft_trans_rule *)trans->data)->flow) -#define nft_trans_rule_id(trans) \ - (((struct nft_trans_rule *)trans->data)->rule_id) -#define nft_trans_rule_bound(trans) \ - (((struct nft_trans_rule *)trans->data)->bound) +#define nft_trans_container_rule(trans) \ + container_of(trans, struct nft_trans_rule, nft_trans) +#define nft_trans_rule(trans) \ + nft_trans_container_rule(trans)->rule +#define nft_trans_flow_rule(trans) \ + nft_trans_container_rule(trans)->flow +#define nft_trans_rule_id(trans) \ + nft_trans_container_rule(trans)->rule_id +#define nft_trans_rule_bound(trans) \ + nft_trans_container_rule(trans)->bound +#define nft_trans_rule_chain(trans) \ + nft_trans_container_rule(trans)->chain struct nft_trans_set { + struct nft_trans_binding nft_trans_binding; struct nft_set *set; u32 set_id; u32 gc_int; @@ -1657,100 +1683,117 @@ struct nft_trans_set { u32 size; }; -#define nft_trans_set(trans) \ - (((struct nft_trans_set *)trans->data)->set) -#define nft_trans_set_id(trans) \ - (((struct nft_trans_set *)trans->data)->set_id) -#define nft_trans_set_bound(trans) \ - (((struct nft_trans_set *)trans->data)->bound) -#define nft_trans_set_update(trans) \ - (((struct nft_trans_set *)trans->data)->update) -#define nft_trans_set_timeout(trans) \ - (((struct nft_trans_set *)trans->data)->timeout) -#define nft_trans_set_gc_int(trans) \ - (((struct nft_trans_set *)trans->data)->gc_int) -#define nft_trans_set_size(trans) \ - (((struct nft_trans_set *)trans->data)->size) +#define nft_trans_container_set(t) \ + container_of(t, struct nft_trans_set, nft_trans_binding.nft_trans) +#define nft_trans_set(trans) \ + nft_trans_container_set(trans)->set +#define nft_trans_set_id(trans) \ + nft_trans_container_set(trans)->set_id +#define nft_trans_set_bound(trans) \ + nft_trans_container_set(trans)->bound +#define nft_trans_set_update(trans) \ + nft_trans_container_set(trans)->update +#define nft_trans_set_timeout(trans) \ + nft_trans_container_set(trans)->timeout +#define nft_trans_set_gc_int(trans) \ + nft_trans_container_set(trans)->gc_int +#define nft_trans_set_size(trans) \ + nft_trans_container_set(trans)->size struct nft_trans_chain { + struct nft_trans_binding nft_trans_binding; struct nft_chain *chain; - bool update; char *name; struct nft_stats __percpu *stats; u8 policy; + bool update; bool bound; u32 chain_id; struct nft_base_chain *basechain; struct list_head hook_list; }; -#define nft_trans_chain(trans) \ - (((struct nft_trans_chain *)trans->data)->chain) -#define nft_trans_chain_update(trans) \ - (((struct nft_trans_chain *)trans->data)->update) -#define nft_trans_chain_name(trans) \ - (((struct nft_trans_chain *)trans->data)->name) -#define nft_trans_chain_stats(trans) \ - (((struct nft_trans_chain *)trans->data)->stats) -#define nft_trans_chain_policy(trans) \ - (((struct nft_trans_chain *)trans->data)->policy) -#define nft_trans_chain_bound(trans) \ - (((struct nft_trans_chain *)trans->data)->bound) -#define nft_trans_chain_id(trans) \ - (((struct nft_trans_chain *)trans->data)->chain_id) -#define nft_trans_basechain(trans) \ - (((struct nft_trans_chain *)trans->data)->basechain) -#define nft_trans_chain_hooks(trans) \ - (((struct nft_trans_chain *)trans->data)->hook_list) +#define nft_trans_container_chain(t) \ + container_of(t, struct nft_trans_chain, nft_trans_binding.nft_trans) +#define nft_trans_chain(trans) \ + nft_trans_container_chain(trans)->chain +#define nft_trans_chain_update(trans) \ + nft_trans_container_chain(trans)->update +#define nft_trans_chain_name(trans) \ + nft_trans_container_chain(trans)->name +#define nft_trans_chain_stats(trans) \ + nft_trans_container_chain(trans)->stats +#define nft_trans_chain_policy(trans) \ + nft_trans_container_chain(trans)->policy +#define nft_trans_chain_bound(trans) \ + nft_trans_container_chain(trans)->bound +#define nft_trans_chain_id(trans) \ + nft_trans_container_chain(trans)->chain_id +#define nft_trans_basechain(trans) \ + nft_trans_container_chain(trans)->basechain +#define nft_trans_chain_hooks(trans) \ + nft_trans_container_chain(trans)->hook_list struct nft_trans_table { + struct nft_trans nft_trans; bool update; }; -#define nft_trans_table_update(trans) \ - (((struct nft_trans_table *)trans->data)->update) +#define nft_trans_container_table(trans) \ + container_of(trans, struct nft_trans_table, nft_trans) +#define nft_trans_table_update(trans) \ + nft_trans_container_table(trans)->update struct nft_trans_elem { + struct nft_trans nft_trans; struct nft_set *set; struct nft_elem_priv *elem_priv; bool bound; }; -#define nft_trans_elem_set(trans) \ - (((struct nft_trans_elem *)trans->data)->set) -#define nft_trans_elem_priv(trans) \ - (((struct nft_trans_elem *)trans->data)->elem_priv) -#define nft_trans_elem_set_bound(trans) \ - (((struct nft_trans_elem *)trans->data)->bound) +#define nft_trans_container_elem(t) \ + container_of(t, struct nft_trans_elem, nft_trans) +#define nft_trans_elem_set(trans) \ + nft_trans_container_elem(trans)->set +#define nft_trans_elem_priv(trans) \ + nft_trans_container_elem(trans)->elem_priv +#define nft_trans_elem_set_bound(trans) \ + nft_trans_container_elem(trans)->bound struct nft_trans_obj { + struct nft_trans nft_trans; struct nft_object *obj; struct nft_object *newobj; bool update; }; -#define nft_trans_obj(trans) \ - (((struct nft_trans_obj *)trans->data)->obj) -#define nft_trans_obj_newobj(trans) \ - (((struct nft_trans_obj *)trans->data)->newobj) -#define nft_trans_obj_update(trans) \ - (((struct nft_trans_obj *)trans->data)->update) +#define nft_trans_container_obj(t) \ + container_of(t, struct nft_trans_obj, nft_trans) +#define nft_trans_obj(trans) \ + nft_trans_container_obj(trans)->obj +#define nft_trans_obj_newobj(trans) \ + nft_trans_container_obj(trans)->newobj +#define nft_trans_obj_update(trans) \ + nft_trans_container_obj(trans)->update struct nft_trans_flowtable { + struct nft_trans nft_trans; struct nft_flowtable *flowtable; - bool update; struct list_head hook_list; u32 flags; + bool update; }; -#define nft_trans_flowtable(trans) \ - (((struct nft_trans_flowtable *)trans->data)->flowtable) -#define nft_trans_flowtable_update(trans) \ - (((struct nft_trans_flowtable *)trans->data)->update) -#define nft_trans_flowtable_hooks(trans) \ - (((struct nft_trans_flowtable *)trans->data)->hook_list) -#define nft_trans_flowtable_flags(trans) \ - (((struct nft_trans_flowtable *)trans->data)->flags) +#define nft_trans_container_flowtable(t) \ + container_of(t, struct nft_trans_flowtable, nft_trans) +#define nft_trans_flowtable(trans) \ + nft_trans_container_flowtable(trans)->flowtable +#define nft_trans_flowtable_update(trans) \ + nft_trans_container_flowtable(trans)->update +#define nft_trans_flowtable_hooks(trans) \ + nft_trans_container_flowtable(trans)->hook_list +#define nft_trans_flowtable_flags(trans) \ + nft_trans_container_flowtable(trans)->flags #define NFT_TRANS_GC_BATCHCOUNT 256 @@ -1764,6 +1807,33 @@ struct nft_trans_gc { struct rcu_head rcu; }; +static inline void nft_ctx_update(struct nft_ctx *ctx, + const struct nft_trans *trans) +{ + switch (trans->msg_type) { + case NFT_MSG_NEWRULE: + case NFT_MSG_DELRULE: + case NFT_MSG_DESTROYRULE: + ctx->chain = nft_trans_rule_chain(trans); + break; + case NFT_MSG_NEWCHAIN: + case NFT_MSG_DELCHAIN: + case NFT_MSG_DESTROYCHAIN: + ctx->chain = nft_trans_chain(trans); + break; + default: + ctx->chain = NULL; + break; + } + + ctx->net = trans->net; + ctx->table = trans->table; + ctx->family = trans->table->family; + ctx->report = trans->report; + ctx->flags = trans->flags; + ctx->seq = trans->seq; +} + struct nft_trans_gc *nft_trans_gc_alloc(struct nft_set *set, unsigned int gc_seq, gfp_t gfp); void nft_trans_gc_destroy(struct nft_trans_gc *trans); diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h index 8733a3117902..e011384c915c 100644 --- a/include/uapi/linux/ethtool.h +++ b/include/uapi/linux/ethtool.h @@ -878,6 +878,24 @@ enum ethtool_mm_verify_status { }; /** + * enum ethtool_module_fw_flash_status - plug-in module firmware flashing status + * @ETHTOOL_MODULE_FW_FLASH_STATUS_STARTED: The firmware flashing process has + * started. + * @ETHTOOL_MODULE_FW_FLASH_STATUS_IN_PROGRESS: The firmware flashing process + * is in progress. + * @ETHTOOL_MODULE_FW_FLASH_STATUS_COMPLETED: The firmware flashing process was + * completed successfully. + * @ETHTOOL_MODULE_FW_FLASH_STATUS_ERROR: The firmware flashing process was + * stopped due to an error. + */ +enum ethtool_module_fw_flash_status { + ETHTOOL_MODULE_FW_FLASH_STATUS_STARTED = 1, + ETHTOOL_MODULE_FW_FLASH_STATUS_IN_PROGRESS, + ETHTOOL_MODULE_FW_FLASH_STATUS_COMPLETED, + ETHTOOL_MODULE_FW_FLASH_STATUS_ERROR, +}; + +/** * struct ethtool_gstrings - string set for data tagging * @cmd: Command number = %ETHTOOL_GSTRINGS * @string_set: String set ID; one of &enum ethtool_stringset diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h index d15856c7e001..840dabdc9d88 100644 --- a/include/uapi/linux/ethtool_netlink.h +++ b/include/uapi/linux/ethtool_netlink.h @@ -57,6 +57,7 @@ enum { ETHTOOL_MSG_PLCA_GET_STATUS, ETHTOOL_MSG_MM_GET, ETHTOOL_MSG_MM_SET, + ETHTOOL_MSG_MODULE_FW_FLASH_ACT, /* add new constants above here */ __ETHTOOL_MSG_USER_CNT, @@ -109,6 +110,7 @@ enum { ETHTOOL_MSG_PLCA_NTF, ETHTOOL_MSG_MM_GET_REPLY, ETHTOOL_MSG_MM_NTF, + ETHTOOL_MSG_MODULE_FW_FLASH_NTF, /* add new constants above here */ __ETHTOOL_MSG_KERNEL_CNT, @@ -1018,6 +1020,23 @@ enum { ETHTOOL_A_MM_MAX = (__ETHTOOL_A_MM_CNT - 1) }; +/* MODULE_FW_FLASH */ + +enum { + ETHTOOL_A_MODULE_FW_FLASH_UNSPEC, + ETHTOOL_A_MODULE_FW_FLASH_HEADER, /* nest - _A_HEADER_* */ + ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME, /* string */ + ETHTOOL_A_MODULE_FW_FLASH_PASSWORD, /* u32 */ + ETHTOOL_A_MODULE_FW_FLASH_STATUS, /* u32 */ + ETHTOOL_A_MODULE_FW_FLASH_STATUS_MSG, /* string */ + ETHTOOL_A_MODULE_FW_FLASH_DONE, /* uint */ + ETHTOOL_A_MODULE_FW_FLASH_TOTAL, /* uint */ + + /* add new constants above here */ + __ETHTOOL_A_MODULE_FW_FLASH_CNT, + ETHTOOL_A_MODULE_FW_FLASH_MAX = (__ETHTOOL_A_MODULE_FW_FLASH_CNT - 1) +}; + /* generic netlink info */ #define ETHTOOL_GENL_NAME "ethtool" #define ETHTOOL_GENL_VERSION 1 diff --git a/include/uapi/linux/netfilter/nf_tables.h b/include/uapi/linux/netfilter/nf_tables.h index aa4094ca2444..639894ed1b97 100644 --- a/include/uapi/linux/netfilter/nf_tables.h +++ b/include/uapi/linux/netfilter/nf_tables.h @@ -1376,7 +1376,7 @@ enum nft_secmark_attributes { #define NFTA_SECMARK_MAX (__NFTA_SECMARK_MAX - 1) /* Max security context length */ -#define NFT_SECMARK_CTX_MAXLEN 256 +#define NFT_SECMARK_CTX_MAXLEN 4096 /** * enum nft_reject_types - nf_tables reject expression reject types diff --git a/include/uapi/linux/tcp_metrics.h b/include/uapi/linux/tcp_metrics.h index 7cb4a172feed..927c735a5b0e 100644 --- a/include/uapi/linux/tcp_metrics.h +++ b/include/uapi/linux/tcp_metrics.h @@ -1,8 +1,8 @@ /* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ /* tcp_metrics.h - TCP Metrics Interface */ -#ifndef _LINUX_TCP_METRICS_H -#define _LINUX_TCP_METRICS_H +#ifndef _UAPI_LINUX_TCP_METRICS_H +#define _UAPI_LINUX_TCP_METRICS_H #include <linux/types.h> @@ -27,6 +27,22 @@ enum tcp_metric_index { #define TCP_METRIC_MAX (__TCP_METRIC_MAX - 1) +/* Re-define enum tcp_metric_index, again, using the values carried + * as netlink attribute types. + */ +enum { + TCP_METRICS_A_METRICS_RTT = 1, + TCP_METRICS_A_METRICS_RTTVAR, + TCP_METRICS_A_METRICS_SSTHRESH, + TCP_METRICS_A_METRICS_CWND, + TCP_METRICS_A_METRICS_REODERING, + TCP_METRICS_A_METRICS_RTT_US, + TCP_METRICS_A_METRICS_RTTVAR_US, + + __TCP_METRICS_A_METRICS_MAX +}; +#define TCP_METRICS_A_METRICS_MAX (__TCP_METRICS_A_METRICS_MAX - 1) + enum { TCP_METRICS_ATTR_UNSPEC, TCP_METRICS_ATTR_ADDR_IPV4, /* u32 */ @@ -58,4 +74,4 @@ enum { #define TCP_METRICS_CMD_MAX (__TCP_METRICS_CMD_MAX - 1) -#endif /* _LINUX_TCP_METRICS_H */ +#endif /* _UAPI_LINUX_TCP_METRICS_H */ diff --git a/net/core/dev.c b/net/core/dev.c index 0a23d7da7fbc..385c4091aa77 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -10336,6 +10336,10 @@ int register_netdevice(struct net_device *dev) if (ret) return ret; + /* rss ctx ID 0 is reserved for the default context, start from 1 */ + xa_init_flags(&dev->ethtool->rss_ctx, XA_FLAGS_ALLOC1); + mutex_init(&dev->ethtool->rss_lock); + spin_lock_init(&dev->addr_list_lock); netdev_set_addr_lockdep_class(dev); @@ -11116,6 +11120,9 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name, dev->real_num_rx_queues = rxqs; if (netif_alloc_rx_queues(dev)) goto free_all; + dev->ethtool = kzalloc(sizeof(*dev->ethtool), GFP_KERNEL_ACCOUNT); + if (!dev->ethtool) + goto free_all; strcpy(dev->name, name); dev->name_assign_type = name_assign_type; @@ -11166,6 +11173,7 @@ void free_netdev(struct net_device *dev) return; } + kfree(dev->ethtool); netif_free_tx_queues(dev); netif_free_rx_queues(dev); @@ -11231,6 +11239,34 @@ void synchronize_net(void) } EXPORT_SYMBOL(synchronize_net); +static void netdev_rss_contexts_free(struct net_device *dev) +{ + struct ethtool_rxfh_context *ctx; + unsigned long context; + + mutex_lock(&dev->ethtool->rss_lock); + xa_for_each(&dev->ethtool->rss_ctx, context, ctx) { + struct ethtool_rxfh_param rxfh; + + rxfh.indir = ethtool_rxfh_context_indir(ctx); + rxfh.key = ethtool_rxfh_context_key(ctx); + rxfh.hfunc = ctx->hfunc; + rxfh.input_xfrm = ctx->input_xfrm; + rxfh.rss_context = context; + rxfh.rss_delete = true; + + xa_erase(&dev->ethtool->rss_ctx, context); + if (dev->ethtool_ops->create_rxfh_context) + dev->ethtool_ops->remove_rxfh_context(dev, ctx, + context, NULL); + else + dev->ethtool_ops->set_rxfh(dev, &rxfh, NULL); + kfree(ctx); + } + xa_destroy(&dev->ethtool->rss_ctx); + mutex_unlock(&dev->ethtool->rss_lock); +} + /** * unregister_netdevice_queue - remove device from the kernel * @dev: device @@ -11334,11 +11370,15 @@ void unregister_netdevice_many_notify(struct list_head *head, netdev_name_node_alt_flush(dev); netdev_name_node_free(dev->name_node); + netdev_rss_contexts_free(dev); + call_netdevice_notifiers(NETDEV_PRE_UNINIT, dev); if (dev->netdev_ops->ndo_uninit) dev->netdev_ops->ndo_uninit(dev); + mutex_destroy(&dev->ethtool->rss_lock); + if (skb) rtmsg_ifinfo_send(skb, dev, GFP_KERNEL, portid, nlh); diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile index 504f954a1b28..9a190635fe95 100644 --- a/net/ethtool/Makefile +++ b/net/ethtool/Makefile @@ -8,4 +8,4 @@ ethtool_nl-y := netlink.o bitset.o strset.o linkinfo.o linkmodes.o rss.o \ linkstate.o debug.o wol.o features.o privflags.o rings.o \ channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o \ tunnels.o fec.o eeprom.o stats.o phc_vclocks.o mm.o \ - module.o pse-pd.o plca.o mm.o + module.o cmis_fw_update.o cmis_cdb.o pse-pd.o plca.o mm.o diff --git a/net/ethtool/cmis.h b/net/ethtool/cmis.h new file mode 100644 index 000000000000..e71cc3e1b7eb --- /dev/null +++ b/net/ethtool/cmis.h @@ -0,0 +1,124 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#define ETHTOOL_CMIS_CDB_LPL_MAX_PL_LENGTH 120 +#define ETHTOOL_CMIS_CDB_CMD_PAGE 0x9F +#define ETHTOOL_CMIS_CDB_PAGE_I2C_ADDR 0x50 + +/** + * struct ethtool_cmis_cdb - CDB commands parameters + * @cmis_rev: CMIS revision major. + * @read_write_len_ext: Allowable additional number of byte octets to the LPL + * in a READ or a WRITE CDB commands. + * @max_completion_time: Maximum CDB command completion time in msec. + */ +struct ethtool_cmis_cdb { + u8 cmis_rev; + u8 read_write_len_ext; + u16 max_completion_time; +}; + +enum ethtool_cmis_cdb_cmd_id { + ETHTOOL_CMIS_CDB_CMD_QUERY_STATUS = 0x0000, + ETHTOOL_CMIS_CDB_CMD_MODULE_FEATURES = 0x0040, + ETHTOOL_CMIS_CDB_CMD_FW_MANAGMENT_FEATURES = 0x0041, + ETHTOOL_CMIS_CDB_CMD_START_FW_DOWNLOAD = 0x0101, + ETHTOOL_CMIS_CDB_CMD_WRITE_FW_BLOCK_LPL = 0x0103, + ETHTOOL_CMIS_CDB_CMD_COMPLETE_FW_DOWNLOAD = 0x0107, + ETHTOOL_CMIS_CDB_CMD_RUN_FW_IMAGE = 0x0109, + ETHTOOL_CMIS_CDB_CMD_COMMIT_FW_IMAGE = 0x010A, +}; + +/** + * struct ethtool_cmis_cdb_request - CDB commands request fields as decribed in + * the CMIS standard + * @id: Command ID. + * @epl_len: EPL memory length. + * @lpl_len: LPL memory length. + * @chk_code: Check code for the previous field and the payload. + * @resv1: Added to match the CMIS standard request continuity. + * @resv2: Added to match the CMIS standard request continuity. + * @payload: Payload for the CDB commands. + */ +struct ethtool_cmis_cdb_request { + __be16 id; + struct_group(body, + __be16 epl_len; + u8 lpl_len; + u8 chk_code; + u8 resv1; + u8 resv2; + u8 payload[ETHTOOL_CMIS_CDB_LPL_MAX_PL_LENGTH]; + ); +}; + +#define CDB_F_COMPLETION_VALID BIT(0) +#define CDB_F_STATUS_VALID BIT(1) +#define CDB_F_MODULE_STATE_VALID BIT(2) + +/** + * struct ethtool_cmis_cdb_cmd_args - CDB commands execution arguments + * @req: CDB command fields as described in the CMIS standard. + * @max_duration: Maximum duration time for command completion in msec. + * @read_write_len_ext: Allowable additional number of byte octets to the LPL + * in a READ or a WRITE commands. + * @msleep_pre_rpl: Waiting time before checking reply in msec. + * @rpl_exp_len: Expected reply length in bytes. + * @flags: Validation flags for CDB commands. + * @err_msg: Error message to be sent to user space. + */ +struct ethtool_cmis_cdb_cmd_args { + struct ethtool_cmis_cdb_request req; + u16 max_duration; + u8 read_write_len_ext; + u8 msleep_pre_rpl; + u8 rpl_exp_len; + u8 flags; + char *err_msg; +}; + +/** + * struct ethtool_cmis_cdb_rpl_hdr - CDB commands reply header arguments + * @rpl_len: Reply length. + * @rpl_chk_code: Reply check code. + */ +struct ethtool_cmis_cdb_rpl_hdr { + u8 rpl_len; + u8 rpl_chk_code; +}; + +/** + * struct ethtool_cmis_cdb_rpl - CDB commands reply arguments + * @hdr: CDB commands reply header arguments. + * @payload: Payload for the CDB commands reply. + */ +struct ethtool_cmis_cdb_rpl { + struct ethtool_cmis_cdb_rpl_hdr hdr; + u8 payload[ETHTOOL_CMIS_CDB_LPL_MAX_PL_LENGTH]; +}; + +u32 ethtool_cmis_get_max_payload_size(u8 num_of_byte_octs); + +void ethtool_cmis_cdb_compose_args(struct ethtool_cmis_cdb_cmd_args *args, + enum ethtool_cmis_cdb_cmd_id cmd, u8 *pl, + u8 lpl_len, u16 max_duration, + u8 read_write_len_ext, u16 msleep_pre_rpl, + u8 rpl_exp_len, u8 flags); + +void ethtool_cmis_cdb_check_completion_flag(u8 cmis_rev, u8 *flags); + +void ethtool_cmis_page_init(struct ethtool_module_eeprom *page_data, + u8 page, u32 offset, u32 length); +void ethtool_cmis_page_fini(struct ethtool_module_eeprom *page_data); + +struct ethtool_cmis_cdb * +ethtool_cmis_cdb_init(struct net_device *dev, + const struct ethtool_module_fw_flash_params *params, + struct ethnl_module_fw_flash_ntf_params *ntf_params); +void ethtool_cmis_cdb_fini(struct ethtool_cmis_cdb *cdb); + +int ethtool_cmis_wait_for_cond(struct net_device *dev, u8 flags, u8 flag, + u16 max_duration, u32 offset, + bool (*cond_success)(u8), bool (*cond_fail)(u8), u8 *state); + +int ethtool_cmis_cdb_execute_cmd(struct net_device *dev, + struct ethtool_cmis_cdb_cmd_args *args); diff --git a/net/ethtool/cmis_cdb.c b/net/ethtool/cmis_cdb.c new file mode 100644 index 000000000000..1bb08783b60d --- /dev/null +++ b/net/ethtool/cmis_cdb.c @@ -0,0 +1,602 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/ethtool.h> +#include <linux/jiffies.h> + +#include "common.h" +#include "module_fw.h" +#include "cmis.h" + +/* For accessing the LPL field on page 9Fh, the allowable length extension is + * min(i, 15) byte octets where i specifies the allowable additional number of + * byte octets in a READ or a WRITE. + */ +u32 ethtool_cmis_get_max_payload_size(u8 num_of_byte_octs) +{ + return 8 * (1 + min_t(u8, num_of_byte_octs, 15)); +} + +void ethtool_cmis_cdb_compose_args(struct ethtool_cmis_cdb_cmd_args *args, + enum ethtool_cmis_cdb_cmd_id cmd, u8 *pl, + u8 lpl_len, u16 max_duration, + u8 read_write_len_ext, u16 msleep_pre_rpl, + u8 rpl_exp_len, u8 flags) +{ + args->req.id = cpu_to_be16(cmd); + args->req.lpl_len = lpl_len; + if (pl) + memcpy(args->req.payload, pl, args->req.lpl_len); + + args->max_duration = max_duration; + args->read_write_len_ext = + ethtool_cmis_get_max_payload_size(read_write_len_ext); + args->msleep_pre_rpl = msleep_pre_rpl; + args->rpl_exp_len = rpl_exp_len; + args->flags = flags; + args->err_msg = NULL; +} + +void ethtool_cmis_page_init(struct ethtool_module_eeprom *page_data, + u8 page, u32 offset, u32 length) +{ + page_data->page = page; + page_data->offset = offset; + page_data->length = length; + page_data->i2c_address = ETHTOOL_CMIS_CDB_PAGE_I2C_ADDR; +} + +#define CMIS_REVISION_PAGE 0x00 +#define CMIS_REVISION_OFFSET 0x01 + +struct cmis_rev_rpl { + u8 rev; +}; + +static u8 cmis_rev_rpl_major(struct cmis_rev_rpl *rpl) +{ + return rpl->rev >> 4; +} + +static int cmis_rev_major_get(struct net_device *dev, u8 *rev_major) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + struct ethtool_module_eeprom page_data = {0}; + struct netlink_ext_ack extack = {}; + struct cmis_rev_rpl rpl = {}; + int err; + + ethtool_cmis_page_init(&page_data, CMIS_REVISION_PAGE, + CMIS_REVISION_OFFSET, sizeof(rpl)); + page_data.data = (u8 *)&rpl; + + err = ops->get_module_eeprom_by_page(dev, &page_data, &extack); + if (err < 0) { + if (extack._msg) + netdev_err(dev, "%s\n", extack._msg); + return err; + } + + *rev_major = cmis_rev_rpl_major(&rpl); + + return 0; +} + +#define CMIS_CDB_ADVERTISEMENT_PAGE 0x01 +#define CMIS_CDB_ADVERTISEMENT_OFFSET 0xA3 + +/* Based on section 8.4.11 "CDB Messaging Support Advertisement" in CMIS + * standard revision 5.2. + */ +struct cmis_cdb_advert_rpl { + u8 inst_supported; + u8 read_write_len_ext; + u8 resv1; + u8 resv2; +}; + +static u8 cmis_cdb_advert_rpl_inst_supported(struct cmis_cdb_advert_rpl *rpl) +{ + return rpl->inst_supported >> 6; +} + +static int cmis_cdb_advertisement_get(struct ethtool_cmis_cdb *cdb, + struct net_device *dev) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + struct ethtool_module_eeprom page_data = {}; + struct cmis_cdb_advert_rpl rpl = {}; + struct netlink_ext_ack extack = {}; + int err; + + ethtool_cmis_page_init(&page_data, CMIS_CDB_ADVERTISEMENT_PAGE, + CMIS_CDB_ADVERTISEMENT_OFFSET, sizeof(rpl)); + page_data.data = (u8 *)&rpl; + + err = ops->get_module_eeprom_by_page(dev, &page_data, &extack); + if (err < 0) { + if (extack._msg) + netdev_err(dev, "%s\n", extack._msg); + return err; + } + + if (!cmis_cdb_advert_rpl_inst_supported(&rpl)) + return -EOPNOTSUPP; + + cdb->read_write_len_ext = rpl.read_write_len_ext; + + return 0; +} + +#define CMIS_PASSWORD_ENTRY_PAGE 0x00 +#define CMIS_PASSWORD_ENTRY_OFFSET 0x7A + +struct cmis_password_entry_pl { + __be32 password; +}; + +/* See section 9.3.1 "CMD 0000h: Query Status" in CMIS standard revision 5.2. + * struct cmis_cdb_query_status_pl and struct cmis_cdb_query_status_rpl are + * structured layouts of the flat arrays, + * struct ethtool_cmis_cdb_request::payload and + * struct ethtool_cmis_cdb_rpl::payload respectively. + */ +struct cmis_cdb_query_status_pl { + u16 response_delay; +}; + +struct cmis_cdb_query_status_rpl { + u8 length; + u8 status; +}; + +static int +cmis_cdb_validate_password(struct ethtool_cmis_cdb *cdb, + struct net_device *dev, + const struct ethtool_module_fw_flash_params *params, + struct ethnl_module_fw_flash_ntf_params *ntf_params) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + struct cmis_cdb_query_status_pl qs_pl = {0}; + struct ethtool_module_eeprom page_data = {}; + struct ethtool_cmis_cdb_cmd_args args = {}; + struct cmis_password_entry_pl pe_pl = {}; + struct cmis_cdb_query_status_rpl *rpl; + struct netlink_ext_ack extack = {}; + int err; + + ethtool_cmis_page_init(&page_data, CMIS_PASSWORD_ENTRY_PAGE, + CMIS_PASSWORD_ENTRY_OFFSET, sizeof(pe_pl)); + page_data.data = (u8 *)&pe_pl; + + pe_pl = *((struct cmis_password_entry_pl *)page_data.data); + pe_pl.password = params->password; + err = ops->set_module_eeprom_by_page(dev, &page_data, &extack); + if (err < 0) { + if (extack._msg) + netdev_err(dev, "%s\n", extack._msg); + return err; + } + + ethtool_cmis_cdb_compose_args(&args, ETHTOOL_CMIS_CDB_CMD_QUERY_STATUS, + (u8 *)&qs_pl, sizeof(qs_pl), 0, + cdb->read_write_len_ext, 1000, + sizeof(*rpl), + CDB_F_COMPLETION_VALID | CDB_F_STATUS_VALID); + + err = ethtool_cmis_cdb_execute_cmd(dev, &args); + if (err < 0) { + ethnl_module_fw_flash_ntf_err(dev, ntf_params, + "Query Status command failed", + args.err_msg); + return err; + } + + rpl = (struct cmis_cdb_query_status_rpl *)args.req.payload; + if (!rpl->length || !rpl->status) { + ethnl_module_fw_flash_ntf_err(dev, ntf_params, + "Password was not accepted", + NULL); + return -EINVAL; + } + + return 0; +} + +/* Some CDB commands asserts the CDB completion flag only from CMIS + * revision 5. Therefore, check the relevant validity flag only when + * the revision supports it. + */ +void ethtool_cmis_cdb_check_completion_flag(u8 cmis_rev, u8 *flags) +{ + *flags |= cmis_rev >= 5 ? CDB_F_COMPLETION_VALID : 0; +} + +#define CMIS_CDB_MODULE_FEATURES_RESV_DATA 34 + +/* See section 9.4.1 "CMD 0040h: Module Features" in CMIS standard revision 5.2. + * struct cmis_cdb_module_features_rpl is structured layout of the flat + * array, ethtool_cmis_cdb_rpl::payload. + */ +struct cmis_cdb_module_features_rpl { + u8 resv1[CMIS_CDB_MODULE_FEATURES_RESV_DATA]; + __be16 max_completion_time; +}; + +static u16 +cmis_cdb_module_features_completion_time(struct cmis_cdb_module_features_rpl *rpl) +{ + return be16_to_cpu(rpl->max_completion_time); +} + +static int cmis_cdb_module_features_get(struct ethtool_cmis_cdb *cdb, + struct net_device *dev, + struct ethnl_module_fw_flash_ntf_params *ntf_params) +{ + struct ethtool_cmis_cdb_cmd_args args = {}; + struct cmis_cdb_module_features_rpl *rpl; + u8 flags = CDB_F_STATUS_VALID; + int err; + + ethtool_cmis_cdb_check_completion_flag(cdb->cmis_rev, &flags); + ethtool_cmis_cdb_compose_args(&args, + ETHTOOL_CMIS_CDB_CMD_MODULE_FEATURES, + NULL, 0, 0, cdb->read_write_len_ext, + 1000, sizeof(*rpl), flags); + + err = ethtool_cmis_cdb_execute_cmd(dev, &args); + if (err < 0) { + ethnl_module_fw_flash_ntf_err(dev, ntf_params, + "Module Features command failed", + args.err_msg); + return err; + } + + rpl = (struct cmis_cdb_module_features_rpl *)args.req.payload; + cdb->max_completion_time = + cmis_cdb_module_features_completion_time(rpl); + + return 0; +} + +struct ethtool_cmis_cdb * +ethtool_cmis_cdb_init(struct net_device *dev, + const struct ethtool_module_fw_flash_params *params, + struct ethnl_module_fw_flash_ntf_params *ntf_params) +{ + struct ethtool_cmis_cdb *cdb; + int err; + + cdb = kzalloc(sizeof(*cdb), GFP_KERNEL); + if (!cdb) + return ERR_PTR(-ENOMEM); + + err = cmis_rev_major_get(dev, &cdb->cmis_rev); + if (err < 0) + goto err; + + if (cdb->cmis_rev < 4) { + ethnl_module_fw_flash_ntf_err(dev, ntf_params, + "CMIS revision doesn't support module firmware flashing", + NULL); + err = -EOPNOTSUPP; + goto err; + } + + err = cmis_cdb_advertisement_get(cdb, dev); + if (err < 0) + goto err; + + if (params->password_valid) { + err = cmis_cdb_validate_password(cdb, dev, params, ntf_params); + if (err < 0) + goto err; + } + + err = cmis_cdb_module_features_get(cdb, dev, ntf_params); + if (err < 0) + goto err; + + return cdb; + +err: + ethtool_cmis_cdb_fini(cdb); + return ERR_PTR(err); +} + +void ethtool_cmis_cdb_fini(struct ethtool_cmis_cdb *cdb) +{ + kfree(cdb); +} + +static bool is_completed(u8 data) +{ + return !!(data & 0x40); +} + +#define CMIS_CDB_STATUS_SUCCESS 0x01 + +static bool status_success(u8 data) +{ + return data == CMIS_CDB_STATUS_SUCCESS; +} + +#define CMIS_CDB_STATUS_FAIL 0x40 + +static bool status_fail(u8 data) +{ + return data & CMIS_CDB_STATUS_FAIL; +} + +struct cmis_wait_for_cond_rpl { + u8 state; +}; + +static int +ethtool_cmis_module_poll(struct net_device *dev, + struct cmis_wait_for_cond_rpl *rpl, u32 offset, + bool (*cond_success)(u8), bool (*cond_fail)(u8)) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + struct ethtool_module_eeprom page_data = {0}; + struct netlink_ext_ack extack = {}; + int err; + + ethtool_cmis_page_init(&page_data, 0, offset, sizeof(rpl)); + page_data.data = (u8 *)rpl; + + err = ops->get_module_eeprom_by_page(dev, &page_data, &extack); + if (err < 0) { + if (extack._msg) + netdev_err_once(dev, "%s\n", extack._msg); + return -EBUSY; + } + + if ((*cond_success)(rpl->state)) + return 0; + + if (*cond_fail && (*cond_fail)(rpl->state)) + return -EIO; + + return -EBUSY; +} + +int ethtool_cmis_wait_for_cond(struct net_device *dev, u8 flags, u8 flag, + u16 max_duration, u32 offset, + bool (*cond_success)(u8), bool (*cond_fail)(u8), + u8 *state) +{ + struct cmis_wait_for_cond_rpl rpl = {}; + unsigned long end; + int err; + + if (!(flags & flag)) + return 0; + + if (max_duration == 0) + max_duration = U16_MAX; + + end = jiffies + msecs_to_jiffies(max_duration); + do { + err = ethtool_cmis_module_poll(dev, &rpl, offset, cond_success, + cond_fail); + if (err != -EBUSY) + goto out; + + msleep(20); + } while (time_before(jiffies, end)); + + err = ethtool_cmis_module_poll(dev, &rpl, offset, cond_success, + cond_fail); + if (err == -EBUSY) + err = -ETIMEDOUT; + +out: + *state = rpl.state; + return err; +} + +#define CMIS_CDB_COMPLETION_FLAG_OFFSET 0x08 + +static int cmis_cdb_wait_for_completion(struct net_device *dev, + struct ethtool_cmis_cdb_cmd_args *args) +{ + u8 flag; + int err; + + /* Some vendors demand waiting time before checking completion flag + * in some CDB commands. + */ + msleep(args->msleep_pre_rpl); + + err = ethtool_cmis_wait_for_cond(dev, args->flags, + CDB_F_COMPLETION_VALID, + args->max_duration, + CMIS_CDB_COMPLETION_FLAG_OFFSET, + is_completed, NULL, &flag); + if (err < 0) + args->err_msg = "Completion Flag did not set on time"; + + return err; +} + +#define CMIS_CDB_STATUS_OFFSET 0x25 + +static void cmis_cdb_status_fail_msg_get(u8 status, char **err_msg) +{ + switch (status) { + case 0b10000001: + *err_msg = "CDB Status is in progress: Busy capturing command"; + break; + case 0b10000010: + *err_msg = + "CDB Status is in progress: Busy checking/validating command"; + break; + case 0b10000011: + *err_msg = "CDB Status is in progress: Busy executing"; + break; + case 0b01000000: + *err_msg = "CDB status failed: no specific failure"; + break; + case 0b01000010: + *err_msg = + "CDB status failed: Parameter range error or parameter not supported"; + break; + case 0b01000101: + *err_msg = "CDB status failed: CdbChkCode error"; + break; + default: + *err_msg = "Unknown failure reason"; + } +}; + +static int cmis_cdb_wait_for_status(struct net_device *dev, + struct ethtool_cmis_cdb_cmd_args *args) +{ + u8 status; + int err; + + /* Some vendors demand waiting time before checking status in some + * CDB commands. + */ + msleep(args->msleep_pre_rpl); + + err = ethtool_cmis_wait_for_cond(dev, args->flags, CDB_F_STATUS_VALID, + args->max_duration, + CMIS_CDB_STATUS_OFFSET, + status_success, status_fail, &status); + if (err < 0 && !args->err_msg) + cmis_cdb_status_fail_msg_get(status, &args->err_msg); + + return err; +} + +#define CMIS_CDB_REPLY_OFFSET 0x86 + +static int cmis_cdb_process_reply(struct net_device *dev, + struct ethtool_module_eeprom *page_data, + struct ethtool_cmis_cdb_cmd_args *args) +{ + u8 rpl_hdr_len = sizeof(struct ethtool_cmis_cdb_rpl_hdr); + u8 rpl_exp_len = args->rpl_exp_len + rpl_hdr_len; + const struct ethtool_ops *ops = dev->ethtool_ops; + struct netlink_ext_ack extack = {}; + struct ethtool_cmis_cdb_rpl *rpl; + int err; + + if (!args->rpl_exp_len) + return 0; + + ethtool_cmis_page_init(page_data, ETHTOOL_CMIS_CDB_CMD_PAGE, + CMIS_CDB_REPLY_OFFSET, rpl_exp_len); + page_data->data = kmalloc(page_data->length, GFP_KERNEL); + if (!page_data->data) + return -ENOMEM; + + err = ops->get_module_eeprom_by_page(dev, page_data, &extack); + if (err < 0) { + if (extack._msg) + netdev_err(dev, "%s\n", extack._msg); + goto out; + } + + rpl = (struct ethtool_cmis_cdb_rpl *)page_data->data; + if ((args->rpl_exp_len > rpl->hdr.rpl_len + rpl_hdr_len) || + !rpl->hdr.rpl_chk_code) { + err = -EIO; + goto out; + } + + args->req.lpl_len = rpl->hdr.rpl_len; + memcpy(args->req.payload, rpl->payload, args->req.lpl_len); + +out: + kfree(page_data->data); + return err; +} + +static int +__ethtool_cmis_cdb_execute_cmd(struct net_device *dev, + struct ethtool_module_eeprom *page_data, + u8 page, u32 offset, u32 length, void *data) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + struct netlink_ext_ack extack = {}; + int err; + + ethtool_cmis_page_init(page_data, page, offset, length); + page_data->data = kmemdup(data, page_data->length, GFP_KERNEL); + if (!page_data->data) + return -ENOMEM; + + err = ops->set_module_eeprom_by_page(dev, page_data, &extack); + if (err < 0) { + if (extack._msg) + netdev_err(dev, "%s\n", extack._msg); + } + + kfree(page_data->data); + return err; +} + +static u8 cmis_cdb_calc_checksum(const void *data, size_t size) +{ + const u8 *bytes = (const u8 *)data; + u8 checksum = 0; + + for (size_t i = 0; i < size; i++) + checksum += bytes[i]; + + return ~checksum; +} + +#define CMIS_CDB_CMD_ID_OFFSET 0x80 + +int ethtool_cmis_cdb_execute_cmd(struct net_device *dev, + struct ethtool_cmis_cdb_cmd_args *args) +{ + struct ethtool_module_eeprom page_data = {}; + u32 offset; + int err; + + args->req.chk_code = + cmis_cdb_calc_checksum(&args->req, sizeof(args->req)); + + if (args->req.lpl_len > args->read_write_len_ext) { + args->err_msg = "LPL length is longer than CDB read write length extension allows"; + return -EINVAL; + } + + /* According to the CMIS standard, there are two options to trigger the + * CDB commands. The default option is triggering the command by writing + * the CMDID bytes. Therefore, the command will be split to 2 calls: + * First, with everything except the CMDID field and then the CMDID + * field. + */ + offset = CMIS_CDB_CMD_ID_OFFSET + + offsetof(struct ethtool_cmis_cdb_request, body); + err = __ethtool_cmis_cdb_execute_cmd(dev, &page_data, + ETHTOOL_CMIS_CDB_CMD_PAGE, offset, + sizeof(args->req.body), + &args->req.body); + if (err < 0) + return err; + + offset = CMIS_CDB_CMD_ID_OFFSET + + offsetof(struct ethtool_cmis_cdb_request, id); + err = __ethtool_cmis_cdb_execute_cmd(dev, &page_data, + ETHTOOL_CMIS_CDB_CMD_PAGE, offset, + sizeof(args->req.id), + &args->req.id); + if (err < 0) + return err; + + err = cmis_cdb_wait_for_completion(dev, args); + if (err < 0) + return err; + + err = cmis_cdb_wait_for_status(dev, args); + if (err < 0) + return err; + + return cmis_cdb_process_reply(dev, &page_data, args); +} diff --git a/net/ethtool/cmis_fw_update.c b/net/ethtool/cmis_fw_update.c new file mode 100644 index 000000000000..ae4b4b28a601 --- /dev/null +++ b/net/ethtool/cmis_fw_update.c @@ -0,0 +1,399 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include <linux/ethtool.h> +#include <linux/firmware.h> + +#include "common.h" +#include "module_fw.h" +#include "cmis.h" + +struct cmis_fw_update_fw_mng_features { + u8 start_cmd_payload_size; + u16 max_duration_start; + u16 max_duration_write; + u16 max_duration_complete; +}; + +/* See section 9.4.2 "CMD 0041h: Firmware Management Features" in CMIS standard + * revision 5.2. + * struct cmis_cdb_fw_mng_features_rpl is a structured layout of the flat + * array, ethtool_cmis_cdb_rpl::payload. + */ +struct cmis_cdb_fw_mng_features_rpl { + u8 resv1; + u8 resv2; + u8 start_cmd_payload_size; + u8 resv3; + u8 read_write_len_ext; + u8 write_mechanism; + u8 resv4; + u8 resv5; + __be16 max_duration_start; + __be16 resv6; + __be16 max_duration_write; + __be16 max_duration_complete; + __be16 resv7; +}; + +#define CMIS_CDB_FW_WRITE_MECHANISM_LPL 0x01 + +static int +cmis_fw_update_fw_mng_features_get(struct ethtool_cmis_cdb *cdb, + struct net_device *dev, + struct cmis_fw_update_fw_mng_features *fw_mng, + struct ethnl_module_fw_flash_ntf_params *ntf_params) +{ + struct ethtool_cmis_cdb_cmd_args args = {}; + struct cmis_cdb_fw_mng_features_rpl *rpl; + u8 flags = CDB_F_STATUS_VALID; + int err; + + ethtool_cmis_cdb_check_completion_flag(cdb->cmis_rev, &flags); + ethtool_cmis_cdb_compose_args(&args, + ETHTOOL_CMIS_CDB_CMD_FW_MANAGMENT_FEATURES, + NULL, 0, cdb->max_completion_time, + cdb->read_write_len_ext, 1000, + sizeof(*rpl), flags); + + err = ethtool_cmis_cdb_execute_cmd(dev, &args); + if (err < 0) { + ethnl_module_fw_flash_ntf_err(dev, ntf_params, + "FW Management Features command failed", + args.err_msg); + return err; + } + + rpl = (struct cmis_cdb_fw_mng_features_rpl *)args.req.payload; + if (!(rpl->write_mechanism == CMIS_CDB_FW_WRITE_MECHANISM_LPL)) { + ethnl_module_fw_flash_ntf_err(dev, ntf_params, + "Write LPL is not supported", + NULL); + return -EOPNOTSUPP; + } + + /* Above, we used read_write_len_ext that we got from CDB + * advertisement. Update it with the value that we got from module + * features query, which is specific for Firmware Management Commands + * (IDs 0100h-01FFh). + */ + cdb->read_write_len_ext = rpl->read_write_len_ext; + fw_mng->start_cmd_payload_size = rpl->start_cmd_payload_size; + fw_mng->max_duration_start = be16_to_cpu(rpl->max_duration_start); + fw_mng->max_duration_write = be16_to_cpu(rpl->max_duration_write); + fw_mng->max_duration_complete = be16_to_cpu(rpl->max_duration_complete); + + return 0; +} + +/* See section 9.7.2 "CMD 0101h: Start Firmware Download" in CMIS standard + * revision 5.2. + * struct cmis_cdb_start_fw_download_pl is a structured layout of the + * flat array, ethtool_cmis_cdb_request::payload. + */ +struct cmis_cdb_start_fw_download_pl { + __struct_group(cmis_cdb_start_fw_download_pl_h, head, /* no attrs */, + __be32 image_size; + __be32 resv1; + ); + u8 vendor_data[ETHTOOL_CMIS_CDB_LPL_MAX_PL_LENGTH - + sizeof(struct cmis_cdb_start_fw_download_pl_h)]; +}; + +static int +cmis_fw_update_start_download(struct ethtool_cmis_cdb *cdb, + struct ethtool_cmis_fw_update_params *fw_update, + struct cmis_fw_update_fw_mng_features *fw_mng) +{ + u8 vendor_data_size = fw_mng->start_cmd_payload_size; + struct cmis_cdb_start_fw_download_pl pl = {}; + struct ethtool_cmis_cdb_cmd_args args = {}; + u8 lpl_len; + int err; + + pl.image_size = cpu_to_be32(fw_update->fw->size); + memcpy(pl.vendor_data, fw_update->fw->data, vendor_data_size); + + lpl_len = offsetof(struct cmis_cdb_start_fw_download_pl, + vendor_data[vendor_data_size]); + + ethtool_cmis_cdb_compose_args(&args, + ETHTOOL_CMIS_CDB_CMD_START_FW_DOWNLOAD, + (u8 *)&pl, lpl_len, + fw_mng->max_duration_start, + cdb->read_write_len_ext, 1000, 0, + CDB_F_COMPLETION_VALID | CDB_F_STATUS_VALID); + + err = ethtool_cmis_cdb_execute_cmd(fw_update->dev, &args); + if (err < 0) + ethnl_module_fw_flash_ntf_err(fw_update->dev, + &fw_update->ntf_params, + "Start FW download command failed", + args.err_msg); + + return err; +} + +/* See section 9.7.4 "CMD 0103h: Write Firmware Block LPL" in CMIS standard + * revision 5.2. + * struct cmis_cdb_write_fw_block_lpl_pl is a structured layout of the + * flat array, ethtool_cmis_cdb_request::payload. + */ +struct cmis_cdb_write_fw_block_lpl_pl { + __be32 block_address; + u8 fw_block[ETHTOOL_CMIS_CDB_LPL_MAX_PL_LENGTH - sizeof(__be32)]; +}; + +static int +cmis_fw_update_write_image(struct ethtool_cmis_cdb *cdb, + struct ethtool_cmis_fw_update_params *fw_update, + struct cmis_fw_update_fw_mng_features *fw_mng) +{ + u8 start = fw_mng->start_cmd_payload_size; + u32 offset, max_block_size, max_lpl_len; + u32 image_size = fw_update->fw->size; + int err; + + max_lpl_len = min_t(u32, + ethtool_cmis_get_max_payload_size(cdb->read_write_len_ext), + ETHTOOL_CMIS_CDB_LPL_MAX_PL_LENGTH); + max_block_size = + max_lpl_len - sizeof_field(struct cmis_cdb_write_fw_block_lpl_pl, + block_address); + + for (offset = start; offset < image_size; offset += max_block_size) { + struct cmis_cdb_write_fw_block_lpl_pl pl = { + .block_address = cpu_to_be32(offset - start), + }; + struct ethtool_cmis_cdb_cmd_args args = {}; + u32 block_size, lpl_len; + + ethnl_module_fw_flash_ntf_in_progress(fw_update->dev, + &fw_update->ntf_params, + offset - start, + image_size); + block_size = min_t(u32, max_block_size, image_size - offset); + memcpy(pl.fw_block, &fw_update->fw->data[offset], block_size); + lpl_len = block_size + + sizeof_field(struct cmis_cdb_write_fw_block_lpl_pl, + block_address); + + ethtool_cmis_cdb_compose_args(&args, + ETHTOOL_CMIS_CDB_CMD_WRITE_FW_BLOCK_LPL, + (u8 *)&pl, lpl_len, + fw_mng->max_duration_write, + cdb->read_write_len_ext, 1, 0, + CDB_F_COMPLETION_VALID | CDB_F_STATUS_VALID); + + err = ethtool_cmis_cdb_execute_cmd(fw_update->dev, &args); + if (err < 0) { + ethnl_module_fw_flash_ntf_err(fw_update->dev, + &fw_update->ntf_params, + "Write FW block LPL command failed", + args.err_msg); + return err; + } + } + + return 0; +} + +static int +cmis_fw_update_complete_download(struct ethtool_cmis_cdb *cdb, + struct net_device *dev, + struct cmis_fw_update_fw_mng_features *fw_mng, + struct ethnl_module_fw_flash_ntf_params *ntf_params) +{ + struct ethtool_cmis_cdb_cmd_args args = {}; + int err; + + ethtool_cmis_cdb_compose_args(&args, + ETHTOOL_CMIS_CDB_CMD_COMPLETE_FW_DOWNLOAD, + NULL, 0, fw_mng->max_duration_complete, + cdb->read_write_len_ext, 1000, 0, + CDB_F_COMPLETION_VALID | CDB_F_STATUS_VALID); + + err = ethtool_cmis_cdb_execute_cmd(dev, &args); + if (err < 0) + ethnl_module_fw_flash_ntf_err(dev, ntf_params, + "Complete FW download command failed", + args.err_msg); + + return err; +} + +static int +cmis_fw_update_download_image(struct ethtool_cmis_cdb *cdb, + struct ethtool_cmis_fw_update_params *fw_update, + struct cmis_fw_update_fw_mng_features *fw_mng) +{ + int err; + + err = cmis_fw_update_start_download(cdb, fw_update, fw_mng); + if (err < 0) + return err; + + err = cmis_fw_update_write_image(cdb, fw_update, fw_mng); + if (err < 0) + return err; + + err = cmis_fw_update_complete_download(cdb, fw_update->dev, fw_mng, + &fw_update->ntf_params); + if (err < 0) + return err; + + return 0; +} + +enum { + CMIS_MODULE_LOW_PWR = 1, + CMIS_MODULE_READY = 3, +}; + +static bool module_is_ready(u8 data) +{ + u8 state = (data >> 1) & 7; + + return state == CMIS_MODULE_READY || state == CMIS_MODULE_LOW_PWR; +} + +#define CMIS_MODULE_READY_MAX_DURATION_MSEC 1000 +#define CMIS_MODULE_STATE_OFFSET 3 + +static int +cmis_fw_update_wait_for_module_state(struct net_device *dev, u8 flags) +{ + u8 state; + + return ethtool_cmis_wait_for_cond(dev, flags, CDB_F_MODULE_STATE_VALID, + CMIS_MODULE_READY_MAX_DURATION_MSEC, + CMIS_MODULE_STATE_OFFSET, + module_is_ready, NULL, &state); +} + +/* See section 9.7.10 "CMD 0109h: Run Firmware Image" in CMIS standard + * revision 5.2. + * struct cmis_cdb_run_fw_image_pl is a structured layout of the flat + * array, ethtool_cmis_cdb_request::payload. + */ +struct cmis_cdb_run_fw_image_pl { + u8 resv1; + u8 image_to_run; + u16 delay_to_reset; +}; + +static int +cmis_fw_update_run_image(struct ethtool_cmis_cdb *cdb, struct net_device *dev, + struct ethnl_module_fw_flash_ntf_params *ntf_params) +{ + struct ethtool_cmis_cdb_cmd_args args = {}; + struct cmis_cdb_run_fw_image_pl pl = {0}; + int err; + + ethtool_cmis_cdb_compose_args(&args, ETHTOOL_CMIS_CDB_CMD_RUN_FW_IMAGE, + (u8 *)&pl, sizeof(pl), + cdb->max_completion_time, + cdb->read_write_len_ext, 1000, 0, + CDB_F_MODULE_STATE_VALID); + + err = ethtool_cmis_cdb_execute_cmd(dev, &args); + if (err < 0) { + ethnl_module_fw_flash_ntf_err(dev, ntf_params, + "Run image command failed", + args.err_msg); + return err; + } + + err = cmis_fw_update_wait_for_module_state(dev, args.flags); + if (err < 0) + ethnl_module_fw_flash_ntf_err(dev, ntf_params, + "Module is not ready on time after reset", + NULL); + + return err; +} + +static int +cmis_fw_update_commit_image(struct ethtool_cmis_cdb *cdb, + struct net_device *dev, + struct ethnl_module_fw_flash_ntf_params *ntf_params) +{ + struct ethtool_cmis_cdb_cmd_args args = {}; + int err; + + ethtool_cmis_cdb_compose_args(&args, + ETHTOOL_CMIS_CDB_CMD_COMMIT_FW_IMAGE, + NULL, 0, cdb->max_completion_time, + cdb->read_write_len_ext, 1000, 0, + CDB_F_COMPLETION_VALID | CDB_F_STATUS_VALID); + + err = ethtool_cmis_cdb_execute_cmd(dev, &args); + if (err < 0) + ethnl_module_fw_flash_ntf_err(dev, ntf_params, + "Commit image command failed", + args.err_msg); + + return err; +} + +static int cmis_fw_update_reset(struct net_device *dev) +{ + __u32 reset_data = ETH_RESET_PHY; + + return dev->ethtool_ops->reset(dev, &reset_data); +} + +void +ethtool_cmis_fw_update(struct ethtool_cmis_fw_update_params *fw_update) +{ + struct ethnl_module_fw_flash_ntf_params *ntf_params = + &fw_update->ntf_params; + struct cmis_fw_update_fw_mng_features fw_mng = {0}; + struct net_device *dev = fw_update->dev; + struct ethtool_cmis_cdb *cdb; + int err; + + cdb = ethtool_cmis_cdb_init(dev, &fw_update->params, ntf_params); + if (IS_ERR(cdb)) + goto err_send_ntf; + + ethnl_module_fw_flash_ntf_start(dev, ntf_params); + + err = cmis_fw_update_fw_mng_features_get(cdb, dev, &fw_mng, ntf_params); + if (err < 0) + goto err_cdb_fini; + + err = cmis_fw_update_download_image(cdb, fw_update, &fw_mng); + if (err < 0) + goto err_cdb_fini; + + err = cmis_fw_update_run_image(cdb, dev, ntf_params); + if (err < 0) + goto err_cdb_fini; + + /* The CDB command "Run Firmware Image" resets the firmware, so the new + * one might have different settings. + * Free the old CDB instance, and init a new one. + */ + ethtool_cmis_cdb_fini(cdb); + + cdb = ethtool_cmis_cdb_init(dev, &fw_update->params, ntf_params); + if (IS_ERR(cdb)) + goto err_send_ntf; + + err = cmis_fw_update_commit_image(cdb, dev, ntf_params); + if (err < 0) + goto err_cdb_fini; + + err = cmis_fw_update_reset(dev); + if (err < 0) + goto err_cdb_fini; + + ethnl_module_fw_flash_ntf_complete(dev, ntf_params); + ethtool_cmis_cdb_fini(cdb); + return; + +err_cdb_fini: + ethtool_cmis_cdb_fini(cdb); +err_send_ntf: + ethnl_module_fw_flash_ntf_err(dev, ntf_params, NULL, NULL); +} diff --git a/net/ethtool/coalesce.c b/net/ethtool/coalesce.c index 759b16e3d134..3e18ca1ccc5e 100644 --- a/net/ethtool/coalesce.c +++ b/net/ethtool/coalesce.c @@ -211,9 +211,9 @@ static int coalesce_fill_reply(struct sk_buff *skb, { const struct coalesce_reply_data *data = COALESCE_REPDATA(reply_base); const struct kernel_ethtool_coalesce *kcoal = &data->kernel_coalesce; - struct dim_irq_moder *moder = req_base->dev->irq_moder; const struct ethtool_coalesce *coal = &data->coalesce; u32 supported = data->supported_params; + struct dim_irq_moder *moder; int ret = 0; if (coalesce_put_u32(skb, ETHTOOL_A_COALESCE_RX_USECS, @@ -272,9 +272,10 @@ static int coalesce_fill_reply(struct sk_buff *skb, kcoal->tx_aggr_time_usecs, supported)) return -EMSGSIZE; - if (!moder) + if (!req_base->dev || !req_base->dev->irq_moder) return 0; + moder = req_base->dev->irq_moder; rcu_read_lock(); if (moder->profile_flags & DIM_PROFILE_RX) { ret = coalesce_put_profile(skb, ETHTOOL_A_COALESCE_RX_PROFILE, diff --git a/net/ethtool/eeprom.c b/net/ethtool/eeprom.c index 6209c3a9c8f7..f36811b3ecf1 100644 --- a/net/ethtool/eeprom.c +++ b/net/ethtool/eeprom.c @@ -91,6 +91,12 @@ static int get_module_eeprom_by_page(struct net_device *dev, { const struct ethtool_ops *ops = dev->ethtool_ops; + if (dev->module_fw_flash_in_progress) { + NL_SET_ERR_MSG(extack, + "Module firmware flashing is in progress"); + return -EBUSY; + } + if (dev->sfp_bus) return sfp_get_module_eeprom_by_page(dev->sfp_bus, page_data, extack); diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c index e645d751a5e8..d8795ed07ba3 100644 --- a/net/ethtool/ioctl.c +++ b/net/ethtool/ioctl.c @@ -658,6 +658,9 @@ static int ethtool_get_settings(struct net_device *dev, void __user *useraddr) if (!dev->ethtool_ops->get_link_ksettings) return -EOPNOTSUPP; + if (dev->module_fw_flash_in_progress) + return -EBUSY; + memset(&link_ksettings, 0, sizeof(link_ksettings)); err = dev->ethtool_ops->get_link_ksettings(dev, &link_ksettings); if (err < 0) @@ -1199,6 +1202,7 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev, const struct ethtool_ops *ops = dev->ethtool_ops; struct ethtool_rxfh_param rxfh_dev = {}; u32 user_indir_size, user_key_size; + struct ethtool_rxfh_context *ctx; struct ethtool_rxfh rxfh; u32 indir_bytes; u8 *rss_config; @@ -1246,11 +1250,26 @@ static noinline_for_stack int ethtool_get_rxfh(struct net_device *dev, if (user_key_size) rxfh_dev.key = rss_config + indir_bytes; - rxfh_dev.rss_context = rxfh.rss_context; - - ret = dev->ethtool_ops->get_rxfh(dev, &rxfh_dev); - if (ret) - goto out; + if (rxfh.rss_context) { + ctx = xa_load(&dev->ethtool->rss_ctx, rxfh.rss_context); + if (!ctx) { + ret = -ENOENT; + goto out; + } + if (rxfh_dev.indir) + memcpy(rxfh_dev.indir, ethtool_rxfh_context_indir(ctx), + indir_bytes); + if (rxfh_dev.key) + memcpy(rxfh_dev.key, ethtool_rxfh_context_key(ctx), + user_key_size); + rxfh_dev.hfunc = ctx->hfunc; + rxfh_dev.input_xfrm = ctx->input_xfrm; + ret = 0; + } else { + ret = dev->ethtool_ops->get_rxfh(dev, &rxfh_dev); + if (ret) + goto out; + } if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh, hfunc), &rxfh_dev.hfunc, sizeof(rxfh.hfunc))) { @@ -1278,10 +1297,13 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, const struct ethtool_ops *ops = dev->ethtool_ops; u32 dev_indir_size = 0, dev_key_size = 0, i; struct ethtool_rxfh_param rxfh_dev = {}; + struct ethtool_rxfh_context *ctx = NULL; struct netlink_ext_ack *extack = NULL; struct ethtool_rxnfc rx_rings; struct ethtool_rxfh rxfh; + bool locked = false; /* dev->ethtool->rss_lock taken */ u32 indir_bytes = 0; + bool create = false; u8 *rss_config; int ret; @@ -1309,6 +1331,7 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, if ((rxfh.input_xfrm & RXH_XFRM_SYM_XOR) && !ops->cap_rss_sym_xor_supported) return -EOPNOTSUPP; + create = rxfh.rss_context == ETH_RXFH_CONTEXT_ALLOC; /* If either indir, hash key or function is valid, proceed further. * Must request at least one change: indir size, hash key, function @@ -1374,13 +1397,77 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, } } + if (rxfh.rss_context) { + mutex_lock(&dev->ethtool->rss_lock); + locked = true; + } + if (create) { + if (rxfh_dev.rss_delete) { + ret = -EINVAL; + goto out; + } + ctx = kzalloc(ethtool_rxfh_context_size(dev_indir_size, + dev_key_size, + ops->rxfh_priv_size), + GFP_KERNEL_ACCOUNT); + if (!ctx) { + ret = -ENOMEM; + goto out; + } + ctx->indir_size = dev_indir_size; + ctx->key_size = dev_key_size; + ctx->priv_size = ops->rxfh_priv_size; + /* Initialise to an empty context */ + ctx->hfunc = ETH_RSS_HASH_NO_CHANGE; + ctx->input_xfrm = RXH_XFRM_NO_CHANGE; + if (ops->create_rxfh_context) { + u32 limit = ops->rxfh_max_context_id ?: U32_MAX; + u32 ctx_id; + + /* driver uses new API, core allocates ID */ + ret = xa_alloc(&dev->ethtool->rss_ctx, &ctx_id, ctx, + XA_LIMIT(1, limit), GFP_KERNEL_ACCOUNT); + if (ret < 0) { + kfree(ctx); + goto out; + } + WARN_ON(!ctx_id); /* can't happen */ + rxfh.rss_context = ctx_id; + } + } else if (rxfh.rss_context) { + ctx = xa_load(&dev->ethtool->rss_ctx, rxfh.rss_context); + if (!ctx) { + ret = -ENOENT; + goto out; + } + } rxfh_dev.hfunc = rxfh.hfunc; rxfh_dev.rss_context = rxfh.rss_context; rxfh_dev.input_xfrm = rxfh.input_xfrm; - ret = ops->set_rxfh(dev, &rxfh_dev, extack); - if (ret) + if (rxfh.rss_context && ops->create_rxfh_context) { + if (create) + ret = ops->create_rxfh_context(dev, ctx, &rxfh_dev, + extack); + else if (rxfh_dev.rss_delete) + ret = ops->remove_rxfh_context(dev, ctx, + rxfh.rss_context, + extack); + else + ret = ops->modify_rxfh_context(dev, ctx, &rxfh_dev, + extack); + } else { + ret = ops->set_rxfh(dev, &rxfh_dev, extack); + } + if (ret) { + if (create) { + /* failed to create, free our new tracking entry */ + if (ops->create_rxfh_context) + xa_erase(&dev->ethtool->rss_ctx, rxfh.rss_context); + kfree(ctx); + } goto out; + } if (copy_to_user(useraddr + offsetof(struct ethtool_rxfh, rss_context), &rxfh_dev.rss_context, sizeof(rxfh_dev.rss_context))) @@ -1393,8 +1480,44 @@ static noinline_for_stack int ethtool_set_rxfh(struct net_device *dev, else if (rxfh.indir_size != ETH_RXFH_INDIR_NO_CHANGE) dev->priv_flags |= IFF_RXFH_CONFIGURED; } + /* Update rss_ctx tracking */ + if (create && !ops->create_rxfh_context) { + /* driver uses old API, it chose context ID */ + if (WARN_ON(xa_load(&dev->ethtool->rss_ctx, rxfh.rss_context))) { + /* context ID reused, our tracking is screwed */ + kfree(ctx); + goto out; + } + /* Allocate the exact ID the driver gave us */ + if (xa_is_err(xa_store(&dev->ethtool->rss_ctx, rxfh.rss_context, + ctx, GFP_KERNEL))) { + kfree(ctx); + goto out; + } + } + if (rxfh_dev.rss_delete) { + WARN_ON(xa_erase(&dev->ethtool->rss_ctx, rxfh.rss_context) != ctx); + kfree(ctx); + } else if (ctx) { + if (rxfh_dev.indir) { + for (i = 0; i < dev_indir_size; i++) + ethtool_rxfh_context_indir(ctx)[i] = rxfh_dev.indir[i]; + ctx->indir_configured = 1; + } + if (rxfh_dev.key) { + memcpy(ethtool_rxfh_context_key(ctx), rxfh_dev.key, + dev_key_size); + ctx->key_configured = 1; + } + if (rxfh_dev.hfunc != ETH_RSS_HASH_NO_CHANGE) + ctx->hfunc = rxfh_dev.hfunc; + if (rxfh_dev.input_xfrm != RXH_XFRM_NO_CHANGE) + ctx->input_xfrm = rxfh_dev.input_xfrm; + } out: + if (locked) + mutex_unlock(&dev->ethtool->rss_lock); kfree(rss_config); return ret; } @@ -1449,6 +1572,9 @@ static int ethtool_reset(struct net_device *dev, char __user *useraddr) if (!dev->ethtool_ops->reset) return -EOPNOTSUPP; + if (dev->module_fw_flash_in_progress) + return -EBUSY; + if (copy_from_user(&reset, useraddr, sizeof(reset))) return -EFAULT; @@ -1503,7 +1629,7 @@ static int ethtool_set_wol(struct net_device *dev, char __user *useraddr) if (ret) return ret; - dev->wol_enabled = !!wol.wolopts; + dev->ethtool->wol_enabled = !!wol.wolopts; ethtool_notify(dev, ETHTOOL_MSG_WOL_NTF, NULL); return 0; @@ -2462,6 +2588,9 @@ int ethtool_get_module_info_call(struct net_device *dev, const struct ethtool_ops *ops = dev->ethtool_ops; struct phy_device *phydev = dev->phydev; + if (dev->module_fw_flash_in_progress) + return -EBUSY; + if (dev->sfp_bus) return sfp_get_module_info(dev->sfp_bus, modinfo); @@ -2499,6 +2628,9 @@ int ethtool_get_module_eeprom_call(struct net_device *dev, const struct ethtool_ops *ops = dev->ethtool_ops; struct phy_device *phydev = dev->phydev; + if (dev->module_fw_flash_in_progress) + return -EBUSY; + if (dev->sfp_bus) return sfp_get_module_eeprom(dev->sfp_bus, ee, data); diff --git a/net/ethtool/module.c b/net/ethtool/module.c index ceb575efc290..6b7448df08d5 100644 --- a/net/ethtool/module.c +++ b/net/ethtool/module.c @@ -1,10 +1,14 @@ // SPDX-License-Identifier: GPL-2.0-only #include <linux/ethtool.h> +#include <linux/firmware.h> +#include <linux/sfp.h> +#include <net/devlink.h> #include "netlink.h" #include "common.h" #include "bitset.h" +#include "module_fw.h" struct module_req_info { struct ethnl_req_info base; @@ -33,6 +37,12 @@ static int module_get_power_mode(struct net_device *dev, if (!ops->get_module_power_mode) return 0; + if (dev->module_fw_flash_in_progress) { + NL_SET_ERR_MSG(extack, + "Module firmware flashing is in progress"); + return -EBUSY; + } + return ops->get_module_power_mode(dev, &data->power, extack); } @@ -109,6 +119,12 @@ ethnl_set_module_validate(struct ethnl_req_info *req_info, if (!tb[ETHTOOL_A_MODULE_POWER_MODE_POLICY]) return 0; + if (req_info->dev->module_fw_flash_in_progress) { + NL_SET_ERR_MSG(info->extack, + "Module firmware flashing is in progress"); + return -EBUSY; + } + if (!ops->get_module_power_mode || !ops->set_module_power_mode) { NL_SET_ERR_MSG_ATTR(info->extack, tb[ETHTOOL_A_MODULE_POWER_MODE_POLICY], @@ -158,3 +174,381 @@ const struct ethnl_request_ops ethnl_module_request_ops = { .set = ethnl_set_module, .set_ntf_cmd = ETHTOOL_MSG_MODULE_NTF, }; + +/* MODULE_FW_FLASH_ACT */ + +const struct nla_policy +ethnl_module_fw_flash_act_policy[ETHTOOL_A_MODULE_FW_FLASH_PASSWORD + 1] = { + [ETHTOOL_A_MODULE_FW_FLASH_HEADER] = + NLA_POLICY_NESTED(ethnl_header_policy), + [ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME] = { .type = NLA_NUL_STRING }, + [ETHTOOL_A_MODULE_FW_FLASH_PASSWORD] = { .type = NLA_U32 }, +}; + +static LIST_HEAD(module_fw_flash_work_list); +static DEFINE_SPINLOCK(module_fw_flash_work_list_lock); + +static int +module_flash_fw_work_list_add(struct ethtool_module_fw_flash *module_fw, + struct genl_info *info) +{ + struct ethtool_module_fw_flash *work; + + /* First, check if already registered. */ + spin_lock(&module_fw_flash_work_list_lock); + list_for_each_entry(work, &module_fw_flash_work_list, list) { + if (work->fw_update.ntf_params.portid == info->snd_portid && + work->fw_update.dev == module_fw->fw_update.dev) { + spin_unlock(&module_fw_flash_work_list_lock); + return -EALREADY; + } + } + + list_add_tail(&module_fw->list, &module_fw_flash_work_list); + spin_unlock(&module_fw_flash_work_list_lock); + + return 0; +} + +static void module_flash_fw_work_list_del(struct list_head *list) +{ + spin_lock(&module_fw_flash_work_list_lock); + list_del(list); + spin_unlock(&module_fw_flash_work_list_lock); +} + +static void module_flash_fw_work(struct work_struct *work) +{ + struct ethtool_module_fw_flash *module_fw; + + module_fw = container_of(work, struct ethtool_module_fw_flash, work); + + ethtool_cmis_fw_update(&module_fw->fw_update); + + module_flash_fw_work_list_del(&module_fw->list); + module_fw->fw_update.dev->module_fw_flash_in_progress = false; + netdev_put(module_fw->fw_update.dev, &module_fw->dev_tracker); + release_firmware(module_fw->fw_update.fw); + kfree(module_fw); +} + +#define MODULE_EEPROM_PHYS_ID_PAGE 0 +#define MODULE_EEPROM_PHYS_ID_I2C_ADDR 0x50 + +static int module_flash_fw_work_init(struct ethtool_module_fw_flash *module_fw, + struct net_device *dev, + struct netlink_ext_ack *extack) +{ + const struct ethtool_ops *ops = dev->ethtool_ops; + struct ethtool_module_eeprom page_data = {}; + u8 phys_id; + int err; + + /* Fetch the SFF-8024 Identifier Value. For all supported standards, it + * is located at I2C address 0x50, byte 0. See section 4.1 in SFF-8024, + * revision 4.9. + */ + page_data.page = MODULE_EEPROM_PHYS_ID_PAGE; + page_data.offset = SFP_PHYS_ID; + page_data.length = sizeof(phys_id); + page_data.i2c_address = MODULE_EEPROM_PHYS_ID_I2C_ADDR; + page_data.data = &phys_id; + + err = ops->get_module_eeprom_by_page(dev, &page_data, extack); + if (err < 0) + return err; + + switch (phys_id) { + case SFF8024_ID_QSFP_DD: + case SFF8024_ID_OSFP: + case SFF8024_ID_DSFP: + case SFF8024_ID_QSFP_PLUS_CMIS: + case SFF8024_ID_SFP_DD_CMIS: + case SFF8024_ID_SFP_PLUS_CMIS: + INIT_WORK(&module_fw->work, module_flash_fw_work); + break; + default: + NL_SET_ERR_MSG(extack, + "Module type does not support firmware flashing"); + return -EOPNOTSUPP; + } + + return 0; +} + +void ethnl_module_fw_flash_sock_destroy(struct ethnl_sock_priv *sk_priv) +{ + struct ethtool_module_fw_flash *work; + + spin_lock(&module_fw_flash_work_list_lock); + list_for_each_entry(work, &module_fw_flash_work_list, list) { + if (work->fw_update.dev == sk_priv->dev && + work->fw_update.ntf_params.portid == sk_priv->portid) { + work->fw_update.ntf_params.closed_sock = true; + break; + } + } + spin_unlock(&module_fw_flash_work_list_lock); +} + +static int +module_flash_fw_schedule(struct net_device *dev, const char *file_name, + struct ethtool_module_fw_flash_params *params, + struct sk_buff *skb, struct genl_info *info) +{ + struct ethtool_cmis_fw_update_params *fw_update; + struct ethtool_module_fw_flash *module_fw; + int err; + + module_fw = kzalloc(sizeof(*module_fw), GFP_KERNEL); + if (!module_fw) + return -ENOMEM; + + fw_update = &module_fw->fw_update; + fw_update->params = *params; + err = request_firmware_direct(&fw_update->fw, + file_name, &dev->dev); + if (err) { + NL_SET_ERR_MSG(info->extack, + "Failed to request module firmware image"); + goto err_free; + } + + err = module_flash_fw_work_init(module_fw, dev, info->extack); + if (err < 0) + goto err_release_firmware; + + dev->module_fw_flash_in_progress = true; + netdev_hold(dev, &module_fw->dev_tracker, GFP_KERNEL); + fw_update->dev = dev; + fw_update->ntf_params.portid = info->snd_portid; + fw_update->ntf_params.seq = info->snd_seq; + fw_update->ntf_params.closed_sock = false; + + err = ethnl_sock_priv_set(skb, dev, fw_update->ntf_params.portid, + ETHTOOL_SOCK_TYPE_MODULE_FW_FLASH); + if (err < 0) + goto err_release_firmware; + + err = module_flash_fw_work_list_add(module_fw, info); + if (err < 0) + goto err_release_firmware; + + schedule_work(&module_fw->work); + + return 0; + +err_release_firmware: + release_firmware(fw_update->fw); +err_free: + kfree(module_fw); + return err; +} + +static int module_flash_fw(struct net_device *dev, struct nlattr **tb, + struct sk_buff *skb, struct genl_info *info) +{ + struct ethtool_module_fw_flash_params params = {}; + const char *file_name; + struct nlattr *attr; + + if (GENL_REQ_ATTR_CHECK(info, ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME)) + return -EINVAL; + + file_name = nla_data(tb[ETHTOOL_A_MODULE_FW_FLASH_FILE_NAME]); + + attr = tb[ETHTOOL_A_MODULE_FW_FLASH_PASSWORD]; + if (attr) { + params.password = cpu_to_be32(nla_get_u32(attr)); + params.password_valid = true; + } + + return module_flash_fw_schedule(dev, file_name, ¶ms, skb, info); +} + +static int ethnl_module_fw_flash_validate(struct net_device *dev, + struct netlink_ext_ack *extack) +{ + struct devlink_port *devlink_port = dev->devlink_port; + const struct ethtool_ops *ops = dev->ethtool_ops; + + if (!ops->set_module_eeprom_by_page || + !ops->get_module_eeprom_by_page) { + NL_SET_ERR_MSG(extack, + "Flashing module firmware is not supported by this device"); + return -EOPNOTSUPP; + } + + if (!ops->reset) { + NL_SET_ERR_MSG(extack, + "Reset module is not supported by this device, so flashing is not permitted"); + return -EOPNOTSUPP; + } + + if (dev->module_fw_flash_in_progress) { + NL_SET_ERR_MSG(extack, "Module firmware flashing already in progress"); + return -EBUSY; + } + + if (dev->flags & IFF_UP) { + NL_SET_ERR_MSG(extack, "Netdevice is up, so flashing is not permitted"); + return -EBUSY; + } + + if (devlink_port && devlink_port->attrs.split) { + NL_SET_ERR_MSG(extack, "Can't perform firmware flashing on a split port"); + return -EOPNOTSUPP; + } + + return 0; +} + +int ethnl_act_module_fw_flash(struct sk_buff *skb, struct genl_info *info) +{ + struct ethnl_req_info req_info = {}; + struct nlattr **tb = info->attrs; + struct net_device *dev; + int ret; + + ret = ethnl_parse_header_dev_get(&req_info, + tb[ETHTOOL_A_MODULE_FW_FLASH_HEADER], + genl_info_net(info), info->extack, + true); + if (ret < 0) + return ret; + dev = req_info.dev; + + rtnl_lock(); + ret = ethnl_ops_begin(dev); + if (ret < 0) + goto out_rtnl; + + ret = ethnl_module_fw_flash_validate(dev, info->extack); + if (ret < 0) + goto out_rtnl; + + ret = module_flash_fw(dev, tb, skb, info); + + ethnl_ops_complete(dev); + +out_rtnl: + rtnl_unlock(); + ethnl_parse_header_dev_put(&req_info); + return ret; +} + +/* MODULE_FW_FLASH_NTF */ + +static int +ethnl_module_fw_flash_ntf_put_err(struct sk_buff *skb, char *err_msg, + char *sub_err_msg) +{ + int err_msg_len, sub_err_msg_len, total_len; + struct nlattr *attr; + + if (!err_msg) + return 0; + + err_msg_len = strlen(err_msg); + total_len = err_msg_len + 2; /* For period and NUL. */ + + if (sub_err_msg) { + sub_err_msg_len = strlen(sub_err_msg); + total_len += sub_err_msg_len + 2; /* For ", ". */ + } + + attr = nla_reserve(skb, ETHTOOL_A_MODULE_FW_FLASH_STATUS_MSG, + total_len); + if (!attr) + return -ENOMEM; + + if (sub_err_msg) + sprintf(nla_data(attr), "%s, %s.", err_msg, sub_err_msg); + else + sprintf(nla_data(attr), "%s.", err_msg); + + return 0; +} + +static void +ethnl_module_fw_flash_ntf(struct net_device *dev, + enum ethtool_module_fw_flash_status status, + struct ethnl_module_fw_flash_ntf_params *ntf_params, + char *err_msg, char *sub_err_msg, + u64 done, u64 total) +{ + struct sk_buff *skb; + void *hdr; + int ret; + + if (ntf_params->closed_sock) + return; + + skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); + if (!skb) + return; + + hdr = ethnl_unicast_put(skb, ntf_params->portid, ntf_params->seq, + ETHTOOL_MSG_MODULE_FW_FLASH_NTF); + if (!hdr) + goto err_skb; + + ret = ethnl_fill_reply_header(skb, dev, + ETHTOOL_A_MODULE_FW_FLASH_HEADER); + if (ret < 0) + goto err_skb; + + if (nla_put_u32(skb, ETHTOOL_A_MODULE_FW_FLASH_STATUS, status)) + goto err_skb; + + ret = ethnl_module_fw_flash_ntf_put_err(skb, err_msg, sub_err_msg); + if (ret < 0) + goto err_skb; + + if (nla_put_uint(skb, ETHTOOL_A_MODULE_FW_FLASH_DONE, done)) + goto err_skb; + + if (nla_put_uint(skb, ETHTOOL_A_MODULE_FW_FLASH_TOTAL, total)) + goto err_skb; + + genlmsg_end(skb, hdr); + genlmsg_unicast(dev_net(dev), skb, ntf_params->portid); + return; + +err_skb: + nlmsg_free(skb); +} + +void ethnl_module_fw_flash_ntf_err(struct net_device *dev, + struct ethnl_module_fw_flash_ntf_params *params, + char *err_msg, char *sub_err_msg) +{ + ethnl_module_fw_flash_ntf(dev, ETHTOOL_MODULE_FW_FLASH_STATUS_ERROR, + params, err_msg, sub_err_msg, 0, 0); +} + +void +ethnl_module_fw_flash_ntf_start(struct net_device *dev, + struct ethnl_module_fw_flash_ntf_params *params) +{ + ethnl_module_fw_flash_ntf(dev, ETHTOOL_MODULE_FW_FLASH_STATUS_STARTED, + params, NULL, NULL, 0, 0); +} + +void +ethnl_module_fw_flash_ntf_complete(struct net_device *dev, + struct ethnl_module_fw_flash_ntf_params *params) +{ + ethnl_module_fw_flash_ntf(dev, ETHTOOL_MODULE_FW_FLASH_STATUS_COMPLETED, + params, NULL, NULL, 0, 0); +} + +void +ethnl_module_fw_flash_ntf_in_progress(struct net_device *dev, + struct ethnl_module_fw_flash_ntf_params *params, + u64 done, u64 total) +{ + ethnl_module_fw_flash_ntf(dev, + ETHTOOL_MODULE_FW_FLASH_STATUS_IN_PROGRESS, + params, NULL, NULL, done, total); +} diff --git a/net/ethtool/module_fw.h b/net/ethtool/module_fw.h new file mode 100644 index 000000000000..634543a12d0c --- /dev/null +++ b/net/ethtool/module_fw.h @@ -0,0 +1,75 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ + +#include <uapi/linux/ethtool.h> +#include "netlink.h" + +/** + * struct ethnl_module_fw_flash_ntf_params - module firmware flashing + * notifications parameters + * @portid: Netlink portid of sender. + * @seq: Sequence number of sender. + * @closed_sock: Indicates whether the socket was closed from user space. + */ +struct ethnl_module_fw_flash_ntf_params { + u32 portid; + u32 seq; + bool closed_sock; +}; + +/** + * struct ethtool_module_fw_flash_params - module firmware flashing parameters + * @password: Module password. Only valid when @pass_valid is set. + * @password_valid: Whether the module password is valid or not. + */ +struct ethtool_module_fw_flash_params { + __be32 password; + u8 password_valid:1; +}; + +/** + * struct ethtool_cmis_fw_update_params - CMIS firmware update specific + * parameters + * @dev: Pointer to the net_device to be flashed. + * @params: Module firmware flashing parameters. + * @ntf_params: Module firmware flashing notification parameters. + * @fw: Firmware to flash. + */ +struct ethtool_cmis_fw_update_params { + struct net_device *dev; + struct ethtool_module_fw_flash_params params; + struct ethnl_module_fw_flash_ntf_params ntf_params; + const struct firmware *fw; +}; + +/** + * struct ethtool_module_fw_flash - module firmware flashing + * @list: List node for &module_fw_flash_work_list. + * @dev_tracker: Refcount tracker for @dev. + * @work: The flashing firmware work. + * @fw_update: CMIS firmware update specific parameters. + */ +struct ethtool_module_fw_flash { + struct list_head list; + netdevice_tracker dev_tracker; + struct work_struct work; + struct ethtool_cmis_fw_update_params fw_update; +}; + +void ethnl_module_fw_flash_sock_destroy(struct ethnl_sock_priv *sk_priv); + +void +ethnl_module_fw_flash_ntf_err(struct net_device *dev, + struct ethnl_module_fw_flash_ntf_params *params, + char *err_msg, char *sub_err_msg); +void +ethnl_module_fw_flash_ntf_start(struct net_device *dev, + struct ethnl_module_fw_flash_ntf_params *params); +void +ethnl_module_fw_flash_ntf_complete(struct net_device *dev, + struct ethnl_module_fw_flash_ntf_params *params); +void +ethnl_module_fw_flash_ntf_in_progress(struct net_device *dev, + struct ethnl_module_fw_flash_ntf_params *params, + u64 done, u64 total); + +void ethtool_cmis_fw_update(struct ethtool_cmis_fw_update_params *params); diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c index bd04f28d5cf4..81fe2e5b95f6 100644 --- a/net/ethtool/netlink.c +++ b/net/ethtool/netlink.c @@ -4,6 +4,7 @@ #include <linux/ethtool_netlink.h> #include <linux/pm_runtime.h> #include "netlink.h" +#include "module_fw.h" static struct genl_family ethtool_genl_family; @@ -30,6 +31,35 @@ const struct nla_policy ethnl_header_policy_stats[] = { ETHTOOL_FLAGS_STATS), }; +int ethnl_sock_priv_set(struct sk_buff *skb, struct net_device *dev, u32 portid, + enum ethnl_sock_type type) +{ + struct ethnl_sock_priv *sk_priv; + + sk_priv = genl_sk_priv_get(ðtool_genl_family, NETLINK_CB(skb).sk); + if (IS_ERR(sk_priv)) + return PTR_ERR(sk_priv); + + sk_priv->dev = dev; + sk_priv->portid = portid; + sk_priv->type = type; + + return 0; +} + +static void ethnl_sock_priv_destroy(void *priv) +{ + struct ethnl_sock_priv *sk_priv = priv; + + switch (sk_priv->type) { + case ETHTOOL_SOCK_TYPE_MODULE_FW_FLASH: + ethnl_module_fw_flash_sock_destroy(sk_priv); + break; + default: + break; + } +} + int ethnl_ops_begin(struct net_device *dev) { int ret; @@ -239,6 +269,11 @@ void *ethnl_bcastmsg_put(struct sk_buff *skb, u8 cmd) cmd); } +void *ethnl_unicast_put(struct sk_buff *skb, u32 portid, u32 seq, u8 cmd) +{ + return genlmsg_put(skb, portid, seq, ðtool_genl_family, 0, cmd); +} + int ethnl_multicast(struct sk_buff *skb, struct net_device *dev) { return genlmsg_multicast_netns(ðtool_genl_family, dev_net(dev), skb, @@ -760,10 +795,22 @@ static void ethnl_notify_features(struct netdev_notifier_info *info) static int ethnl_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { + struct netdev_notifier_info *info = ptr; + struct netlink_ext_ack *extack; + struct net_device *dev; + + dev = netdev_notifier_info_to_dev(info); + extack = netdev_notifier_info_to_extack(info); + switch (event) { case NETDEV_FEAT_CHANGE: ethnl_notify_features(ptr); break; + case NETDEV_PRE_UP: + if (dev->module_fw_flash_in_progress) { + NL_SET_ERR_MSG(extack, "Can't set port up while flashing module firmware"); + return NOTIFY_BAD; + } } return NOTIFY_DONE; @@ -1125,6 +1172,13 @@ static const struct genl_ops ethtool_genl_ops[] = { .policy = ethnl_mm_set_policy, .maxattr = ARRAY_SIZE(ethnl_mm_set_policy) - 1, }, + { + .cmd = ETHTOOL_MSG_MODULE_FW_FLASH_ACT, + .flags = GENL_UNS_ADMIN_PERM, + .doit = ethnl_act_module_fw_flash, + .policy = ethnl_module_fw_flash_act_policy, + .maxattr = ARRAY_SIZE(ethnl_module_fw_flash_act_policy) - 1, + }, }; static const struct genl_multicast_group ethtool_nl_mcgrps[] = { @@ -1141,6 +1195,8 @@ static struct genl_family ethtool_genl_family __ro_after_init = { .resv_start_op = ETHTOOL_MSG_MODULE_GET + 1, .mcgrps = ethtool_nl_mcgrps, .n_mcgrps = ARRAY_SIZE(ethtool_nl_mcgrps), + .sock_priv_size = sizeof(struct ethnl_sock_priv), + .sock_priv_destroy = ethnl_sock_priv_destroy, }; /* module setup */ diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h index 9a333a8d04c1..46ec273a87c5 100644 --- a/net/ethtool/netlink.h +++ b/net/ethtool/netlink.h @@ -21,6 +21,7 @@ struct sk_buff *ethnl_reply_init(size_t payload, struct net_device *dev, u8 cmd, void **ehdrp); void *ethnl_dump_put(struct sk_buff *skb, struct netlink_callback *cb, u8 cmd); void *ethnl_bcastmsg_put(struct sk_buff *skb, u8 cmd); +void *ethnl_unicast_put(struct sk_buff *skb, u32 portid, u32 seq, u8 cmd); int ethnl_multicast(struct sk_buff *skb, struct net_device *dev); /** @@ -283,6 +284,19 @@ struct ethnl_reply_data { int ethnl_ops_begin(struct net_device *dev); void ethnl_ops_complete(struct net_device *dev); +enum ethnl_sock_type { + ETHTOOL_SOCK_TYPE_MODULE_FW_FLASH, +}; + +struct ethnl_sock_priv { + struct net_device *dev; + u32 portid; + enum ethnl_sock_type type; +}; + +int ethnl_sock_priv_set(struct sk_buff *skb, struct net_device *dev, u32 portid, + enum ethnl_sock_type type); + /** * struct ethnl_request_ops - unified handling of GET and SET requests * @request_cmd: command id for request (GET) @@ -441,6 +455,7 @@ extern const struct nla_policy ethnl_plca_set_cfg_policy[ETHTOOL_A_PLCA_MAX + 1] extern const struct nla_policy ethnl_plca_get_status_policy[ETHTOOL_A_PLCA_HEADER + 1]; extern const struct nla_policy ethnl_mm_get_policy[ETHTOOL_A_MM_HEADER + 1]; extern const struct nla_policy ethnl_mm_set_policy[ETHTOOL_A_MM_MAX + 1]; +extern const struct nla_policy ethnl_module_fw_flash_act_policy[ETHTOOL_A_MODULE_FW_FLASH_PASSWORD + 1]; int ethnl_set_features(struct sk_buff *skb, struct genl_info *info); int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info); @@ -448,6 +463,7 @@ int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info); int ethnl_tunnel_info_doit(struct sk_buff *skb, struct genl_info *info); int ethnl_tunnel_info_start(struct netlink_callback *cb); int ethnl_tunnel_info_dumpit(struct sk_buff *skb, struct netlink_callback *cb); +int ethnl_act_module_fw_flash(struct sk_buff *skb, struct genl_info *info); extern const char stats_std_names[__ETHTOOL_STATS_CNT][ETH_GSTRING_LEN]; extern const char stats_eth_phy_names[__ETHTOOL_A_STATS_ETH_PHY_CNT][ETH_GSTRING_LEN]; diff --git a/net/ethtool/wol.c b/net/ethtool/wol.c index 0ed56c9ac1bc..a39d8000d808 100644 --- a/net/ethtool/wol.c +++ b/net/ethtool/wol.c @@ -137,7 +137,7 @@ ethnl_set_wol(struct ethnl_req_info *req_info, struct genl_info *info) ret = dev->ethtool_ops->set_wol(dev, &wol); if (ret) return ret; - dev->wol_enabled = !!wol.wolopts; + dev->ethtool->wol_enabled = !!wol.wolopts; return 1; } diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index d08bf16d476d..ed97df6af14d 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -938,8 +938,7 @@ static int udp_send_skb(struct sk_buff *skb, struct flowi4 *fl4, kfree_skb(skb); return -EINVAL; } - if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite || - dst_xfrm(skb_dst(skb))) { + if (is_udplite || dst_xfrm(skb_dst(skb))) { kfree_skb(skb); return -EIO; } diff --git a/net/ipv4/udp_offload.c b/net/ipv4/udp_offload.c index 59448a2dbf2c..aa2e0a28ca61 100644 --- a/net/ipv4/udp_offload.c +++ b/net/ipv4/udp_offload.c @@ -357,6 +357,14 @@ struct sk_buff *__udp_gso_segment(struct sk_buff *gso_skb, else uh->check = gso_make_checksum(seg, ~check) ? : CSUM_MANGLED_0; + /* On the TX path, CHECKSUM_NONE and CHECKSUM_UNNECESSARY have the same + * meaning. However, check for bad offloads in the GSO stack expects the + * latter, if the checksum was calculated in software. To vouch for the + * segment skbs we actually need to set it on the gso_skb. + */ + if (gso_skb->ip_summed == CHECKSUM_NONE) + gso_skb->ip_summed = CHECKSUM_UNNECESSARY; + /* update refcount for the packet */ if (copy_dtor) { int delta = sum_truesize - gso_skb->truesize; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index b56f0b9f4307..b5456394cc67 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1257,8 +1257,7 @@ static int udp_v6_send_skb(struct sk_buff *skb, struct flowi6 *fl6, kfree_skb(skb); return -EINVAL; } - if (skb->ip_summed != CHECKSUM_PARTIAL || is_udplite || - dst_xfrm(skb_dst(skb))) { + if (is_udplite || dst_xfrm(skb_dst(skb))) { kfree_skb(skb); return -EIO; } diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 1e689c714127..83e452916403 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -126,7 +126,7 @@ sctp_snat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, if (sctph->source != cp->vport || payload_csum || skb->ip_summed == CHECKSUM_PARTIAL) { sctph->source = cp->vport; - if (!skb_is_gso(skb) || !skb_is_gso_sctp(skb)) + if (!skb_is_gso(skb)) sctp_nat_csum(skb, sctph, sctphoff); } else { skb->ip_summed = CHECKSUM_UNNECESSARY; @@ -175,7 +175,7 @@ sctp_dnat_handler(struct sk_buff *skb, struct ip_vs_protocol *pp, (skb->ip_summed == CHECKSUM_PARTIAL && !(skb_dst(skb)->dev->features & NETIF_F_SCTP_CRC))) { sctph->dest = cp->dport; - if (!skb_is_gso(skb) || !skb_is_gso_sctp(skb)) + if (!skb_is_gso(skb)) sctp_nat_csum(skb, sctph, sctphoff); } else if (skb->ip_summed != CHECKSUM_PARTIAL) { skb->ip_summed = CHECKSUM_UNNECESSARY; diff --git a/net/netfilter/nf_conncount.c b/net/netfilter/nf_conncount.c index 8715617b02fe..34ba14e59e95 100644 --- a/net/netfilter/nf_conncount.c +++ b/net/netfilter/nf_conncount.c @@ -321,7 +321,6 @@ insert_tree(struct net *net, struct nf_conncount_rb *rbconn; struct nf_conncount_tuple *conn; unsigned int count = 0, gc_count = 0; - u8 keylen = data->keylen; bool do_gc = true; spin_lock_bh(&nf_conncount_locks[hash]); @@ -333,7 +332,7 @@ restart: rbconn = rb_entry(*rbnode, struct nf_conncount_rb, node); parent = *rbnode; - diff = key_diff(key, rbconn->key, keylen); + diff = key_diff(key, rbconn->key, data->keylen); if (diff < 0) { rbnode = &((*rbnode)->rb_left); } else if (diff > 0) { @@ -378,7 +377,7 @@ restart: conn->tuple = *tuple; conn->zone = *zone; - memcpy(rbconn->key, key, sizeof(u32) * keylen); + memcpy(rbconn->key, key, sizeof(u32) * data->keylen); nf_conncount_list_init(&rbconn->list); list_add(&conn->node, &rbconn->list.head); @@ -403,7 +402,6 @@ count_tree(struct net *net, struct rb_node *parent; struct nf_conncount_rb *rbconn; unsigned int hash; - u8 keylen = data->keylen; hash = jhash2(key, data->keylen, conncount_rnd) % CONNCOUNT_SLOTS; root = &data->root[hash]; @@ -414,7 +412,7 @@ count_tree(struct net *net, rbconn = rb_entry(parent, struct nf_conncount_rb, node); - diff = key_diff(key, rbconn->key, keylen); + diff = key_diff(key, rbconn->key, data->keylen); if (diff < 0) { parent = rcu_dereference_raw(parent->rb_left); } else if (diff > 0) { diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index e8dcf41d360d..70d0bad029fd 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -153,14 +153,18 @@ static struct nft_trans *nft_trans_alloc_gfp(const struct nft_ctx *ctx, { struct nft_trans *trans; - trans = kzalloc(sizeof(struct nft_trans) + size, gfp); + trans = kzalloc(size, gfp); if (trans == NULL) return NULL; INIT_LIST_HEAD(&trans->list); - INIT_LIST_HEAD(&trans->binding_list); trans->msg_type = msg_type; - trans->ctx = *ctx; + + trans->net = ctx->net; + trans->table = ctx->table; + trans->seq = ctx->seq; + trans->flags = ctx->flags; + trans->report = ctx->report; return trans; } @@ -171,10 +175,26 @@ static struct nft_trans *nft_trans_alloc(const struct nft_ctx *ctx, return nft_trans_alloc_gfp(ctx, msg_type, size, GFP_KERNEL); } +static struct nft_trans_binding *nft_trans_get_binding(struct nft_trans *trans) +{ + switch (trans->msg_type) { + case NFT_MSG_NEWCHAIN: + case NFT_MSG_NEWSET: + return container_of(trans, struct nft_trans_binding, nft_trans); + } + + return NULL; +} + static void nft_trans_list_del(struct nft_trans *trans) { + struct nft_trans_binding *trans_binding; + list_del(&trans->list); - list_del(&trans->binding_list); + + trans_binding = nft_trans_get_binding(trans); + if (trans_binding) + list_del(&trans_binding->binding_list); } static void nft_trans_destroy(struct nft_trans *trans) @@ -236,7 +256,7 @@ static void __nft_chain_trans_bind(const struct nft_ctx *ctx, nft_trans_chain_bound(trans) = bind; break; case NFT_MSG_NEWRULE: - if (trans->ctx.chain == chain) + if (nft_trans_rule_chain(trans) == chain) nft_trans_rule_bound(trans) = bind; break; } @@ -372,21 +392,26 @@ static void nf_tables_unregister_hook(struct net *net, static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *trans) { struct nftables_pernet *nft_net = nft_pernet(net); + struct nft_trans_binding *binding; + + list_add_tail(&trans->list, &nft_net->commit_list); + + binding = nft_trans_get_binding(trans); + if (!binding) + return; switch (trans->msg_type) { case NFT_MSG_NEWSET: if (!nft_trans_set_update(trans) && nft_set_is_anonymous(nft_trans_set(trans))) - list_add_tail(&trans->binding_list, &nft_net->binding_list); + list_add_tail(&binding->binding_list, &nft_net->binding_list); break; case NFT_MSG_NEWCHAIN: if (!nft_trans_chain_update(trans) && nft_chain_binding(nft_trans_chain(trans))) - list_add_tail(&trans->binding_list, &nft_net->binding_list); + list_add_tail(&binding->binding_list, &nft_net->binding_list); break; } - - list_add_tail(&trans->list, &nft_net->commit_list); } static int nft_trans_table_add(struct nft_ctx *ctx, int msg_type) @@ -416,11 +441,28 @@ static int nft_deltable(struct nft_ctx *ctx) return err; } -static struct nft_trans *nft_trans_chain_add(struct nft_ctx *ctx, int msg_type) +static struct nft_trans * +nft_trans_alloc_chain(const struct nft_ctx *ctx, int msg_type) { + struct nft_trans_chain *trans_chain; struct nft_trans *trans; trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_chain)); + if (!trans) + return NULL; + + trans_chain = nft_trans_container_chain(trans); + INIT_LIST_HEAD(&trans_chain->nft_trans_binding.binding_list); + trans_chain->chain = ctx->chain; + + return trans; +} + +static struct nft_trans *nft_trans_chain_add(struct nft_ctx *ctx, int msg_type) +{ + struct nft_trans *trans; + + trans = nft_trans_alloc_chain(ctx, msg_type); if (trans == NULL) return ERR_PTR(-ENOMEM); @@ -432,7 +474,6 @@ static struct nft_trans *nft_trans_chain_add(struct nft_ctx *ctx, int msg_type) ntohl(nla_get_be32(ctx->nla[NFTA_CHAIN_ID])); } } - nft_trans_chain(trans) = ctx->chain; nft_trans_commit_list_add_tail(ctx->net, trans); return trans; @@ -505,6 +546,7 @@ static struct nft_trans *nft_trans_rule_add(struct nft_ctx *ctx, int msg_type, ntohl(nla_get_be32(ctx->nla[NFTA_RULE_ID])); } nft_trans_rule(trans) = rule; + nft_trans_rule_chain(trans) = ctx->chain; nft_trans_commit_list_add_tail(ctx->net, trans); return trans; @@ -560,12 +602,16 @@ static int __nft_trans_set_add(const struct nft_ctx *ctx, int msg_type, struct nft_set *set, const struct nft_set_desc *desc) { + struct nft_trans_set *trans_set; struct nft_trans *trans; trans = nft_trans_alloc(ctx, msg_type, sizeof(struct nft_trans_set)); if (trans == NULL) return -ENOMEM; + trans_set = nft_trans_container_set(trans); + INIT_LIST_HEAD(&trans_set->nft_trans_binding.binding_list); + if (msg_type == NFT_MSG_NEWSET && ctx->nla[NFTA_SET_ID] && !desc) { nft_trans_set_id(trans) = ntohl(nla_get_be32(ctx->nla[NFTA_SET_ID])); @@ -1217,11 +1263,11 @@ static bool nft_table_pending_update(const struct nft_ctx *ctx) return true; list_for_each_entry(trans, &nft_net->commit_list, list) { - if (trans->ctx.table == ctx->table && + if (trans->table == ctx->table && ((trans->msg_type == NFT_MSG_NEWCHAIN && nft_trans_chain_update(trans)) || (trans->msg_type == NFT_MSG_DELCHAIN && - nft_is_base_chain(trans->ctx.chain)))) + nft_is_base_chain(nft_trans_chain(trans))))) return true; } @@ -1615,15 +1661,15 @@ static int nf_tables_deltable(struct sk_buff *skb, const struct nfnl_info *info, return nft_flush_table(&ctx); } -static void nf_tables_table_destroy(struct nft_ctx *ctx) +static void nf_tables_table_destroy(struct nft_table *table) { - if (WARN_ON(ctx->table->use > 0)) + if (WARN_ON(table->use > 0)) return; - rhltable_destroy(&ctx->table->chains_ht); - kfree(ctx->table->name); - kfree(ctx->table->udata); - kfree(ctx->table); + rhltable_destroy(&table->chains_ht); + kfree(table->name); + kfree(table->udata); + kfree(table); } void nft_register_chain_type(const struct nft_chain_type *ctype) @@ -2049,18 +2095,19 @@ static struct nft_stats __percpu *nft_stats_alloc(const struct nlattr *attr) return newstats; } -static void nft_chain_stats_replace(struct nft_trans *trans) +static void nft_chain_stats_replace(struct nft_trans_chain *trans) { - struct nft_base_chain *chain = nft_base_chain(trans->ctx.chain); + const struct nft_trans *t = &trans->nft_trans_binding.nft_trans; + struct nft_base_chain *chain = nft_base_chain(trans->chain); - if (!nft_trans_chain_stats(trans)) + if (!trans->stats) return; - nft_trans_chain_stats(trans) = - rcu_replace_pointer(chain->stats, nft_trans_chain_stats(trans), - lockdep_commit_lock_is_held(trans->ctx.net)); + trans->stats = + rcu_replace_pointer(chain->stats, trans->stats, + lockdep_commit_lock_is_held(t->net)); - if (!nft_trans_chain_stats(trans)) + if (!trans->stats) static_branch_inc(&nft_counters_enabled); } @@ -2078,9 +2125,9 @@ static void nf_tables_chain_free_chain_rules(struct nft_chain *chain) kvfree(chain->blob_next); } -void nf_tables_chain_destroy(struct nft_ctx *ctx) +void nf_tables_chain_destroy(struct nft_chain *chain) { - struct nft_chain *chain = ctx->chain; + const struct nft_table *table = chain->table; struct nft_hook *hook, *next; if (WARN_ON(chain->use > 0)) @@ -2092,7 +2139,7 @@ void nf_tables_chain_destroy(struct nft_ctx *ctx) if (nft_is_base_chain(chain)) { struct nft_base_chain *basechain = nft_base_chain(chain); - if (nft_base_chain_netdev(ctx->family, basechain->ops.hooknum)) { + if (nft_base_chain_netdev(table->family, basechain->ops.hooknum)) { list_for_each_entry_safe(hook, next, &basechain->hook_list, list) { list_del_rcu(&hook->list); @@ -2581,7 +2628,7 @@ err_chain_add: err_trans: nft_use_dec_restore(&table->use); err_destroy_chain: - nf_tables_chain_destroy(ctx); + nf_tables_chain_destroy(chain); return err; } @@ -2698,8 +2745,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, } err = -ENOMEM; - trans = nft_trans_alloc(ctx, NFT_MSG_NEWCHAIN, - sizeof(struct nft_trans_chain)); + trans = nft_trans_alloc_chain(ctx, NFT_MSG_NEWCHAIN); if (trans == NULL) goto err_trans; @@ -2725,7 +2771,7 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy, err = -EEXIST; list_for_each_entry(tmp, &nft_net->commit_list, list) { if (tmp->msg_type == NFT_MSG_NEWCHAIN && - tmp->ctx.table == table && + tmp->table == table && nft_trans_chain_update(tmp) && nft_trans_chain_name(tmp) && strcmp(name, nft_trans_chain_name(tmp)) == 0) { @@ -2774,13 +2820,11 @@ static struct nft_chain *nft_chain_lookup_byid(const struct net *net, struct nft_trans *trans; list_for_each_entry(trans, &nft_net->commit_list, list) { - struct nft_chain *chain = trans->ctx.chain; - if (trans->msg_type == NFT_MSG_NEWCHAIN && - chain->table == table && + nft_trans_chain(trans)->table == table && id == nft_trans_chain_id(trans) && - nft_active_genmask(chain, genmask)) - return chain; + nft_active_genmask(nft_trans_chain(trans), genmask)) + return nft_trans_chain(trans); } return ERR_PTR(-ENOENT); } @@ -2915,8 +2959,7 @@ static int nft_delchain_hook(struct nft_ctx *ctx, list_move(&hook->list, &chain_del_list); } - trans = nft_trans_alloc(ctx, NFT_MSG_DELCHAIN, - sizeof(struct nft_trans_chain)); + trans = nft_trans_alloc_chain(ctx, NFT_MSG_DELCHAIN); if (!trans) { err = -ENOMEM; goto err_chain_del_hook; @@ -4188,7 +4231,7 @@ static struct nft_rule *nft_rule_lookup_byid(const struct net *net, list_for_each_entry(trans, &nft_net->commit_list, list) { if (trans->msg_type == NFT_MSG_NEWRULE && - trans->ctx.chain == chain && + nft_trans_rule_chain(trans) == chain && id == nft_trans_rule_id(trans)) return nft_trans_rule(trans); } @@ -9417,51 +9460,53 @@ static int nf_tables_validate(struct net *net) * * We defer the drop policy until the transaction has been finalized. */ -static void nft_chain_commit_drop_policy(struct nft_trans *trans) +static void nft_chain_commit_drop_policy(struct nft_trans_chain *trans) { struct nft_base_chain *basechain; - if (nft_trans_chain_policy(trans) != NF_DROP) + if (trans->policy != NF_DROP) return; - if (!nft_is_base_chain(trans->ctx.chain)) + if (!nft_is_base_chain(trans->chain)) return; - basechain = nft_base_chain(trans->ctx.chain); + basechain = nft_base_chain(trans->chain); basechain->policy = NF_DROP; } -static void nft_chain_commit_update(struct nft_trans *trans) +static void nft_chain_commit_update(struct nft_trans_chain *trans) { + struct nft_table *table = trans->nft_trans_binding.nft_trans.table; struct nft_base_chain *basechain; - if (nft_trans_chain_name(trans)) { - rhltable_remove(&trans->ctx.table->chains_ht, - &trans->ctx.chain->rhlhead, + if (trans->name) { + rhltable_remove(&table->chains_ht, + &trans->chain->rhlhead, nft_chain_ht_params); - swap(trans->ctx.chain->name, nft_trans_chain_name(trans)); - rhltable_insert_key(&trans->ctx.table->chains_ht, - trans->ctx.chain->name, - &trans->ctx.chain->rhlhead, + swap(trans->chain->name, trans->name); + rhltable_insert_key(&table->chains_ht, + trans->chain->name, + &trans->chain->rhlhead, nft_chain_ht_params); } - if (!nft_is_base_chain(trans->ctx.chain)) + if (!nft_is_base_chain(trans->chain)) return; nft_chain_stats_replace(trans); - basechain = nft_base_chain(trans->ctx.chain); + basechain = nft_base_chain(trans->chain); - switch (nft_trans_chain_policy(trans)) { + switch (trans->policy) { case NF_DROP: case NF_ACCEPT: - basechain->policy = nft_trans_chain_policy(trans); + basechain->policy = trans->policy; break; } } -static void nft_obj_commit_update(struct nft_trans *trans) +static void nft_obj_commit_update(const struct nft_ctx *ctx, + struct nft_trans *trans) { struct nft_object *newobj; struct nft_object *obj; @@ -9473,15 +9518,21 @@ static void nft_obj_commit_update(struct nft_trans *trans) return; obj->ops->update(obj, newobj); - nft_obj_destroy(&trans->ctx, newobj); + nft_obj_destroy(ctx, newobj); } static void nft_commit_release(struct nft_trans *trans) { + struct nft_ctx ctx = { + .net = trans->net, + }; + + nft_ctx_update(&ctx, trans); + switch (trans->msg_type) { case NFT_MSG_DELTABLE: case NFT_MSG_DESTROYTABLE: - nf_tables_table_destroy(&trans->ctx); + nf_tables_table_destroy(trans->table); break; case NFT_MSG_NEWCHAIN: free_percpu(nft_trans_chain_stats(trans)); @@ -9492,25 +9543,25 @@ static void nft_commit_release(struct nft_trans *trans) if (nft_trans_chain_update(trans)) nft_hooks_destroy(&nft_trans_chain_hooks(trans)); else - nf_tables_chain_destroy(&trans->ctx); + nf_tables_chain_destroy(nft_trans_chain(trans)); break; case NFT_MSG_DELRULE: case NFT_MSG_DESTROYRULE: - nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); + nf_tables_rule_destroy(&ctx, nft_trans_rule(trans)); break; case NFT_MSG_DELSET: case NFT_MSG_DESTROYSET: - nft_set_destroy(&trans->ctx, nft_trans_set(trans)); + nft_set_destroy(&ctx, nft_trans_set(trans)); break; case NFT_MSG_DELSETELEM: case NFT_MSG_DESTROYSETELEM: - nf_tables_set_elem_destroy(&trans->ctx, + nf_tables_set_elem_destroy(&ctx, nft_trans_elem_set(trans), nft_trans_elem_priv(trans)); break; case NFT_MSG_DELOBJ: case NFT_MSG_DESTROYOBJ: - nft_obj_destroy(&trans->ctx, nft_trans_obj(trans)); + nft_obj_destroy(&ctx, nft_trans_obj(trans)); break; case NFT_MSG_DELFLOWTABLE: case NFT_MSG_DESTROYFLOWTABLE: @@ -9522,7 +9573,7 @@ static void nft_commit_release(struct nft_trans *trans) } if (trans->put_net) - put_net(trans->ctx.net); + put_net(trans->net); kfree(trans); } @@ -9641,10 +9692,10 @@ static void nf_tables_commit_chain_prepare_cancel(struct net *net) struct nft_trans *trans, *next; list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) { - struct nft_chain *chain = trans->ctx.chain; - if (trans->msg_type == NFT_MSG_NEWRULE || trans->msg_type == NFT_MSG_DELRULE) { + struct nft_chain *chain = nft_trans_rule_chain(trans); + kvfree(chain->blob_next); chain->blob_next = NULL; } @@ -10002,7 +10053,7 @@ static void nf_tables_commit_release(struct net *net) trans = list_last_entry(&nft_net->commit_list, struct nft_trans, list); - get_net(trans->ctx.net); + get_net(trans->net); WARN_ON_ONCE(trans->put_net); trans->put_net = true; @@ -10146,12 +10197,15 @@ static void nft_gc_seq_end(struct nftables_pernet *nft_net, unsigned int gc_seq) static int nf_tables_commit(struct net *net, struct sk_buff *skb) { struct nftables_pernet *nft_net = nft_pernet(net); + const struct nlmsghdr *nlh = nlmsg_hdr(skb); + struct nft_trans_binding *trans_binding; struct nft_trans *trans, *next; unsigned int base_seq, gc_seq; LIST_HEAD(set_update_list); struct nft_trans_elem *te; struct nft_chain *chain; struct nft_table *table; + struct nft_ctx ctx; LIST_HEAD(adl); int err; @@ -10160,7 +10214,10 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) return 0; } - list_for_each_entry(trans, &nft_net->binding_list, binding_list) { + nft_ctx_init(&ctx, net, skb, nlh, NFPROTO_UNSPEC, NULL, NULL, NULL); + + list_for_each_entry(trans_binding, &nft_net->binding_list, binding_list) { + trans = &trans_binding->nft_trans; switch (trans->msg_type) { case NFT_MSG_NEWSET: if (!nft_trans_set_update(trans) && @@ -10178,6 +10235,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) return -EINVAL; } break; + default: + WARN_ONCE(1, "Unhandled bind type %d", trans->msg_type); + break; } } @@ -10193,9 +10253,10 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) /* 1. Allocate space for next generation rules_gen_X[] */ list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) { + struct nft_table *table = trans->table; int ret; - ret = nf_tables_commit_audit_alloc(&adl, trans->ctx.table); + ret = nf_tables_commit_audit_alloc(&adl, table); if (ret) { nf_tables_commit_chain_prepare_cancel(net); nf_tables_commit_audit_free(&adl); @@ -10203,7 +10264,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) } if (trans->msg_type == NFT_MSG_NEWRULE || trans->msg_type == NFT_MSG_DELRULE) { - chain = trans->ctx.chain; + chain = nft_trans_rule_chain(trans); ret = nf_tables_commit_chain_prepare(net, chain); if (ret < 0) { @@ -10236,70 +10297,71 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) net->nft.gencursor = nft_gencursor_next(net); list_for_each_entry_safe(trans, next, &nft_net->commit_list, list) { - nf_tables_commit_audit_collect(&adl, trans->ctx.table, - trans->msg_type); + struct nft_table *table = trans->table; + + nft_ctx_update(&ctx, trans); + + nf_tables_commit_audit_collect(&adl, table, trans->msg_type); switch (trans->msg_type) { case NFT_MSG_NEWTABLE: if (nft_trans_table_update(trans)) { - if (!(trans->ctx.table->flags & __NFT_TABLE_F_UPDATE)) { + if (!(table->flags & __NFT_TABLE_F_UPDATE)) { nft_trans_destroy(trans); break; } - if (trans->ctx.table->flags & NFT_TABLE_F_DORMANT) - nf_tables_table_disable(net, trans->ctx.table); + if (table->flags & NFT_TABLE_F_DORMANT) + nf_tables_table_disable(net, table); - trans->ctx.table->flags &= ~__NFT_TABLE_F_UPDATE; + table->flags &= ~__NFT_TABLE_F_UPDATE; } else { - nft_clear(net, trans->ctx.table); + nft_clear(net, table); } - nf_tables_table_notify(&trans->ctx, NFT_MSG_NEWTABLE); + nf_tables_table_notify(&ctx, NFT_MSG_NEWTABLE); nft_trans_destroy(trans); break; case NFT_MSG_DELTABLE: case NFT_MSG_DESTROYTABLE: - list_del_rcu(&trans->ctx.table->list); - nf_tables_table_notify(&trans->ctx, trans->msg_type); + list_del_rcu(&table->list); + nf_tables_table_notify(&ctx, trans->msg_type); break; case NFT_MSG_NEWCHAIN: if (nft_trans_chain_update(trans)) { - nft_chain_commit_update(trans); - nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN, + nft_chain_commit_update(nft_trans_container_chain(trans)); + nf_tables_chain_notify(&ctx, NFT_MSG_NEWCHAIN, &nft_trans_chain_hooks(trans)); list_splice(&nft_trans_chain_hooks(trans), &nft_trans_basechain(trans)->hook_list); /* trans destroyed after rcu grace period */ } else { - nft_chain_commit_drop_policy(trans); - nft_clear(net, trans->ctx.chain); - nf_tables_chain_notify(&trans->ctx, NFT_MSG_NEWCHAIN, NULL); + nft_chain_commit_drop_policy(nft_trans_container_chain(trans)); + nft_clear(net, nft_trans_chain(trans)); + nf_tables_chain_notify(&ctx, NFT_MSG_NEWCHAIN, NULL); nft_trans_destroy(trans); } break; case NFT_MSG_DELCHAIN: case NFT_MSG_DESTROYCHAIN: if (nft_trans_chain_update(trans)) { - nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN, + nf_tables_chain_notify(&ctx, NFT_MSG_DELCHAIN, &nft_trans_chain_hooks(trans)); - if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT)) { + if (!(table->flags & NFT_TABLE_F_DORMANT)) { nft_netdev_unregister_hooks(net, &nft_trans_chain_hooks(trans), true); } } else { - nft_chain_del(trans->ctx.chain); - nf_tables_chain_notify(&trans->ctx, NFT_MSG_DELCHAIN, + nft_chain_del(nft_trans_chain(trans)); + nf_tables_chain_notify(&ctx, NFT_MSG_DELCHAIN, NULL); - nf_tables_unregister_hook(trans->ctx.net, - trans->ctx.table, - trans->ctx.chain); + nf_tables_unregister_hook(ctx.net, ctx.table, + nft_trans_chain(trans)); } break; case NFT_MSG_NEWRULE: - nft_clear(trans->ctx.net, nft_trans_rule(trans)); - nf_tables_rule_notify(&trans->ctx, - nft_trans_rule(trans), + nft_clear(net, nft_trans_rule(trans)); + nf_tables_rule_notify(&ctx, nft_trans_rule(trans), NFT_MSG_NEWRULE); - if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) + if (nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) nft_flow_rule_destroy(nft_trans_flow_rule(trans)); nft_trans_destroy(trans); @@ -10307,14 +10369,12 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) case NFT_MSG_DELRULE: case NFT_MSG_DESTROYRULE: list_del_rcu(&nft_trans_rule(trans)->list); - nf_tables_rule_notify(&trans->ctx, - nft_trans_rule(trans), + nf_tables_rule_notify(&ctx, nft_trans_rule(trans), trans->msg_type); - nft_rule_expr_deactivate(&trans->ctx, - nft_trans_rule(trans), + nft_rule_expr_deactivate(&ctx, nft_trans_rule(trans), NFT_TRANS_COMMIT); - if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) + if (nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) nft_flow_rule_destroy(nft_trans_flow_rule(trans)); break; case NFT_MSG_NEWSET: @@ -10333,9 +10393,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) */ if (nft_set_is_anonymous(nft_trans_set(trans)) && !list_empty(&nft_trans_set(trans)->bindings)) - nft_use_dec(&trans->ctx.table->use); + nft_use_dec(&table->use); } - nf_tables_set_notify(&trans->ctx, nft_trans_set(trans), + nf_tables_set_notify(&ctx, nft_trans_set(trans), NFT_MSG_NEWSET, GFP_KERNEL); nft_trans_destroy(trans); break; @@ -10343,14 +10403,14 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) case NFT_MSG_DESTROYSET: nft_trans_set(trans)->dead = 1; list_del_rcu(&nft_trans_set(trans)->list); - nf_tables_set_notify(&trans->ctx, nft_trans_set(trans), + nf_tables_set_notify(&ctx, nft_trans_set(trans), trans->msg_type, GFP_KERNEL); break; case NFT_MSG_NEWSETELEM: - te = (struct nft_trans_elem *)trans->data; + te = nft_trans_container_elem(trans); nft_setelem_activate(net, te->set, te->elem_priv); - nf_tables_setelem_notify(&trans->ctx, te->set, + nf_tables_setelem_notify(&ctx, te->set, te->elem_priv, NFT_MSG_NEWSETELEM); if (te->set->ops->commit && @@ -10362,9 +10422,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) break; case NFT_MSG_DELSETELEM: case NFT_MSG_DESTROYSETELEM: - te = (struct nft_trans_elem *)trans->data; + te = nft_trans_container_elem(trans); - nf_tables_setelem_notify(&trans->ctx, te->set, + nf_tables_setelem_notify(&ctx, te->set, te->elem_priv, trans->msg_type); nft_setelem_remove(net, te->set, te->elem_priv); @@ -10380,13 +10440,13 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) break; case NFT_MSG_NEWOBJ: if (nft_trans_obj_update(trans)) { - nft_obj_commit_update(trans); - nf_tables_obj_notify(&trans->ctx, + nft_obj_commit_update(&ctx, trans); + nf_tables_obj_notify(&ctx, nft_trans_obj(trans), NFT_MSG_NEWOBJ); } else { nft_clear(net, nft_trans_obj(trans)); - nf_tables_obj_notify(&trans->ctx, + nf_tables_obj_notify(&ctx, nft_trans_obj(trans), NFT_MSG_NEWOBJ); nft_trans_destroy(trans); @@ -10395,14 +10455,14 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) case NFT_MSG_DELOBJ: case NFT_MSG_DESTROYOBJ: nft_obj_del(nft_trans_obj(trans)); - nf_tables_obj_notify(&trans->ctx, nft_trans_obj(trans), + nf_tables_obj_notify(&ctx, nft_trans_obj(trans), trans->msg_type); break; case NFT_MSG_NEWFLOWTABLE: if (nft_trans_flowtable_update(trans)) { nft_trans_flowtable(trans)->data.flags = nft_trans_flowtable_flags(trans); - nf_tables_flowtable_notify(&trans->ctx, + nf_tables_flowtable_notify(&ctx, nft_trans_flowtable(trans), &nft_trans_flowtable_hooks(trans), NFT_MSG_NEWFLOWTABLE); @@ -10410,7 +10470,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) &nft_trans_flowtable(trans)->hook_list); } else { nft_clear(net, nft_trans_flowtable(trans)); - nf_tables_flowtable_notify(&trans->ctx, + nf_tables_flowtable_notify(&ctx, nft_trans_flowtable(trans), NULL, NFT_MSG_NEWFLOWTABLE); @@ -10420,7 +10480,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) case NFT_MSG_DELFLOWTABLE: case NFT_MSG_DESTROYFLOWTABLE: if (nft_trans_flowtable_update(trans)) { - nf_tables_flowtable_notify(&trans->ctx, + nf_tables_flowtable_notify(&ctx, nft_trans_flowtable(trans), &nft_trans_flowtable_hooks(trans), trans->msg_type); @@ -10428,7 +10488,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) &nft_trans_flowtable_hooks(trans)); } else { list_del_rcu(&nft_trans_flowtable(trans)->list); - nf_tables_flowtable_notify(&trans->ctx, + nf_tables_flowtable_notify(&ctx, nft_trans_flowtable(trans), NULL, trans->msg_type); @@ -10470,28 +10530,32 @@ static void nf_tables_module_autoload(struct net *net) static void nf_tables_abort_release(struct nft_trans *trans) { + struct nft_ctx ctx = { }; + + nft_ctx_update(&ctx, trans); + switch (trans->msg_type) { case NFT_MSG_NEWTABLE: - nf_tables_table_destroy(&trans->ctx); + nf_tables_table_destroy(trans->table); break; case NFT_MSG_NEWCHAIN: if (nft_trans_chain_update(trans)) nft_hooks_destroy(&nft_trans_chain_hooks(trans)); else - nf_tables_chain_destroy(&trans->ctx); + nf_tables_chain_destroy(nft_trans_chain(trans)); break; case NFT_MSG_NEWRULE: - nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); + nf_tables_rule_destroy(&ctx, nft_trans_rule(trans)); break; case NFT_MSG_NEWSET: - nft_set_destroy(&trans->ctx, nft_trans_set(trans)); + nft_set_destroy(&ctx, nft_trans_set(trans)); break; case NFT_MSG_NEWSETELEM: nft_set_elem_destroy(nft_trans_elem_set(trans), nft_trans_elem_priv(trans), true); break; case NFT_MSG_NEWOBJ: - nft_obj_destroy(&trans->ctx, nft_trans_obj(trans)); + nft_obj_destroy(&ctx, nft_trans_obj(trans)); break; case NFT_MSG_NEWFLOWTABLE: if (nft_trans_flowtable_update(trans)) @@ -10523,6 +10587,9 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) struct nft_trans *trans, *next; LIST_HEAD(set_update_list); struct nft_trans_elem *te; + struct nft_ctx ctx = { + .net = net, + }; int err = 0; if (action == NFNL_ABORT_VALIDATE && @@ -10531,37 +10598,41 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) list_for_each_entry_safe_reverse(trans, next, &nft_net->commit_list, list) { + struct nft_table *table = trans->table; + + nft_ctx_update(&ctx, trans); + switch (trans->msg_type) { case NFT_MSG_NEWTABLE: if (nft_trans_table_update(trans)) { - if (!(trans->ctx.table->flags & __NFT_TABLE_F_UPDATE)) { + if (!(table->flags & __NFT_TABLE_F_UPDATE)) { nft_trans_destroy(trans); break; } - if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_DORMANT) { - nf_tables_table_disable(net, trans->ctx.table); - trans->ctx.table->flags |= NFT_TABLE_F_DORMANT; - } else if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_AWAKEN) { - trans->ctx.table->flags &= ~NFT_TABLE_F_DORMANT; + if (table->flags & __NFT_TABLE_F_WAS_DORMANT) { + nf_tables_table_disable(net, table); + table->flags |= NFT_TABLE_F_DORMANT; + } else if (table->flags & __NFT_TABLE_F_WAS_AWAKEN) { + table->flags &= ~NFT_TABLE_F_DORMANT; } - if (trans->ctx.table->flags & __NFT_TABLE_F_WAS_ORPHAN) { - trans->ctx.table->flags &= ~NFT_TABLE_F_OWNER; - trans->ctx.table->nlpid = 0; + if (table->flags & __NFT_TABLE_F_WAS_ORPHAN) { + table->flags &= ~NFT_TABLE_F_OWNER; + table->nlpid = 0; } - trans->ctx.table->flags &= ~__NFT_TABLE_F_UPDATE; + table->flags &= ~__NFT_TABLE_F_UPDATE; nft_trans_destroy(trans); } else { - list_del_rcu(&trans->ctx.table->list); + list_del_rcu(&table->list); } break; case NFT_MSG_DELTABLE: case NFT_MSG_DESTROYTABLE: - nft_clear(trans->ctx.net, trans->ctx.table); + nft_clear(trans->net, table); nft_trans_destroy(trans); break; case NFT_MSG_NEWCHAIN: if (nft_trans_chain_update(trans)) { - if (!(trans->ctx.table->flags & NFT_TABLE_F_DORMANT)) { + if (!(table->flags & NFT_TABLE_F_DORMANT)) { nft_netdev_unregister_hooks(net, &nft_trans_chain_hooks(trans), true); @@ -10574,11 +10645,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_trans_destroy(trans); break; } - nft_use_dec_restore(&trans->ctx.table->use); - nft_chain_del(trans->ctx.chain); - nf_tables_unregister_hook(trans->ctx.net, - trans->ctx.table, - trans->ctx.chain); + nft_use_dec_restore(&table->use); + nft_chain_del(nft_trans_chain(trans)); + nf_tables_unregister_hook(trans->net, table, + nft_trans_chain(trans)); } break; case NFT_MSG_DELCHAIN: @@ -10587,8 +10657,8 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) list_splice(&nft_trans_chain_hooks(trans), &nft_trans_basechain(trans)->hook_list); } else { - nft_use_inc_restore(&trans->ctx.table->use); - nft_clear(trans->ctx.net, trans->ctx.chain); + nft_use_inc_restore(&table->use); + nft_clear(trans->net, nft_trans_chain(trans)); } nft_trans_destroy(trans); break; @@ -10597,20 +10667,20 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_trans_destroy(trans); break; } - nft_use_dec_restore(&trans->ctx.chain->use); + nft_use_dec_restore(&nft_trans_rule_chain(trans)->use); list_del_rcu(&nft_trans_rule(trans)->list); - nft_rule_expr_deactivate(&trans->ctx, + nft_rule_expr_deactivate(&ctx, nft_trans_rule(trans), NFT_TRANS_ABORT); - if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) + if (nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) nft_flow_rule_destroy(nft_trans_flow_rule(trans)); break; case NFT_MSG_DELRULE: case NFT_MSG_DESTROYRULE: - nft_use_inc_restore(&trans->ctx.chain->use); - nft_clear(trans->ctx.net, nft_trans_rule(trans)); - nft_rule_expr_activate(&trans->ctx, nft_trans_rule(trans)); - if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) + nft_use_inc_restore(&nft_trans_rule_chain(trans)->use); + nft_clear(trans->net, nft_trans_rule(trans)); + nft_rule_expr_activate(&ctx, nft_trans_rule(trans)); + if (nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) nft_flow_rule_destroy(nft_trans_flow_rule(trans)); nft_trans_destroy(trans); @@ -10620,7 +10690,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_trans_destroy(trans); break; } - nft_use_dec_restore(&trans->ctx.table->use); + nft_use_dec_restore(&table->use); if (nft_trans_set_bound(trans)) { nft_trans_destroy(trans); break; @@ -10630,10 +10700,10 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) break; case NFT_MSG_DELSET: case NFT_MSG_DESTROYSET: - nft_use_inc_restore(&trans->ctx.table->use); - nft_clear(trans->ctx.net, nft_trans_set(trans)); + nft_use_inc_restore(&table->use); + nft_clear(trans->net, nft_trans_set(trans)); if (nft_trans_set(trans)->flags & (NFT_SET_MAP | NFT_SET_OBJECT)) - nft_map_activate(&trans->ctx, nft_trans_set(trans)); + nft_map_activate(&ctx, nft_trans_set(trans)); nft_trans_destroy(trans); break; @@ -10642,7 +10712,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_trans_destroy(trans); break; } - te = (struct nft_trans_elem *)trans->data; + te = nft_trans_container_elem(trans); nft_setelem_remove(net, te->set, te->elem_priv); if (!nft_setelem_is_catchall(te->set, te->elem_priv)) atomic_dec(&te->set->nelems); @@ -10655,7 +10725,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) break; case NFT_MSG_DELSETELEM: case NFT_MSG_DESTROYSETELEM: - te = (struct nft_trans_elem *)trans->data; + te = nft_trans_container_elem(trans); if (!nft_setelem_active_next(net, te->set, te->elem_priv)) { nft_setelem_data_activate(net, te->set, te->elem_priv); @@ -10673,17 +10743,17 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) break; case NFT_MSG_NEWOBJ: if (nft_trans_obj_update(trans)) { - nft_obj_destroy(&trans->ctx, nft_trans_obj_newobj(trans)); + nft_obj_destroy(&ctx, nft_trans_obj_newobj(trans)); nft_trans_destroy(trans); } else { - nft_use_dec_restore(&trans->ctx.table->use); + nft_use_dec_restore(&table->use); nft_obj_del(nft_trans_obj(trans)); } break; case NFT_MSG_DELOBJ: case NFT_MSG_DESTROYOBJ: - nft_use_inc_restore(&trans->ctx.table->use); - nft_clear(trans->ctx.net, nft_trans_obj(trans)); + nft_use_inc_restore(&table->use); + nft_clear(trans->net, nft_trans_obj(trans)); nft_trans_destroy(trans); break; case NFT_MSG_NEWFLOWTABLE: @@ -10691,7 +10761,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) nft_unregister_flowtable_net_hooks(net, &nft_trans_flowtable_hooks(trans)); } else { - nft_use_dec_restore(&trans->ctx.table->use); + nft_use_dec_restore(&table->use); list_del_rcu(&nft_trans_flowtable(trans)->list); nft_unregister_flowtable_net_hooks(net, &nft_trans_flowtable(trans)->hook_list); @@ -10703,8 +10773,8 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action) list_splice(&nft_trans_flowtable_hooks(trans), &nft_trans_flowtable(trans)->hook_list); } else { - nft_use_inc_restore(&trans->ctx.table->use); - nft_clear(trans->ctx.net, nft_trans_flowtable(trans)); + nft_use_inc_restore(&table->use); + nft_clear(trans->net, nft_trans_flowtable(trans)); } nft_trans_destroy(trans); break; @@ -11365,7 +11435,7 @@ int __nft_release_basechain(struct nft_ctx *ctx) } nft_chain_del(ctx->chain); nft_use_dec(&ctx->table->use); - nf_tables_chain_destroy(ctx); + nf_tables_chain_destroy(ctx->chain); return 0; } @@ -11440,12 +11510,11 @@ static void __nft_release_table(struct net *net, struct nft_table *table) nft_obj_destroy(&ctx, obj); } list_for_each_entry_safe(chain, nc, &table->chains, list) { - ctx.chain = chain; nft_chain_del(chain); nft_use_dec(&table->use); - nf_tables_chain_destroy(&ctx); + nf_tables_chain_destroy(chain); } - nf_tables_table_destroy(&ctx); + nf_tables_table_destroy(table); } static void __nft_release_tables(struct net *net) @@ -11588,6 +11657,14 @@ static int __init nf_tables_module_init(void) { int err; + BUILD_BUG_ON(offsetof(struct nft_trans_table, nft_trans) != 0); + BUILD_BUG_ON(offsetof(struct nft_trans_chain, nft_trans_binding.nft_trans) != 0); + BUILD_BUG_ON(offsetof(struct nft_trans_rule, nft_trans) != 0); + BUILD_BUG_ON(offsetof(struct nft_trans_set, nft_trans_binding.nft_trans) != 0); + BUILD_BUG_ON(offsetof(struct nft_trans_elem, nft_trans) != 0); + BUILD_BUG_ON(offsetof(struct nft_trans_obj, nft_trans) != 0); + BUILD_BUG_ON(offsetof(struct nft_trans_flowtable, nft_trans) != 0); + err = register_pernet_subsys(&nf_tables_net_ops); if (err < 0) return err; diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c index 12ab78fa5d84..64675f1c7f29 100644 --- a/net/netfilter/nf_tables_offload.c +++ b/net/netfilter/nf_tables_offload.c @@ -513,38 +513,38 @@ static void nft_flow_rule_offload_abort(struct net *net, int err = 0; list_for_each_entry_continue_reverse(trans, &nft_net->commit_list, list) { - if (trans->ctx.family != NFPROTO_NETDEV) + if (trans->table->family != NFPROTO_NETDEV) continue; switch (trans->msg_type) { case NFT_MSG_NEWCHAIN: - if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) || + if (!(nft_trans_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) || nft_trans_chain_update(trans)) continue; - err = nft_flow_offload_chain(trans->ctx.chain, NULL, + err = nft_flow_offload_chain(nft_trans_chain(trans), NULL, FLOW_BLOCK_UNBIND); break; case NFT_MSG_DELCHAIN: - if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) + if (!(nft_trans_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)) continue; - err = nft_flow_offload_chain(trans->ctx.chain, NULL, + err = nft_flow_offload_chain(nft_trans_chain(trans), NULL, FLOW_BLOCK_BIND); break; case NFT_MSG_NEWRULE: - if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) + if (!(nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)) continue; - err = nft_flow_offload_rule(trans->ctx.chain, + err = nft_flow_offload_rule(nft_trans_rule_chain(trans), nft_trans_rule(trans), NULL, FLOW_CLS_DESTROY); break; case NFT_MSG_DELRULE: - if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) + if (!(nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)) continue; - err = nft_flow_offload_rule(trans->ctx.chain, + err = nft_flow_offload_rule(nft_trans_rule_chain(trans), nft_trans_rule(trans), nft_trans_flow_rule(trans), FLOW_CLS_REPLACE); @@ -564,46 +564,46 @@ int nft_flow_rule_offload_commit(struct net *net) u8 policy; list_for_each_entry(trans, &nft_net->commit_list, list) { - if (trans->ctx.family != NFPROTO_NETDEV) + if (trans->table->family != NFPROTO_NETDEV) continue; switch (trans->msg_type) { case NFT_MSG_NEWCHAIN: - if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD) || + if (!(nft_trans_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD) || nft_trans_chain_update(trans)) continue; policy = nft_trans_chain_policy(trans); - err = nft_flow_offload_chain(trans->ctx.chain, &policy, + err = nft_flow_offload_chain(nft_trans_chain(trans), &policy, FLOW_BLOCK_BIND); break; case NFT_MSG_DELCHAIN: - if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) + if (!(nft_trans_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)) continue; policy = nft_trans_chain_policy(trans); - err = nft_flow_offload_chain(trans->ctx.chain, &policy, + err = nft_flow_offload_chain(nft_trans_chain(trans), &policy, FLOW_BLOCK_UNBIND); break; case NFT_MSG_NEWRULE: - if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) + if (!(nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)) continue; - if (trans->ctx.flags & NLM_F_REPLACE || - !(trans->ctx.flags & NLM_F_APPEND)) { + if (trans->flags & NLM_F_REPLACE || + !(trans->flags & NLM_F_APPEND)) { err = -EOPNOTSUPP; break; } - err = nft_flow_offload_rule(trans->ctx.chain, + err = nft_flow_offload_rule(nft_trans_rule_chain(trans), nft_trans_rule(trans), nft_trans_flow_rule(trans), FLOW_CLS_REPLACE); break; case NFT_MSG_DELRULE: - if (!(trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)) + if (!(nft_trans_rule_chain(trans)->flags & NFT_CHAIN_HW_OFFLOAD)) continue; - err = nft_flow_offload_rule(trans->ctx.chain, + err = nft_flow_offload_rule(nft_trans_rule_chain(trans), nft_trans_rule(trans), NULL, FLOW_CLS_DESTROY); break; diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index f466af4f8531..eab4f476b47f 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -366,8 +366,7 @@ static int cttimeout_default_set(struct sk_buff *skb, __u8 l4num; int ret; - if (!cda[CTA_TIMEOUT_L3PROTO] || - !cda[CTA_TIMEOUT_L4PROTO] || + if (!cda[CTA_TIMEOUT_L4PROTO] || !cda[CTA_TIMEOUT_DATA]) return -EINVAL; diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c index 6475c7abc1fe..ac2422c215e5 100644 --- a/net/netfilter/nft_immediate.c +++ b/net/netfilter/nft_immediate.c @@ -221,7 +221,7 @@ static void nft_immediate_destroy(const struct nft_ctx *ctx, list_del(&rule->list); nf_tables_rule_destroy(&chain_ctx, rule); } - nf_tables_chain_destroy(&chain_ctx); + nf_tables_chain_destroy(chain); break; default: break; diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index ef93e0d3bee0..588a5e6ad899 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -59,9 +59,9 @@ MODULE_PARM_DESC(ip_list_gid, "default owning group of /proc/net/xt_recent/* fil /* retained for backwards compatibility */ static unsigned int ip_pkt_list_tot __read_mostly; module_param(ip_pkt_list_tot, uint, 0400); -MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP address to remember (max. 255)"); +MODULE_PARM_DESC(ip_pkt_list_tot, "number of packets per IP address to remember (max. 65535)"); -#define XT_RECENT_MAX_NSTAMPS 256 +#define XT_RECENT_MAX_NSTAMPS 65536 struct recent_entry { struct list_head list; @@ -69,7 +69,7 @@ struct recent_entry { union nf_inet_addr addr; u_int16_t family; u_int8_t ttl; - u_int8_t index; + u_int16_t index; u_int16_t nstamps; unsigned long stamps[]; }; @@ -80,7 +80,7 @@ struct recent_table { union nf_inet_addr mask; unsigned int refcnt; unsigned int entries; - u8 nstamps_max_mask; + u_int16_t nstamps_max_mask; struct list_head lru_list; struct list_head iphash[]; }; diff --git a/tools/net/ynl/Makefile.deps b/tools/net/ynl/Makefile.deps index f4e8eb79c1b8..0712b5e82eb7 100644 --- a/tools/net/ynl/Makefile.deps +++ b/tools/net/ynl/Makefile.deps @@ -16,7 +16,8 @@ get_hdr_inc=-D$(1) -include $(UAPI_PATH)/linux/$(2) CFLAGS_devlink:=$(call get_hdr_inc,_LINUX_DEVLINK_H_,devlink.h) CFLAGS_dpll:=$(call get_hdr_inc,_LINUX_DPLL_H,dpll.h) -CFLAGS_ethtool:=$(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_H_,ethtool_netlink.h) +CFLAGS_ethtool:=$(call get_hdr_inc,_LINUX_ETHTOOL_H,ethtool.h) \ + $(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_H_,ethtool_netlink.h) CFLAGS_handshake:=$(call get_hdr_inc,_LINUX_HANDSHAKE_H,handshake.h) CFLAGS_mptcp_pm:=$(call get_hdr_inc,_LINUX_MPTCP_PM_H,mptcp_pm.h) CFLAGS_netdev:=$(call get_hdr_inc,_LINUX_NETDEV_H,netdev.h) @@ -25,3 +26,4 @@ CFLAGS_nfsd:=$(call get_hdr_inc,_LINUX_NFSD_NETLINK_H,nfsd_netlink.h) CFLAGS_ovs_datapath:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h) CFLAGS_ovs_flow:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h) CFLAGS_ovs_vport:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h) +CFLAGS_tcp_metrics:=$(call get_hdr_inc,_LINUX_TCP_METRICS_H,tcp_metrics.h) diff --git a/tools/testing/selftests/drivers/net/hw/rss_ctx.py b/tools/testing/selftests/drivers/net/hw/rss_ctx.py index 68c7d40214eb..3a804e41f7cb 100755 --- a/tools/testing/selftests/drivers/net/hw/rss_ctx.py +++ b/tools/testing/selftests/drivers/net/hw/rss_ctx.py @@ -8,7 +8,7 @@ from lib.py import NetDrvEpEnv from lib.py import NetdevFamily from lib.py import KsftSkipEx from lib.py import rand_port -from lib.py import ethtool, ip, GenerateTraffic, CmdExitFailure +from lib.py import ethtool, ip, defer, GenerateTraffic, CmdExitFailure def _rss_key_str(key): @@ -127,64 +127,56 @@ def test_rss_context(cfg, ctx_cnt=1, create_with_cfg=None): # Try to allocate more queues when necessary qcnt = len(_get_rx_cnts(cfg)) - if qcnt >= 2 + 2 * ctx_cnt: - qcnt = None - else: + if qcnt < 2 + 2 * ctx_cnt: try: ksft_pr(f"Increasing queue count {qcnt} -> {2 + 2 * ctx_cnt}") ethtool(f"-L {cfg.ifname} combined {2 + 2 * ctx_cnt}") + defer(ethtool, f"-L {cfg.ifname} combined {qcnt}") except: raise KsftSkipEx("Not enough queues for the test") - ntuple = [] - ctx_id = [] ports = [] - try: - # Use queues 0 and 1 for normal traffic - ethtool(f"-X {cfg.ifname} equal 2") - for i in range(ctx_cnt): - want_cfg = f"start {2 + i * 2} equal 2" - create_cfg = want_cfg if create_with_cfg else "" - - try: - ctx_id.append(ethtool_create(cfg, "-X", f"context new {create_cfg}")) - except CmdExitFailure: - # try to carry on and skip at the end - if i == 0: - raise - ksft_pr(f"Failed to create context {i + 1}, trying to test what we got") - ctx_cnt = i - break - - if not create_with_cfg: - ethtool(f"-X {cfg.ifname} context {ctx_id[i]} {want_cfg}") - - # Sanity check the context we just created - data = get_rss(cfg, ctx_id[i]) - ksft_eq(min(data['rss-indirection-table']), 2 + i * 2, "Unexpected context cfg: " + str(data)) - ksft_eq(max(data['rss-indirection-table']), 2 + i * 2 + 1, "Unexpected context cfg: " + str(data)) - - ports.append(rand_port()) - flow = f"flow-type tcp{cfg.addr_ipver} dst-port {ports[i]} context {ctx_id[i]}" - ntuple.append(ethtool_create(cfg, "-N", flow)) + # Use queues 0 and 1 for normal traffic + ethtool(f"-X {cfg.ifname} equal 2") + defer(ethtool, f"-X {cfg.ifname} default") - for i in range(ctx_cnt): - cnts = _get_rx_cnts(cfg) - GenerateTraffic(cfg, port=ports[i]).wait_pkts_and_stop(20000) - cnts = _get_rx_cnts(cfg, prev=cnts) + for i in range(ctx_cnt): + want_cfg = f"start {2 + i * 2} equal 2" + create_cfg = want_cfg if create_with_cfg else "" - ksft_lt(sum(cnts[ :2]), 10000, "traffic on main context:" + str(cnts)) - ksft_ge(sum(cnts[2+i*2:4+i*2]), 20000, f"traffic on context {i}: " + str(cnts)) - ksft_eq(sum(cnts[2:2+i*2] + cnts[4+i*2:]), 0, "traffic on other contexts: " + str(cnts)) - finally: - for nid in ntuple: - ethtool(f"-N {cfg.ifname} delete {nid}") - for cid in ctx_id: - ethtool(f"-X {cfg.ifname} context {cid} delete") - ethtool(f"-X {cfg.ifname} default") - if qcnt: - ethtool(f"-L {cfg.ifname} combined {qcnt}") + try: + ctx_id = ethtool_create(cfg, "-X", f"context new {create_cfg}") + defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete") + except CmdExitFailure: + # try to carry on and skip at the end + if i == 0: + raise + ksft_pr(f"Failed to create context {i + 1}, trying to test what we got") + ctx_cnt = i + break + + if not create_with_cfg: + ethtool(f"-X {cfg.ifname} context {ctx_id} {want_cfg}") + + # Sanity check the context we just created + data = get_rss(cfg, ctx_id) + ksft_eq(min(data['rss-indirection-table']), 2 + i * 2, "Unexpected context cfg: " + str(data)) + ksft_eq(max(data['rss-indirection-table']), 2 + i * 2 + 1, "Unexpected context cfg: " + str(data)) + + ports.append(rand_port()) + flow = f"flow-type tcp{cfg.addr_ipver} dst-port {ports[i]} context {ctx_id}" + ntuple = ethtool_create(cfg, "-N", flow) + defer(ethtool, f"-N {cfg.ifname} delete {ntuple}") + + for i in range(ctx_cnt): + cnts = _get_rx_cnts(cfg) + GenerateTraffic(cfg, port=ports[i]).wait_pkts_and_stop(20000) + cnts = _get_rx_cnts(cfg, prev=cnts) + + ksft_lt(sum(cnts[ :2]), 10000, "traffic on main context:" + str(cnts)) + ksft_ge(sum(cnts[2+i*2:4+i*2]), 20000, f"traffic on context {i}: " + str(cnts)) + ksft_eq(sum(cnts[2:2+i*2] + cnts[4+i*2:]), 0, "traffic on other contexts: " + str(cnts)) if requested_ctx_cnt != ctx_cnt: raise KsftSkipEx(f"Tested only {ctx_cnt} contexts, wanted {requested_ctx_cnt}") @@ -216,24 +208,23 @@ def test_rss_context_out_of_order(cfg, ctx_cnt=4): # Try to allocate more queues when necessary qcnt = len(_get_rx_cnts(cfg)) - if qcnt >= 2 + 2 * ctx_cnt: - qcnt = None - else: + if qcnt < 2 + 2 * ctx_cnt: try: ksft_pr(f"Increasing queue count {qcnt} -> {2 + 2 * ctx_cnt}") ethtool(f"-L {cfg.ifname} combined {2 + 2 * ctx_cnt}") + defer(ethtool, f"-L {cfg.ifname} combined {qcnt}") except: raise KsftSkipEx("Not enough queues for the test") ntuple = [] - ctx_id = [] + ctx = [] ports = [] def remove_ctx(idx): - ethtool(f"-N {cfg.ifname} delete {ntuple[idx]}") + ntuple[idx].exec() ntuple[idx] = None - ethtool(f"-X {cfg.ifname} context {ctx_id[idx]} delete") - ctx_id[idx] = None + ctx[idx].exec() + ctx[idx] = None def check_traffic(): for i in range(ctx_cnt): @@ -241,7 +232,7 @@ def test_rss_context_out_of_order(cfg, ctx_cnt=4): GenerateTraffic(cfg, port=ports[i]).wait_pkts_and_stop(20000) cnts = _get_rx_cnts(cfg, prev=cnts) - if ctx_id[i] is None: + if ctx[i]: ksft_lt(sum(cnts[ :2]), 10000, "traffic on main context:" + str(cnts)) ksft_ge(sum(cnts[2+i*2:4+i*2]), 20000, f"traffic on context {i}: " + str(cnts)) ksft_eq(sum(cnts[2:2+i*2] + cnts[4+i*2:]), 0, "traffic on other contexts: " + str(cnts)) @@ -249,41 +240,32 @@ def test_rss_context_out_of_order(cfg, ctx_cnt=4): ksft_ge(sum(cnts[ :2]), 20000, "traffic on main context:" + str(cnts)) ksft_eq(sum(cnts[2: ]), 0, "traffic on other contexts: " + str(cnts)) - try: - # Use queues 0 and 1 for normal traffic - ethtool(f"-X {cfg.ifname} equal 2") - - for i in range(ctx_cnt): - ctx_id.append(ethtool_create(cfg, "-X", f"context new start {2 + i * 2} equal 2")) + # Use queues 0 and 1 for normal traffic + ethtool(f"-X {cfg.ifname} equal 2") + defer(ethtool, f"-X {cfg.ifname} default") - ports.append(rand_port()) - flow = f"flow-type tcp{cfg.addr_ipver} dst-port {ports[i]} context {ctx_id[i]}" - ntuple.append(ethtool_create(cfg, "-N", flow)) + for i in range(ctx_cnt): + ctx_id = ethtool_create(cfg, "-X", f"context new start {2 + i * 2} equal 2") + ctx.append(defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete")) - check_traffic() + ports.append(rand_port()) + flow = f"flow-type tcp{cfg.addr_ipver} dst-port {ports[i]} context {ctx_id}" + ntuple_id = ethtool_create(cfg, "-N", flow) + ntuple.append(defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}")) - # Remove middle context - remove_ctx(ctx_cnt // 2) - check_traffic() + check_traffic() - # Remove first context - remove_ctx(0) - check_traffic() + # Remove middle context + remove_ctx(ctx_cnt // 2) + check_traffic() - # Remove last context - remove_ctx(-1) - check_traffic() + # Remove first context + remove_ctx(0) + check_traffic() - finally: - for nid in ntuple: - if nid is not None: - ethtool(f"-N {cfg.ifname} delete {nid}") - for cid in ctx_id: - if cid is not None: - ethtool(f"-X {cfg.ifname} context {cid} delete") - ethtool(f"-X {cfg.ifname} default") - if qcnt: - ethtool(f"-L {cfg.ifname} combined {qcnt}") + # Remove last context + remove_ctx(-1) + check_traffic() if requested_ctx_cnt != ctx_cnt: raise KsftSkipEx(f"Tested only {ctx_cnt} contexts, wanted {requested_ctx_cnt}") @@ -298,69 +280,58 @@ def test_rss_context_overlap(cfg, other_ctx=0): require_ntuple(cfg) queue_cnt = len(_get_rx_cnts(cfg)) - if queue_cnt >= 4: - queue_cnt = None - else: + if queue_cnt < 4: try: ksft_pr(f"Increasing queue count {queue_cnt} -> 4") ethtool(f"-L {cfg.ifname} combined 4") + defer(ethtool, f"-L {cfg.ifname} combined {queue_cnt}") except: raise KsftSkipEx("Not enough queues for the test") - ctx_id = None - ntuple = None if other_ctx == 0: ethtool(f"-X {cfg.ifname} equal 4") + defer(ethtool, f"-X {cfg.ifname} default") else: other_ctx = ethtool_create(cfg, "-X", "context new") ethtool(f"-X {cfg.ifname} context {other_ctx} equal 4") + defer(ethtool, f"-X {cfg.ifname} context {other_ctx} delete") - try: - ctx_id = ethtool_create(cfg, "-X", "context new") - ethtool(f"-X {cfg.ifname} context {ctx_id} start 2 equal 2") + ctx_id = ethtool_create(cfg, "-X", "context new") + ethtool(f"-X {cfg.ifname} context {ctx_id} start 2 equal 2") + defer(ethtool, f"-X {cfg.ifname} context {ctx_id} delete") - port = rand_port() - if other_ctx: - flow = f"flow-type tcp{cfg.addr_ipver} dst-port {port} context {other_ctx}" - ntuple = ethtool_create(cfg, "-N", flow) + port = rand_port() + if other_ctx: + flow = f"flow-type tcp{cfg.addr_ipver} dst-port {port} context {other_ctx}" + ntuple_id = ethtool_create(cfg, "-N", flow) + ntuple = defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}") - # Test the main context - cnts = _get_rx_cnts(cfg) - GenerateTraffic(cfg, port=port).wait_pkts_and_stop(20000) - cnts = _get_rx_cnts(cfg, prev=cnts) + # Test the main context + cnts = _get_rx_cnts(cfg) + GenerateTraffic(cfg, port=port).wait_pkts_and_stop(20000) + cnts = _get_rx_cnts(cfg, prev=cnts) - ksft_ge(sum(cnts[ :4]), 20000, "traffic on main context: " + str(cnts)) - ksft_ge(sum(cnts[ :2]), 7000, "traffic on main context (1/2): " + str(cnts)) - ksft_ge(sum(cnts[2:4]), 7000, "traffic on main context (2/2): " + str(cnts)) - if other_ctx == 0: - ksft_eq(sum(cnts[4: ]), 0, "traffic on other queues: " + str(cnts)) - - # Now create a rule for context 1 and make sure traffic goes to a subset - if other_ctx: - ethtool(f"-N {cfg.ifname} delete {ntuple}") - ntuple = None - flow = f"flow-type tcp{cfg.addr_ipver} dst-port {port} context {ctx_id}" - ntuple = ethtool_create(cfg, "-N", flow) + ksft_ge(sum(cnts[ :4]), 20000, "traffic on main context: " + str(cnts)) + ksft_ge(sum(cnts[ :2]), 7000, "traffic on main context (1/2): " + str(cnts)) + ksft_ge(sum(cnts[2:4]), 7000, "traffic on main context (2/2): " + str(cnts)) + if other_ctx == 0: + ksft_eq(sum(cnts[4: ]), 0, "traffic on other queues: " + str(cnts)) - cnts = _get_rx_cnts(cfg) - GenerateTraffic(cfg, port=port).wait_pkts_and_stop(20000) - cnts = _get_rx_cnts(cfg, prev=cnts) + # Now create a rule for context 1 and make sure traffic goes to a subset + if other_ctx: + ntuple.exec() + flow = f"flow-type tcp{cfg.addr_ipver} dst-port {port} context {ctx_id}" + ntuple_id = ethtool_create(cfg, "-N", flow) + defer(ethtool, f"-N {cfg.ifname} delete {ntuple_id}") - ksft_lt(sum(cnts[ :2]), 7000, "traffic on main context: " + str(cnts)) - ksft_ge(sum(cnts[2:4]), 20000, "traffic on extra context: " + str(cnts)) - if other_ctx == 0: - ksft_eq(sum(cnts[4: ]), 0, "traffic on other queues: " + str(cnts)) - finally: - if ntuple is not None: - ethtool(f"-N {cfg.ifname} delete {ntuple}") - if ctx_id: - ethtool(f"-X {cfg.ifname} context {ctx_id} delete") - if other_ctx == 0: - ethtool(f"-X {cfg.ifname} default") - else: - ethtool(f"-X {cfg.ifname} context {other_ctx} delete") - if queue_cnt: - ethtool(f"-L {cfg.ifname} combined {queue_cnt}") + cnts = _get_rx_cnts(cfg) + GenerateTraffic(cfg, port=port).wait_pkts_and_stop(20000) + cnts = _get_rx_cnts(cfg, prev=cnts) + + ksft_lt(sum(cnts[ :2]), 7000, "traffic on main context: " + str(cnts)) + ksft_ge(sum(cnts[2:4]), 20000, "traffic on extra context: " + str(cnts)) + if other_ctx == 0: + ksft_eq(sum(cnts[4: ]), 0, "traffic on other queues: " + str(cnts)) def test_rss_context_overlap2(cfg): diff --git a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh index 76f1ab4898d9..e1ad623146d7 100755 --- a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre.sh @@ -15,6 +15,13 @@ source $lib_dir/mirror_lib.sh source $lib_dir/mirror_gre_lib.sh source $lib_dir/mirror_gre_topo_lib.sh +ALL_TESTS=" + test_keyful + test_soft + test_tos_fixed + test_ttl_inherit +" + setup_keyful() { tunnel_create gt6-key ip6gretap 2001:db8:3::1 2001:db8:3::2 \ @@ -118,15 +125,15 @@ test_span_gre_ttl_inherit() RET=0 ip link set dev $tundev type $type ttl inherit - mirror_install $swp1 ingress $tundev "matchall $tcflags" - fail_test_span_gre_dir $tundev ingress + mirror_install $swp1 ingress $tundev "matchall" + fail_test_span_gre_dir $tundev ip link set dev $tundev type $type ttl 100 - quick_test_span_gre_dir $tundev ingress + quick_test_span_gre_dir $tundev mirror_uninstall $swp1 ingress - log_test "$what: no offload on TTL of inherit ($tcflags)" + log_test "$what: no offload on TTL of inherit" } test_span_gre_tos_fixed() @@ -138,61 +145,49 @@ test_span_gre_tos_fixed() RET=0 ip link set dev $tundev type $type tos 0x10 - mirror_install $swp1 ingress $tundev "matchall $tcflags" - fail_test_span_gre_dir $tundev ingress + mirror_install $swp1 ingress $tundev "matchall" + fail_test_span_gre_dir $tundev ip link set dev $tundev type $type tos inherit - quick_test_span_gre_dir $tundev ingress + quick_test_span_gre_dir $tundev mirror_uninstall $swp1 ingress - log_test "$what: no offload on a fixed TOS ($tcflags)" + log_test "$what: no offload on a fixed TOS" } test_span_failable() { - local should_fail=$1; shift local tundev=$1; shift local what=$1; shift RET=0 - mirror_install $swp1 ingress $tundev "matchall $tcflags" - if ((should_fail)); then - fail_test_span_gre_dir $tundev ingress - else - quick_test_span_gre_dir $tundev ingress - fi + mirror_install $swp1 ingress $tundev "matchall" + fail_test_span_gre_dir $tundev mirror_uninstall $swp1 ingress - log_test "$what: should_fail=$should_fail ($tcflags)" + log_test "fail $what" } -test_failable() +test_keyful() { - local should_fail=$1; shift - - test_span_failable $should_fail gt6-key "mirror to keyful gretap" - test_span_failable $should_fail gt6-soft "mirror to gretap w/ soft underlay" + test_span_failable gt6-key "mirror to keyful gretap" } -test_sw() +test_soft() { - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - test_failable 0 - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress + test_span_failable gt6-soft "mirror to gretap w/ soft underlay" } -test_hw() +test_tos_fixed() { - test_failable 1 - test_span_gre_tos_fixed gt4 gretap "mirror to gretap" test_span_gre_tos_fixed gt6 ip6gretap "mirror to ip6gretap" +} + +test_ttl_inherit() +{ test_span_gre_ttl_inherit gt4 gretap "mirror to gretap" test_span_gre_ttl_inherit gt6 ip6gretap "mirror to ip6gretap" } @@ -202,16 +197,6 @@ trap cleanup EXIT setup_prepare setup_wait -if ! tc_offload_check; then - check_err 1 "Could not test offloaded functionality" - log_test "mlxsw-specific tests for mirror to gretap" - exit -fi - -tcflags="skip_hw" -test_sw - -tcflags="skip_sw" -test_hw +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh index e5589e2fca85..d43093310e23 100644 --- a/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh +++ b/tools/testing/selftests/drivers/net/mlxsw/mirror_gre_scale.sh @@ -79,7 +79,7 @@ mirror_gre_tunnels_create() cat >> $MIRROR_GRE_BATCH_FILE <<-EOF filter add dev $swp1 ingress pref 1000 \ protocol ipv6 \ - flower $tcflags dst_ip $match_dip \ + flower skip_sw dst_ip $match_dip \ action mirred egress mirror dev $tun EOF done @@ -107,7 +107,7 @@ mirror_gre_tunnels_destroy() done } -__mirror_gre_test() +mirror_gre_test() { local count=$1; shift local should_fail=$1; shift @@ -131,20 +131,6 @@ __mirror_gre_test() done } -mirror_gre_test() -{ - local count=$1; shift - local should_fail=$1; shift - - if ! tc_offload_check $TC_FLOWER_NUM_NETIFS; then - check_err 1 "Could not test offloaded functionality" - return - fi - - tcflags="skip_sw" - __mirror_gre_test $count $should_fail -} - mirror_gre_setup_prepare() { h1=${NETIFS[p1]} diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 9086d2015296..ff96bb7535ff 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -1225,22 +1225,6 @@ trap_uninstall() tc filter del dev $dev $direction pref 1 flower } -slow_path_trap_install() -{ - # For slow-path testing, we need to install a trap to get to - # slow path the packets that would otherwise be switched in HW. - if [ "${tcflags/skip_hw}" != "$tcflags" ]; then - trap_install "$@" - fi -} - -slow_path_trap_uninstall() -{ - if [ "${tcflags/skip_hw}" != "$tcflags" ]; then - trap_uninstall "$@" - fi -} - __icmp_capture_add_del() { local add_del=$1; shift @@ -1257,22 +1241,34 @@ __icmp_capture_add_del() icmp_capture_install() { - __icmp_capture_add_del add 100 "" "$@" + local tundev=$1; shift + local filter=$1; shift + + __icmp_capture_add_del add 100 "" "$tundev" "$filter" } icmp_capture_uninstall() { - __icmp_capture_add_del del 100 "" "$@" + local tundev=$1; shift + local filter=$1; shift + + __icmp_capture_add_del del 100 "" "$tundev" "$filter" } icmp6_capture_install() { - __icmp_capture_add_del add 100 v6 "$@" + local tundev=$1; shift + local filter=$1; shift + + __icmp_capture_add_del add 100 v6 "$tundev" "$filter" } icmp6_capture_uninstall() { - __icmp_capture_add_del del 100 v6 "$@" + local tundev=$1; shift + local filter=$1; shift + + __icmp_capture_add_del del 100 v6 "$tundev" "$filter" } __vlan_capture_add_del() @@ -1290,12 +1286,18 @@ __vlan_capture_add_del() vlan_capture_install() { - __vlan_capture_add_del add 100 "$@" + local dev=$1; shift + local filter=$1; shift + + __vlan_capture_add_del add 100 "$dev" "$filter" } vlan_capture_uninstall() { - __vlan_capture_add_del del 100 "$@" + local dev=$1; shift + local filter=$1; shift + + __vlan_capture_add_del del 100 "$dev" "$filter" } __dscp_capture_add_del() @@ -1655,34 +1657,61 @@ __start_traffic() local sip=$1; shift local dip=$1; shift local dmac=$1; shift + local -a mz_args=("$@") $MZ $h_in -p $pktsize -A $sip -B $dip -c 0 \ - -a own -b $dmac -t "$proto" -q "$@" & + -a own -b $dmac -t "$proto" -q "${mz_args[@]}" & sleep 1 } start_traffic_pktsize() { local pktsize=$1; shift + local h_in=$1; shift + local sip=$1; shift + local dip=$1; shift + local dmac=$1; shift + local -a mz_args=("$@") - __start_traffic $pktsize udp "$@" + __start_traffic $pktsize udp "$h_in" "$sip" "$dip" "$dmac" \ + "${mz_args[@]}" } start_tcp_traffic_pktsize() { local pktsize=$1; shift + local h_in=$1; shift + local sip=$1; shift + local dip=$1; shift + local dmac=$1; shift + local -a mz_args=("$@") - __start_traffic $pktsize tcp "$@" + __start_traffic $pktsize tcp "$h_in" "$sip" "$dip" "$dmac" \ + "${mz_args[@]}" } start_traffic() { - start_traffic_pktsize 8000 "$@" + local h_in=$1; shift + local sip=$1; shift + local dip=$1; shift + local dmac=$1; shift + local -a mz_args=("$@") + + start_traffic_pktsize 8000 "$h_in" "$sip" "$dip" "$dmac" \ + "${mz_args[@]}" } start_tcp_traffic() { - start_tcp_traffic_pktsize 8000 "$@" + local h_in=$1; shift + local sip=$1; shift + local dip=$1; shift + local dmac=$1; shift + local -a mz_args=("$@") + + start_tcp_traffic_pktsize 8000 "$h_in" "$sip" "$dip" "$dmac" \ + "${mz_args[@]}" } stop_traffic() diff --git a/tools/testing/selftests/net/forwarding/mirror_gre.sh b/tools/testing/selftests/net/forwarding/mirror_gre.sh index 0266443601bc..921c733ee04f 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre.sh @@ -74,7 +74,7 @@ test_span_gre_mac() RET=0 - mirror_install $swp1 $direction $tundev "matchall $tcflags" + mirror_install $swp1 $direction $tundev "matchall" icmp_capture_install h3-${tundev} "src_mac $src_mac dst_mac $dst_mac" mirror_test v$h1 192.0.2.1 192.0.2.2 h3-${tundev} 100 10 @@ -82,29 +82,29 @@ test_span_gre_mac() icmp_capture_uninstall h3-${tundev} mirror_uninstall $swp1 $direction - log_test "$direction $what: envelope MAC ($tcflags)" + log_test "$direction $what: envelope MAC" } test_two_spans() { RET=0 - mirror_install $swp1 ingress gt4 "matchall $tcflags" - mirror_install $swp1 egress gt6 "matchall $tcflags" - quick_test_span_gre_dir gt4 ingress - quick_test_span_gre_dir gt6 egress + mirror_install $swp1 ingress gt4 "matchall" + mirror_install $swp1 egress gt6 "matchall" + quick_test_span_gre_dir gt4 8 0 + quick_test_span_gre_dir gt6 0 8 mirror_uninstall $swp1 ingress - fail_test_span_gre_dir gt4 ingress - quick_test_span_gre_dir gt6 egress + fail_test_span_gre_dir gt4 8 0 + quick_test_span_gre_dir gt6 0 8 - mirror_install $swp1 ingress gt4 "matchall $tcflags" + mirror_install $swp1 ingress gt4 "matchall" mirror_uninstall $swp1 egress - quick_test_span_gre_dir gt4 ingress - fail_test_span_gre_dir gt6 egress + quick_test_span_gre_dir gt4 8 0 + fail_test_span_gre_dir gt6 0 8 mirror_uninstall $swp1 ingress - log_test "two simultaneously configured mirrors ($tcflags)" + log_test "two simultaneously configured mirrors" } test_gretap() @@ -131,30 +131,11 @@ test_ip6gretap_mac() test_span_gre_mac gt6 egress "mirror to ip6gretap" } -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh index 6c257ec03756..e3cd48e18eeb 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh @@ -196,32 +196,11 @@ test_ip6gretap() full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap w/ UL" } -test_all() -{ - RET=0 - - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh index 04fd14b0a9b7..6c7bd33332c2 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d.sh @@ -108,30 +108,11 @@ test_ip6gretap() full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap" } -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh index f35313c76fac..909ec956a5e5 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh @@ -104,30 +104,11 @@ test_ip6gretap_stp() full_test_span_gre_stp gt6 $swp3.555 "mirror to ip6gretap" } -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh index 0cf4c47a46f9..40ac9dd3aff1 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q.sh @@ -104,30 +104,11 @@ test_ip6gretap() full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap" } -tests() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -tests - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - tests -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh index c53148b1dc63..fe4d7c906a70 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh @@ -227,10 +227,10 @@ test_lag_slave() RET=0 tc filter add dev $swp1 ingress pref 999 \ - proto 802.1q flower vlan_ethtype arp $tcflags \ + proto 802.1q flower vlan_ethtype arp \ action pass mirror_install $swp1 ingress gt4 \ - "proto 802.1q flower vlan_id 333 $tcflags" + "proto 802.1q flower vlan_id 333" # Test connectivity through $up_dev when $down_dev is set down. ip link set dev $down_dev down @@ -239,7 +239,7 @@ test_lag_slave() setup_wait_dev $host_dev $ARPING -I br1 192.0.2.130 -qfc 1 sleep 2 - mirror_test vrf-h1 192.0.2.1 192.0.2.18 $host_dev 1 10 + mirror_test vrf-h1 192.0.2.1 192.0.2.18 $host_dev 1 ">= 10" # Test lack of connectivity when both slaves are down. ip link set dev $up_dev down @@ -252,7 +252,7 @@ test_lag_slave() mirror_uninstall $swp1 ingress tc filter del dev $swp1 ingress pref 999 - log_test "$what ($tcflags)" + log_test "$what" } test_mirror_gretap_first() @@ -265,30 +265,11 @@ test_mirror_gretap_second() test_lag_slave $h4 $swp4 $swp3 "mirror to gretap: LAG second slave" } -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh index 5ea9d63915f7..65ae9d960c18 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh @@ -73,7 +73,7 @@ test_span_gre_ttl() RET=0 mirror_install $swp1 ingress $tundev \ - "prot ip flower $tcflags ip_prot icmp" + "prot ip flower ip_prot icmp" tc filter add dev $h3 ingress pref 77 prot $prot \ flower skip_hw ip_ttl 50 action pass @@ -81,13 +81,13 @@ test_span_gre_ttl() ip link set dev $tundev type $type ttl 50 sleep 2 - mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 10 + mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 ">= 10" ip link set dev $tundev type $type ttl 100 tc filter del dev $h3 ingress pref 77 mirror_uninstall $swp1 ingress - log_test "$what: TTL change ($tcflags)" + log_test "$what: TTL change" } test_span_gre_tun_up() @@ -98,15 +98,15 @@ test_span_gre_tun_up() RET=0 ip link set dev $tundev down - mirror_install $swp1 ingress $tundev "matchall $tcflags" - fail_test_span_gre_dir $tundev ingress + mirror_install $swp1 ingress $tundev "matchall" + fail_test_span_gre_dir $tundev ip link set dev $tundev up - quick_test_span_gre_dir $tundev ingress + quick_test_span_gre_dir $tundev mirror_uninstall $swp1 ingress - log_test "$what: tunnel down/up ($tcflags)" + log_test "$what: tunnel down/up" } test_span_gre_egress_up() @@ -118,8 +118,8 @@ test_span_gre_egress_up() RET=0 ip link set dev $swp3 down - mirror_install $swp1 ingress $tundev "matchall $tcflags" - fail_test_span_gre_dir $tundev ingress + mirror_install $swp1 ingress $tundev "matchall" + fail_test_span_gre_dir $tundev # After setting the device up, wait for neighbor to get resolved so that # we can expect mirroring to work. @@ -127,10 +127,10 @@ test_span_gre_egress_up() setup_wait_dev $swp3 ping -c 1 -I $swp3 $remote_ip &>/dev/null - quick_test_span_gre_dir $tundev ingress + quick_test_span_gre_dir $tundev mirror_uninstall $swp1 ingress - log_test "$what: egress down/up ($tcflags)" + log_test "$what: egress down/up" } test_span_gre_remote_ip() @@ -144,14 +144,14 @@ test_span_gre_remote_ip() RET=0 ip link set dev $tundev type $type remote $wrong_ip - mirror_install $swp1 ingress $tundev "matchall $tcflags" - fail_test_span_gre_dir $tundev ingress + mirror_install $swp1 ingress $tundev "matchall" + fail_test_span_gre_dir $tundev ip link set dev $tundev type $type remote $correct_ip - quick_test_span_gre_dir $tundev ingress + quick_test_span_gre_dir $tundev mirror_uninstall $swp1 ingress - log_test "$what: remote address change ($tcflags)" + log_test "$what: remote address change" } test_span_gre_tun_del() @@ -165,10 +165,10 @@ test_span_gre_tun_del() RET=0 - mirror_install $swp1 ingress $tundev "matchall $tcflags" - quick_test_span_gre_dir $tundev ingress + mirror_install $swp1 ingress $tundev "matchall" + quick_test_span_gre_dir $tundev ip link del dev $tundev - fail_test_span_gre_dir $tundev ingress + fail_test_span_gre_dir $tundev tunnel_create $tundev $type $local_ip $remote_ip \ ttl 100 tos inherit $flags @@ -176,11 +176,11 @@ test_span_gre_tun_del() # Recreating the tunnel doesn't reestablish mirroring, so reinstall it # and verify it works for the follow-up tests. mirror_uninstall $swp1 ingress - mirror_install $swp1 ingress $tundev "matchall $tcflags" - quick_test_span_gre_dir $tundev ingress + mirror_install $swp1 ingress $tundev "matchall" + quick_test_span_gre_dir $tundev mirror_uninstall $swp1 ingress - log_test "$what: tunnel deleted ($tcflags)" + log_test "$what: tunnel deleted" } test_span_gre_route_del() @@ -192,18 +192,18 @@ test_span_gre_route_del() RET=0 - mirror_install $swp1 ingress $tundev "matchall $tcflags" - quick_test_span_gre_dir $tundev ingress + mirror_install $swp1 ingress $tundev "matchall" + quick_test_span_gre_dir $tundev ip route del $route dev $edev - fail_test_span_gre_dir $tundev ingress + fail_test_span_gre_dir $tundev ip route add $route dev $edev - quick_test_span_gre_dir $tundev ingress + quick_test_span_gre_dir $tundev mirror_uninstall $swp1 ingress - log_test "$what: underlay route removal ($tcflags)" + log_test "$what: underlay route removal" } test_ttl() @@ -244,30 +244,11 @@ test_route_del() test_span_gre_route_del gt6 $swp3 2001:db8:2::/64 "mirror to ip6gretap" } -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh b/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh index 09389f3b9369..3a84f3ab5856 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh @@ -64,12 +64,19 @@ cleanup() test_span_gre_dir_acl() { - test_span_gre_dir_ips "$@" 192.0.2.3 192.0.2.4 + local tundev=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift + + test_span_gre_dir_ips "$tundev" "$forward_type" \ + "$backward_type" 192.0.2.3 192.0.2.4 } fail_test_span_gre_dir_acl() { - fail_test_span_gre_dir_ips "$@" 192.0.2.3 192.0.2.4 + local tundev=$1; shift + + fail_test_span_gre_dir_ips "$tundev" 192.0.2.3 192.0.2.4 } full_test_span_gre_dir_acl() @@ -84,16 +91,15 @@ full_test_span_gre_dir_acl() RET=0 mirror_install $swp1 $direction $tundev \ - "protocol ip flower $tcflags dst_ip $match_dip" - fail_test_span_gre_dir $tundev $direction - test_span_gre_dir_acl "$tundev" "$direction" \ - "$forward_type" "$backward_type" + "protocol ip flower dst_ip $match_dip" + fail_test_span_gre_dir $tundev + test_span_gre_dir_acl "$tundev" "$forward_type" "$backward_type" mirror_uninstall $swp1 $direction # Test lack of mirroring after ACL mirror is uninstalled. - fail_test_span_gre_dir_acl "$tundev" "$direction" + fail_test_span_gre_dir_acl "$tundev" - log_test "$direction $what ($tcflags)" + log_test "$direction $what" } test_gretap() @@ -108,30 +114,11 @@ test_ip6gretap() full_test_span_gre_dir_acl gt6 egress 0 8 192.0.2.3 "ACL mirror to ip6gretap" } -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh index 9edf4cb104a8..1261e6f46e34 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh @@ -37,8 +37,14 @@ # | \ / | # | \____________________________________________/ | # | | | -# | + lag2 (team) | -# | 192.0.2.130/28 | +# | + lag2 (team) ------> + gt4-dst (gretap) | +# | 192.0.2.130/28 loc=192.0.2.130 | +# | rem=192.0.2.129 | +# | ttl=100 | +# | tos=inherit | +# | | +# | | +# | | # | | # +---------------------------------------------------------------------------+ @@ -50,9 +56,6 @@ ALL_TESTS=" NUM_NETIFS=6 source lib.sh source mirror_lib.sh -source mirror_gre_lib.sh - -require_command $ARPING vlan_host_create() { @@ -122,16 +125,21 @@ h3_create() { vrf_create vrf-h3 ip link set dev vrf-h3 up - tc qdisc add dev $h3 clsact - tc qdisc add dev $h4 clsact h3_create_team + + tunnel_create gt4-dst gretap 192.0.2.130 192.0.2.129 \ + ttl 100 tos inherit + ip link set dev gt4-dst master vrf-h3 + tc qdisc add dev gt4-dst clsact } h3_destroy() { + tc qdisc del dev gt4-dst clsact + ip link set dev gt4-dst nomaster + tunnel_destroy gt4-dst + h3_destroy_team - tc qdisc del dev $h4 clsact - tc qdisc del dev $h3 clsact ip link set dev vrf-h3 down vrf_destroy vrf-h3 } @@ -188,18 +196,12 @@ setup_prepare() h2_create h3_create switch_create - - trap_install $h3 ingress - trap_install $h4 ingress } cleanup() { pre_cleanup - trap_uninstall $h4 ingress - trap_uninstall $h3 ingress - switch_destroy h3_destroy h2_destroy @@ -218,7 +220,8 @@ test_lag_slave() RET=0 mirror_install $swp1 ingress gt4 \ - "proto 802.1q flower vlan_id 333 $tcflags" + "proto 802.1q flower vlan_id 333" + vlan_capture_install gt4-dst "vlan_ethtype ipv4 ip_proto icmp type 8" # Move $down_dev away from the team. That will prompt change in # txability of the connected device, without changing its upness. The @@ -226,13 +229,14 @@ test_lag_slave() # other slave. ip link set dev $down_dev nomaster sleep 2 - mirror_test vrf-h1 192.0.2.1 192.0.2.18 $up_dev 1 10 + mirror_test vrf-h1 192.0.2.1 192.0.2.18 gt4-dst 100 10 # Test lack of connectivity when neither slave is txable. ip link set dev $up_dev nomaster sleep 2 - mirror_test vrf-h1 192.0.2.1 192.0.2.18 $h3 1 0 - mirror_test vrf-h1 192.0.2.1 192.0.2.18 $h4 1 0 + mirror_test vrf-h1 192.0.2.1 192.0.2.18 gt4-dst 100 0 + + vlan_capture_uninstall gt4-dst mirror_uninstall $swp1 ingress # Recreate H3's team device, because mlxsw, which this test is @@ -243,7 +247,7 @@ test_lag_slave() # Wait for ${h,swp}{3,4}. setup_wait - log_test "$what ($tcflags)" + log_test "$what" } test_mirror_gretap_first() @@ -256,30 +260,11 @@ test_mirror_gretap_second() test_lag_slave $h4 $h3 "mirror to gretap: LAG second slave" } -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh index 0c36546e131e..20078cc55f24 100644 --- a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh @@ -5,22 +5,34 @@ source "$net_forwarding_dir/mirror_lib.sh" quick_test_span_gre_dir_ips() { local tundev=$1; shift + local ip1=$1; shift + local ip2=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift - do_test_span_dir_ips 10 h3-$tundev "$@" + do_test_span_dir_ips 10 h3-$tundev "$ip1" "$ip2" \ + "$forward_type" "$backward_type" } fail_test_span_gre_dir_ips() { local tundev=$1; shift + local ip1=$1; shift + local ip2=$1; shift - do_test_span_dir_ips 0 h3-$tundev "$@" + do_test_span_dir_ips 0 h3-$tundev "$ip1" "$ip2" } test_span_gre_dir_ips() { local tundev=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift + local ip1=$1; shift + local ip2=$1; shift - test_span_dir_ips h3-$tundev "$@" + test_span_dir_ips h3-$tundev "$forward_type" \ + "$backward_type" "$ip1" "$ip2" } full_test_span_gre_dir_ips() @@ -35,12 +47,12 @@ full_test_span_gre_dir_ips() RET=0 - mirror_install $swp1 $direction $tundev "matchall $tcflags" - test_span_dir_ips "h3-$tundev" "$direction" "$forward_type" \ + mirror_install $swp1 $direction $tundev "matchall" + test_span_dir_ips "h3-$tundev" "$forward_type" \ "$backward_type" "$ip1" "$ip2" mirror_uninstall $swp1 $direction - log_test "$direction $what ($tcflags)" + log_test "$direction $what" } full_test_span_gre_dir_vlan_ips() @@ -56,45 +68,63 @@ full_test_span_gre_dir_vlan_ips() RET=0 - mirror_install $swp1 $direction $tundev "matchall $tcflags" + mirror_install $swp1 $direction $tundev "matchall" - test_span_dir_ips "h3-$tundev" "$direction" "$forward_type" \ + test_span_dir_ips "h3-$tundev" "$forward_type" \ "$backward_type" "$ip1" "$ip2" tc filter add dev $h3 ingress pref 77 prot 802.1q \ flower $vlan_match \ action pass - mirror_test v$h1 $ip1 $ip2 $h3 77 10 + mirror_test v$h1 $ip1 $ip2 $h3 77 '>= 10' tc filter del dev $h3 ingress pref 77 mirror_uninstall $swp1 $direction - log_test "$direction $what ($tcflags)" + log_test "$direction $what" } quick_test_span_gre_dir() { - quick_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2 + local tundev=$1; shift + local forward_type=${1-8}; shift + local backward_type=${1-0}; shift + + quick_test_span_gre_dir_ips "$tundev" 192.0.2.1 192.0.2.2 \ + "$forward_type" "$backward_type" } fail_test_span_gre_dir() { - fail_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2 -} + local tundev=$1; shift -test_span_gre_dir() -{ - test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2 + fail_test_span_gre_dir_ips "$tundev" 192.0.2.1 192.0.2.2 } full_test_span_gre_dir() { - full_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2 + local tundev=$1; shift + local direction=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift + local what=$1; shift + + full_test_span_gre_dir_ips "$tundev" "$direction" "$forward_type" \ + "$backward_type" "$what" 192.0.2.1 192.0.2.2 } full_test_span_gre_dir_vlan() { - full_test_span_gre_dir_vlan_ips "$@" 192.0.2.1 192.0.2.2 + local tundev=$1; shift + local direction=$1; shift + local vlan_match=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift + local what=$1; shift + + full_test_span_gre_dir_vlan_ips "$tundev" "$direction" "$vlan_match" \ + "$forward_type" "$backward_type" \ + "$what" 192.0.2.1 192.0.2.2 } full_test_span_gre_stp_ips() @@ -104,27 +134,39 @@ full_test_span_gre_stp_ips() local what=$1; shift local ip1=$1; shift local ip2=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift local h3mac=$(mac_get $h3) RET=0 - mirror_install $swp1 ingress $tundev "matchall $tcflags" - quick_test_span_gre_dir_ips $tundev ingress $ip1 $ip2 + mirror_install $swp1 ingress $tundev "matchall" + quick_test_span_gre_dir_ips $tundev $ip1 $ip2 \ + "$forward_type" "$backward_type" bridge link set dev $nbpdev state disabled sleep 1 - fail_test_span_gre_dir_ips $tundev ingress $ip1 $ip2 + fail_test_span_gre_dir_ips $tundev $ip1 $ip2 bridge link set dev $nbpdev state forwarding sleep 1 - quick_test_span_gre_dir_ips $tundev ingress $ip1 $ip2 + quick_test_span_gre_dir_ips $tundev $ip1 $ip2 \ + "$forward_type" "$backward_type" mirror_uninstall $swp1 ingress - log_test "$what: STP state ($tcflags)" + log_test "$what: STP state" } full_test_span_gre_stp() { - full_test_span_gre_stp_ips "$@" 192.0.2.1 192.0.2.2 + local tundev=$1; shift + local nbpdev=$1; shift + local what=$1; shift + local forward_type=${1-8}; shift + local backward_type=${1-0}; shift + + full_test_span_gre_stp_ips "$tundev" "$nbpdev" "$what" \ + 192.0.2.1 192.0.2.2 \ + "$forward_type" "$backward_type" } diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh b/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh index fc0508e40fca..2cbfbecf25c8 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh @@ -60,41 +60,32 @@ test_span_gre_neigh() local addr=$1; shift local tundev=$1; shift local direction=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift local what=$1; shift RET=0 ip neigh replace dev $swp3 $addr lladdr 00:11:22:33:44:55 - mirror_install $swp1 $direction $tundev "matchall $tcflags" - fail_test_span_gre_dir $tundev ingress + mirror_install $swp1 $direction $tundev "matchall" + fail_test_span_gre_dir $tundev "$forward_type" "$backward_type" ip neigh del dev $swp3 $addr - quick_test_span_gre_dir $tundev ingress + quick_test_span_gre_dir $tundev "$forward_type" "$backward_type" mirror_uninstall $swp1 $direction - log_test "$direction $what: neighbor change ($tcflags)" + log_test "$direction $what: neighbor change" } test_gretap() { - test_span_gre_neigh 192.0.2.130 gt4 ingress "mirror to gretap" - test_span_gre_neigh 192.0.2.130 gt4 egress "mirror to gretap" + test_span_gre_neigh 192.0.2.130 gt4 ingress 8 0 "mirror to gretap" + test_span_gre_neigh 192.0.2.130 gt4 egress 0 8 "mirror to gretap" } test_ip6gretap() { - test_span_gre_neigh 2001:db8:2::2 gt6 ingress "mirror to ip6gretap" - test_span_gre_neigh 2001:db8:2::2 gt6 egress "mirror to ip6gretap" -} - -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress + test_span_gre_neigh 2001:db8:2::2 gt6 ingress 8 0 "mirror to ip6gretap" + test_span_gre_neigh 2001:db8:2::2 gt6 egress 0 8 "mirror to ip6gretap" } trap cleanup EXIT @@ -102,14 +93,6 @@ trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh b/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh index 6f9ef1820e93..34bc646938e3 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh @@ -75,42 +75,31 @@ cleanup() test_gretap() { RET=0 - mirror_install $swp1 ingress gt4 "matchall $tcflags" + mirror_install $swp1 ingress gt4 "matchall" # For IPv4, test that there's no mirroring without the route directing # the traffic to tunnel remote address. Then add it and test that # mirroring starts. For IPv6 we can't test this due to the limitation # that routes for locally-specified IPv6 addresses can't be added. - fail_test_span_gre_dir gt4 ingress + fail_test_span_gre_dir gt4 ip route add 192.0.2.130/32 via 192.0.2.162 - quick_test_span_gre_dir gt4 ingress + quick_test_span_gre_dir gt4 ip route del 192.0.2.130/32 via 192.0.2.162 mirror_uninstall $swp1 ingress - log_test "mirror to gre with next-hop remote ($tcflags)" + log_test "mirror to gre with next-hop remote" } test_ip6gretap() { RET=0 - mirror_install $swp1 ingress gt6 "matchall $tcflags" - quick_test_span_gre_dir gt6 ingress + mirror_install $swp1 ingress gt6 "matchall" + quick_test_span_gre_dir gt6 mirror_uninstall $swp1 ingress - log_test "mirror to ip6gre with next-hop remote ($tcflags)" -} - -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress + log_test "mirror to ip6gre with next-hop remote" } trap cleanup EXIT @@ -118,14 +107,6 @@ trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh index 88cecdb9a861..63689928cb51 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh @@ -63,30 +63,11 @@ test_gretap() full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap" } -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh index c8a9b5bd841f..1b902cc579f6 100755 --- a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh @@ -153,21 +153,21 @@ test_span_gre_forbidden_cpu() RET=0 # Run the pass-test first, to prime neighbor table. - mirror_install $swp1 ingress $tundev "matchall $tcflags" - quick_test_span_gre_dir $tundev ingress + mirror_install $swp1 ingress $tundev "matchall" + quick_test_span_gre_dir $tundev # Now forbid the VLAN at the bridge and see it fail. bridge vlan del dev br1 vid 555 self sleep 1 - fail_test_span_gre_dir $tundev ingress + fail_test_span_gre_dir $tundev bridge vlan add dev br1 vid 555 self sleep 1 - quick_test_span_gre_dir $tundev ingress + quick_test_span_gre_dir $tundev mirror_uninstall $swp1 ingress - log_test "$what: vlan forbidden at a bridge ($tcflags)" + log_test "$what: vlan forbidden at a bridge" } test_gretap_forbidden_cpu() @@ -187,22 +187,22 @@ test_span_gre_forbidden_egress() RET=0 - mirror_install $swp1 ingress $tundev "matchall $tcflags" - quick_test_span_gre_dir $tundev ingress + mirror_install $swp1 ingress $tundev "matchall" + quick_test_span_gre_dir $tundev bridge vlan del dev $swp3 vid 555 sleep 1 - fail_test_span_gre_dir $tundev ingress + fail_test_span_gre_dir $tundev bridge vlan add dev $swp3 vid 555 # Re-prime FDB $ARPING -I br1.555 192.0.2.130 -fqc 1 sleep 1 - quick_test_span_gre_dir $tundev ingress + quick_test_span_gre_dir $tundev mirror_uninstall $swp1 ingress - log_test "$what: vlan forbidden at a bridge egress ($tcflags)" + log_test "$what: vlan forbidden at a bridge egress" } test_gretap_forbidden_egress() @@ -223,30 +223,30 @@ test_span_gre_untagged_egress() RET=0 - mirror_install $swp1 ingress $tundev "matchall $tcflags" + mirror_install $swp1 ingress $tundev "matchall" - quick_test_span_gre_dir $tundev ingress - quick_test_span_vlan_dir $h3 555 ingress "$ul_proto" + quick_test_span_gre_dir $tundev + quick_test_span_vlan_dir $h3 555 "$ul_proto" h3_addr_add_del del $h3.555 bridge vlan add dev $swp3 vid 555 pvid untagged h3_addr_add_del add $h3 sleep 5 - quick_test_span_gre_dir $tundev ingress - fail_test_span_vlan_dir $h3 555 ingress "$ul_proto" + quick_test_span_gre_dir $tundev + fail_test_span_vlan_dir $h3 555 "$ul_proto" h3_addr_add_del del $h3 bridge vlan add dev $swp3 vid 555 h3_addr_add_del add $h3.555 sleep 5 - quick_test_span_gre_dir $tundev ingress - quick_test_span_vlan_dir $h3 555 ingress "$ul_proto" + quick_test_span_gre_dir $tundev + quick_test_span_vlan_dir $h3 555 "$ul_proto" mirror_uninstall $swp1 ingress - log_test "$what: vlan untagged at a bridge egress ($tcflags)" + log_test "$what: vlan untagged at a bridge egress" } test_gretap_untagged_egress() @@ -267,19 +267,19 @@ test_span_gre_fdb_roaming() RET=0 - mirror_install $swp1 ingress $tundev "matchall $tcflags" - quick_test_span_gre_dir $tundev ingress + mirror_install $swp1 ingress $tundev "matchall" + quick_test_span_gre_dir $tundev while ((RET == 0)); do bridge fdb del dev $swp3 $h3mac vlan 555 master 2>/dev/null bridge fdb add dev $swp2 $h3mac vlan 555 master static sleep 1 - fail_test_span_gre_dir $tundev ingress + fail_test_span_gre_dir $tundev if ! bridge fdb sh dev $swp2 vlan 555 master \ | grep -q $h3mac; then printf "TEST: %-60s [RETRY]\n" \ - "$what: MAC roaming ($tcflags)" + "$what: MAC roaming" # ARP or ND probably reprimed the FDB while the test # was running. We would get a spurious failure. RET=0 @@ -292,11 +292,11 @@ test_span_gre_fdb_roaming() # Re-prime FDB $ARPING -I br1.555 192.0.2.130 -fqc 1 sleep 1 - quick_test_span_gre_dir $tundev ingress + quick_test_span_gre_dir $tundev mirror_uninstall $swp1 ingress - log_test "$what: MAC roaming ($tcflags)" + log_test "$what: MAC roaming" } test_gretap_fdb_roaming() @@ -319,30 +319,11 @@ test_ip6gretap_stp() full_test_span_gre_stp gt6 $swp3 "mirror to ip6gretap" } -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - - tests_run - - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh index 3e8ebeff3019..6bf9d5ae933c 100644 --- a/tools/testing/selftests/net/forwarding/mirror_lib.sh +++ b/tools/testing/selftests/net/forwarding/mirror_lib.sh @@ -44,14 +44,17 @@ mirror_test() local type="icmp echoreq" fi + if [[ -z ${expect//[[:digit:]]/} ]]; then + expect="== $expect" + fi + local t0=$(tc_rule_stats_get $dev $pref) $MZ $proto $vrf_name ${sip:+-A $sip} -B $dip -a own -b bc -q \ -c 10 -d 100msec -t $type sleep 0.5 local t1=$(tc_rule_stats_get $dev $pref) local delta=$((t1 - t0)) - # Tolerate a couple stray extra packets. - ((expect <= delta && delta <= expect + 2)) + ((delta $expect)) check_err $? "Expected to capture $expect packets, got $delta." } @@ -59,36 +62,42 @@ do_test_span_dir_ips() { local expect=$1; shift local dev=$1; shift - local direction=$1; shift local ip1=$1; shift local ip2=$1; shift + local forward_type=${1-8}; shift + local backward_type=${1-0}; shift - icmp_capture_install $dev + icmp_capture_install $dev "type $forward_type" mirror_test v$h1 $ip1 $ip2 $dev 100 $expect + icmp_capture_uninstall $dev + + icmp_capture_install $dev "type $backward_type" mirror_test v$h2 $ip2 $ip1 $dev 100 $expect icmp_capture_uninstall $dev } quick_test_span_dir_ips() { - do_test_span_dir_ips 10 "$@" -} + local dev=$1; shift + local ip1=$1; shift + local ip2=$1; shift + local forward_type=${1-8}; shift + local backward_type=${1-0}; shift -fail_test_span_dir_ips() -{ - do_test_span_dir_ips 0 "$@" + do_test_span_dir_ips 10 "$dev" "$ip1" "$ip2" \ + "$forward_type" "$backward_type" } test_span_dir_ips() { local dev=$1; shift - local direction=$1; shift local forward_type=$1; shift local backward_type=$1; shift local ip1=$1; shift local ip2=$1; shift - quick_test_span_dir_ips "$dev" "$direction" "$ip1" "$ip2" + quick_test_span_dir_ips "$dev" "$ip1" "$ip2" \ + "$forward_type" "$backward_type" icmp_capture_install $dev "type $forward_type" mirror_test v$h1 $ip1 $ip2 $dev 100 10 @@ -99,14 +108,14 @@ test_span_dir_ips() icmp_capture_uninstall $dev } -fail_test_span_dir() -{ - fail_test_span_dir_ips "$@" 192.0.2.1 192.0.2.2 -} - test_span_dir() { - test_span_dir_ips "$@" 192.0.2.1 192.0.2.2 + local dev=$1; shift + local forward_type=$1; shift + local backward_type=$1; shift + + test_span_dir_ips "$dev" "$forward_type" "$backward_type" \ + 192.0.2.1 192.0.2.2 } do_test_span_vlan_dir_ips() @@ -114,7 +123,6 @@ do_test_span_vlan_dir_ips() local expect=$1; shift local dev=$1; shift local vid=$1; shift - local direction=$1; shift local ul_proto=$1; shift local ip1=$1; shift local ip2=$1; shift @@ -123,27 +131,50 @@ do_test_span_vlan_dir_ips() # The traffic is meant for local box anyway, so will be trapped to # kernel. vlan_capture_install $dev "skip_hw vlan_id $vid vlan_ethtype $ul_proto" - mirror_test v$h1 $ip1 $ip2 $dev 100 $expect - mirror_test v$h2 $ip2 $ip1 $dev 100 $expect + mirror_test v$h1 $ip1 $ip2 $dev 100 "$expect" + mirror_test v$h2 $ip2 $ip1 $dev 100 "$expect" vlan_capture_uninstall $dev } quick_test_span_vlan_dir_ips() { - do_test_span_vlan_dir_ips 10 "$@" + local dev=$1; shift + local vid=$1; shift + local ul_proto=$1; shift + local ip1=$1; shift + local ip2=$1; shift + + do_test_span_vlan_dir_ips '>= 10' "$dev" "$vid" "$ul_proto" \ + "$ip1" "$ip2" } fail_test_span_vlan_dir_ips() { - do_test_span_vlan_dir_ips 0 "$@" + local dev=$1; shift + local vid=$1; shift + local ul_proto=$1; shift + local ip1=$1; shift + local ip2=$1; shift + + do_test_span_vlan_dir_ips 0 "$dev" "$vid" "$ul_proto" "$ip1" "$ip2" } quick_test_span_vlan_dir() { - quick_test_span_vlan_dir_ips "$@" 192.0.2.1 192.0.2.2 + local dev=$1; shift + local vid=$1; shift + local ul_proto=$1; shift + + quick_test_span_vlan_dir_ips "$dev" "$vid" "$ul_proto" \ + 192.0.2.1 192.0.2.2 } fail_test_span_vlan_dir() { - fail_test_span_vlan_dir_ips "$@" 192.0.2.1 192.0.2.2 + local dev=$1; shift + local vid=$1; shift + local ul_proto=$1; shift + + fail_test_span_vlan_dir_ips "$dev" "$vid" "$ul_proto" \ + 192.0.2.1 192.0.2.2 } diff --git a/tools/testing/selftests/net/forwarding/mirror_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_vlan.sh index 0b44e148235e..2f150a414d38 100755 --- a/tools/testing/selftests/net/forwarding/mirror_vlan.sh +++ b/tools/testing/selftests/net/forwarding/mirror_vlan.sh @@ -40,12 +40,16 @@ setup_prepare() vlan_create $h2 111 v$h2 192.0.2.18/28 bridge vlan add dev $swp2 vid 111 + + trap_install $h3 ingress } cleanup() { pre_cleanup + trap_uninstall $h3 ingress + vlan_destroy $h2 111 vlan_destroy $h1 111 vlan_destroy $h3 555 @@ -63,11 +67,11 @@ test_vlan_dir() RET=0 - mirror_install $swp1 $direction $swp3.555 "matchall $tcflags" - test_span_dir "$h3.555" "$direction" "$forward_type" "$backward_type" + mirror_install $swp1 $direction $swp3.555 "matchall" + test_span_dir "$h3.555" "$forward_type" "$backward_type" mirror_uninstall $swp1 $direction - log_test "$direction mirror to vlan ($tcflags)" + log_test "$direction mirror to vlan" } test_vlan() @@ -84,14 +88,12 @@ test_tagged_vlan_dir() RET=0 - mirror_install $swp1 $direction $swp3.555 "matchall $tcflags" - do_test_span_vlan_dir_ips 10 "$h3.555" 111 "$direction" ip \ - 192.0.2.17 192.0.2.18 - do_test_span_vlan_dir_ips 0 "$h3.555" 555 "$direction" ip \ - 192.0.2.17 192.0.2.18 + mirror_install $swp1 $direction $swp3.555 "matchall" + do_test_span_vlan_dir_ips '>= 10' "$h3.555" 111 ip 192.0.2.17 192.0.2.18 + do_test_span_vlan_dir_ips 0 "$h3.555" 555 ip 192.0.2.17 192.0.2.18 mirror_uninstall $swp1 $direction - log_test "$direction mirror tagged to vlan ($tcflags)" + log_test "$direction mirror tagged to vlan" } test_tagged_vlan() @@ -100,32 +102,11 @@ test_tagged_vlan() test_tagged_vlan_dir egress 0 8 } -test_all() -{ - slow_path_trap_install $swp1 ingress - slow_path_trap_install $swp1 egress - trap_install $h3 ingress - - tests_run - - trap_uninstall $h3 ingress - slow_path_trap_uninstall $swp1 egress - slow_path_trap_uninstall $swp1 ingress -} - trap cleanup EXIT setup_prepare setup_wait -tcflags="skip_hw" -test_all - -if ! tc_offload_check; then - echo "WARN: Could not test offloaded functionality" -else - tcflags="skip_sw" - test_all -fi +tests_run exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh index 915f319bcc8b..d0219032f773 100644 --- a/tools/testing/selftests/net/lib.sh +++ b/tools/testing/selftests/net/lib.sh @@ -199,10 +199,10 @@ tc_rule_stats_get() { local dev=$1; shift local pref=$1; shift - local dir=$1; shift + local dir=${1:-ingress}; shift local selector=${1:-.packets}; shift - tc -j -s filter show dev $dev ${dir:-ingress} pref $pref \ + tc -j -s filter show dev $dev $dir pref $pref \ | jq ".[1].options.actions[].stats$selector" } diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py index b6ce3f33d41e..3aaa2748a58e 100644 --- a/tools/testing/selftests/net/lib/py/ksft.py +++ b/tools/testing/selftests/net/lib/py/ksft.py @@ -6,6 +6,7 @@ import sys import time import traceback from .consts import KSFT_MAIN_NAME +from .utils import global_defer_queue KSFT_RESULT = None KSFT_RESULT_ALL = True @@ -108,6 +109,24 @@ def ktap_result(ok, cnt=1, case="", comment=""): print(res) +def ksft_flush_defer(): + global KSFT_RESULT + + i = 0 + qlen_start = len(global_defer_queue) + while global_defer_queue: + i += 1 + entry = global_defer_queue.pop() + try: + entry.exec_only() + except: + ksft_pr(f"Exception while handling defer / cleanup (callback {i} of {qlen_start})!") + tb = traceback.format_exc() + for line in tb.strip().split('\n'): + ksft_pr("Defer Exception|", line) + KSFT_RESULT = False + + def ksft_run(cases=None, globs=None, case_pfx=None, args=()): cases = cases or [] @@ -130,29 +149,31 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()): for case in cases: KSFT_RESULT = True cnt += 1 + comment = "" + cnt_key = "" + try: case(*args) except KsftSkipEx as e: - ktap_result(True, cnt, case, comment="SKIP " + str(e)) - totals['skip'] += 1 - continue + comment = "SKIP " + str(e) + cnt_key = 'skip' except KsftXfailEx as e: - ktap_result(True, cnt, case, comment="XFAIL " + str(e)) - totals['xfail'] += 1 - continue + comment = "XFAIL " + str(e) + cnt_key = 'xfail' except Exception as e: tb = traceback.format_exc() for line in tb.strip().split('\n'): ksft_pr("Exception|", line) - ktap_result(False, cnt, case) - totals['fail'] += 1 - continue - - ktap_result(KSFT_RESULT, cnt, case) - if KSFT_RESULT: - totals['pass'] += 1 - else: - totals['fail'] += 1 + KSFT_RESULT = False + cnt_key = 'fail' + + ksft_flush_defer() + + if not cnt_key: + cnt_key = 'pass' if KSFT_RESULT else 'fail' + + ktap_result(KSFT_RESULT, cnt, case, comment=comment) + totals[cnt_key] += 1 print( f"# Totals: pass:{totals['pass']} fail:{totals['fail']} xfail:{totals['xfail']} xpass:0 skip:{totals['skip']} error:0" diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py index 405aa510aaf2..72590c3f90f1 100644 --- a/tools/testing/selftests/net/lib/py/utils.py +++ b/tools/testing/selftests/net/lib/py/utils.py @@ -66,6 +66,40 @@ class bkg(cmd): return self.process(terminate=self.terminate, fail=self.check_fail) +global_defer_queue = [] + + +class defer: + def __init__(self, func, *args, **kwargs): + global global_defer_queue + + if not callable(func): + raise Exception("defer created with un-callable object, did you call the function instead of passing its name?") + + self.func = func + self.args = args + self.kwargs = kwargs + + self._queue = global_defer_queue + self._queue.append(self) + + def __enter__(self): + return self + + def __exit__(self, ex_type, ex_value, ex_tb): + return self.exec() + + def exec_only(self): + self.func(*self.args, **self.kwargs) + + def cancel(self): + self._queue.remove(self) + + def exec(self): + self.cancel() + self.exec_only() + + def tool(name, args, json=None, ns=None, host=None): cmd_str = name + ' ' if json: diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh index 8538f08c64c2..c61d23a8c88d 100755 --- a/tools/testing/selftests/net/netfilter/nft_queue.sh +++ b/tools/testing/selftests/net/netfilter/nft_queue.sh @@ -375,6 +375,42 @@ EOF wait 2>/dev/null } +test_queue_removal() +{ + read tainted_then < /proc/sys/kernel/tainted + + ip netns exec "$ns1" nft -f - <<EOF +flush ruleset +table ip filter { + chain output { + type filter hook output priority 0; policy accept; + ip protocol icmp queue num 0 + } +} +EOF + ip netns exec "$ns1" ./nf_queue -q 0 -d 30000 -t "$timeout" & + local nfqpid=$! + + busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$ns1" 0 + + ip netns exec "$ns1" ping -w 2 -f -c 10 127.0.0.1 -q >/dev/null + kill $nfqpid + + ip netns exec "$ns1" nft flush ruleset + + if [ "$tainted_then" -ne 0 ];then + return + fi + + read tainted_now < /proc/sys/kernel/tainted + if [ "$tainted_now" -eq 0 ];then + echo "PASS: queue program exiting while packets queued" + else + echo "TAINT: queue program exiting while packets queued" + ret=1 + fi +} + ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null @@ -413,5 +449,6 @@ test_tcp_localhost test_tcp_localhost_connectclose test_tcp_localhost_requeue test_icmp_vrf +test_queue_removal exit $ret diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c index 85b3baa3f7f3..3e74cfa1a2bf 100644 --- a/tools/testing/selftests/net/udpgso.c +++ b/tools/testing/selftests/net/udpgso.c @@ -53,6 +53,7 @@ static bool cfg_do_ipv6; static bool cfg_do_connected; static bool cfg_do_connectionless; static bool cfg_do_msgmore; +static bool cfg_do_recv = true; static bool cfg_do_setsockopt; static int cfg_specific_test_id = -1; @@ -414,6 +415,9 @@ static void run_one(struct testcase *test, int fdt, int fdr, if (!sent) return; + if (!cfg_do_recv) + return; + if (test->gso_len) mss = test->gso_len; else @@ -464,8 +468,10 @@ static void run_test(struct sockaddr *addr, socklen_t alen) if (fdr == -1) error(1, errno, "socket r"); - if (bind(fdr, addr, alen)) - error(1, errno, "bind"); + if (cfg_do_recv) { + if (bind(fdr, addr, alen)) + error(1, errno, "bind"); + } /* Have tests fail quickly instead of hang */ if (setsockopt(fdr, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv))) @@ -524,7 +530,7 @@ static void parse_opts(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "46cCmst:")) != -1) { + while ((c = getopt(argc, argv, "46cCmRst:")) != -1) { switch (c) { case '4': cfg_do_ipv4 = true; @@ -541,6 +547,9 @@ static void parse_opts(int argc, char **argv) case 'm': cfg_do_msgmore = true; break; + case 'R': + cfg_do_recv = false; + break; case 's': cfg_do_setsockopt = true; break; diff --git a/tools/testing/selftests/net/udpgso.sh b/tools/testing/selftests/net/udpgso.sh index 6c63178086b0..85d1fa3c1ff7 100755 --- a/tools/testing/selftests/net/udpgso.sh +++ b/tools/testing/selftests/net/udpgso.sh @@ -27,6 +27,31 @@ test_route_mtu() { ip route add local fd00::1/128 table local dev lo mtu 1500 } +setup_dummy_sink() { + ip link add name sink mtu 1500 type dummy + ip addr add dev sink 10.0.0.0/24 + ip addr add dev sink fd00::2/64 nodad + ip link set dev sink up +} + +test_hw_gso_hw_csum() { + setup_dummy_sink + ethtool -K sink tx-checksum-ip-generic on >/dev/null + ethtool -K sink tx-udp-segmentation on >/dev/null +} + +test_sw_gso_hw_csum() { + setup_dummy_sink + ethtool -K sink tx-checksum-ip-generic on >/dev/null + ethtool -K sink tx-udp-segmentation off >/dev/null +} + +test_sw_gso_sw_csum() { + setup_dummy_sink + ethtool -K sink tx-checksum-ip-generic off >/dev/null + ethtool -K sink tx-udp-segmentation off >/dev/null +} + if [ "$#" -gt 0 ]; then "$1" shift 2 # pop "test_*" arg and "--" delimiter @@ -56,3 +81,21 @@ echo "ipv4 msg_more" echo "ipv6 msg_more" ./in_netns.sh "$0" test_dev_mtu -- ./udpgso -6 -C -m + +echo "ipv4 hw-gso hw-csum" +./in_netns.sh "$0" test_hw_gso_hw_csum -- ./udpgso -4 -C -R + +echo "ipv6 hw-gso hw-csum" +./in_netns.sh "$0" test_hw_gso_hw_csum -- ./udpgso -6 -C -R + +echo "ipv4 sw-gso hw-csum" +./in_netns.sh "$0" test_sw_gso_hw_csum -- ./udpgso -4 -C -R + +echo "ipv6 sw-gso hw-csum" +./in_netns.sh "$0" test_sw_gso_hw_csum -- ./udpgso -6 -C -R + +echo "ipv4 sw-gso sw-csum" +./in_netns.sh "$0" test_sw_gso_sw_csum -- ./udpgso -4 -C -R + +echo "ipv6 sw-gso sw-csum" +./in_netns.sh "$0" test_sw_gso_sw_csum -- ./udpgso -6 -C -R |