diff options
99 files changed, 694 insertions, 327 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 52086876ce40..281de213ef47 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3247,7 +3247,7 @@ R: Martin KaFai Lau <kafai@fb.com> R: Song Liu <songliubraving@fb.com> R: Yonghong Song <yhs@fb.com> R: John Fastabend <john.fastabend@gmail.com> -R: KP Singh <kpsingh@chromium.org> +R: KP Singh <kpsingh@kernel.org> L: netdev@vger.kernel.org L: bpf@vger.kernel.org S: Supported @@ -3366,7 +3366,7 @@ F: arch/x86/net/ X: arch/x86/net/bpf_jit_comp32.c BPF LSM (Security Audit and Enforcement using BPF) -M: KP Singh <kpsingh@chromium.org> +M: KP Singh <kpsingh@kernel.org> R: Florent Revest <revest@chromium.org> R: Brendan Jackman <jackmanb@chromium.org> L: bpf@vger.kernel.org @@ -10563,6 +10563,13 @@ S: Supported F: Documentation/networking/device_drivers/ethernet/marvell/octeontx2.rst F: drivers/net/ethernet/marvell/octeontx2/af/ +MARVELL PRESTERA ETHERNET SWITCH DRIVER +M: Vadym Kochan <vkochan@marvell.com> +M: Taras Chornyi <tchornyi@marvell.com> +S: Supported +W: https://github.com/Marvell-switching/switchdev-prestera +F: drivers/net/ethernet/marvell/prestera/ + MARVELL SOC MMC/SD/SDIO CONTROLLER DRIVER M: Nicolas Pitre <nico@fluxnic.net> S: Odd Fixes diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 9abfaae1c6f7..a4e4e15f574d 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -745,6 +745,19 @@ const struct bond_option *bond_opt_get(unsigned int option) return &bond_opts[option]; } +static void bond_set_xfrm_features(struct net_device *bond_dev, u64 mode) +{ + if (!IS_ENABLED(CONFIG_XFRM_OFFLOAD)) + return; + + if (mode == BOND_MODE_ACTIVEBACKUP) + bond_dev->wanted_features |= BOND_XFRM_FEATURES; + else + bond_dev->wanted_features &= ~BOND_XFRM_FEATURES; + + netdev_update_features(bond_dev); +} + static int bond_option_mode_set(struct bonding *bond, const struct bond_opt_value *newval) { @@ -767,13 +780,8 @@ static int bond_option_mode_set(struct bonding *bond, if (newval->value == BOND_MODE_ALB) bond->params.tlb_dynamic_lb = 1; -#ifdef CONFIG_XFRM_OFFLOAD - if (newval->value == BOND_MODE_ACTIVEBACKUP) - bond->dev->wanted_features |= BOND_XFRM_FEATURES; - else - bond->dev->wanted_features &= ~BOND_XFRM_FEATURES; - netdev_change_features(bond->dev); -#endif /* CONFIG_XFRM_OFFLOAD */ + if (bond->dev->reg_state == NETREG_REGISTERED) + bond_set_xfrm_features(bond->dev, newval->value); /* don't cache arp_validate between modes */ bond->params.arp_validate = BOND_ARP_VALIDATE_NONE; diff --git a/drivers/net/can/softing/softing_main.c b/drivers/net/can/softing/softing_main.c index 9d2faaa39ce4..c9ca8b9fceb9 100644 --- a/drivers/net/can/softing/softing_main.c +++ b/drivers/net/can/softing/softing_main.c @@ -382,8 +382,13 @@ static int softing_netdev_open(struct net_device *ndev) /* check or determine and set bittime */ ret = open_candev(ndev); - if (!ret) - ret = softing_startstop(ndev, 1); + if (ret) + return ret; + + ret = softing_startstop(ndev, 1); + if (ret < 0) + close_candev(ndev); + return ret; } diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c index f791860d495f..c444ef3da3e2 100644 --- a/drivers/net/dsa/ocelot/felix.c +++ b/drivers/net/dsa/ocelot/felix.c @@ -569,7 +569,6 @@ static int felix_setup(struct dsa_switch *ds) struct ocelot *ocelot = ds->priv; struct felix *felix = ocelot_to_felix(ocelot); int port, err; - int tc; err = felix_init_structs(felix, ds->num_ports); if (err) @@ -608,12 +607,6 @@ static int felix_setup(struct dsa_switch *ds) ocelot_write_rix(ocelot, ANA_PGID_PGID_PGID(GENMASK(ocelot->num_phys_ports, 0)), ANA_PGID_PGID, PGID_UC); - /* Setup the per-traffic class flooding PGIDs */ - for (tc = 0; tc < FELIX_NUM_TC; tc++) - ocelot_write_rix(ocelot, ANA_FLOODING_FLD_MULTICAST(PGID_MC) | - ANA_FLOODING_FLD_BROADCAST(PGID_MC) | - ANA_FLOODING_FLD_UNICAST(PGID_UC), - ANA_FLOODING, tc); ds->mtu_enforcement_ingress = true; ds->configure_vlan_while_not_filtering = true; diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c index 3e925b8d5306..2e5bbdca5ea4 100644 --- a/drivers/net/dsa/ocelot/felix_vsc9959.c +++ b/drivers/net/dsa/ocelot/felix_vsc9959.c @@ -1429,6 +1429,7 @@ static int felix_pci_probe(struct pci_dev *pdev, pci_set_drvdata(pdev, felix); ocelot = &felix->ocelot; ocelot->dev = &pdev->dev; + ocelot->num_flooding_pgids = FELIX_NUM_TC; felix->info = &felix_info_vsc9959; felix->switch_base = pci_resource_start(pdev, felix->info->switch_pci_bar); diff --git a/drivers/net/dsa/ocelot/seville_vsc9953.c b/drivers/net/dsa/ocelot/seville_vsc9953.c index 1d420c4a2f0f..ebbaf6817ec8 100644 --- a/drivers/net/dsa/ocelot/seville_vsc9953.c +++ b/drivers/net/dsa/ocelot/seville_vsc9953.c @@ -1210,6 +1210,7 @@ static int seville_probe(struct platform_device *pdev) ocelot = &felix->ocelot; ocelot->dev = &pdev->dev; + ocelot->num_flooding_pgids = 1; felix->info = &seville_info_vsc9953; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); diff --git a/drivers/net/ethernet/agere/Kconfig b/drivers/net/ethernet/agere/Kconfig index d92516ae59cc..9cd750184947 100644 --- a/drivers/net/ethernet/agere/Kconfig +++ b/drivers/net/ethernet/agere/Kconfig @@ -21,6 +21,7 @@ config ET131X tristate "Agere ET-1310 Gigabit Ethernet support" depends on PCI select PHYLIB + select CRC32 help This driver supports Agere ET-1310 ethernet adapters. diff --git a/drivers/net/ethernet/cadence/Kconfig b/drivers/net/ethernet/cadence/Kconfig index 85858163bac5..e432a68ac520 100644 --- a/drivers/net/ethernet/cadence/Kconfig +++ b/drivers/net/ethernet/cadence/Kconfig @@ -23,6 +23,7 @@ config MACB tristate "Cadence MACB/GEM support" depends on HAS_DMA && COMMON_CLK select PHYLINK + select CRC32 help The Cadence MACB ethernet interface is found on many Atmel AT32 and AT91 parts. This driver also supports the Cadence GEM (Gigabit diff --git a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c index 7f90b828d159..1b7e8c91b541 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/ch_ktls/chcr_ktls.c @@ -987,9 +987,7 @@ chcr_ktls_write_tcp_options(struct chcr_ktls_info *tx_info, struct sk_buff *skb, struct fw_eth_tx_pkt_wr *wr; struct cpl_tx_pkt_core *cpl; u32 ctrl, iplen, maclen; -#if IS_ENABLED(CONFIG_IPV6) struct ipv6hdr *ip6; -#endif unsigned int ndesc; struct tcphdr *tcp; int len16, pktlen; @@ -1043,17 +1041,15 @@ chcr_ktls_write_tcp_options(struct chcr_ktls_info *tx_info, struct sk_buff *skb, cpl->len = htons(pktlen); memcpy(buf, skb->data, pktlen); - if (tx_info->ip_family == AF_INET) { + if (!IS_ENABLED(CONFIG_IPV6) || tx_info->ip_family == AF_INET) { /* we need to correct ip header len */ ip = (struct iphdr *)(buf + maclen); ip->tot_len = htons(pktlen - maclen); cntrl1 = TXPKT_CSUM_TYPE_V(TX_CSUM_TCPIP); -#if IS_ENABLED(CONFIG_IPV6) } else { ip6 = (struct ipv6hdr *)(buf + maclen); ip6->payload_len = htons(pktlen - maclen - iplen); cntrl1 = TXPKT_CSUM_TYPE_V(TX_CSUM_TCPIP6); -#endif } cntrl1 |= T6_TXPKT_ETHHDR_LEN_V(maclen - ETH_HLEN) | diff --git a/drivers/net/ethernet/faraday/Kconfig b/drivers/net/ethernet/faraday/Kconfig index c2677ec0564d..3d1e9a302148 100644 --- a/drivers/net/ethernet/faraday/Kconfig +++ b/drivers/net/ethernet/faraday/Kconfig @@ -33,6 +33,7 @@ config FTGMAC100 depends on !64BIT || BROKEN select PHYLIB select MDIO_ASPEED if MACH_ASPEED_G6 + select CRC32 help This driver supports the FTGMAC100 Gigabit Ethernet controller from Faraday. It is used on Faraday A369, Andes AG102 and some diff --git a/drivers/net/ethernet/freescale/Kconfig b/drivers/net/ethernet/freescale/Kconfig index a1d53ddf1593..3f9175bdce77 100644 --- a/drivers/net/ethernet/freescale/Kconfig +++ b/drivers/net/ethernet/freescale/Kconfig @@ -25,6 +25,7 @@ config FEC depends on (M523x || M527x || M5272 || M528x || M520x || M532x || \ ARCH_MXC || SOC_IMX28 || COMPILE_TEST) default ARCH_MXC || SOC_IMX28 if ARM + select CRC32 select PHYLIB imply PTP_1588_CLOCK help diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c index 90cd243070d7..828c177df03d 100644 --- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c +++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-mac.c @@ -269,6 +269,7 @@ static int dpaa2_pcs_create(struct dpaa2_mac *mac, if (!of_device_is_available(node)) { netdev_err(mac->net_dev, "pcs-handle node not available\n"); + of_node_put(node); return -ENODEV; } diff --git a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c index 8ed1ebd5a183..89e558135432 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c +++ b/drivers/net/ethernet/freescale/enetc/enetc_ethtool.c @@ -143,8 +143,8 @@ static const struct { { ENETC_PM0_R255, "MAC rx 128-255 byte packets" }, { ENETC_PM0_R511, "MAC rx 256-511 byte packets" }, { ENETC_PM0_R1023, "MAC rx 512-1023 byte packets" }, - { ENETC_PM0_R1518, "MAC rx 1024-1518 byte packets" }, - { ENETC_PM0_R1519X, "MAC rx 1519 to max-octet packets" }, + { ENETC_PM0_R1522, "MAC rx 1024-1522 byte packets" }, + { ENETC_PM0_R1523X, "MAC rx 1523 to max-octet packets" }, { ENETC_PM0_ROVR, "MAC rx oversized packets" }, { ENETC_PM0_RJBR, "MAC rx jabber packets" }, { ENETC_PM0_RFRG, "MAC rx fragment packets" }, @@ -163,9 +163,13 @@ static const struct { { ENETC_PM0_TBCA, "MAC tx broadcast frames" }, { ENETC_PM0_TPKT, "MAC tx packets" }, { ENETC_PM0_TUND, "MAC tx undersized packets" }, + { ENETC_PM0_T64, "MAC tx 64 byte packets" }, { ENETC_PM0_T127, "MAC tx 65-127 byte packets" }, + { ENETC_PM0_T255, "MAC tx 128-255 byte packets" }, + { ENETC_PM0_T511, "MAC tx 256-511 byte packets" }, { ENETC_PM0_T1023, "MAC tx 512-1023 byte packets" }, - { ENETC_PM0_T1518, "MAC tx 1024-1518 byte packets" }, + { ENETC_PM0_T1522, "MAC tx 1024-1522 byte packets" }, + { ENETC_PM0_T1523X, "MAC tx 1523 to max-octet packets" }, { ENETC_PM0_TCNP, "MAC tx control packets" }, { ENETC_PM0_TDFR, "MAC tx deferred packets" }, { ENETC_PM0_TMCOL, "MAC tx multiple collisions" }, diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h index eb6bbf1113c7..4cbf1667d7ff 100644 --- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h +++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h @@ -267,8 +267,8 @@ enum enetc_bdr_type {TX, RX}; #define ENETC_PM0_R255 0x8180 #define ENETC_PM0_R511 0x8188 #define ENETC_PM0_R1023 0x8190 -#define ENETC_PM0_R1518 0x8198 -#define ENETC_PM0_R1519X 0x81A0 +#define ENETC_PM0_R1522 0x8198 +#define ENETC_PM0_R1523X 0x81A0 #define ENETC_PM0_ROVR 0x81A8 #define ENETC_PM0_RJBR 0x81B0 #define ENETC_PM0_RFRG 0x81B8 @@ -287,9 +287,13 @@ enum enetc_bdr_type {TX, RX}; #define ENETC_PM0_TBCA 0x8250 #define ENETC_PM0_TPKT 0x8260 #define ENETC_PM0_TUND 0x8268 +#define ENETC_PM0_T64 0x8270 #define ENETC_PM0_T127 0x8278 +#define ENETC_PM0_T255 0x8280 +#define ENETC_PM0_T511 0x8288 #define ENETC_PM0_T1023 0x8290 -#define ENETC_PM0_T1518 0x8298 +#define ENETC_PM0_T1522 0x8298 +#define ENETC_PM0_T1523X 0x82A0 #define ENETC_PM0_TCNP 0x82C0 #define ENETC_PM0_TDFR 0x82D0 #define ENETC_PM0_TMCOL 0x82D8 diff --git a/drivers/net/ethernet/freescale/fman/Kconfig b/drivers/net/ethernet/freescale/fman/Kconfig index 34150182cc35..48bf8088795d 100644 --- a/drivers/net/ethernet/freescale/fman/Kconfig +++ b/drivers/net/ethernet/freescale/fman/Kconfig @@ -4,6 +4,7 @@ config FSL_FMAN depends on FSL_SOC || ARCH_LAYERSCAPE || COMPILE_TEST select GENERIC_ALLOCATOR select PHYLIB + select CRC32 default n help Freescale Data-Path Acceleration Architecture Frame Manager diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h index a9066e6ff697..ca2ab6cf84d9 100644 --- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h +++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.h @@ -35,8 +35,6 @@ #define HCLGE_DBG_DFX_SSU_2_OFFSET 12 -#pragma pack(1) - struct hclge_qos_pri_map_cmd { u8 pri0_tc : 4, pri1_tc : 4; @@ -85,8 +83,6 @@ struct hclge_dbg_reg_type_info { struct hclge_dbg_reg_common_msg reg_msg; }; -#pragma pack() - static const struct hclge_dbg_dfx_message hclge_dbg_bios_common_reg[] = { {false, "Reserved"}, {true, "BP_CPU_STATE"}, diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c index b30f00891c03..128ab6898070 100644 --- a/drivers/net/ethernet/intel/e1000e/netdev.c +++ b/drivers/net/ethernet/intel/e1000e/netdev.c @@ -6475,13 +6475,13 @@ static void e1000e_s0ix_entry_flow(struct e1000_adapter *adapter) /* Ungate PGCB clock */ mac_data = er32(FEXTNVM9); - mac_data |= BIT(28); + mac_data &= ~BIT(28); ew32(FEXTNVM9, mac_data); /* Enable K1 off to enable mPHY Power Gating */ mac_data = er32(FEXTNVM6); mac_data |= BIT(31); - ew32(FEXTNVM12, mac_data); + ew32(FEXTNVM6, mac_data); /* Enable mPHY power gating for any link and speed */ mac_data = er32(FEXTNVM8); @@ -6525,11 +6525,11 @@ static void e1000e_s0ix_exit_flow(struct e1000_adapter *adapter) /* Disable K1 off */ mac_data = er32(FEXTNVM6); mac_data &= ~BIT(31); - ew32(FEXTNVM12, mac_data); + ew32(FEXTNVM6, mac_data); /* Disable Ungate PGCB clock */ mac_data = er32(FEXTNVM9); - mac_data &= ~BIT(28); + mac_data |= BIT(28); ew32(FEXTNVM9, mac_data); /* Cancel not waking from dynamic diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c index d43ce13a93c9..3f5825fa67c9 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c +++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c @@ -1850,6 +1850,7 @@ static inline bool i40e_page_is_reusable(struct page *page) * the adapter for another receive * * @rx_buffer: buffer containing the page + * @rx_buffer_pgcnt: buffer page refcount pre xdp_do_redirect() call * * If page is reusable, rx_buffer->page_offset is adjusted to point to * an unused region in the page. @@ -1872,7 +1873,8 @@ static inline bool i40e_page_is_reusable(struct page *page) * * In either case, if the page is reusable its refcount is increased. **/ -static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer) +static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer, + int rx_buffer_pgcnt) { unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; struct page *page = rx_buffer->page; @@ -1883,7 +1885,7 @@ static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer *rx_buffer) #if (PAGE_SIZE < 8192) /* if we are only owner of page we can reuse it */ - if (unlikely((page_count(page) - pagecnt_bias) > 1)) + if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1)) return false; #else #define I40E_LAST_OFFSET \ @@ -1942,16 +1944,24 @@ static void i40e_add_rx_frag(struct i40e_ring *rx_ring, * i40e_get_rx_buffer - Fetch Rx buffer and synchronize data for use * @rx_ring: rx descriptor ring to transact packets on * @size: size of buffer to add to skb + * @rx_buffer_pgcnt: buffer page refcount * * This function will pull an Rx buffer from the ring and synchronize it * for use by the CPU. */ static struct i40e_rx_buffer *i40e_get_rx_buffer(struct i40e_ring *rx_ring, - const unsigned int size) + const unsigned int size, + int *rx_buffer_pgcnt) { struct i40e_rx_buffer *rx_buffer; rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean); + *rx_buffer_pgcnt = +#if (PAGE_SIZE < 8192) + page_count(rx_buffer->page); +#else + 0; +#endif prefetch_page_address(rx_buffer->page); /* we are reusing so sync this buffer for CPU use */ @@ -2102,14 +2112,16 @@ static struct sk_buff *i40e_build_skb(struct i40e_ring *rx_ring, * i40e_put_rx_buffer - Clean up used buffer and either recycle or free * @rx_ring: rx descriptor ring to transact packets on * @rx_buffer: rx buffer to pull data from + * @rx_buffer_pgcnt: rx buffer page refcount pre xdp_do_redirect() call * * This function will clean up the contents of the rx_buffer. It will * either recycle the buffer or unmap it and free the associated resources. */ static void i40e_put_rx_buffer(struct i40e_ring *rx_ring, - struct i40e_rx_buffer *rx_buffer) + struct i40e_rx_buffer *rx_buffer, + int rx_buffer_pgcnt) { - if (i40e_can_reuse_rx_page(rx_buffer)) { + if (i40e_can_reuse_rx_page(rx_buffer, rx_buffer_pgcnt)) { /* hand second half of page back to the ring */ i40e_reuse_rx_page(rx_ring, rx_buffer); } else { @@ -2336,6 +2348,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) while (likely(total_rx_packets < (unsigned int)budget)) { struct i40e_rx_buffer *rx_buffer; union i40e_rx_desc *rx_desc; + int rx_buffer_pgcnt; unsigned int size; u64 qword; @@ -2378,7 +2391,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) break; i40e_trace(clean_rx_irq, rx_ring, rx_desc, skb); - rx_buffer = i40e_get_rx_buffer(rx_ring, size); + rx_buffer = i40e_get_rx_buffer(rx_ring, size, &rx_buffer_pgcnt); /* retrieve a buffer from the ring */ if (!skb) { @@ -2421,7 +2434,7 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget) break; } - i40e_put_rx_buffer(rx_ring, rx_buffer); + i40e_put_rx_buffer(rx_ring, rx_buffer, rx_buffer_pgcnt); cleaned_count++; if (i40e_is_non_eop(rx_ring, rx_desc, skb)) diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c index eae75260fe20..23eca2f0a03b 100644 --- a/drivers/net/ethernet/intel/ice/ice_txrx.c +++ b/drivers/net/ethernet/intel/ice/ice_txrx.c @@ -762,13 +762,15 @@ ice_rx_buf_adjust_pg_offset(struct ice_rx_buf *rx_buf, unsigned int size) /** * ice_can_reuse_rx_page - Determine if page can be reused for another Rx * @rx_buf: buffer containing the page + * @rx_buf_pgcnt: rx_buf page refcount pre xdp_do_redirect() call * * If page is reusable, we have a green light for calling ice_reuse_rx_page, * which will assign the current buffer to the buffer that next_to_alloc is * pointing to; otherwise, the DMA mapping needs to be destroyed and * page freed */ -static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf) +static bool +ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf, int rx_buf_pgcnt) { unsigned int pagecnt_bias = rx_buf->pagecnt_bias; struct page *page = rx_buf->page; @@ -779,7 +781,7 @@ static bool ice_can_reuse_rx_page(struct ice_rx_buf *rx_buf) #if (PAGE_SIZE < 8192) /* if we are only owner of page we can reuse it */ - if (unlikely((page_count(page) - pagecnt_bias) > 1)) + if (unlikely((rx_buf_pgcnt - pagecnt_bias) > 1)) return false; #else #define ICE_LAST_OFFSET \ @@ -864,17 +866,24 @@ ice_reuse_rx_page(struct ice_ring *rx_ring, struct ice_rx_buf *old_buf) * @rx_ring: Rx descriptor ring to transact packets on * @skb: skb to be used * @size: size of buffer to add to skb + * @rx_buf_pgcnt: rx_buf page refcount * * This function will pull an Rx buffer from the ring and synchronize it * for use by the CPU. */ static struct ice_rx_buf * ice_get_rx_buf(struct ice_ring *rx_ring, struct sk_buff **skb, - const unsigned int size) + const unsigned int size, int *rx_buf_pgcnt) { struct ice_rx_buf *rx_buf; rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean]; + *rx_buf_pgcnt = +#if (PAGE_SIZE < 8192) + page_count(rx_buf->page); +#else + 0; +#endif prefetchw(rx_buf->page); *skb = rx_buf->skb; @@ -1006,12 +1015,15 @@ ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, * ice_put_rx_buf - Clean up used buffer and either recycle or free * @rx_ring: Rx descriptor ring to transact packets on * @rx_buf: Rx buffer to pull data from + * @rx_buf_pgcnt: Rx buffer page count pre xdp_do_redirect() * * This function will update next_to_clean and then clean up the contents * of the rx_buf. It will either recycle the buffer or unmap it and free * the associated resources. */ -static void ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf) +static void +ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf, + int rx_buf_pgcnt) { u16 ntc = rx_ring->next_to_clean + 1; @@ -1022,7 +1034,7 @@ static void ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf) if (!rx_buf) return; - if (ice_can_reuse_rx_page(rx_buf)) { + if (ice_can_reuse_rx_page(rx_buf, rx_buf_pgcnt)) { /* hand second half of page back to the ring */ ice_reuse_rx_page(rx_ring, rx_buf); } else { @@ -1097,6 +1109,7 @@ int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) struct sk_buff *skb; unsigned int size; u16 stat_err_bits; + int rx_buf_pgcnt; u16 vlan_tag = 0; u8 rx_ptype; @@ -1119,7 +1132,7 @@ int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) dma_rmb(); if (rx_desc->wb.rxdid == FDIR_DESC_RXDID || !rx_ring->netdev) { - ice_put_rx_buf(rx_ring, NULL); + ice_put_rx_buf(rx_ring, NULL, 0); cleaned_count++; continue; } @@ -1128,7 +1141,7 @@ int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) ICE_RX_FLX_DESC_PKT_LEN_M; /* retrieve a buffer from the ring */ - rx_buf = ice_get_rx_buf(rx_ring, &skb, size); + rx_buf = ice_get_rx_buf(rx_ring, &skb, size, &rx_buf_pgcnt); if (!size) { xdp.data = NULL; @@ -1168,7 +1181,7 @@ int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget) total_rx_pkts++; cleaned_count++; - ice_put_rx_buf(rx_ring, rx_buf); + ice_put_rx_buf(rx_ring, rx_buf, rx_buf_pgcnt); continue; construct_skb: if (skb) { @@ -1187,7 +1200,7 @@ construct_skb: break; } - ice_put_rx_buf(rx_ring, rx_buf); + ice_put_rx_buf(rx_ring, rx_buf, rx_buf_pgcnt); cleaned_count++; /* skip if it is NOP desc */ diff --git a/drivers/net/ethernet/intel/igb/igb.h b/drivers/net/ethernet/intel/igb/igb.h index 0286d2fceee4..aaa954aae574 100644 --- a/drivers/net/ethernet/intel/igb/igb.h +++ b/drivers/net/ethernet/intel/igb/igb.h @@ -138,6 +138,8 @@ struct vf_mac_filter { /* this is the size past which hardware will drop packets when setting LPE=0 */ #define MAXIMUM_ETHERNET_VLAN_SIZE 1522 +#define IGB_ETH_PKT_HDR_PAD (ETH_HLEN + ETH_FCS_LEN + (VLAN_HLEN * 2)) + /* Supported Rx Buffer Sizes */ #define IGB_RXBUFFER_256 256 #define IGB_RXBUFFER_1536 1536 @@ -247,6 +249,9 @@ enum igb_tx_flags { #define IGB_SFF_ADDRESSING_MODE 0x4 #define IGB_SFF_8472_UNSUP 0x00 +/* TX resources are shared between XDP and netstack + * and we need to tag the buffer type to distinguish them + */ enum igb_tx_buf_type { IGB_TYPE_SKB = 0, IGB_TYPE_XDP, diff --git a/drivers/net/ethernet/intel/igb/igb_main.c b/drivers/net/ethernet/intel/igb/igb_main.c index 5fc2c381da55..0d343d050973 100644 --- a/drivers/net/ethernet/intel/igb/igb_main.c +++ b/drivers/net/ethernet/intel/igb/igb_main.c @@ -2824,20 +2824,25 @@ static int igb_setup_tc(struct net_device *dev, enum tc_setup_type type, } } -static int igb_xdp_setup(struct net_device *dev, struct bpf_prog *prog) +static int igb_xdp_setup(struct net_device *dev, struct netdev_bpf *bpf) { - int i, frame_size = dev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; + int i, frame_size = dev->mtu + IGB_ETH_PKT_HDR_PAD; struct igb_adapter *adapter = netdev_priv(dev); + struct bpf_prog *prog = bpf->prog, *old_prog; bool running = netif_running(dev); - struct bpf_prog *old_prog; bool need_reset; /* verify igb ring attributes are sufficient for XDP */ for (i = 0; i < adapter->num_rx_queues; i++) { struct igb_ring *ring = adapter->rx_ring[i]; - if (frame_size > igb_rx_bufsz(ring)) + if (frame_size > igb_rx_bufsz(ring)) { + NL_SET_ERR_MSG_MOD(bpf->extack, + "The RX buffer size is too small for the frame size"); + netdev_warn(dev, "XDP RX buffer size %d is too small for the frame size %d\n", + igb_rx_bufsz(ring), frame_size); return -EINVAL; + } } old_prog = xchg(&adapter->xdp_prog, prog); @@ -2869,7 +2874,7 @@ static int igb_xdp(struct net_device *dev, struct netdev_bpf *xdp) { switch (xdp->command) { case XDP_SETUP_PROG: - return igb_xdp_setup(dev, xdp->prog); + return igb_xdp_setup(dev, xdp); default: return -EINVAL; } @@ -2910,10 +2915,12 @@ static int igb_xdp_xmit_back(struct igb_adapter *adapter, struct xdp_buff *xdp) */ tx_ring = adapter->xdp_prog ? igb_xdp_tx_queue_mapping(adapter) : NULL; if (unlikely(!tx_ring)) - return -ENXIO; + return IGB_XDP_CONSUMED; nq = txring_txq(tx_ring); __netif_tx_lock(nq, cpu); + /* Avoid transmit queue timeout since we share it with the slow path */ + nq->trans_start = jiffies; ret = igb_xmit_xdp_ring(adapter, tx_ring, xdpf); __netif_tx_unlock(nq); @@ -2946,6 +2953,9 @@ static int igb_xdp_xmit(struct net_device *dev, int n, nq = txring_txq(tx_ring); __netif_tx_lock(nq, cpu); + /* Avoid transmit queue timeout since we share it with the slow path */ + nq->trans_start = jiffies; + for (i = 0; i < n; i++) { struct xdp_frame *xdpf = frames[i]; int err; @@ -3950,8 +3960,7 @@ static int igb_sw_init(struct igb_adapter *adapter) /* set default work limits */ adapter->tx_work_limit = IGB_DEFAULT_TX_WORK; - adapter->max_frame_size = netdev->mtu + ETH_HLEN + ETH_FCS_LEN + - VLAN_HLEN; + adapter->max_frame_size = netdev->mtu + IGB_ETH_PKT_HDR_PAD; adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN; spin_lock_init(&adapter->nfc_lock); @@ -6491,7 +6500,7 @@ static void igb_get_stats64(struct net_device *netdev, static int igb_change_mtu(struct net_device *netdev, int new_mtu) { struct igb_adapter *adapter = netdev_priv(netdev); - int max_frame = new_mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN; + int max_frame = new_mtu + IGB_ETH_PKT_HDR_PAD; if (adapter->xdp_prog) { int i; @@ -6500,7 +6509,9 @@ static int igb_change_mtu(struct net_device *netdev, int new_mtu) struct igb_ring *ring = adapter->rx_ring[i]; if (max_frame > igb_rx_bufsz(ring)) { - netdev_warn(adapter->netdev, "Requested MTU size is not supported with XDP\n"); + netdev_warn(adapter->netdev, + "Requested MTU size is not supported with XDP. Max frame size is %d\n", + max_frame); return -EINVAL; } } @@ -8351,6 +8362,7 @@ static struct sk_buff *igb_build_skb(struct igb_ring *rx_ring, SKB_DATA_ALIGN(xdp->data_end - xdp->data_hard_start); #endif + unsigned int metasize = xdp->data - xdp->data_meta; struct sk_buff *skb; /* prefetch first cache line of first page */ @@ -8365,6 +8377,9 @@ static struct sk_buff *igb_build_skb(struct igb_ring *rx_ring, skb_reserve(skb, xdp->data - xdp->data_hard_start); __skb_put(skb, xdp->data_end - xdp->data); + if (metasize) + skb_metadata_set(skb, metasize); + /* pull timestamp out of packet data */ if (igb_test_staterr(rx_desc, E1000_RXDADV_STAT_TSIP)) { igb_ptp_rx_pktstamp(rx_ring->q_vector, skb->data, skb); @@ -8771,7 +8786,7 @@ static int igb_clean_rx_irq(struct igb_q_vector *q_vector, const int budget) rx_ring->skb = skb; if (xdp_xmit & IGB_XDP_REDIR) - xdp_do_flush_map(); + xdp_do_flush(); if (xdp_xmit & IGB_XDP_TX) { struct igb_ring *tx_ring = igb_xdp_tx_queue_mapping(adapter); diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c index 45ae33e15303..f3f449f53920 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c @@ -1945,7 +1945,8 @@ static inline bool ixgbe_page_is_reserved(struct page *page) return (page_to_nid(page) != numa_mem_id()) || page_is_pfmemalloc(page); } -static bool ixgbe_can_reuse_rx_page(struct ixgbe_rx_buffer *rx_buffer) +static bool ixgbe_can_reuse_rx_page(struct ixgbe_rx_buffer *rx_buffer, + int rx_buffer_pgcnt) { unsigned int pagecnt_bias = rx_buffer->pagecnt_bias; struct page *page = rx_buffer->page; @@ -1956,7 +1957,7 @@ static bool ixgbe_can_reuse_rx_page(struct ixgbe_rx_buffer *rx_buffer) #if (PAGE_SIZE < 8192) /* if we are only owner of page we can reuse it */ - if (unlikely((page_ref_count(page) - pagecnt_bias) > 1)) + if (unlikely((rx_buffer_pgcnt - pagecnt_bias) > 1)) return false; #else /* The last offset is a bit aggressive in that we assume the @@ -2021,11 +2022,18 @@ static void ixgbe_add_rx_frag(struct ixgbe_ring *rx_ring, static struct ixgbe_rx_buffer *ixgbe_get_rx_buffer(struct ixgbe_ring *rx_ring, union ixgbe_adv_rx_desc *rx_desc, struct sk_buff **skb, - const unsigned int size) + const unsigned int size, + int *rx_buffer_pgcnt) { struct ixgbe_rx_buffer *rx_buffer; rx_buffer = &rx_ring->rx_buffer_info[rx_ring->next_to_clean]; + *rx_buffer_pgcnt = +#if (PAGE_SIZE < 8192) + page_count(rx_buffer->page); +#else + 0; +#endif prefetchw(rx_buffer->page); *skb = rx_buffer->skb; @@ -2055,9 +2063,10 @@ skip_sync: static void ixgbe_put_rx_buffer(struct ixgbe_ring *rx_ring, struct ixgbe_rx_buffer *rx_buffer, - struct sk_buff *skb) + struct sk_buff *skb, + int rx_buffer_pgcnt) { - if (ixgbe_can_reuse_rx_page(rx_buffer)) { + if (ixgbe_can_reuse_rx_page(rx_buffer, rx_buffer_pgcnt)) { /* hand second half of page back to the ring */ ixgbe_reuse_rx_page(rx_ring, rx_buffer); } else { @@ -2303,6 +2312,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, union ixgbe_adv_rx_desc *rx_desc; struct ixgbe_rx_buffer *rx_buffer; struct sk_buff *skb; + int rx_buffer_pgcnt; unsigned int size; /* return some buffers to hardware, one at a time is too slow */ @@ -2322,7 +2332,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, */ dma_rmb(); - rx_buffer = ixgbe_get_rx_buffer(rx_ring, rx_desc, &skb, size); + rx_buffer = ixgbe_get_rx_buffer(rx_ring, rx_desc, &skb, size, &rx_buffer_pgcnt); /* retrieve a buffer from the ring */ if (!skb) { @@ -2367,7 +2377,7 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector, break; } - ixgbe_put_rx_buffer(rx_ring, rx_buffer, skb); + ixgbe_put_rx_buffer(rx_ring, rx_buffer, skb, rx_buffer_pgcnt); cleaned_count++; /* place incomplete frames back on ring for completion */ diff --git a/drivers/net/ethernet/marvell/prestera/prestera_main.c b/drivers/net/ethernet/marvell/prestera/prestera_main.c index 0f20e0788cce..da4b286d1337 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_main.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_main.c @@ -318,8 +318,10 @@ static int prestera_port_create(struct prestera_switch *sw, u32 id) goto err_port_init; } - if (port->fp_id >= PRESTERA_MAC_ADDR_NUM_MAX) + if (port->fp_id >= PRESTERA_MAC_ADDR_NUM_MAX) { + err = -EINVAL; goto err_port_init; + } /* firmware requires that port's MAC address consist of the first * 5 bytes of the base MAC address diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c index 106513f772c3..6f290319b617 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c @@ -1378,8 +1378,10 @@ static void mlx4_en_tx_timeout(struct net_device *dev, unsigned int txqueue) tx_ring->cons, tx_ring->prod); priv->port_stats.tx_timeout++; - en_dbg(DRV, priv, "Scheduling watchdog\n"); - queue_work(mdev->workqueue, &priv->watchdog_task); + if (!test_and_set_bit(MLX4_EN_STATE_FLAG_RESTARTING, &priv->state)) { + en_dbg(DRV, priv, "Scheduling port restart\n"); + queue_work(mdev->workqueue, &priv->restart_task); + } } @@ -1733,6 +1735,7 @@ int mlx4_en_start_port(struct net_device *dev) mlx4_en_deactivate_cq(priv, cq); goto tx_err; } + clear_bit(MLX4_EN_TX_RING_STATE_RECOVERING, &tx_ring->state); if (t != TX_XDP) { tx_ring->tx_queue = netdev_get_tx_queue(dev, i); tx_ring->recycle_ring = NULL; @@ -1829,6 +1832,7 @@ int mlx4_en_start_port(struct net_device *dev) local_bh_enable(); } + clear_bit(MLX4_EN_STATE_FLAG_RESTARTING, &priv->state); netif_tx_start_all_queues(dev); netif_device_attach(dev); @@ -1999,7 +2003,7 @@ void mlx4_en_stop_port(struct net_device *dev, int detach) static void mlx4_en_restart(struct work_struct *work) { struct mlx4_en_priv *priv = container_of(work, struct mlx4_en_priv, - watchdog_task); + restart_task); struct mlx4_en_dev *mdev = priv->mdev; struct net_device *dev = priv->dev; @@ -2377,7 +2381,7 @@ static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu) if (netif_running(dev)) { mutex_lock(&mdev->state_lock); if (!mdev->device_up) { - /* NIC is probably restarting - let watchdog task reset + /* NIC is probably restarting - let restart task reset * the port */ en_dbg(DRV, priv, "Change MTU called with card down!?\n"); } else { @@ -2386,7 +2390,9 @@ static int mlx4_en_change_mtu(struct net_device *dev, int new_mtu) if (err) { en_err(priv, "Failed restarting port:%d\n", priv->port); - queue_work(mdev->workqueue, &priv->watchdog_task); + if (!test_and_set_bit(MLX4_EN_STATE_FLAG_RESTARTING, + &priv->state)) + queue_work(mdev->workqueue, &priv->restart_task); } } mutex_unlock(&mdev->state_lock); @@ -2792,7 +2798,8 @@ static int mlx4_xdp_set(struct net_device *dev, struct bpf_prog *prog) if (err) { en_err(priv, "Failed starting port %d for XDP change\n", priv->port); - queue_work(mdev->workqueue, &priv->watchdog_task); + if (!test_and_set_bit(MLX4_EN_STATE_FLAG_RESTARTING, &priv->state)) + queue_work(mdev->workqueue, &priv->restart_task); } } @@ -3165,7 +3172,7 @@ int mlx4_en_init_netdev(struct mlx4_en_dev *mdev, int port, priv->counter_index = MLX4_SINK_COUNTER_INDEX(mdev->dev); spin_lock_init(&priv->stats_lock); INIT_WORK(&priv->rx_mode_task, mlx4_en_do_set_rx_mode); - INIT_WORK(&priv->watchdog_task, mlx4_en_restart); + INIT_WORK(&priv->restart_task, mlx4_en_restart); INIT_WORK(&priv->linkstate_task, mlx4_en_linkstate); INIT_DELAYED_WORK(&priv->stats_task, mlx4_en_do_get_stats); INIT_DELAYED_WORK(&priv->service_task, mlx4_en_service_task); diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c index 3ddb7268e415..59b097cda327 100644 --- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c @@ -392,6 +392,35 @@ int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring) return cnt; } +static void mlx4_en_handle_err_cqe(struct mlx4_en_priv *priv, struct mlx4_err_cqe *err_cqe, + u16 cqe_index, struct mlx4_en_tx_ring *ring) +{ + struct mlx4_en_dev *mdev = priv->mdev; + struct mlx4_en_tx_info *tx_info; + struct mlx4_en_tx_desc *tx_desc; + u16 wqe_index; + int desc_size; + + en_err(priv, "CQE error - cqn 0x%x, ci 0x%x, vendor syndrome: 0x%x syndrome: 0x%x\n", + ring->sp_cqn, cqe_index, err_cqe->vendor_err_syndrome, err_cqe->syndrome); + print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, 16, 1, err_cqe, sizeof(*err_cqe), + false); + + wqe_index = be16_to_cpu(err_cqe->wqe_index) & ring->size_mask; + tx_info = &ring->tx_info[wqe_index]; + desc_size = tx_info->nr_txbb << LOG_TXBB_SIZE; + en_err(priv, "Related WQE - qpn 0x%x, wqe index 0x%x, wqe size 0x%x\n", ring->qpn, + wqe_index, desc_size); + tx_desc = ring->buf + (wqe_index << LOG_TXBB_SIZE); + print_hex_dump(KERN_WARNING, "", DUMP_PREFIX_OFFSET, 16, 1, tx_desc, desc_size, false); + + if (test_and_set_bit(MLX4_EN_STATE_FLAG_RESTARTING, &priv->state)) + return; + + en_err(priv, "Scheduling port restart\n"); + queue_work(mdev->workqueue, &priv->restart_task); +} + int mlx4_en_process_tx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int napi_budget) { @@ -438,13 +467,10 @@ int mlx4_en_process_tx_cq(struct net_device *dev, dma_rmb(); if (unlikely((cqe->owner_sr_opcode & MLX4_CQE_OPCODE_MASK) == - MLX4_CQE_OPCODE_ERROR)) { - struct mlx4_err_cqe *cqe_err = (struct mlx4_err_cqe *)cqe; - - en_err(priv, "CQE error - vendor syndrome: 0x%x syndrome: 0x%x\n", - cqe_err->vendor_err_syndrome, - cqe_err->syndrome); - } + MLX4_CQE_OPCODE_ERROR)) + if (!test_and_set_bit(MLX4_EN_TX_RING_STATE_RECOVERING, &ring->state)) + mlx4_en_handle_err_cqe(priv, (struct mlx4_err_cqe *)cqe, index, + ring); /* Skip over last polled CQE */ new_index = be16_to_cpu(cqe->wqe_index) & size_mask; diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h index a46efe37cfa9..30378e4c90b5 100644 --- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h +++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h @@ -271,6 +271,10 @@ struct mlx4_en_page_cache { } buf[MLX4_EN_CACHE_SIZE]; }; +enum { + MLX4_EN_TX_RING_STATE_RECOVERING, +}; + struct mlx4_en_priv; struct mlx4_en_tx_ring { @@ -317,6 +321,7 @@ struct mlx4_en_tx_ring { * Only queue_stopped might be used if BQL is not properly working. */ unsigned long queue_stopped; + unsigned long state; struct mlx4_hwq_resources sp_wqres; struct mlx4_qp sp_qp; struct mlx4_qp_context sp_context; @@ -530,6 +535,10 @@ struct mlx4_en_stats_bitmap { struct mutex mutex; /* for mutual access to stats bitmap */ }; +enum { + MLX4_EN_STATE_FLAG_RESTARTING, +}; + struct mlx4_en_priv { struct mlx4_en_dev *mdev; struct mlx4_en_port_profile *prof; @@ -595,7 +604,7 @@ struct mlx4_en_priv { struct mlx4_en_cq *rx_cq[MAX_RX_RINGS]; struct mlx4_qp drop_qp; struct work_struct rx_mode_task; - struct work_struct watchdog_task; + struct work_struct restart_task; struct work_struct linkstate_task; struct delayed_work stats_task; struct delayed_work service_task; @@ -641,6 +650,7 @@ struct mlx4_en_priv { u32 pflags; u8 rss_key[MLX4_EN_RSS_KEY_SIZE]; u8 rss_hash_fn; + unsigned long state; }; enum mlx4_en_wol { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig index 99f1ec3b2575..3e371d24c462 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig +++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig @@ -198,6 +198,7 @@ config MLX5_EN_TLS config MLX5_SW_STEERING bool "Mellanox Technologies software-managed steering" depends on MLX5_CORE_EN && MLX5_ESWITCH + select CRC32 default y help Build support for software-managed steering in the NIC. diff --git a/drivers/net/ethernet/microchip/Kconfig b/drivers/net/ethernet/microchip/Kconfig index 31f9a82dc113..d0f6dfe0dcf3 100644 --- a/drivers/net/ethernet/microchip/Kconfig +++ b/drivers/net/ethernet/microchip/Kconfig @@ -47,6 +47,7 @@ config LAN743X depends on PCI select PHYLIB select CRC16 + select CRC32 help Support for the Microchip LAN743x PCI Express Gigabit Ethernet chip diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c index 70bf8c67d7ef..a53bd36b11c6 100644 --- a/drivers/net/ethernet/mscc/ocelot.c +++ b/drivers/net/ethernet/mscc/ocelot.c @@ -1489,10 +1489,11 @@ int ocelot_init(struct ocelot *ocelot) SYS_FRM_AGING_MAX_AGE(307692), SYS_FRM_AGING); /* Setup flooding PGIDs */ - ocelot_write_rix(ocelot, ANA_FLOODING_FLD_MULTICAST(PGID_MC) | - ANA_FLOODING_FLD_BROADCAST(PGID_MC) | - ANA_FLOODING_FLD_UNICAST(PGID_UC), - ANA_FLOODING, 0); + for (i = 0; i < ocelot->num_flooding_pgids; i++) + ocelot_write_rix(ocelot, ANA_FLOODING_FLD_MULTICAST(PGID_MC) | + ANA_FLOODING_FLD_BROADCAST(PGID_MC) | + ANA_FLOODING_FLD_UNICAST(PGID_UC), + ANA_FLOODING, i); ocelot_write(ocelot, ANA_FLOODING_IPMC_FLD_MC6_DATA(PGID_MCIPV6) | ANA_FLOODING_IPMC_FLD_MC6_CTRL(PGID_MC) | ANA_FLOODING_IPMC_FLD_MC4_DATA(PGID_MCIPV4) | diff --git a/drivers/net/ethernet/mscc/ocelot_vsc7514.c b/drivers/net/ethernet/mscc/ocelot_vsc7514.c index dc00772950e5..1e7729421a82 100644 --- a/drivers/net/ethernet/mscc/ocelot_vsc7514.c +++ b/drivers/net/ethernet/mscc/ocelot_vsc7514.c @@ -1254,6 +1254,7 @@ static int mscc_ocelot_probe(struct platform_device *pdev) } ocelot->num_phys_ports = of_get_child_count(ports); + ocelot->num_flooding_pgids = 1; ocelot->vcap = vsc7514_vcap_props; ocelot->inj_prefix = OCELOT_TAG_PREFIX_NONE; diff --git a/drivers/net/ethernet/netronome/Kconfig b/drivers/net/ethernet/netronome/Kconfig index d8b99d6a0356..b82758d5beed 100644 --- a/drivers/net/ethernet/netronome/Kconfig +++ b/drivers/net/ethernet/netronome/Kconfig @@ -22,6 +22,7 @@ config NFP depends on VXLAN || VXLAN=n depends on TLS && TLS_DEVICE || TLS_DEVICE=n select NET_DEVLINK + select CRC32 help This driver supports the Netronome(R) NFP4000/NFP6000 based cards working as a advanced Ethernet NIC. It works with both diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index b150da43adb2..437226866ce8 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -3562,9 +3562,6 @@ static int nfp_net_xdp_setup_drv(struct nfp_net *nn, struct netdev_bpf *bpf) struct nfp_net_dp *dp; int err; - if (!xdp_attachment_flags_ok(&nn->xdp, bpf)) - return -EBUSY; - if (!prog == !nn->dp.xdp_prog) { WRITE_ONCE(nn->dp.xdp_prog, prog); xdp_attachment_setup(&nn->xdp, bpf); @@ -3593,9 +3590,6 @@ static int nfp_net_xdp_setup_hw(struct nfp_net *nn, struct netdev_bpf *bpf) { int err; - if (!xdp_attachment_flags_ok(&nn->xdp_hw, bpf)) - return -EBUSY; - err = nfp_app_xdp_offload(nn->app, nn, bpf->prog, bpf->extack); if (err) return err; diff --git a/drivers/net/ethernet/nxp/Kconfig b/drivers/net/ethernet/nxp/Kconfig index ee83a71c2509..c84997db828c 100644 --- a/drivers/net/ethernet/nxp/Kconfig +++ b/drivers/net/ethernet/nxp/Kconfig @@ -3,6 +3,7 @@ config LPC_ENET tristate "NXP ethernet MAC on LPC devices" depends on ARCH_LPC32XX || COMPILE_TEST select PHYLIB + select CRC32 help Say Y or M here if you want to use the NXP ethernet MAC included on some NXP LPC devices. You can safely enable this option for LPC32xx diff --git a/drivers/net/ethernet/rocker/Kconfig b/drivers/net/ethernet/rocker/Kconfig index 99e1290e0307..2318811ff75a 100644 --- a/drivers/net/ethernet/rocker/Kconfig +++ b/drivers/net/ethernet/rocker/Kconfig @@ -19,6 +19,7 @@ if NET_VENDOR_ROCKER config ROCKER tristate "Rocker switch driver (EXPERIMENTAL)" depends on PCI && NET_SWITCHDEV && BRIDGE + select CRC32 help This driver supports Rocker switch device. diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c index efef5476a577..223f69da7e95 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c @@ -246,13 +246,7 @@ static int imx_dwmac_probe(struct platform_device *pdev) goto err_parse_dt; } - ret = dma_set_mask_and_coherent(&pdev->dev, - DMA_BIT_MASK(dwmac->ops->addr_width)); - if (ret) { - dev_err(&pdev->dev, "DMA mask set failed\n"); - goto err_dma_mask; - } - + plat_dat->addr64 = dwmac->ops->addr_width; plat_dat->init = imx_dwmac_init; plat_dat->exit = imx_dwmac_exit; plat_dat->fix_mac_speed = imx_dwmac_fix_speed; @@ -272,7 +266,6 @@ static int imx_dwmac_probe(struct platform_device *pdev) err_dwmac_init: err_drv_probe: imx_dwmac_exit(pdev, plat_dat->bsp_priv); -err_dma_mask: err_parse_dt: err_match_data: stmmac_remove_config_dt(pdev, plat_dat); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c index 5afcf05bbf9c..6d6bd77bb6af 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c @@ -30,7 +30,6 @@ #define PRG_ETH0_EXT_RMII_MODE 4 /* mux to choose between fclk_div2 (bit unset) and mpll2 (bit set) */ -#define PRG_ETH0_CLK_M250_SEL_SHIFT 4 #define PRG_ETH0_CLK_M250_SEL_MASK GENMASK(4, 4) /* TX clock delay in ns = "8ns / 4 * tx_dly_val" (where 8ns are exactly one @@ -155,8 +154,9 @@ static int meson8b_init_rgmii_tx_clk(struct meson8b_dwmac *dwmac) return -ENOMEM; clk_configs->m250_mux.reg = dwmac->regs + PRG_ETH0; - clk_configs->m250_mux.shift = PRG_ETH0_CLK_M250_SEL_SHIFT; - clk_configs->m250_mux.mask = PRG_ETH0_CLK_M250_SEL_MASK; + clk_configs->m250_mux.shift = __ffs(PRG_ETH0_CLK_M250_SEL_MASK); + clk_configs->m250_mux.mask = PRG_ETH0_CLK_M250_SEL_MASK >> + clk_configs->m250_mux.shift; clk = meson8b_dwmac_register_clk(dwmac, "m250_sel", mux_parents, ARRAY_SIZE(mux_parents), &clk_mux_ops, &clk_configs->m250_mux.hw); diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c index 6e30d7eb4983..0b4ee2dbb691 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_lib.c @@ -22,7 +22,7 @@ int dwmac4_dma_reset(void __iomem *ioaddr) return readl_poll_timeout(ioaddr + DMA_BUS_MODE, value, !(value & DMA_BUS_MODE_SFT_RESET), - 10000, 100000); + 10000, 1000000); } void dwmac4_set_rx_tail_ptr(void __iomem *ioaddr, u32 tail_ptr, u32 chan) diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c index ba45fe237512..c33db79cdd0a 100644 --- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c +++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c @@ -1534,6 +1534,19 @@ static void dma_free_tx_skbufs(struct stmmac_priv *priv, u32 queue) } /** + * stmmac_free_tx_skbufs - free TX skb buffers + * @priv: private structure + */ +static void stmmac_free_tx_skbufs(struct stmmac_priv *priv) +{ + u32 tx_queue_cnt = priv->plat->tx_queues_to_use; + u32 queue; + + for (queue = 0; queue < tx_queue_cnt; queue++) + dma_free_tx_skbufs(priv, queue); +} + +/** * free_dma_rx_desc_resources - free RX dma desc resources * @priv: private structure */ @@ -2895,9 +2908,6 @@ static int stmmac_release(struct net_device *dev) struct stmmac_priv *priv = netdev_priv(dev); u32 chan; - if (priv->eee_enabled) - del_timer_sync(&priv->eee_ctrl_timer); - if (device_may_wakeup(priv->device)) phylink_speed_down(priv->phylink, false); /* Stop and disconnect the PHY */ @@ -2916,6 +2926,11 @@ static int stmmac_release(struct net_device *dev) if (priv->lpi_irq > 0) free_irq(priv->lpi_irq, dev); + if (priv->eee_enabled) { + priv->tx_path_in_lpi_mode = false; + del_timer_sync(&priv->eee_ctrl_timer); + } + /* Stop TX/RX DMA and clear the descriptors */ stmmac_stop_all_dma(priv); @@ -4930,6 +4945,14 @@ int stmmac_dvr_probe(struct device *device, dev_info(priv->device, "SPH feature enabled\n"); } + /* The current IP register MAC_HW_Feature1[ADDR64] only define + * 32/40/64 bit width, but some SOC support others like i.MX8MP + * support 34 bits but it map to 40 bits width in MAC_HW_Feature1[ADDR64]. + * So overwrite dma_cap.addr64 according to HW real design. + */ + if (priv->plat->addr64) + priv->dma_cap.addr64 = priv->plat->addr64; + if (priv->dma_cap.addr64) { ret = dma_set_mask_and_coherent(device, DMA_BIT_MASK(priv->dma_cap.addr64)); @@ -5142,6 +5165,11 @@ int stmmac_suspend(struct device *dev) for (chan = 0; chan < priv->plat->tx_queues_to_use; chan++) del_timer_sync(&priv->tx_queue[chan].txtimer); + if (priv->eee_enabled) { + priv->tx_path_in_lpi_mode = false; + del_timer_sync(&priv->eee_ctrl_timer); + } + /* Stop TX/RX DMA */ stmmac_stop_all_dma(priv); @@ -5247,11 +5275,20 @@ int stmmac_resume(struct device *dev) return ret; } + if (!device_may_wakeup(priv->device) || !priv->plat->pmt) { + rtnl_lock(); + phylink_start(priv->phylink); + /* We may have called phylink_speed_down before */ + phylink_speed_up(priv->phylink); + rtnl_unlock(); + } + rtnl_lock(); mutex_lock(&priv->lock); stmmac_reset_queues_param(priv); + stmmac_free_tx_skbufs(priv); stmmac_clear_descriptors(priv); stmmac_hw_setup(ndev, false); @@ -5265,14 +5302,6 @@ int stmmac_resume(struct device *dev) mutex_unlock(&priv->lock); rtnl_unlock(); - if (!device_may_wakeup(priv->device) || !priv->plat->pmt) { - rtnl_lock(); - phylink_start(priv->phylink); - /* We may have called phylink_speed_down before */ - phylink_speed_up(priv->phylink); - rtnl_unlock(); - } - phylink_mac_change(priv->phylink, true); netif_device_attach(ndev); diff --git a/drivers/net/ethernet/ti/cpsw_priv.c b/drivers/net/ethernet/ti/cpsw_priv.c index 31c5e36ff706..424e644724e4 100644 --- a/drivers/net/ethernet/ti/cpsw_priv.c +++ b/drivers/net/ethernet/ti/cpsw_priv.c @@ -1265,9 +1265,6 @@ static int cpsw_xdp_prog_setup(struct cpsw_priv *priv, struct netdev_bpf *bpf) if (!priv->xdpi.prog && !prog) return 0; - if (!xdp_attachment_flags_ok(&priv->xdpi, bpf)) - return -EBUSY; - WRITE_ONCE(priv->xdp_prog, prog); xdp_attachment_setup(&priv->xdpi, bpf); diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c index 60c199fcb91e..030185301014 100644 --- a/drivers/net/ethernet/xilinx/ll_temac_main.c +++ b/drivers/net/ethernet/xilinx/ll_temac_main.c @@ -1351,7 +1351,6 @@ static int temac_probe(struct platform_device *pdev) struct device_node *temac_np = dev_of_node(&pdev->dev), *dma_np; struct temac_local *lp; struct net_device *ndev; - struct resource *res; const void *addr; __be32 *p; bool little_endian; @@ -1500,13 +1499,11 @@ static int temac_probe(struct platform_device *pdev) of_node_put(dma_np); } else if (pdata) { /* 2nd memory resource specifies DMA registers */ - res = platform_get_resource(pdev, IORESOURCE_MEM, 1); - lp->sdma_regs = devm_ioremap(&pdev->dev, res->start, - resource_size(res)); - if (!lp->sdma_regs) { + lp->sdma_regs = devm_platform_ioremap_resource(pdev, 1); + if (IS_ERR(lp->sdma_regs)) { dev_err(&pdev->dev, "could not map DMA registers\n"); - return -ENOMEM; + return PTR_ERR(lp->sdma_regs); } if (pdata->dma_little_endian) { lp->dma_in = temac_dma_in32_le; diff --git a/drivers/net/geneve.c b/drivers/net/geneve.c index 8ae9ce2014a4..1426bfc009bc 100644 --- a/drivers/net/geneve.c +++ b/drivers/net/geneve.c @@ -257,21 +257,11 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, skb_dst_set(skb, &tun_dst->dst); /* Ignore packet loops (and multicast echo) */ - if (ether_addr_equal(eth_hdr(skb)->h_source, geneve->dev->dev_addr)) - goto rx_error; - - switch (skb_protocol(skb, true)) { - case htons(ETH_P_IP): - if (pskb_may_pull(skb, sizeof(struct iphdr))) - goto rx_error; - break; - case htons(ETH_P_IPV6): - if (pskb_may_pull(skb, sizeof(struct ipv6hdr))) - goto rx_error; - break; - default: - goto rx_error; + if (ether_addr_equal(eth_hdr(skb)->h_source, geneve->dev->dev_addr)) { + geneve->dev->stats.rx_errors++; + goto drop; } + oiph = skb_network_header(skb); skb_reset_network_header(skb); @@ -308,8 +298,6 @@ static void geneve_rx(struct geneve_dev *geneve, struct geneve_sock *gs, dev_sw_netstats_rx_add(geneve->dev, len); return; -rx_error: - geneve->dev->stats.rx_errors++; drop: /* Consume bad packet */ kfree_skb(skb); diff --git a/drivers/net/ipa/gsi_trans.c b/drivers/net/ipa/gsi_trans.c index e8599bb948c0..6c3ed5b17b80 100644 --- a/drivers/net/ipa/gsi_trans.c +++ b/drivers/net/ipa/gsi_trans.c @@ -156,6 +156,9 @@ int gsi_trans_pool_init_dma(struct device *dev, struct gsi_trans_pool *pool, /* The allocator will give us a power-of-2 number of pages. But we * can't guarantee that, so request it. That way we won't waste any * memory that would be available beyond the required space. + * + * Note that gsi_trans_pool_exit_dma() assumes the total allocated + * size is exactly (count * size). */ total_size = get_order(total_size) << PAGE_SHIFT; @@ -175,7 +178,9 @@ int gsi_trans_pool_init_dma(struct device *dev, struct gsi_trans_pool *pool, void gsi_trans_pool_exit_dma(struct device *dev, struct gsi_trans_pool *pool) { - dma_free_coherent(dev, pool->size, pool->base, pool->addr); + size_t total_size = pool->count * pool->size; + + dma_free_coherent(dev, total_size, pool->base, pool->addr); memset(pool, 0, sizeof(*pool)); } diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c index 2e90512f3bbe..90aafb56f140 100644 --- a/drivers/net/netdevsim/bpf.c +++ b/drivers/net/netdevsim/bpf.c @@ -63,15 +63,20 @@ static int nsim_bpf_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn) { struct nsim_bpf_bound_prog *state; + int ret = 0; state = env->prog->aux->offload->dev_priv; if (state->nsim_dev->bpf_bind_verifier_delay && !insn_idx) msleep(state->nsim_dev->bpf_bind_verifier_delay); - if (insn_idx == env->prog->len - 1) + if (insn_idx == env->prog->len - 1) { pr_vlog(env, "Hello from netdevsim!\n"); - return 0; + if (!state->nsim_dev->bpf_bind_verifier_accept) + ret = -EOPNOTSUPP; + } + + return ret; } static int nsim_bpf_finalize(struct bpf_verifier_env *env) @@ -190,9 +195,6 @@ nsim_xdp_set_prog(struct netdevsim *ns, struct netdev_bpf *bpf, { int err; - if (!xdp_attachment_flags_ok(xdp, bpf)) - return -EBUSY; - if (bpf->command == XDP_SETUP_PROG && !ns->bpf_xdpdrv_accept) { NSIM_EA(bpf->extack, "driver XDP disabled in DebugFS"); return -EOPNOTSUPP; @@ -598,6 +600,9 @@ int nsim_bpf_dev_init(struct nsim_dev *nsim_dev) &nsim_dev->bpf_bind_accept); debugfs_create_u32("bpf_bind_verifier_delay", 0600, nsim_dev->ddir, &nsim_dev->bpf_bind_verifier_delay); + nsim_dev->bpf_bind_verifier_accept = true; + debugfs_create_bool("bpf_bind_verifier_accept", 0600, nsim_dev->ddir, + &nsim_dev->bpf_bind_verifier_accept); return 0; } diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 827fc80f50a0..c4e7ad2a1964 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -189,6 +189,7 @@ struct nsim_dev { struct dentry *take_snapshot; struct bpf_offload_dev *bpf_dev; bool bpf_bind_accept; + bool bpf_bind_verifier_accept; u32 bpf_bind_verifier_delay; struct dentry *ddir_bpf_bound_progs; u32 prog_id_gen; diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c index f2793ffde191..b9b7e00b72a8 100644 --- a/drivers/net/vrf.c +++ b/drivers/net/vrf.c @@ -1315,11 +1315,17 @@ static struct sk_buff *vrf_ip6_rcv(struct net_device *vrf_dev, int orig_iif = skb->skb_iif; bool need_strict = rt6_need_strict(&ipv6_hdr(skb)->daddr); bool is_ndisc = ipv6_ndisc_frame(skb); + bool is_ll_src; /* loopback, multicast & non-ND link-local traffic; do not push through - * packet taps again. Reset pkt_type for upper layers to process skb + * packet taps again. Reset pkt_type for upper layers to process skb. + * for packets with lladdr src, however, skip so that the dst can be + * determine at input using original ifindex in the case that daddr + * needs strict */ - if (skb->pkt_type == PACKET_LOOPBACK || (need_strict && !is_ndisc)) { + is_ll_src = ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL; + if (skb->pkt_type == PACKET_LOOPBACK || + (need_strict && !is_ndisc && !is_ll_src)) { skb->dev = vrf_dev; skb->skb_iif = vrf_dev->ifindex; IP6CB(skb)->flags |= IP6SKB_L3SLAVE; diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index 5deb099d156d..8ebb64193757 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -227,7 +227,7 @@ struct xt_table { unsigned int valid_hooks; /* Man behind the curtain... */ - struct xt_table_info *private; + struct xt_table_info __rcu *private; /* Set this to THIS_MODULE if you are a module, otherwise NULL */ struct module *me; @@ -448,6 +448,9 @@ xt_get_per_cpu_counter(struct xt_counters *cnt, unsigned int cpu) struct nf_hook_ops *xt_hook_ops_alloc(const struct xt_table *, nf_hookfn *); +struct xt_table_info +*xt_table_get_private_protected(const struct xt_table *table); + #ifdef CONFIG_COMPAT #include <net/compat.h> diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 628e28903b8b..15ca6b4167cc 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -170,6 +170,7 @@ struct plat_stmmacenet_data { int unicast_filter_entries; int tx_fifo_size; int rx_fifo_size; + u32 addr64; u32 rx_queues_to_use; u32 tx_queues_to_use; u8 rx_sched_algorithm; diff --git a/include/net/bonding.h b/include/net/bonding.h index d9d0ff3b0ad3..adc3da776970 100644 --- a/include/net/bonding.h +++ b/include/net/bonding.h @@ -86,10 +86,8 @@ #define bond_for_each_slave_rcu(bond, pos, iter) \ netdev_for_each_lower_private_rcu((bond)->dev, pos, iter) -#ifdef CONFIG_XFRM_OFFLOAD #define BOND_XFRM_FEATURES (NETIF_F_HW_ESP | NETIF_F_HW_ESP_TX_CSUM | \ NETIF_F_GSO_ESP) -#endif /* CONFIG_XFRM_OFFLOAD */ #ifdef CONFIG_NET_POLL_CONTROLLER extern atomic_t netpoll_block_tx; diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index 55b4cadf290a..c1c0a4ff92ae 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -1524,4 +1524,8 @@ void __init nft_chain_route_init(void); void nft_chain_route_fini(void); void nf_tables_trans_destroy_flush_work(void); + +int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result); +__be64 nf_jiffies64_to_msecs(u64 input); + #endif /* _NET_NF_TABLES_H */ diff --git a/include/net/xdp.h b/include/net/xdp.h index 3814fb631d52..9dab2bc6f187 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -240,8 +240,6 @@ struct xdp_attachment_info { }; struct netdev_bpf; -bool xdp_attachment_flags_ok(struct xdp_attachment_info *info, - struct netdev_bpf *bpf); void xdp_attachment_setup(struct xdp_attachment_info *info, struct netdev_bpf *bpf); diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h index 1e9db9577441..49b46df476f2 100644 --- a/include/soc/mscc/ocelot.h +++ b/include/soc/mscc/ocelot.h @@ -618,6 +618,9 @@ struct ocelot { /* Keep track of the vlan port masks */ u32 vlan_mask[VLAN_N_VID]; + /* Switches like VSC9959 have flooding per traffic class */ + int num_flooding_pgids; + /* In tables like ANA:PORT and the ANA:PGID:PGID mask, * the CPU is located after the physical ports (at the * num_phys_ports index). diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 1388bf733071..53fe6ef6d931 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1298,9 +1298,7 @@ static void __reg_combine_32_into_64(struct bpf_reg_state *reg) static bool __reg64_bound_s32(s64 a) { - if (a > S32_MIN && a < S32_MAX) - return true; - return false; + return a > S32_MIN && a < S32_MAX; } static bool __reg64_bound_u32(u64 a) @@ -1314,10 +1312,10 @@ static void __reg_combine_64_into_32(struct bpf_reg_state *reg) { __mark_reg32_unbounded(reg); - if (__reg64_bound_s32(reg->smin_value)) + if (__reg64_bound_s32(reg->smin_value) && __reg64_bound_s32(reg->smax_value)) { reg->s32_min_value = (s32)reg->smin_value; - if (__reg64_bound_s32(reg->smax_value)) reg->s32_max_value = (s32)reg->smax_value; + } if (__reg64_bound_u32(reg->umin_value)) reg->u32_min_value = (u32)reg->umin_value; if (__reg64_bound_u32(reg->umax_value)) @@ -4895,6 +4893,8 @@ static void do_refine_retval_range(struct bpf_reg_state *regs, int ret_type, ret_reg->smax_value = meta->msize_max_value; ret_reg->s32_max_value = meta->msize_max_value; + ret_reg->smin_value = -MAX_ERRNO; + ret_reg->s32_min_value = -MAX_ERRNO; __reg_deduce_bounds(ret_reg); __reg_bound_offset(ret_reg); __update_reg_bounds(ret_reg); diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c index 7730c8f3cb53..d3ea9d0779fb 100644 --- a/net/bridge/br_device.c +++ b/net/bridge/br_device.c @@ -177,6 +177,9 @@ static int br_dev_open(struct net_device *dev) br_stp_enable_bridge(br); br_multicast_open(br); + if (br_opt_get(br, BROPT_MULTICAST_ENABLED)) + br_multicast_join_snoopers(br); + return 0; } @@ -197,6 +200,9 @@ static int br_dev_stop(struct net_device *dev) br_stp_disable_bridge(br); br_multicast_stop(br); + if (br_opt_get(br, BROPT_MULTICAST_ENABLED)) + br_multicast_leave_snoopers(br); + netif_stop_queue(dev); return 0; diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c index eae898c3cff7..54cb82a69056 100644 --- a/net/bridge/br_multicast.c +++ b/net/bridge/br_multicast.c @@ -3286,7 +3286,7 @@ static inline void br_ip6_multicast_join_snoopers(struct net_bridge *br) } #endif -static void br_multicast_join_snoopers(struct net_bridge *br) +void br_multicast_join_snoopers(struct net_bridge *br) { br_ip4_multicast_join_snoopers(br); br_ip6_multicast_join_snoopers(br); @@ -3317,7 +3317,7 @@ static inline void br_ip6_multicast_leave_snoopers(struct net_bridge *br) } #endif -static void br_multicast_leave_snoopers(struct net_bridge *br) +void br_multicast_leave_snoopers(struct net_bridge *br) { br_ip4_multicast_leave_snoopers(br); br_ip6_multicast_leave_snoopers(br); @@ -3336,9 +3336,6 @@ static void __br_multicast_open(struct net_bridge *br, void br_multicast_open(struct net_bridge *br) { - if (br_opt_get(br, BROPT_MULTICAST_ENABLED)) - br_multicast_join_snoopers(br); - __br_multicast_open(br, &br->ip4_own_query); #if IS_ENABLED(CONFIG_IPV6) __br_multicast_open(br, &br->ip6_own_query); @@ -3354,9 +3351,6 @@ void br_multicast_stop(struct net_bridge *br) del_timer_sync(&br->ip6_other_query.timer); del_timer_sync(&br->ip6_own_query.timer); #endif - - if (br_opt_get(br, BROPT_MULTICAST_ENABLED)) - br_multicast_leave_snoopers(br); } void br_multicast_dev_del(struct net_bridge *br) @@ -3487,6 +3481,7 @@ static void br_multicast_start_querier(struct net_bridge *br, int br_multicast_toggle(struct net_bridge *br, unsigned long val) { struct net_bridge_port *port; + bool change_snoopers = false; spin_lock_bh(&br->multicast_lock); if (!!br_opt_get(br, BROPT_MULTICAST_ENABLED) == !!val) @@ -3495,7 +3490,7 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val) br_mc_disabled_update(br->dev, val); br_opt_toggle(br, BROPT_MULTICAST_ENABLED, !!val); if (!br_opt_get(br, BROPT_MULTICAST_ENABLED)) { - br_multicast_leave_snoopers(br); + change_snoopers = true; goto unlock; } @@ -3506,9 +3501,30 @@ int br_multicast_toggle(struct net_bridge *br, unsigned long val) list_for_each_entry(port, &br->port_list, list) __br_multicast_enable_port(port); + change_snoopers = true; + unlock: spin_unlock_bh(&br->multicast_lock); + /* br_multicast_join_snoopers has the potential to cause + * an MLD Report/Leave to be delivered to br_multicast_rcv, + * which would in turn call br_multicast_add_group, which would + * attempt to acquire multicast_lock. This function should be + * called after the lock has been released to avoid deadlocks on + * multicast_lock. + * + * br_multicast_leave_snoopers does not have the problem since + * br_multicast_rcv first checks BROPT_MULTICAST_ENABLED, and + * returns without calling br_multicast_ipv4/6_rcv if it's not + * enabled. Moved both functions out just for symmetry. + */ + if (change_snoopers) { + if (br_opt_get(br, BROPT_MULTICAST_ENABLED)) + br_multicast_join_snoopers(br); + else + br_multicast_leave_snoopers(br); + } + return 0; } diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h index 345118e35c42..8424464186a6 100644 --- a/net/bridge/br_private.h +++ b/net/bridge/br_private.h @@ -792,6 +792,8 @@ void br_multicast_del_port(struct net_bridge_port *port); void br_multicast_enable_port(struct net_bridge_port *port); void br_multicast_disable_port(struct net_bridge_port *port); void br_multicast_init(struct net_bridge *br); +void br_multicast_join_snoopers(struct net_bridge *br); +void br_multicast_leave_snoopers(struct net_bridge *br); void br_multicast_open(struct net_bridge *br); void br_multicast_stop(struct net_bridge *br); void br_multicast_dev_del(struct net_bridge *br); @@ -969,6 +971,14 @@ static inline void br_multicast_init(struct net_bridge *br) { } +static inline void br_multicast_join_snoopers(struct net_bridge *br) +{ +} + +static inline void br_multicast_leave_snoopers(struct net_bridge *br) +{ +} + static inline void br_multicast_open(struct net_bridge *br) { } diff --git a/net/bridge/br_vlan.c b/net/bridge/br_vlan.c index 3e493eb85bb2..08c77418c687 100644 --- a/net/bridge/br_vlan.c +++ b/net/bridge/br_vlan.c @@ -266,8 +266,10 @@ static int __vlan_add(struct net_bridge_vlan *v, u16 flags, } masterv = br_vlan_get_master(br, v->vid, extack); - if (!masterv) + if (!masterv) { + err = -ENOMEM; goto out_filt; + } v->brvlan = masterv; if (br_opt_get(br, BROPT_VLAN_STATS_PER_PORT)) { v->stats = netdev_alloc_pcpu_stats(struct br_vlan_stats); diff --git a/net/can/isotp.c b/net/can/isotp.c index d78ab13bd8be..26bdc3c20b7e 100644 --- a/net/can/isotp.c +++ b/net/can/isotp.c @@ -1157,6 +1157,9 @@ static int isotp_setsockopt(struct socket *sock, int level, int optname, if (level != SOL_CAN_ISOTP) return -EINVAL; + if (so->bound) + return -EISCONN; + switch (optname) { case CAN_ISOTP_OPTS: if (optlen != sizeof(struct can_isotp_options)) diff --git a/net/core/dev.c b/net/core/dev.c index 8588ade790cb..38412e70f761 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8917,6 +8917,17 @@ static struct bpf_prog *dev_xdp_prog(struct net_device *dev, return dev->xdp_state[mode].prog; } +static u8 dev_xdp_prog_count(struct net_device *dev) +{ + u8 count = 0; + int i; + + for (i = 0; i < __MAX_XDP_MODE; i++) + if (dev->xdp_state[i].prog || dev->xdp_state[i].link) + count++; + return count; +} + u32 dev_xdp_prog_id(struct net_device *dev, enum bpf_xdp_mode mode) { struct bpf_prog *prog = dev_xdp_prog(dev, mode); @@ -9007,6 +9018,7 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack struct bpf_xdp_link *link, struct bpf_prog *new_prog, struct bpf_prog *old_prog, u32 flags) { + unsigned int num_modes = hweight32(flags & XDP_FLAGS_MODES); struct bpf_prog *cur_prog; enum bpf_xdp_mode mode; bpf_op_t bpf_op; @@ -9022,11 +9034,17 @@ static int dev_xdp_attach(struct net_device *dev, struct netlink_ext_ack *extack NL_SET_ERR_MSG(extack, "Invalid XDP flags for BPF link attachment"); return -EINVAL; } - /* just one XDP mode bit should be set, zero defaults to SKB mode */ - if (hweight32(flags & XDP_FLAGS_MODES) > 1) { + /* just one XDP mode bit should be set, zero defaults to drv/skb mode */ + if (num_modes > 1) { NL_SET_ERR_MSG(extack, "Only one XDP mode flag can be set"); return -EINVAL; } + /* avoid ambiguity if offload + drv/skb mode progs are both loaded */ + if (!num_modes && dev_xdp_prog_count(dev) > 1) { + NL_SET_ERR_MSG(extack, + "More than one program loaded, unset mode is ambiguous"); + return -EINVAL; + } /* old_prog != NULL implies XDP_FLAGS_REPLACE is set */ if (old_prog && !(flags & XDP_FLAGS_REPLACE)) { NL_SET_ERR_MSG(extack, "XDP_FLAGS_REPLACE is not specified"); diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c index d4474c812b64..715b67f6c62f 100644 --- a/net/core/flow_offload.c +++ b/net/core/flow_offload.c @@ -381,10 +381,8 @@ static void __flow_block_indr_cleanup(void (*release)(void *cb_priv), list_for_each_entry_safe(this, next, &flow_block_indr_list, indr.list) { if (this->release == release && - this->indr.cb_priv == cb_priv) { + this->indr.cb_priv == cb_priv) list_move(&this->indr.list, cleanup_list); - return; - } } } diff --git a/net/core/lwt_bpf.c b/net/core/lwt_bpf.c index 7d3438215f32..2f7940bcf715 100644 --- a/net/core/lwt_bpf.c +++ b/net/core/lwt_bpf.c @@ -39,12 +39,11 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt, { int ret; - /* Preempt disable is needed to protect per-cpu redirect_info between - * BPF prog and skb_do_redirect(). The call_rcu in bpf_prog_put() and - * access to maps strictly require a rcu_read_lock() for protection, - * mixing with BH RCU lock doesn't work. + /* Migration disable and BH disable are needed to protect per-cpu + * redirect_info between BPF prog and skb_do_redirect(). */ - preempt_disable(); + migrate_disable(); + local_bh_disable(); bpf_compute_data_pointers(skb); ret = bpf_prog_run_save_cb(lwt->prog, skb); @@ -78,7 +77,8 @@ static int run_lwt_bpf(struct sk_buff *skb, struct bpf_lwt_prog *lwt, break; } - preempt_enable(); + local_bh_enable(); + migrate_enable(); return ret; } diff --git a/net/core/xdp.c b/net/core/xdp.c index 48aba933a5a8..d900cebc0acd 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -335,11 +335,10 @@ EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model); * scenarios (e.g. queue full), it is possible to return the xdp_frame * while still leveraging this protection. The @napi_direct boolean * is used for those calls sites. Thus, allowing for faster recycling - * of xdp_frames/pages in those cases. This path is never used by the - * MEM_TYPE_XSK_BUFF_POOL memory type, so it's explicitly not part of - * the switch-statement. + * of xdp_frames/pages in those cases. */ -static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct) +static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct, + struct xdp_buff *xdp) { struct xdp_mem_allocator *xa; struct page *page; @@ -361,6 +360,10 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct) page = virt_to_page(data); /* Assumes order0 page*/ put_page(page); break; + case MEM_TYPE_XSK_BUFF_POOL: + /* NB! Only valid from an xdp_buff! */ + xsk_buff_free(xdp); + break; default: /* Not possible, checked in xdp_rxq_info_reg_mem_model() */ WARN(1, "Incorrect XDP memory type (%d) usage", mem->type); @@ -370,19 +373,19 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct) void xdp_return_frame(struct xdp_frame *xdpf) { - __xdp_return(xdpf->data, &xdpf->mem, false); + __xdp_return(xdpf->data, &xdpf->mem, false, NULL); } EXPORT_SYMBOL_GPL(xdp_return_frame); void xdp_return_frame_rx_napi(struct xdp_frame *xdpf) { - __xdp_return(xdpf->data, &xdpf->mem, true); + __xdp_return(xdpf->data, &xdpf->mem, true, NULL); } EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi); void xdp_return_buff(struct xdp_buff *xdp) { - __xdp_return(xdp->data, &xdp->rxq->mem, true); + __xdp_return(xdp->data, &xdp->rxq->mem, true, xdp); } /* Only called for MEM_TYPE_PAGE_POOL see xdp.h */ @@ -400,18 +403,6 @@ void __xdp_release_frame(void *data, struct xdp_mem_info *mem) } EXPORT_SYMBOL_GPL(__xdp_release_frame); -bool xdp_attachment_flags_ok(struct xdp_attachment_info *info, - struct netdev_bpf *bpf) -{ - if (info->prog && (bpf->flags ^ info->flags) & XDP_FLAGS_MODES) { - NL_SET_ERR_MSG(bpf->extack, - "program loaded with different flags"); - return false; - } - return true; -} -EXPORT_SYMBOL_GPL(xdp_attachment_flags_ok); - void xdp_attachment_setup(struct xdp_attachment_info *info, struct netdev_bpf *bpf) { diff --git a/net/ethtool/bitset.c b/net/ethtool/bitset.c index 1fb3603d92ad..0515d6604b3b 100644 --- a/net/ethtool/bitset.c +++ b/net/ethtool/bitset.c @@ -628,6 +628,8 @@ int ethnl_parse_bitset(unsigned long *val, unsigned long *mask, return ret; change_bits = nla_get_u32(tb[ETHTOOL_A_BITSET_SIZE]); + if (change_bits > nbits) + change_bits = nbits; bitmap_from_arr32(val, nla_data(tb[ETHTOOL_A_BITSET_VALUE]), change_bits); if (change_bits < nbits) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index b87140a1fa28..cdf6ec5aa45d 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -825,7 +825,7 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, if (has_gw && has_via) { NL_SET_ERR_MSG(extack, "Nexthop configuration can not contain both GATEWAY and VIA"); - goto errout; + return -EINVAL; } return 0; diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index d1e04d2b5170..563b62b76a5f 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -203,7 +203,7 @@ unsigned int arpt_do_table(struct sk_buff *skb, local_bh_disable(); addend = xt_write_recseq_begin(); - private = READ_ONCE(table->private); /* Address dependency. */ + private = rcu_access_pointer(table->private); cpu = smp_processor_id(); table_base = private->entries; jumpstack = (struct arpt_entry **)private->jumpstack[cpu]; @@ -649,7 +649,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; - const struct xt_table_info *private = table->private; + const struct xt_table_info *private = xt_table_get_private_protected(table); /* We need atomic snapshot of counters: rest doesn't change * (other than comefrom, which userspace doesn't care @@ -673,7 +673,7 @@ static int copy_entries_to_user(unsigned int total_size, unsigned int off, num; const struct arpt_entry *e; struct xt_counters *counters; - struct xt_table_info *private = table->private; + struct xt_table_info *private = xt_table_get_private_protected(table); int ret = 0; void *loc_cpu_entry; @@ -807,7 +807,7 @@ static int get_info(struct net *net, void __user *user, const int *len) t = xt_request_find_table_lock(net, NFPROTO_ARP, name); if (!IS_ERR(t)) { struct arpt_getinfo info; - const struct xt_table_info *private = t->private; + const struct xt_table_info *private = xt_table_get_private_protected(t); #ifdef CONFIG_COMPAT struct xt_table_info tmp; @@ -860,7 +860,7 @@ static int get_entries(struct net *net, struct arpt_get_entries __user *uptr, t = xt_find_table_lock(net, NFPROTO_ARP, get.name); if (!IS_ERR(t)) { - const struct xt_table_info *private = t->private; + const struct xt_table_info *private = xt_table_get_private_protected(t); if (get.size == private->size) ret = copy_entries_to_user(private->size, @@ -1017,7 +1017,7 @@ static int do_add_counters(struct net *net, sockptr_t arg, unsigned int len) } local_bh_disable(); - private = t->private; + private = xt_table_get_private_protected(t); if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; @@ -1330,7 +1330,7 @@ static int compat_copy_entries_to_user(unsigned int total_size, void __user *userptr) { struct xt_counters *counters; - const struct xt_table_info *private = table->private; + const struct xt_table_info *private = xt_table_get_private_protected(table); void __user *pos; unsigned int size; int ret = 0; diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index f15bc21d7301..6e2851f8d3a3 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -258,7 +258,7 @@ ipt_do_table(struct sk_buff *skb, WARN_ON(!(table->valid_hooks & (1 << hook))); local_bh_disable(); addend = xt_write_recseq_begin(); - private = READ_ONCE(table->private); /* Address dependency. */ + private = rcu_access_pointer(table->private); cpu = smp_processor_id(); table_base = private->entries; jumpstack = (struct ipt_entry **)private->jumpstack[cpu]; @@ -791,7 +791,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; - const struct xt_table_info *private = table->private; + const struct xt_table_info *private = xt_table_get_private_protected(table); /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care @@ -815,7 +815,7 @@ copy_entries_to_user(unsigned int total_size, unsigned int off, num; const struct ipt_entry *e; struct xt_counters *counters; - const struct xt_table_info *private = table->private; + const struct xt_table_info *private = xt_table_get_private_protected(table); int ret = 0; const void *loc_cpu_entry; @@ -964,7 +964,7 @@ static int get_info(struct net *net, void __user *user, const int *len) t = xt_request_find_table_lock(net, AF_INET, name); if (!IS_ERR(t)) { struct ipt_getinfo info; - const struct xt_table_info *private = t->private; + const struct xt_table_info *private = xt_table_get_private_protected(t); #ifdef CONFIG_COMPAT struct xt_table_info tmp; @@ -1018,7 +1018,7 @@ get_entries(struct net *net, struct ipt_get_entries __user *uptr, t = xt_find_table_lock(net, AF_INET, get.name); if (!IS_ERR(t)) { - const struct xt_table_info *private = t->private; + const struct xt_table_info *private = xt_table_get_private_protected(t); if (get.size == private->size) ret = copy_entries_to_user(private->size, t, uptr->entrytable); @@ -1173,7 +1173,7 @@ do_add_counters(struct net *net, sockptr_t arg, unsigned int len) } local_bh_disable(); - private = t->private; + private = xt_table_get_private_protected(t); if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; @@ -1543,7 +1543,7 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, void __user *userptr) { struct xt_counters *counters; - const struct xt_table_info *private = table->private; + const struct xt_table_info *private = xt_table_get_private_protected(table); void __user *pos; unsigned int size; int ret = 0; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 389d1b340248..ef4bdb038a4b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -510,7 +510,6 @@ static void tcp_init_buffer_space(struct sock *sk) if (!(sk->sk_userlocks & SOCK_SNDBUF_LOCK)) tcp_sndbuf_expand(sk); - tp->rcvq_space.space = min_t(u32, tp->rcv_wnd, TCP_INIT_CWND * tp->advmss); tcp_mstamp_refresh(tp); tp->rcvq_space.time = tp->tcp_mstamp; tp->rcvq_space.seq = tp->copied_seq; @@ -534,6 +533,8 @@ static void tcp_init_buffer_space(struct sock *sk) tp->rcv_ssthresh = min(tp->rcv_ssthresh, tp->window_clamp); tp->snd_cwnd_stamp = tcp_jiffies32; + tp->rcvq_space.space = min3(tp->rcv_ssthresh, tp->rcv_wnd, + (u32)TCP_INIT_CWND * tp->advmss); } /* 4. Recalculate window clamp after socket hit its memory bounds. */ diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 8391aa29e7a4..595dcc3afac5 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -984,7 +984,8 @@ static int tcp_v4_send_synack(const struct sock *sk, struct dst_entry *dst, __tcp_v4_send_check(skb, ireq->ir_loc_addr, ireq->ir_rmt_addr); tos = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ? - tcp_rsk(req)->syn_tos & ~INET_ECN_MASK : + (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | + (inet_sk(sk)->tos & INET_ECN_MASK) : inet_sk(sk)->tos; if (!INET_ECN_is_capable(tos) && @@ -1541,7 +1542,9 @@ struct sock *tcp_v4_syn_recv_sock(const struct sock *sk, struct sk_buff *skb, inet_csk(newsk)->icsk_ext_hdr_len = inet_opt->opt.optlen; newinet->inet_id = prandom_u32(); - /* Set ToS of the new socket based upon the value of incoming SYN. */ + /* Set ToS of the new socket based upon the value of incoming SYN. + * ECT bits are set later in tcp_init_transfer(). + */ if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) newinet->tos = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index bf48cd73e967..99011768c264 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1880,7 +1880,8 @@ static void tcp_cwnd_validate(struct sock *sk, bool is_cwnd_limited) * window, and remember whether we were cwnd-limited then. */ if (!before(tp->snd_una, tp->max_packets_seq) || - tp->packets_out > tp->max_packets_out) { + tp->packets_out > tp->max_packets_out || + is_cwnd_limited) { tp->max_packets_out = tp->packets_out; tp->max_packets_seq = tp->snd_nxt; tp->is_cwnd_limited = is_cwnd_limited; @@ -2702,6 +2703,10 @@ repair: else tcp_chrono_stop(sk, TCP_CHRONO_RWND_LIMITED); + is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd); + if (likely(sent_pkts || is_cwnd_limited)) + tcp_cwnd_validate(sk, is_cwnd_limited); + if (likely(sent_pkts)) { if (tcp_in_cwnd_reduction(sk)) tp->prr_out += sent_pkts; @@ -2709,8 +2714,6 @@ repair: /* Send one loss probe per tail loss episode. */ if (push_one != 2) tcp_schedule_loss_probe(sk, false); - is_cwnd_limited |= (tcp_packets_in_flight(tp) >= tp->snd_cwnd); - tcp_cwnd_validate(sk, is_cwnd_limited); return false; } return !tp->packets_out && !tcp_write_queue_empty(sk); diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 09f0a23d1a01..9eeebd4a0054 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2173,7 +2173,7 @@ static int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) __skb_pull(skb, skb_transport_offset(skb)); ret = udp_queue_rcv_one_skb(sk, skb); if (ret > 0) - ip_protocol_deliver_rcu(dev_net(skb->dev), skb, -ret); + ip_protocol_deliver_rcu(dev_net(skb->dev), skb, ret); } return 0; } diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 2e2119bfcf13..c4f532f4d311 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -280,7 +280,7 @@ ip6t_do_table(struct sk_buff *skb, local_bh_disable(); addend = xt_write_recseq_begin(); - private = READ_ONCE(table->private); /* Address dependency. */ + private = rcu_access_pointer(table->private); cpu = smp_processor_id(); table_base = private->entries; jumpstack = (struct ip6t_entry **)private->jumpstack[cpu]; @@ -807,7 +807,7 @@ static struct xt_counters *alloc_counters(const struct xt_table *table) { unsigned int countersize; struct xt_counters *counters; - const struct xt_table_info *private = table->private; + const struct xt_table_info *private = xt_table_get_private_protected(table); /* We need atomic snapshot of counters: rest doesn't change (other than comefrom, which userspace doesn't care @@ -831,7 +831,7 @@ copy_entries_to_user(unsigned int total_size, unsigned int off, num; const struct ip6t_entry *e; struct xt_counters *counters; - const struct xt_table_info *private = table->private; + const struct xt_table_info *private = xt_table_get_private_protected(table); int ret = 0; const void *loc_cpu_entry; @@ -980,7 +980,7 @@ static int get_info(struct net *net, void __user *user, const int *len) t = xt_request_find_table_lock(net, AF_INET6, name); if (!IS_ERR(t)) { struct ip6t_getinfo info; - const struct xt_table_info *private = t->private; + const struct xt_table_info *private = xt_table_get_private_protected(t); #ifdef CONFIG_COMPAT struct xt_table_info tmp; @@ -1035,7 +1035,7 @@ get_entries(struct net *net, struct ip6t_get_entries __user *uptr, t = xt_find_table_lock(net, AF_INET6, get.name); if (!IS_ERR(t)) { - struct xt_table_info *private = t->private; + struct xt_table_info *private = xt_table_get_private_protected(t); if (get.size == private->size) ret = copy_entries_to_user(private->size, t, uptr->entrytable); @@ -1189,7 +1189,7 @@ do_add_counters(struct net *net, sockptr_t arg, unsigned int len) } local_bh_disable(); - private = t->private; + private = xt_table_get_private_protected(t); if (private->number != tmp.num_counters) { ret = -EINVAL; goto unlock_up_free; @@ -1552,7 +1552,7 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, void __user *userptr) { struct xt_counters *counters; - const struct xt_table_info *private = table->private; + const struct xt_table_info *private = xt_table_get_private_protected(table); void __user *pos; unsigned int size; int ret = 0; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 992cbf3eb9e3..991dc36f95ff 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -528,7 +528,8 @@ static int tcp_v6_send_synack(const struct sock *sk, struct dst_entry *dst, fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); tclass = sock_net(sk)->ipv4.sysctl_tcp_reflect_tos ? - tcp_rsk(req)->syn_tos & ~INET_ECN_MASK : + (tcp_rsk(req)->syn_tos & ~INET_ECN_MASK) | + (np->tclass & INET_ECN_MASK) : np->tclass; if (!INET_ECN_is_capable(tclass) && @@ -1320,7 +1321,9 @@ static struct sock *tcp_v6_syn_recv_sock(const struct sock *sk, struct sk_buff * if (np->repflow) newnp->flow_label = ip6_flowlabel(ipv6_hdr(skb)); - /* Set ToS of the new socket based upon the value of incoming SYN. */ + /* Set ToS of the new socket based upon the value of incoming SYN. + * ECT bits are set later in tcp_init_transfer(). + */ if (sock_net(sk)->ipv4.sysctl_tcp_reflect_tos) newnp->tclass = tcp_rsk(req)->syn_tos & ~INET_ECN_MASK; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 1be775979132..44154cc596cd 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -948,6 +948,8 @@ int ieee80211_add_virtual_monitor(struct ieee80211_local *local) return ret; } + set_bit(SDATA_STATE_RUNNING, &sdata->state); + ret = ieee80211_check_queues(sdata, NL80211_IFTYPE_MONITOR); if (ret) { kfree(sdata); diff --git a/net/mac80211/mesh_pathtbl.c b/net/mac80211/mesh_pathtbl.c index 48f31ac9233c..620ecf922408 100644 --- a/net/mac80211/mesh_pathtbl.c +++ b/net/mac80211/mesh_pathtbl.c @@ -60,6 +60,7 @@ static struct mesh_table *mesh_table_alloc(void) atomic_set(&newtbl->entries, 0); spin_lock_init(&newtbl->gates_lock); spin_lock_init(&newtbl->walk_lock); + rhashtable_init(&newtbl->rhead, &mesh_rht_params); return newtbl; } @@ -773,9 +774,6 @@ int mesh_pathtbl_init(struct ieee80211_sub_if_data *sdata) goto free_path; } - rhashtable_init(&tbl_path->rhead, &mesh_rht_params); - rhashtable_init(&tbl_mpp->rhead, &mesh_rht_params); - sdata->u.mesh.mesh_paths = tbl_path; sdata->u.mesh.mpp_paths = tbl_mpp; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 49342060490f..94e624e9439b 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -3455,7 +3455,7 @@ bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata, *chandef = he_chandef; - return false; + return true; } bool ieee80211_chandef_s1g_oper(const struct ieee80211_s1g_oper_ie *oper, diff --git a/net/mptcp/mib.c b/net/mptcp/mib.c index 84d119436b22..b921cbdd9aaa 100644 --- a/net/mptcp/mib.c +++ b/net/mptcp/mib.c @@ -67,6 +67,7 @@ void mptcp_seq_show(struct seq_file *seq) for (i = 0; mptcp_snmp_list[i].name; i++) seq_puts(seq, " 0"); + seq_putc(seq, '\n'); return; } diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 23abf1578594..9a080767667b 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -1723,6 +1723,10 @@ static struct nft_hook *nft_netdev_hook_alloc(struct net *net, } nla_strlcpy(ifname, attr, IFNAMSIZ); + /* nf_tables_netdev_event() is called under rtnl_mutex, this is + * indirectly serializing all the other holders of the commit_mutex with + * the rtnl_mutex. + */ dev = __dev_get_by_name(net, ifname); if (!dev) { err = -ENOENT; @@ -3719,7 +3723,7 @@ cont: return 0; } -static int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result) +int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result) { u64 ms = be64_to_cpu(nla_get_be64(nla)); u64 max = (u64)(~((u64)0)); @@ -3733,7 +3737,7 @@ static int nf_msecs_to_jiffies64(const struct nlattr *nla, u64 *result) return 0; } -static __be64 nf_jiffies64_to_msecs(u64 input) +__be64 nf_jiffies64_to_msecs(u64 input) { return cpu_to_be64(jiffies64_to_msecs(input)); } diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c index 322bd674963e..a1b0aac46e9e 100644 --- a/net/netfilter/nft_ct.c +++ b/net/netfilter/nft_ct.c @@ -177,8 +177,6 @@ static void nft_ct_get_eval(const struct nft_expr *expr, } #endif case NFT_CT_ID: - if (!nf_ct_is_confirmed(ct)) - goto err; *dest = nf_ct_get_id(ct); return; default: diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index 64ca13a1885b..9af4f93c7f0e 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -157,8 +157,10 @@ static int nft_dynset_init(const struct nft_ctx *ctx, if (tb[NFTA_DYNSET_TIMEOUT] != NULL) { if (!(set->flags & NFT_SET_TIMEOUT)) return -EINVAL; - timeout = msecs_to_jiffies(be64_to_cpu(nla_get_be64( - tb[NFTA_DYNSET_TIMEOUT]))); + + err = nf_msecs_to_jiffies64(tb[NFTA_DYNSET_TIMEOUT], &timeout); + if (err) + return err; } priv->sreg_key = nft_parse_register(tb[NFTA_DYNSET_SREG_KEY]); @@ -267,7 +269,7 @@ static int nft_dynset_dump(struct sk_buff *skb, const struct nft_expr *expr) if (nla_put_string(skb, NFTA_DYNSET_SET_NAME, priv->set->name)) goto nla_put_failure; if (nla_put_be64(skb, NFTA_DYNSET_TIMEOUT, - cpu_to_be64(jiffies_to_msecs(priv->timeout)), + nf_jiffies64_to_msecs(priv->timeout), NFTA_DYNSET_PAD)) goto nla_put_failure; if (priv->expr && nft_expr_dump(skb, NFTA_DYNSET_EXPR, priv->expr)) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index af22dbe85e2c..acce622582e3 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1349,6 +1349,14 @@ struct xt_counters *xt_counters_alloc(unsigned int counters) } EXPORT_SYMBOL(xt_counters_alloc); +struct xt_table_info +*xt_table_get_private_protected(const struct xt_table *table) +{ + return rcu_dereference_protected(table->private, + mutex_is_locked(&xt[table->af].mutex)); +} +EXPORT_SYMBOL(xt_table_get_private_protected); + struct xt_table_info * xt_replace_table(struct xt_table *table, unsigned int num_counters, @@ -1356,7 +1364,6 @@ xt_replace_table(struct xt_table *table, int *error) { struct xt_table_info *private; - unsigned int cpu; int ret; ret = xt_jumpstack_alloc(newinfo); @@ -1366,47 +1373,20 @@ xt_replace_table(struct xt_table *table, } /* Do the substitution. */ - local_bh_disable(); - private = table->private; + private = xt_table_get_private_protected(table); /* Check inside lock: is the old number correct? */ if (num_counters != private->number) { pr_debug("num_counters != table->private->number (%u/%u)\n", num_counters, private->number); - local_bh_enable(); *error = -EAGAIN; return NULL; } newinfo->initial_entries = private->initial_entries; - /* - * Ensure contents of newinfo are visible before assigning to - * private. - */ - smp_wmb(); - table->private = newinfo; - - /* make sure all cpus see new ->private value */ - smp_wmb(); - /* - * Even though table entries have now been swapped, other CPU's - * may still be using the old entries... - */ - local_bh_enable(); - - /* ... so wait for even xt_recseq on all cpus */ - for_each_possible_cpu(cpu) { - seqcount_t *s = &per_cpu(xt_recseq, cpu); - u32 seq = raw_read_seqcount(s); - - if (seq & 1) { - do { - cond_resched(); - cpu_relax(); - } while (seq == raw_read_seqcount(s)); - } - } + rcu_assign_pointer(table->private, newinfo); + synchronize_rcu(); audit_log_nfcfg(table->name, table->af, private->number, !private->number ? AUDIT_XT_OP_REGISTER : @@ -1442,12 +1422,12 @@ struct xt_table *xt_register_table(struct net *net, } /* Simplifies replace_table code. */ - table->private = bootstrap; + rcu_assign_pointer(table->private, bootstrap); if (!xt_replace_table(table, 0, newinfo, &ret)) goto unlock; - private = table->private; + private = xt_table_get_private_protected(table); pr_debug("table->private->number = %u\n", private->number); /* save number of initial entries */ @@ -1470,7 +1450,8 @@ void *xt_unregister_table(struct xt_table *table) struct xt_table_info *private; mutex_lock(&xt[table->af].mutex); - private = table->private; + private = xt_table_get_private_protected(table); + RCU_INIT_POINTER(table->private, NULL); list_del(&table->list); mutex_unlock(&xt[table->af].mutex); audit_log_nfcfg(table->name, table->af, private->number, diff --git a/net/openvswitch/flow_netlink.c b/net/openvswitch/flow_netlink.c index ec0689ddc635..4c5c2331e764 100644 --- a/net/openvswitch/flow_netlink.c +++ b/net/openvswitch/flow_netlink.c @@ -2531,7 +2531,7 @@ static int validate_and_copy_dec_ttl(struct net *net, action_start = add_nested_action_start(sfa, OVS_DEC_TTL_ATTR_ACTION, log); if (action_start < 0) - return start; + return action_start; err = __ovs_nla_copy_actions(net, actions, key, sfa, eth_type, vlan_tci, mpls_label_count, log); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index fed18fd2c50b..1319986693fc 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -2424,8 +2424,8 @@ static int fl_dump_key_mpls_opt_lse(struct sk_buff *skb, return err; } if (lse_mask->mpls_label) { - err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL, - lse_key->mpls_label); + err = nla_put_u32(skb, TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL, + lse_key->mpls_label); if (err) return err; } diff --git a/net/sched/sch_fq_pie.c b/net/sched/sch_fq_pie.c index 4dda15588cf4..949163fe68af 100644 --- a/net/sched/sch_fq_pie.c +++ b/net/sched/sch_fq_pie.c @@ -401,6 +401,7 @@ static int fq_pie_init(struct Qdisc *sch, struct nlattr *opt, INIT_LIST_HEAD(&q->new_flows); INIT_LIST_HEAD(&q->old_flows); + timer_setup(&q->adapt_timer, fq_pie_timer, 0); if (opt) { err = fq_pie_change(sch, opt, extack); @@ -426,7 +427,6 @@ static int fq_pie_init(struct Qdisc *sch, struct nlattr *opt, pie_vars_init(&flow->vars); } - timer_setup(&q->adapt_timer, fq_pie_timer, 0); mod_timer(&q->adapt_timer, jiffies + HZ / 2); return 0; diff --git a/net/tipc/node.c b/net/tipc/node.c index c95d037fde51..83978d5dae59 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -2181,9 +2181,11 @@ void tipc_node_apply_property(struct net *net, struct tipc_bearer *b, &xmitq); else if (prop == TIPC_NLA_PROP_MTU) tipc_link_set_mtu(e->link, b->mtu); + + /* Update MTU for node link entry */ + e->mtu = tipc_link_mss(e->link); } - /* Update MTU for node link entry */ - e->mtu = tipc_link_mss(e->link); + tipc_node_write_unlock(n); tipc_bearer_xmit(net, bearer_id, &xmitq, &e->maddr, NULL); } diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index a77174b99b07..f67ddf2cebcb 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -12634,7 +12634,7 @@ static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info) struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; struct nlattr *tb[NUM_NL80211_REKEY_DATA]; - struct cfg80211_gtk_rekey_data rekey_data; + struct cfg80211_gtk_rekey_data rekey_data = {}; int err; if (!info->attrs[NL80211_ATTR_REKEY_DATA]) diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index b7b039bd9d03..62504471fd20 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -211,6 +211,14 @@ static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len, return 0; } +static bool xsk_tx_writeable(struct xdp_sock *xs) +{ + if (xskq_cons_present_entries(xs->tx) > xs->tx->nentries / 2) + return false; + + return true; +} + static bool xsk_is_bound(struct xdp_sock *xs) { if (READ_ONCE(xs->state) == XSK_BOUND) { @@ -296,7 +304,8 @@ void xsk_tx_release(struct xsk_buff_pool *pool) rcu_read_lock(); list_for_each_entry_rcu(xs, &pool->xsk_tx_list, tx_list) { __xskq_cons_release(xs->tx); - xs->sk.sk_write_space(&xs->sk); + if (xsk_tx_writeable(xs)) + xs->sk.sk_write_space(&xs->sk); } rcu_read_unlock(); } @@ -436,7 +445,8 @@ static int xsk_generic_xmit(struct sock *sk) out: if (sent_frame) - sk->sk_write_space(sk); + if (xsk_tx_writeable(xs)) + sk->sk_write_space(sk); mutex_unlock(&xs->mutex); return err; @@ -471,11 +481,13 @@ static int xsk_sendmsg(struct socket *sock, struct msghdr *m, size_t total_len) static __poll_t xsk_poll(struct file *file, struct socket *sock, struct poll_table_struct *wait) { - __poll_t mask = datagram_poll(file, sock, wait); + __poll_t mask = 0; struct sock *sk = sock->sk; struct xdp_sock *xs = xdp_sk(sk); struct xsk_buff_pool *pool; + sock_poll_wait(file, sock, wait); + if (unlikely(!xsk_is_bound(xs))) return mask; @@ -491,7 +503,7 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock, if (xs->rx && !xskq_prod_is_empty(xs->rx)) mask |= EPOLLIN | EPOLLRDNORM; - if (xs->tx && !xskq_cons_is_full(xs->tx)) + if (xs->tx && xsk_tx_writeable(xs)) mask |= EPOLLOUT | EPOLLWRNORM; return mask; diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c index 9287eddec52c..d5adeee9d5d9 100644 --- a/net/xdp/xsk_buff_pool.c +++ b/net/xdp/xsk_buff_pool.c @@ -175,6 +175,7 @@ static int __xp_assign_dev(struct xsk_buff_pool *pool, if (!pool->dma_pages) { WARN(1, "Driver did not DMA map zero-copy buffers"); + err = -EINVAL; goto err_unreg_xsk; } pool->umem->zc = true; diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index cdb9cf3cd136..9e71b9f27679 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -264,6 +264,12 @@ static inline bool xskq_cons_is_full(struct xsk_queue *q) q->nentries; } +static inline u32 xskq_cons_present_entries(struct xsk_queue *q) +{ + /* No barriers needed since data is not accessed */ + return READ_ONCE(q->ring->producer) - READ_ONCE(q->ring->consumer); +} + /* Functions for producers */ static inline bool xskq_prod_is_full(struct xsk_queue *q) diff --git a/net/xfrm/xfrm_compat.c b/net/xfrm/xfrm_compat.c index e28f0c9ecd6a..d8e8a11ca845 100644 --- a/net/xfrm/xfrm_compat.c +++ b/net/xfrm/xfrm_compat.c @@ -234,6 +234,7 @@ static int xfrm_xlate64_attr(struct sk_buff *dst, const struct nlattr *src) case XFRMA_PAD: /* Ignore */ return 0; + case XFRMA_UNSPEC: case XFRMA_ALG_AUTH: case XFRMA_ALG_CRYPT: case XFRMA_ALG_COMP: @@ -387,7 +388,7 @@ static int xfrm_attr_cpy32(void *dst, size_t *pos, const struct nlattr *src, memcpy(nla, src, nla_attr_size(copy_len)); nla->nla_len = nla_attr_size(payload); - *pos += nla_attr_size(payload); + *pos += nla_attr_size(copy_len); nlmsg->nlmsg_len += nla->nla_len; memset(dst + *pos, 0, payload - copy_len); @@ -563,7 +564,7 @@ static struct nlmsghdr *xfrm_user_rcv_msg_compat(const struct nlmsghdr *h32, return NULL; len += NLMSG_HDRLEN; - h64 = kvmalloc(len, GFP_KERNEL | __GFP_ZERO); + h64 = kvmalloc(len, GFP_KERNEL); if (!h64) return ERR_PTR(-ENOMEM); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index a77da7aae6fe..2f1517827995 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -2382,8 +2382,10 @@ int xfrm_user_policy(struct sock *sk, int optname, sockptr_t optval, int optlen) if (in_compat_syscall()) { struct xfrm_translator *xtr = xfrm_get_translator(); - if (!xtr) + if (!xtr) { + kfree(data); return -EOPNOTSUPP; + } err = xtr->xlate_user_policy_sockptr(&data, optlen); xfrm_put_translator(xtr); diff --git a/tools/bpf/bpftool/pids.c b/tools/bpf/bpftool/pids.c index df7d8ec76036..477e55d59c34 100644 --- a/tools/bpf/bpftool/pids.c +++ b/tools/bpf/bpftool/pids.c @@ -89,9 +89,9 @@ libbpf_print_none(__maybe_unused enum libbpf_print_level level, int build_obj_refs_table(struct obj_refs_table *table, enum bpf_obj_type type) { - char buf[4096]; - struct pid_iter_bpf *skel; struct pid_iter_entry *e; + char buf[4096 / sizeof(*e) * sizeof(*e)]; + struct pid_iter_bpf *skel; int err, ret, fd = -1, i; libbpf_print_fn_t default_print; diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c index 5c6522c89af1..98537ff2679e 100644 --- a/tools/lib/bpf/ringbuf.c +++ b/tools/lib/bpf/ringbuf.c @@ -278,7 +278,7 @@ int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms) err = ringbuf_process_ring(ring); if (err < 0) return err; - res += cnt; + res += err; } return cnt < 0 ? -errno : res; } diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c index 52414058a627..5861446d0777 100644 --- a/tools/testing/selftests/bpf/prog_tests/align.c +++ b/tools/testing/selftests/bpf/prog_tests/align.c @@ -456,10 +456,10 @@ static struct bpf_align_test tests[] = { */ {7, "R5_w=inv(id=0,smin_value=-9223372036854775806,smax_value=9223372036854775806,umin_value=2,umax_value=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"}, /* Checked s>=0 */ - {9, "R5=inv(id=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"}, + {9, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"}, /* packet pointer + nonnegative (4n+2) */ - {11, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"}, - {13, "R4_w=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"}, + {11, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"}, + {13, "R4_w=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"}, /* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine. * We checked the bounds, but it might have been able * to overflow if the packet pointer started in the @@ -467,7 +467,7 @@ static struct bpf_align_test tests[] = { * So we did not get a 'range' on R6, and the access * attempt will fail. */ - {15, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"}, + {15, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc)"}, } }, { diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c index c1650548433c..fddbc5db5d6a 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c @@ -217,9 +217,15 @@ void test_ringbuf(void) if (CHECK(err, "join_bg", "err %d\n", err)) goto cleanup; - if (CHECK(bg_ret != 1, "bg_ret", "epoll_wait result: %ld", bg_ret)) + if (CHECK(bg_ret <= 0, "bg_ret", "epoll_wait result: %ld", bg_ret)) goto cleanup; + /* due to timing variations, there could still be non-notified + * samples, so consume them here to collect all the samples + */ + err = ring_buffer__consume(ringbuf); + CHECK(err < 0, "rb_consume", "failed: %d\b", err); + /* 3 rounds, 2 samples each */ cnt = atomic_xchg(&sample_cnt, 0); CHECK(cnt != 6, "cnt", "exp %d samples, got %d\n", 6, cnt); diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c index 78e450609803..d37161e59bb2 100644 --- a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c +++ b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c @@ -81,7 +81,7 @@ void test_ringbuf_multi(void) /* poll for samples, should get 2 ringbufs back */ err = ring_buffer__poll(ringbuf, -1); - if (CHECK(err != 4, "poll_res", "expected 4 records, got %d\n", err)) + if (CHECK(err != 2, "poll_res", "expected 2 records, got %d\n", err)) goto cleanup; /* expect extra polling to return nothing */ diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py index 43c9cda199b8..b99bb8ed3ed4 100755 --- a/tools/testing/selftests/bpf/test_offload.py +++ b/tools/testing/selftests/bpf/test_offload.py @@ -184,9 +184,7 @@ def bpftool_prog_list(expected=None, ns=""): def bpftool_map_list(expected=None, ns=""): _, maps = bpftool("map show", JSON=True, ns=ns, fail=True) # Remove the base maps - for m in base_maps: - if m in maps: - maps.remove(m) + maps = [m for m in maps if m not in base_maps and m.get('name') not in base_map_names] if expected is not None: if len(maps) != expected: fail(True, "%d BPF maps loaded, expected %d" % @@ -716,13 +714,11 @@ def test_multi_prog(simdev, sim, obj, modename, modeid): fail(ret == 0, "Replaced one of programs without -force") check_extack(err, "XDP program already attached.", args) - if modename == "" or modename == "drv": - othermode = "" if modename == "drv" else "drv" - start_test("Test multi-attachment XDP - detach...") - ret, _, err = sim.unset_xdp(othermode, force=True, - fail=False, include_stderr=True) - fail(ret == 0, "Removed program with a bad mode") - check_extack(err, "program loaded with different flags.", args) + start_test("Test multi-attachment XDP - remove without mode...") + ret, _, err = sim.unset_xdp("", force=True, + fail=False, include_stderr=True) + fail(ret == 0, "Removed program without a mode flag") + check_extack(err, "More than one program loaded, unset mode is ambiguous.", args) sim.unset_xdp("offload") xdp = sim.ip_link_show(xdp=True)["xdp"] @@ -772,6 +768,9 @@ ret, progs = bpftool("prog", fail=False) skip(ret != 0, "bpftool not installed") base_progs = progs _, base_maps = bpftool("map") +base_map_names = [ + 'pid_iter.rodata' # created on each bpftool invocation +] # Check netdevsim ret, out = cmd("modprobe netdevsim", fail=False) @@ -913,11 +912,18 @@ try: sim.tc_flush_filters() + start_test("Test TC offloads failure...") + sim.dfs["dev/bpf_bind_verifier_accept"] = 0 + ret, _, err = sim.cls_bpf_add_filter(obj, verbose=True, skip_sw=True, + fail=False, include_stderr=True) + fail(ret == 0, "TC filter did not reject with TC offloads enabled") + check_verifier_log(err, "[netdevsim] Hello from netdevsim!") + sim.dfs["dev/bpf_bind_verifier_accept"] = 1 + start_test("Test TC offloads work...") ret, _, err = sim.cls_bpf_add_filter(obj, verbose=True, skip_sw=True, fail=False, include_stderr=True) fail(ret != 0, "TC filter did not load with TC offloads enabled") - check_verifier_log(err, "[netdevsim] Hello from netdevsim!") start_test("Test TC offload basics...") dfs = simdev.dfs_get_bound_progs(expected=1) @@ -941,6 +947,7 @@ try: start_test("Test disabling TC offloads is rejected while filters installed...") ret, _ = sim.set_ethtool_tc_offloads(False, fail=False) fail(ret == 0, "Driver should refuse to disable TC offloads with filters installed...") + sim.set_ethtool_tc_offloads(True) start_test("Test qdisc removal frees things...") sim.tc_flush_filters() @@ -999,18 +1006,8 @@ try: fail=False, include_stderr=True) fail(ret == 0, "Replaced XDP program with a program in different mode") check_extack(err, - "native and generic XDP can't be active at the same time.", + "Native and generic XDP can't be active at the same time.", args) - ret, _, err = sim.set_xdp(obj, "", force=True, - fail=False, include_stderr=True) - fail(ret == 0, "Replaced XDP program with a program in different mode") - check_extack(err, "program loaded with different flags.", args) - - start_test("Test XDP prog remove with bad flags...") - ret, _, err = sim.unset_xdp("", force=True, - fail=False, include_stderr=True) - fail(ret == 0, "Removed program with a bad mode") - check_extack(err, "program loaded with different flags.", args) start_test("Test MTU restrictions...") ret, _ = sim.set_mtu(9000, fail=False) @@ -1040,10 +1037,19 @@ try: offload = bpf_pinned("/sys/fs/bpf/offload") ret, _, err = sim.set_xdp(offload, "drv", fail=False, include_stderr=True) fail(ret == 0, "attached offloaded XDP program to drv") - check_extack(err, "using device-bound program without HW_MODE flag is not supported.", args) + check_extack(err, "Using device-bound program without HW_MODE flag is not supported.", args) rm("/sys/fs/bpf/offload") sim.wait_for_flush() + start_test("Test XDP load failure...") + sim.dfs["dev/bpf_bind_verifier_accept"] = 0 + ret, _, err = bpftool_prog_load("sample_ret0.o", "/sys/fs/bpf/offload", + dev=sim['ifname'], fail=False, include_stderr=True) + fail(ret == 0, "verifier should fail on load") + check_verifier_log(err, "[netdevsim] Hello from netdevsim!") + sim.dfs["dev/bpf_bind_verifier_accept"] = 1 + sim.wait_for_flush() + start_test("Test XDP offload...") _, _, err = sim.set_xdp(obj, "offload", verbose=True, include_stderr=True) ipl = sim.ip_link_show(xdp=True) @@ -1051,7 +1057,6 @@ try: progs = bpftool_prog_list(expected=1) prog = progs[0] fail(link_xdp["id"] != prog["id"], "Loaded program has wrong ID") - check_verifier_log(err, "[netdevsim] Hello from netdevsim!") start_test("Test XDP offload is device bound...") dfs = simdev.dfs_get_bound_progs(expected=1) diff --git a/tools/testing/selftests/bpf/verifier/array_access.c b/tools/testing/selftests/bpf/verifier/array_access.c index 1c4b1939f5a8..bed53b561e04 100644 --- a/tools/testing/selftests/bpf/verifier/array_access.c +++ b/tools/testing/selftests/bpf/verifier/array_access.c @@ -68,7 +68,7 @@ BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 9), BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), - BPF_JMP_IMM(BPF_JSGT, BPF_REG_1, 0xffffffff, 1), + BPF_JMP32_IMM(BPF_JSGT, BPF_REG_1, 0xffffffff, 1), BPF_MOV32_IMM(BPF_REG_1, 0), BPF_MOV32_IMM(BPF_REG_2, MAX_ENTRIES), BPF_JMP_REG(BPF_JSGT, BPF_REG_2, BPF_REG_1, 1), diff --git a/tools/testing/selftests/bpf/verifier/bounds.c b/tools/testing/selftests/bpf/verifier/bounds.c index dac40de3f868..57ed67b86074 100644 --- a/tools/testing/selftests/bpf/verifier/bounds.c +++ b/tools/testing/selftests/bpf/verifier/bounds.c @@ -703,3 +703,44 @@ .fixup_map_hash_8b = { 3 }, .result = ACCEPT, }, +{ + "bounds checks after 32-bit truncation. test 1", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + /* This used to reduce the max bound to 0x7fffffff */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 1), + BPF_JMP_IMM(BPF_JGT, BPF_REG_1, 0x7fffffff, 1), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .errstr_unpriv = "R0 leaks addr", + .result_unpriv = REJECT, + .result = ACCEPT, +}, +{ + "bounds checks after 32-bit truncation. test 2", + .insns = { + BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8), + BPF_LD_MAP_FD(BPF_REG_1, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 4), + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_0, 0), + BPF_JMP_IMM(BPF_JSLT, BPF_REG_1, 1, 1), + BPF_JMP32_IMM(BPF_JSLT, BPF_REG_1, 0, 1), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_map_hash_8b = { 3 }, + .errstr_unpriv = "R0 leaks addr", + .result_unpriv = REJECT, + .result = ACCEPT, +}, diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index fb5c55dd6df8..02b0b9ead40b 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -256,6 +256,28 @@ setup_cmd_nsb() fi } +setup_cmd_nsc() +{ + local cmd="$*" + local rc + + run_cmd_nsc ${cmd} + rc=$? + if [ $rc -ne 0 ]; then + # show user the command if not done so already + if [ "$VERBOSE" = "0" ]; then + echo "setup command: $cmd" + fi + echo "failed. stopping tests" + if [ "${PAUSE_ON_FAIL}" = "yes" ]; then + echo + echo "hit enter to continue" + read a + fi + exit $rc + fi +} + # set sysctl values in NS-A set_sysctl() { @@ -471,6 +493,36 @@ setup() sleep 1 } +setup_lla_only() +{ + # make sure we are starting with a clean slate + kill_procs + cleanup 2>/dev/null + + log_debug "Configuring network namespaces" + set -e + + create_ns ${NSA} "-" "-" + create_ns ${NSB} "-" "-" + create_ns ${NSC} "-" "-" + connect_ns ${NSA} ${NSA_DEV} "-" "-" \ + ${NSB} ${NSB_DEV} "-" "-" + connect_ns ${NSA} ${NSA_DEV2} "-" "-" \ + ${NSC} ${NSC_DEV} "-" "-" + + NSA_LINKIP6=$(get_linklocal ${NSA} ${NSA_DEV}) + NSB_LINKIP6=$(get_linklocal ${NSB} ${NSB_DEV}) + NSC_LINKIP6=$(get_linklocal ${NSC} ${NSC_DEV}) + + create_vrf ${NSA} ${VRF} ${VRF_TABLE} "-" "-" + ip -netns ${NSA} link set dev ${NSA_DEV} vrf ${VRF} + ip -netns ${NSA} link set dev ${NSA_DEV2} vrf ${VRF} + + set +e + + sleep 1 +} + ################################################################################ # IPv4 @@ -3787,10 +3839,53 @@ use_case_br() setup_cmd_nsb ip li del vlan100 2>/dev/null } +# VRF only. +# ns-A device is connected to both ns-B and ns-C on a single VRF but only has +# LLA on the interfaces +use_case_ping_lla_multi() +{ + setup_lla_only + # only want reply from ns-A + setup_cmd_nsb sysctl -qw net.ipv6.icmp.echo_ignore_multicast=1 + setup_cmd_nsc sysctl -qw net.ipv6.icmp.echo_ignore_multicast=1 + + log_start + run_cmd_nsb ping -c1 -w1 ${MCAST}%${NSB_DEV} + log_test_addr ${MCAST}%${NSB_DEV} $? 0 "Pre cycle, ping out ns-B" + + run_cmd_nsc ping -c1 -w1 ${MCAST}%${NSC_DEV} + log_test_addr ${MCAST}%${NSC_DEV} $? 0 "Pre cycle, ping out ns-C" + + # cycle/flap the first ns-A interface + setup_cmd ip link set ${NSA_DEV} down + setup_cmd ip link set ${NSA_DEV} up + sleep 1 + + log_start + run_cmd_nsb ping -c1 -w1 ${MCAST}%${NSB_DEV} + log_test_addr ${MCAST}%${NSB_DEV} $? 0 "Post cycle ${NSA} ${NSA_DEV}, ping out ns-B" + run_cmd_nsc ping -c1 -w1 ${MCAST}%${NSC_DEV} + log_test_addr ${MCAST}%${NSC_DEV} $? 0 "Post cycle ${NSA} ${NSA_DEV}, ping out ns-C" + + # cycle/flap the second ns-A interface + setup_cmd ip link set ${NSA_DEV2} down + setup_cmd ip link set ${NSA_DEV2} up + sleep 1 + + log_start + run_cmd_nsb ping -c1 -w1 ${MCAST}%${NSB_DEV} + log_test_addr ${MCAST}%${NSB_DEV} $? 0 "Post cycle ${NSA} ${NSA_DEV2}, ping out ns-B" + run_cmd_nsc ping -c1 -w1 ${MCAST}%${NSC_DEV} + log_test_addr ${MCAST}%${NSC_DEV} $? 0 "Post cycle ${NSA} ${NSA_DEV2}, ping out ns-C" +} + use_cases() { log_section "Use cases" + log_subsection "Device enslaved to bridge" use_case_br + log_subsection "Ping LLA with multiple interfaces" + use_case_ping_lla_multi } ################################################################################ diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c index db3d4a8b5a4c..76a24052f4b4 100644 --- a/tools/testing/selftests/net/udpgso_bench_rx.c +++ b/tools/testing/selftests/net/udpgso_bench_rx.c @@ -113,6 +113,9 @@ static void do_poll(int fd, int timeout_ms) interrupted = true; break; } + + /* no events and more time to wait, do poll again */ + continue; } if (pfd.revents != POLLIN) error(1, errno, "poll: 0x%x expected 0x%x\n", |