From 8698cb92eeece1e326a4d6a051bcf143037c4d31 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Tue, 8 Aug 2023 14:21:30 +0300 Subject: net/mlx5: Perform DMA operations in the right locations The cited patch change mlx5 driver so that during probe DMA operations were performed before pci_enable_device(), and during teardown DMA operations were performed after pci_disable_device(). DMA operations require PCI to be enabled. Hence, The above leads to the following oops in PPC systems[1]. On s390x systems, as reported by Niklas Schnelle, this is a problem because mlx5_pci_init() is where the DMA and coherent mask is set but mlx5_cmd_init() already does a dma_alloc_coherent(). Thus a DMA allocation is done during probe before the correct mask is set. This causes probe to fail initialization of the cmdif SW structs on s390x after that is converted to the common dma-iommu code. This is because on s390x DMA addresses below 4 GiB are reserved on current machines and unlike the old s390x specific DMA API implementation common code enforces DMA masks. Fix it by performing the DMA operations during probe after pci_enable_device() and after the dma mask is set, and during teardown before pci_disable_device(). [1] Oops: Kernel access of bad area, sig: 11 [#1] LE PAGE_SIZE=64K MMU=Radix SMP NR_CPUS=2048 NUMA pSeries Modules linked in: xt_MASQUERADE nf_conntrack_netlink nfnetlink xfrm_user iptable_nat xt_addrtype xt_conntrack nf_nat nf_conntrack nf_defrag_ipv6 nf_defrag_ipv4 netconsole rpcsec_gss_krb5 auth_rpcgss oid_registry overlay rpcrdma rdma_ucm ib_iser ib_umad rdma_cm ib_ipoib iw_cm libiscsi scsi_transport_iscsi ib_cm ib_uverbs ib_core mlx5_core(-) ptp pps_core fuse vmx_crypto crc32c_vpmsum [last unloaded: mlx5_ib] CPU: 1 PID: 8937 Comm: modprobe Not tainted 6.5.0-rc3_for_upstream_min_debug_2023_07_31_16_02 #1 Hardware name: IBM pSeries (emulated by qemu) POWER9 (raw) 0x4e1202 0xf000005 of:SLOF,HEAD hv:linux,kvm pSeries NIP: c000000000423388 LR: c0000000001e733c CTR: c0000000001e4720 REGS: c0000000055636d0 TRAP: 0380 Not tainted (6.5.0-rc3_for_upstream_min_debug_2023_07_31_16_02) MSR: 8000000000009033 CR: 24008884 XER: 20040000 CFAR: c0000000001e7338 IRQMASK: 0 NIP [c000000000423388] __free_pages+0x28/0x160 LR [c0000000001e733c] dma_direct_free+0xac/0x190 Call Trace: [c000000005563970] [5deadbeef0000100] 0x5deadbeef0000100 (unreliable) [c0000000055639b0] [c0000000003d46cc] kfree+0x7c/0x150 [c000000005563a40] [c0000000001e47c8] dma_free_attrs+0xa8/0x1a0 [c000000005563aa0] [c008000000d0064c] mlx5_cmd_cleanup+0xa4/0x100 [mlx5_core] [c000000005563ad0] [c008000000cf629c] mlx5_mdev_uninit+0xf4/0x140 [mlx5_core] [c000000005563b00] [c008000000cf6448] remove_one+0x160/0x1d0 [mlx5_core] [c000000005563b40] [c000000000958540] pci_device_remove+0x60/0x110 [c000000005563b80] [c000000000a35e80] device_remove+0x70/0xd0 [c000000005563bb0] [c000000000a37a38] device_release_driver_internal+0x2a8/0x330 [c000000005563c00] [c000000000a37b8c] driver_detach+0x8c/0x160 [c000000005563c40] [c000000000a35350] bus_remove_driver+0x90/0x110 [c000000005563c80] [c000000000a38948] driver_unregister+0x48/0x90 [c000000005563cf0] [c000000000957e38] pci_unregister_driver+0x38/0x150 [c000000005563d40] [c008000000eb6140] mlx5_cleanup+0x38/0x90 [mlx5_core] Fixes: 06cd555f73ca ("net/mlx5: split mlx5_cmd_init() to probe and reload routines") Signed-off-by: Shay Drory Reviewed-by: Moshe Shemesh Reviewed-by: Tariq Toukan Reviewed-by: Leon Romanovsky Reviewed-by: Niklas Schnelle Tested-by: Niklas Schnelle Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 64 ++++++++++++--------------- 1 file changed, 28 insertions(+), 36 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index afb348579577..c22b0ad0c870 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -2186,52 +2186,23 @@ static u16 cmdif_rev(struct mlx5_core_dev *dev) int mlx5_cmd_init(struct mlx5_core_dev *dev) { - int size = sizeof(struct mlx5_cmd_prot_block); - int align = roundup_pow_of_two(size); struct mlx5_cmd *cmd = &dev->cmd; - u32 cmd_l; - int err; - - cmd->pool = dma_pool_create("mlx5_cmd", mlx5_core_dma_dev(dev), size, align, 0); - if (!cmd->pool) - return -ENOMEM; - err = alloc_cmd_page(dev, cmd); - if (err) - goto err_free_pool; - - cmd_l = (u32)(cmd->dma); - if (cmd_l & 0xfff) { - mlx5_core_err(dev, "invalid command queue address\n"); - err = -ENOMEM; - goto err_cmd_page; - } cmd->checksum_disabled = 1; spin_lock_init(&cmd->alloc_lock); spin_lock_init(&cmd->token_lock); - create_msg_cache(dev); - set_wqname(dev); cmd->wq = create_singlethread_workqueue(cmd->wq_name); if (!cmd->wq) { mlx5_core_err(dev, "failed to create command workqueue\n"); - err = -ENOMEM; - goto err_cache; + return -ENOMEM; } mlx5_cmdif_debugfs_init(dev); return 0; - -err_cache: - destroy_msg_cache(dev); -err_cmd_page: - free_cmd_page(dev, cmd); -err_free_pool: - dma_pool_destroy(cmd->pool); - return err; } void mlx5_cmd_cleanup(struct mlx5_core_dev *dev) @@ -2240,15 +2211,15 @@ void mlx5_cmd_cleanup(struct mlx5_core_dev *dev) mlx5_cmdif_debugfs_cleanup(dev); destroy_workqueue(cmd->wq); - destroy_msg_cache(dev); - free_cmd_page(dev, cmd); - dma_pool_destroy(cmd->pool); } int mlx5_cmd_enable(struct mlx5_core_dev *dev) { + int size = sizeof(struct mlx5_cmd_prot_block); + int align = roundup_pow_of_two(size); struct mlx5_cmd *cmd = &dev->cmd; u32 cmd_h, cmd_l; + int err; memset(&cmd->vars, 0, sizeof(cmd->vars)); cmd->vars.cmdif_rev = cmdif_rev(dev); @@ -2281,10 +2252,21 @@ int mlx5_cmd_enable(struct mlx5_core_dev *dev) sema_init(&cmd->vars.pages_sem, 1); sema_init(&cmd->vars.throttle_sem, DIV_ROUND_UP(cmd->vars.max_reg_cmds, 2)); + cmd->pool = dma_pool_create("mlx5_cmd", mlx5_core_dma_dev(dev), size, align, 0); + if (!cmd->pool) + return -ENOMEM; + + err = alloc_cmd_page(dev, cmd); + if (err) + goto err_free_pool; + cmd_h = (u32)((u64)(cmd->dma) >> 32); cmd_l = (u32)(cmd->dma); - if (WARN_ON(cmd_l & 0xfff)) - return -EINVAL; + if (cmd_l & 0xfff) { + mlx5_core_err(dev, "invalid command queue address\n"); + err = -ENOMEM; + goto err_cmd_page; + } iowrite32be(cmd_h, &dev->iseg->cmdq_addr_h); iowrite32be(cmd_l, &dev->iseg->cmdq_addr_l_sz); @@ -2297,17 +2279,27 @@ int mlx5_cmd_enable(struct mlx5_core_dev *dev) cmd->mode = CMD_MODE_POLLING; cmd->allowed_opcode = CMD_ALLOWED_OPCODE_ALL; + create_msg_cache(dev); create_debugfs_files(dev); return 0; + +err_cmd_page: + free_cmd_page(dev, cmd); +err_free_pool: + dma_pool_destroy(cmd->pool); + return err; } void mlx5_cmd_disable(struct mlx5_core_dev *dev) { struct mlx5_cmd *cmd = &dev->cmd; - clean_debug_files(dev); flush_workqueue(cmd->wq); + clean_debug_files(dev); + destroy_msg_cache(dev); + free_cmd_page(dev, cmd); + dma_pool_destroy(cmd->pool); } void mlx5_cmd_set_state(struct mlx5_core_dev *dev, -- cgit v1.3.1 From 7624e58a8b3a251e3e5108b32f2183b34453db32 Mon Sep 17 00:00:00 2001 From: Shay Drory Date: Sun, 27 Aug 2023 13:31:53 +0300 Subject: net/mlx5: E-switch, register event handler before arming the event Currently, mlx5 is registering event handler for vport context change event some time after arming the event. this can lead to missing an event, which will result in wrong rules in the FDB. Hence, register the event handler before arming the event. This solution is valid since FW is sending vport context change event only on vports which SW armed, and SW arming the vport when enabling it, which is done after the FDB has been created. Fixes: 6933a9379559 ("net/mlx5: E-Switch, Use async events chain") Signed-off-by: Shay Drory Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index d4cde6555063..8d0b915a3121 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1038,11 +1038,8 @@ const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev) return ERR_PTR(err); } -static void mlx5_eswitch_event_handlers_register(struct mlx5_eswitch *esw) +static void mlx5_eswitch_event_handler_register(struct mlx5_eswitch *esw) { - MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE); - mlx5_eq_notifier_register(esw->dev, &esw->nb); - if (esw->mode == MLX5_ESWITCH_OFFLOADS && mlx5_eswitch_is_funcs_handler(esw->dev)) { MLX5_NB_INIT(&esw->esw_funcs.nb, mlx5_esw_funcs_changed_handler, ESW_FUNCTIONS_CHANGED); @@ -1050,13 +1047,11 @@ static void mlx5_eswitch_event_handlers_register(struct mlx5_eswitch *esw) } } -static void mlx5_eswitch_event_handlers_unregister(struct mlx5_eswitch *esw) +static void mlx5_eswitch_event_handler_unregister(struct mlx5_eswitch *esw) { if (esw->mode == MLX5_ESWITCH_OFFLOADS && mlx5_eswitch_is_funcs_handler(esw->dev)) mlx5_eq_notifier_unregister(esw->dev, &esw->esw_funcs.nb); - mlx5_eq_notifier_unregister(esw->dev, &esw->nb); - flush_workqueue(esw->work_queue); } @@ -1483,6 +1478,9 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs) mlx5_eswitch_update_num_of_vfs(esw, num_vfs); + MLX5_NB_INIT(&esw->nb, eswitch_vport_event, NIC_VPORT_CHANGE); + mlx5_eq_notifier_register(esw->dev, &esw->nb); + if (esw->mode == MLX5_ESWITCH_LEGACY) { err = esw_legacy_enable(esw); } else { @@ -1495,7 +1493,7 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs) esw->fdb_table.flags |= MLX5_ESW_FDB_CREATED; - mlx5_eswitch_event_handlers_register(esw); + mlx5_eswitch_event_handler_register(esw); esw_info(esw->dev, "Enable: mode(%s), nvfs(%d), necvfs(%d), active vports(%d)\n", esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", @@ -1622,7 +1620,8 @@ void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw) */ mlx5_esw_mode_change_notify(esw, MLX5_ESWITCH_LEGACY); - mlx5_eswitch_event_handlers_unregister(esw); + mlx5_eq_notifier_unregister(esw->dev, &esw->nb); + mlx5_eswitch_event_handler_unregister(esw); esw_info(esw->dev, "Disable: mode(%s), nvfs(%d), necvfs(%d), active vports(%d)\n", esw->mode == MLX5_ESWITCH_LEGACY ? "LEGACY" : "OFFLOADS", -- cgit v1.3.1 From 7a3ce8074878a68a75ceacec93d9ae05906eec86 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Wed, 9 Aug 2023 11:10:57 +0200 Subject: net/mlx5: Bridge, fix peer entry ageing in LAG mode With current implementation in single FDB LAG mode all packets are processed by eswitch 0 rules. As such, 'peer' FDB entries receive the packets for rules of other eswitches and are responsible for updating the main entry by sending SWITCHDEV_FDB_ADD_TO_BRIDGE notification from their background update wq task. However, this introduces a race condition when non-zero eswitch instance decides to delete a FDB entry, sends SWITCHDEV_FDB_DEL_TO_BRIDGE notification, but another eswitch's update task refreshes the same entry concurrently while its async delete work is still pending on the workque. In such case another SWITCHDEV_FDB_ADD_TO_BRIDGE event may be generated and entry will remain stuck in FDB marked as 'offloaded' since no more SWITCHDEV_FDB_DEL_TO_BRIDGE notifications are sent for deleting the peer entries. Fix the issue by synchronously marking deleted entries with MLX5_ESW_BRIDGE_FLAG_DELETED flag and skipping them in background update job. Signed-off-by: Vlad Buslov Reviewed-by: Jianbo Liu Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/core/en/rep/bridge.c | 11 ++++++++++ .../net/ethernet/mellanox/mlx5/core/esw/bridge.c | 25 +++++++++++++++++++++- .../net/ethernet/mellanox/mlx5/core/esw/bridge.h | 3 +++ .../ethernet/mellanox/mlx5/core/esw/bridge_priv.h | 1 + 4 files changed, 39 insertions(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c index 0fef853eab62..5d128c5b4529 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bridge.c @@ -467,6 +467,17 @@ static int mlx5_esw_bridge_switchdev_event(struct notifier_block *nb, /* only handle the event on peers */ if (mlx5_esw_bridge_is_local(dev, rep, esw)) break; + + fdb_info = container_of(info, + struct switchdev_notifier_fdb_info, + info); + /* Mark for deletion to prevent the update wq task from + * spuriously refreshing the entry which would mark it again as + * offloaded in SW bridge. After this fallthrough to regular + * async delete code. + */ + mlx5_esw_bridge_fdb_mark_deleted(dev, vport_num, esw_owner_vhca_id, br_offloads, + fdb_info); fallthrough; case SWITCHDEV_FDB_ADD_TO_DEVICE: case SWITCHDEV_FDB_DEL_TO_DEVICE: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c index e36294b7ade2..1b9bc32efd6f 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.c @@ -1748,6 +1748,28 @@ void mlx5_esw_bridge_fdb_update_used(struct net_device *dev, u16 vport_num, u16 entry->lastuse = jiffies; } +void mlx5_esw_bridge_fdb_mark_deleted(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct switchdev_notifier_fdb_info *fdb_info) +{ + struct mlx5_esw_bridge_fdb_entry *entry; + struct mlx5_esw_bridge *bridge; + + bridge = mlx5_esw_bridge_from_port_lookup(vport_num, esw_owner_vhca_id, br_offloads); + if (!bridge) + return; + + entry = mlx5_esw_bridge_fdb_lookup(bridge, fdb_info->addr, fdb_info->vid); + if (!entry) { + esw_debug(br_offloads->esw->dev, + "FDB mark deleted entry with specified key not found (MAC=%pM,vid=%u,vport=%u)\n", + fdb_info->addr, fdb_info->vid, vport_num); + return; + } + + entry->flags |= MLX5_ESW_BRIDGE_FLAG_DELETED; +} + void mlx5_esw_bridge_fdb_create(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, struct mlx5_esw_bridge_offloads *br_offloads, struct switchdev_notifier_fdb_info *fdb_info) @@ -1810,7 +1832,8 @@ void mlx5_esw_bridge_update(struct mlx5_esw_bridge_offloads *br_offloads) unsigned long lastuse = (unsigned long)mlx5_fc_query_lastuse(entry->ingress_counter); - if (entry->flags & MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER) + if (entry->flags & (MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER | + MLX5_ESW_BRIDGE_FLAG_DELETED)) continue; if (time_after(lastuse, entry->lastuse)) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h index c2c7c70d99eb..d6f539161993 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge.h @@ -62,6 +62,9 @@ int mlx5_esw_bridge_vport_peer_unlink(struct net_device *br_netdev, u16 vport_nu void mlx5_esw_bridge_fdb_update_used(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, struct mlx5_esw_bridge_offloads *br_offloads, struct switchdev_notifier_fdb_info *fdb_info); +void mlx5_esw_bridge_fdb_mark_deleted(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, + struct mlx5_esw_bridge_offloads *br_offloads, + struct switchdev_notifier_fdb_info *fdb_info); void mlx5_esw_bridge_fdb_create(struct net_device *dev, u16 vport_num, u16 esw_owner_vhca_id, struct mlx5_esw_bridge_offloads *br_offloads, struct switchdev_notifier_fdb_info *fdb_info); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h index 4911cc32161b..7c251af566c6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/bridge_priv.h @@ -133,6 +133,7 @@ struct mlx5_esw_bridge_mdb_key { enum { MLX5_ESW_BRIDGE_FLAG_ADDED_BY_USER = BIT(0), MLX5_ESW_BRIDGE_FLAG_PEER = BIT(1), + MLX5_ESW_BRIDGE_FLAG_DELETED = BIT(2), }; enum { -- cgit v1.3.1 From 92fd39634541eb0a11bf1bafbc8ba92d6ddb8dba Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Wed, 6 Sep 2023 21:48:30 +0300 Subject: net/mlx5: Handle fw tracer change ownership event based on MTRC Currently, whenever fw issues a change ownership event, the PF that owns the fw tracer drops its ownership directly and the other PFs try to pick up the ownership via what MTRC register suggests. In some cases, driver releases the ownership of the tracer and reacquires it later on. Whenever the driver releases ownership of the tracer, fw issues a change ownership event. This event can be delayed and come after driver has reacquired ownership of the tracer. Thus the late event will trigger the tracer owner PF to release the ownership again and lead to a scenario where no PF is owning the tracer. To prevent the scenario described above, when handling a change ownership event, do not drop ownership of the tracer directly, instead read the fw MTRC register to retrieve the up-to-date owner of the tracer and set it accordingly in driver level. Fixes: f53aaa31cce7 ("net/mlx5: FW tracer, implement tracer logic") Signed-off-by: Maher Sanalla Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c index 7c0f2adbea00..ad789349c06e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c @@ -848,7 +848,7 @@ static void mlx5_fw_tracer_ownership_change(struct work_struct *work) mlx5_core_dbg(tracer->dev, "FWTracer: ownership changed, current=(%d)\n", tracer->owner); if (tracer->owner) { - tracer->owner = false; + mlx5_fw_tracer_ownership_acquire(tracer); return; } -- cgit v1.3.1 From be43b7489a3c4702799e50179da69c3df7d6899b Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Mon, 2 Oct 2023 14:05:29 +0300 Subject: net/mlx5e: RX, Fix page_pool allocation failure recovery for striding rq When a page allocation fails during refill in mlx5e_post_rx_mpwqes, the page will be released again on the next refill call. This triggers the page_pool negative page fragment count warning below: [ 2436.447717] WARNING: CPU: 1 PID: 2419 at include/net/page_pool/helpers.h:130 mlx5e_page_release_fragmented.isra.0+0x42/0x50 [mlx5_core] ... [ 2436.447895] RIP: 0010:mlx5e_page_release_fragmented.isra.0+0x42/0x50 [mlx5_core] [ 2436.447991] Call Trace: [ 2436.447975] mlx5e_post_rx_mpwqes+0x1d5/0xcf0 [mlx5_core] [ 2436.447994] [ 2436.447996] ? __warn+0x7d/0x120 [ 2436.448009] ? mlx5e_handle_rx_cqe_mpwrq+0x109/0x1d0 [mlx5_core] [ 2436.448002] ? mlx5e_page_release_fragmented.isra.0+0x42/0x50 [mlx5_core] [ 2436.448044] ? mlx5e_poll_rx_cq+0x87/0x6e0 [mlx5_core] [ 2436.448061] ? report_bug+0x155/0x180 [ 2436.448065] ? handle_bug+0x36/0x70 [ 2436.448067] ? exc_invalid_op+0x13/0x60 [ 2436.448070] ? asm_exc_invalid_op+0x16/0x20 [ 2436.448079] mlx5e_napi_poll+0x122/0x6b0 [mlx5_core] [ 2436.448077] ? mlx5e_page_release_fragmented.isra.0+0x42/0x50 [mlx5_core] [ 2436.448113] ? generic_exec_single+0x35/0x100 [ 2436.448117] __napi_poll+0x25/0x1a0 [ 2436.448120] net_rx_action+0x28a/0x300 [ 2436.448122] __do_softirq+0xcd/0x279 [ 2436.448126] irq_exit_rcu+0x6a/0x90 [ 2436.448128] sysvec_apic_timer_interrupt+0x6e/0x90 [ 2436.448130] This patch fixes the striding rq case by setting the skip flag on all the wqe pages that were expected to have new pages allocated. Fixes: 4c2a13236807 ("net/mlx5e: RX, Defer page release in striding rq for better recycling") Tested-by: Chris Mason Reported-by: Chris Mason Closes: https://lore.kernel.org/netdev/117FF31A-7BE0-4050-B2BB-E41F224FF72F@meta.com Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 3fd11b0761e0..7988b3a9598c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -816,6 +816,8 @@ err_unmap: mlx5e_page_release_fragmented(rq, frag_page); } + bitmap_fill(wi->skip_release_bitmap, rq->mpwqe.pages_per_wqe); + err: rq->stats->buff_alloc_err++; -- cgit v1.3.1 From ef9369e9c30846f5e052a11ccc70e1f6b8dc557a Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Fri, 29 Sep 2023 17:31:49 +0300 Subject: net/mlx5e: RX, Fix page_pool allocation failure recovery for legacy rq When a page allocation fails during refill in mlx5e_refill_rx_wqes, the page will be released again on the next refill call. This triggers the page_pool negative page fragment count warning below: [ 338.326070] WARNING: CPU: 4 PID: 0 at include/net/page_pool/helpers.h:130 mlx5e_page_release_fragmented.isra.0+0x42/0x50 [mlx5_core] ... [ 338.328993] RIP: 0010:mlx5e_page_release_fragmented.isra.0+0x42/0x50 [mlx5_core] [ 338.329094] Call Trace: [ 338.329097] [ 338.329100] ? __warn+0x7d/0x120 [ 338.329105] ? mlx5e_page_release_fragmented.isra.0+0x42/0x50 [mlx5_core] [ 338.329173] ? report_bug+0x155/0x180 [ 338.329179] ? handle_bug+0x3c/0x60 [ 338.329183] ? exc_invalid_op+0x13/0x60 [ 338.329187] ? asm_exc_invalid_op+0x16/0x20 [ 338.329192] ? mlx5e_page_release_fragmented.isra.0+0x42/0x50 [mlx5_core] [ 338.329259] mlx5e_post_rx_wqes+0x210/0x5a0 [mlx5_core] [ 338.329327] ? mlx5e_poll_rx_cq+0x88/0x6f0 [mlx5_core] [ 338.329394] mlx5e_napi_poll+0x127/0x6b0 [mlx5_core] [ 338.329461] __napi_poll+0x25/0x1a0 [ 338.329465] net_rx_action+0x28a/0x300 [ 338.329468] __do_softirq+0xcd/0x279 [ 338.329473] irq_exit_rcu+0x6a/0x90 [ 338.329477] common_interrupt+0x82/0xa0 [ 338.329482] This patch fixes the legacy rq case by releasing all allocated fragments and then setting the skip flag on all released fragments. It is important to note that the number of released fragments will be higher than the number of allocated fragments when an allocation error occurs. Fixes: 3f93f82988bc ("net/mlx5e: RX, Defer page release in legacy rq for better recycling") Tested-by: Chris Mason Reported-by: Chris Mason Closes: https://lore.kernel.org/netdev/117FF31A-7BE0-4050-B2BB-E41F224FF72F@meta.com Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 33 ++++++++++++++++++------- 1 file changed, 24 insertions(+), 9 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 7988b3a9598c..8d9743a5e42c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -457,26 +457,41 @@ static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) static int mlx5e_refill_rx_wqes(struct mlx5e_rq *rq, u16 ix, int wqe_bulk) { int remaining = wqe_bulk; - int i = 0; + int total_alloc = 0; + int refill_alloc; + int refill; /* The WQE bulk is split into smaller bulks that are sized * according to the page pool cache refill size to avoid overflowing * the page pool cache due to too many page releases at once. */ do { - int refill = min_t(u16, rq->wqe.info.refill_unit, remaining); - int alloc_count; + refill = min_t(u16, rq->wqe.info.refill_unit, remaining); - mlx5e_free_rx_wqes(rq, ix + i, refill); - alloc_count = mlx5e_alloc_rx_wqes(rq, ix + i, refill); - i += alloc_count; - if (unlikely(alloc_count != refill)) - break; + mlx5e_free_rx_wqes(rq, ix + total_alloc, refill); + refill_alloc = mlx5e_alloc_rx_wqes(rq, ix + total_alloc, refill); + if (unlikely(refill_alloc != refill)) + goto err_free; + total_alloc += refill_alloc; remaining -= refill; } while (remaining); - return i; + return total_alloc; + +err_free: + mlx5e_free_rx_wqes(rq, ix, total_alloc + refill_alloc); + + for (int i = 0; i < total_alloc + refill; i++) { + int j = mlx5_wq_cyc_ctr2ix(&rq->wqe.wq, ix + i); + struct mlx5e_wqe_frag_info *frag; + + frag = get_frag(rq, j); + for (int k = 0; k < rq->wqe.info.num_frags; k++, frag++) + frag->flags |= BIT(MLX5E_WQE_FRAG_SKIP_RELEASE); + } + + return 0; } static void -- cgit v1.3.1 From aaab619ccd07a32e5b29aa7e59b20de1dcc7a29e Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Mon, 25 Sep 2023 17:50:18 +0300 Subject: net/mlx5e: XDP, Fix XDP_REDIRECT mpwqe page fragment leaks on shutdown When mlx5e_xdp_xmit is called without the XDP_XMIT_FLUSH set it is possible that it leaves a mpwqe session open. That is ok during runtime: the session will be closed on the next call to mlx5e_xdp_xmit. But having a mpwqe session still open at XDP sq close time is problematic: the pc counter is not updated before flushing the contents of the xdpi_fifo. This results in leaking page fragments. The fix is to always close the mpwqe session at the end of mlx5e_xdp_xmit, regardless of the XDP_XMIT_FLUSH flag being set or not. Fixes: 5e0d2eef771e ("net/mlx5e: XDP, Support Enhanced Multi-Packet TX WQE") Signed-off-by: Dragos Tatulea Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c index 12f56d0db0af..8bed17d8fe56 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c @@ -874,11 +874,11 @@ int mlx5e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames, } out: - if (flags & XDP_XMIT_FLUSH) { - if (sq->mpwqe.wqe) - mlx5e_xdp_mpwqe_complete(sq); + if (sq->mpwqe.wqe) + mlx5e_xdp_mpwqe_complete(sq); + + if (flags & XDP_XMIT_FLUSH) mlx5e_xmit_xdp_doorbell(sq); - } return nxmit; } -- cgit v1.3.1 From c51c673462a266fb813cf189f8190798a12d3124 Mon Sep 17 00:00:00 2001 From: Lama Kayal Date: Tue, 12 Sep 2023 10:06:24 +0300 Subject: net/mlx5e: Take RTNL lock before triggering netdev notifiers Hold RTNL lock when calling xdp_set_features() with a registered netdev, as the call triggers the netdev notifiers. This could happen when switching from nic profile to uplink representor for example. Similar logic which fixed a similar scenario was previously introduced in the following commit: commit 72cc65497065 net/mlx5e: Take RTNL lock when needed before calling xdp_set_features(). This fixes the following assertion and warning call trace: RTNL: assertion failed at net/core/dev.c (1961) WARNING: CPU: 13 PID: 2529 at net/core/dev.c:1961 call_netdevice_notifiers_info+0x7c/0x80 Modules linked in: rpcrdma rdma_ucm ib_iser libiscsi scsi_transport_iscsi ib_umad rdma_cm ib_ipoib iw_cm ib_cm mlx5_ib ib_uverbs ib_core xt_conntrack xt_MASQUERADE nf_conntrack_netlink nfnetlink xt_addrtype iptable_nat nf_nat br_netfilter rpcsec_gss_krb5 auth_rpcgss oid_registry overlay mlx5_core zram zsmalloc fuse CPU: 13 PID: 2529 Comm: devlink Not tainted 6.5.0_for_upstream_min_debug_2023_09_07_20_04 #1 Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 RIP: 0010:call_netdevice_notifiers_info+0x7c/0x80 Code: 8f ff 80 3d 77 0d 16 01 00 75 c5 ba a9 07 00 00 48 c7 c6 c4 bb 0d 82 48 c7 c7 18 c8 06 82 c6 05 5b 0d 16 01 01 e8 44 f6 8c ff <0f> 0b eb a2 0f 1f 44 00 00 55 48 89 e5 41 54 48 83 e4 f0 48 83 ec RSP: 0018:ffff88819930f7f0 EFLAGS: 00010282 RAX: 0000000000000000 RBX: ffffffff8309f740 RCX: 0000000000000027 RDX: ffff88885fb5b5c8 RSI: 0000000000000001 RDI: ffff88885fb5b5c0 RBP: 0000000000000028 R08: ffff88887ffabaa8 R09: 0000000000000003 R10: ffff88887fecbac0 R11: ffff88887ff7bac0 R12: ffff88819930f810 R13: ffff88810b7fea40 R14: ffff8881154e8fd8 R15: ffff888107e881a0 FS: 00007f3ad248f800(0000) GS:ffff88885fb40000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000563b85f164e0 CR3: 0000000113b5c006 CR4: 0000000000370ea0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: ? __warn+0x79/0x120 ? call_netdevice_notifiers_info+0x7c/0x80 ? report_bug+0x17c/0x190 ? handle_bug+0x3c/0x60 ? exc_invalid_op+0x14/0x70 ? asm_exc_invalid_op+0x16/0x20 ? call_netdevice_notifiers_info+0x7c/0x80 call_netdevice_notifiers+0x2e/0x50 mlx5e_set_xdp_feature+0x21/0x50 [mlx5_core] mlx5e_build_rep_params+0x97/0x130 [mlx5_core] mlx5e_init_ul_rep+0x9f/0x100 [mlx5_core] mlx5e_netdev_init_profile+0x76/0x110 [mlx5_core] mlx5e_netdev_attach_profile+0x1f/0x90 [mlx5_core] mlx5e_netdev_change_profile+0x92/0x160 [mlx5_core] mlx5e_vport_rep_load+0x329/0x4a0 [mlx5_core] mlx5_esw_offloads_rep_load+0x9e/0xf0 [mlx5_core] esw_offloads_enable+0x4bc/0xe90 [mlx5_core] mlx5_eswitch_enable_locked+0x3c8/0x570 [mlx5_core] ? kmalloc_trace+0x25/0x80 mlx5_devlink_eswitch_mode_set+0x224/0x680 [mlx5_core] ? devlink_get_from_attrs_lock+0x9e/0x110 devlink_nl_cmd_eswitch_set_doit+0x60/0xe0 genl_family_rcv_msg_doit+0xd0/0x120 genl_rcv_msg+0x180/0x2b0 ? devlink_get_from_attrs_lock+0x110/0x110 ? devlink_nl_cmd_eswitch_get_doit+0x290/0x290 ? devlink_pernet_pre_exit+0xf0/0xf0 ? genl_family_rcv_msg_dumpit+0xf0/0xf0 netlink_rcv_skb+0x54/0x100 genl_rcv+0x24/0x40 netlink_unicast+0x1fc/0x2c0 netlink_sendmsg+0x232/0x4a0 sock_sendmsg+0x38/0x60 ? _copy_from_user+0x2a/0x60 __sys_sendto+0x110/0x160 ? handle_mm_fault+0x161/0x260 ? do_user_addr_fault+0x276/0x620 __x64_sys_sendto+0x20/0x30 do_syscall_64+0x3d/0x90 entry_SYSCALL_64_after_hwframe+0x46/0xb0 RIP: 0033:0x7f3ad231340a Code: d8 64 89 02 48 c7 c0 ff ff ff ff eb b8 0f 1f 00 f3 0f 1e fa 41 89 ca 64 8b 04 25 18 00 00 00 85 c0 75 15 b8 2c 00 00 00 0f 05 <48> 3d 00 f0 ff ff 77 7e c3 0f 1f 44 00 00 41 54 48 83 ec 30 44 89 RSP: 002b:00007ffd70aad4b8 EFLAGS: 00000246 ORIG_RAX: 000000000000002c RAX: ffffffffffffffda RBX: 0000000000c36b00 RCX:00007f3ad231340a RDX: 0000000000000038 RSI: 0000000000c36b00 RDI: 0000000000000003 RBP: 0000000000c36910 R08: 00007f3ad2625200 R09: 000000000000000c R10: 0000000000000000 R11: 0000000000000246 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000000 R15: 0000000000000001 ---[ end trace 0000000000000000 ]--- ------------[ cut here ]------------ Fixes: 4d5ab0ad964d ("net/mlx5e: take into account device reconfiguration for xdp_features flag") Signed-off-by: Lama Kayal Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 2fdb8895aecd..5ca9bc337dc6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -769,6 +769,7 @@ static int mlx5e_rep_max_nch_limit(struct mlx5_core_dev *mdev) static void mlx5e_build_rep_params(struct net_device *netdev) { + const bool take_rtnl = netdev->reg_state == NETREG_REGISTERED; struct mlx5e_priv *priv = netdev_priv(netdev); struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch_rep *rep = rpriv->rep; @@ -794,8 +795,15 @@ static void mlx5e_build_rep_params(struct net_device *netdev) /* RQ */ mlx5e_build_rq_params(mdev, params); + /* If netdev is already registered (e.g. move from nic profile to uplink, + * RTNL lock must be held before triggering netdev notifiers. + */ + if (take_rtnl) + rtnl_lock(); /* update XDP supported features */ mlx5e_set_xdp_feature(netdev); + if (take_rtnl) + rtnl_unlock(); /* CQ moderation params */ params->rx_dim_enabled = MLX5_CAP_GEN(mdev, cq_moderation); -- cgit v1.3.1 From 06b4eac9c4beda520b8a4dbbb8e33dba9d1c8fba Mon Sep 17 00:00:00 2001 From: Jianbo Liu Date: Tue, 12 Sep 2023 02:28:47 +0000 Subject: net/mlx5e: Don't offload internal port if filter device is out device In the cited commit, if the routing device is ovs internal port, the out device is set to uplink, and packets go out after encapsulation. If filter device is uplink, it can trigger the following syndrome: mlx5_core 0000:08:00.0: mlx5_cmd_out_err:803:(pid 3966): SET_FLOW_TABLE_ENTRY(0x936) op_mod(0x0) failed, status bad parameter(0x3), syndrome (0xcdb051), err(-22) Fix this issue by not offloading internal port if filter device is out device. In this case, packets are not forwarded to the root table to be processed, the termination table is used instead to forward them from uplink to uplink. Fixes: 100ad4e2d758 ("net/mlx5e: Offload internal port as encap route device") Signed-off-by: Jianbo Liu Reviewed-by: Ariel Levkovich Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c index 1730f6a716ee..b10e40e1a9c1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_encap.c @@ -24,7 +24,8 @@ static int mlx5e_set_int_port_tunnel(struct mlx5e_priv *priv, route_dev = dev_get_by_index(dev_net(e->out_dev), e->route_dev_ifindex); - if (!route_dev || !netif_is_ovs_master(route_dev)) + if (!route_dev || !netif_is_ovs_master(route_dev) || + attr->parse_attr->filter_dev == e->out_dev) goto out; err = mlx5e_set_fwd_to_int_port_actions(priv, attr, e->route_dev_ifindex, -- cgit v1.3.1 From 80f1241484dd1b1d4eab1a0211d52ec2bd83e2f1 Mon Sep 17 00:00:00 2001 From: Amir Tzin Date: Mon, 4 Sep 2023 18:26:47 +0300 Subject: net/mlx5e: Fix VF representors reporting zero counters to "ip -s" command Although vf_vport entry of struct mlx5e_stats is never updated, its values are mistakenly copied to the caller structure in the VF representor .ndo_get_stat_64 callback mlx5e_rep_get_stats(). Remove redundant entry and use the updated one, rep_stats, instead. Fixes: 64b68e369649 ("net/mlx5: Refactor and expand rep vport stat group") Reviewed-by: Patrisious Haddad Signed-off-by: Amir Tzin Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 11 ++++++++++- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 5 +++-- 3 files changed, 14 insertions(+), 4 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index 5ca9bc337dc6..fd1cce542b68 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -701,7 +701,7 @@ mlx5e_rep_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats) /* update HW stats in background for next time */ mlx5e_queue_update_stats(priv); - memcpy(stats, &priv->stats.vf_vport, sizeof(*stats)); + mlx5e_stats_copy_rep_stats(stats, &priv->stats.rep_stats); } static int mlx5e_rep_change_mtu(struct net_device *netdev, int new_mtu) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 176fa5976259..477c547dcc04 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -484,11 +484,20 @@ struct mlx5e_stats { struct mlx5e_vnic_env_stats vnic; struct mlx5e_vport_stats vport; struct mlx5e_pport_stats pport; - struct rtnl_link_stats64 vf_vport; struct mlx5e_pcie_stats pcie; struct mlx5e_rep_stats rep_stats; }; +static inline void mlx5e_stats_copy_rep_stats(struct rtnl_link_stats64 *vf_vport, + struct mlx5e_rep_stats *rep_stats) +{ + memset(vf_vport, 0, sizeof(*vf_vport)); + vf_vport->rx_packets = rep_stats->vport_rx_packets; + vf_vport->tx_packets = rep_stats->vport_tx_packets; + vf_vport->rx_bytes = rep_stats->vport_rx_bytes; + vf_vport->tx_bytes = rep_stats->vport_tx_bytes; +} + extern mlx5e_stats_grp_t mlx5e_nic_stats_grps[]; unsigned int mlx5e_nic_stats_grps_num(struct mlx5e_priv *priv); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index c24828b688ac..c8590483ddc6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -4972,7 +4972,8 @@ static int scan_tc_matchall_fdb_actions(struct mlx5e_priv *priv, if (err) return err; - rpriv->prev_vf_vport_stats = priv->stats.vf_vport; + mlx5e_stats_copy_rep_stats(&rpriv->prev_vf_vport_stats, + &priv->stats.rep_stats); break; default: NL_SET_ERR_MSG_MOD(extack, "mlx5 supports only police action for matchall"); @@ -5012,7 +5013,7 @@ void mlx5e_tc_stats_matchall(struct mlx5e_priv *priv, u64 dbytes; u64 dpkts; - cur_stats = priv->stats.vf_vport; + mlx5e_stats_copy_rep_stats(&cur_stats, &priv->stats.rep_stats); dpkts = cur_stats.rx_packets - rpriv->prev_vf_vport_stats.rx_packets; dbytes = cur_stats.rx_bytes - rpriv->prev_vf_vport_stats.rx_bytes; rpriv->prev_vf_vport_stats = cur_stats; -- cgit v1.3.1 From d93f3f992780af4a21e6c1ab86946b7c5602f1b9 Mon Sep 17 00:00:00 2001 From: Jiri Wiesner Date: Tue, 10 Oct 2023 18:39:33 +0200 Subject: bonding: Return pointer to data after pull on skb Since 429e3d123d9a ("bonding: Fix extraction of ports from the packet headers"), header offsets used to compute a hash in bond_xmit_hash() are relative to skb->data and not skb->head. If the tail of the header buffer of an skb really needs to be advanced and the operation is successful, the pointer to the data must be returned (and not a pointer to the head of the buffer). Fixes: 429e3d123d9a ("bonding: Fix extraction of ports from the packet headers") Signed-off-by: Jiri Wiesner Acked-by: Jay Vosburgh Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- drivers/net/bonding/bond_main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c index ed7212e61c54..51d47eda1c87 100644 --- a/drivers/net/bonding/bond_main.c +++ b/drivers/net/bonding/bond_main.c @@ -4023,7 +4023,7 @@ static inline const void *bond_pull_data(struct sk_buff *skb, if (likely(n <= hlen)) return data; else if (skb && likely(pskb_may_pull(skb, n))) - return skb->head; + return skb->data; return NULL; } -- cgit v1.3.1 From 61b40cefe51af005c72dbdcf975a3d166c6e6406 Mon Sep 17 00:00:00 2001 From: Jinjie Ruan Date: Wed, 11 Oct 2023 11:24:19 +0800 Subject: net: dsa: bcm_sf2: Fix possible memory leak in bcm_sf2_mdio_register() In bcm_sf2_mdio_register(), the class_find_device() will call get_device() to increment reference count for priv->master_mii_bus->dev if of_mdio_find_bus() succeeds. If mdiobus_alloc() or mdiobus_register() fails, it will call get_device() twice without decrement reference count for the device. And it is the same if bcm_sf2_mdio_register() succeeds but fails in bcm_sf2_sw_probe(), or if bcm_sf2_sw_probe() succeeds. If the reference count has not decremented to zero, the dev related resource will not be freed. So remove the get_device() in bcm_sf2_mdio_register(), and call put_device() if mdiobus_alloc() or mdiobus_register() fails and in bcm_sf2_mdio_unregister() to solve the issue. And as Simon suggested, unwind from errors for bcm_sf2_mdio_register() and just return 0 if it succeeds to make it cleaner. Fixes: 461cd1b03e32 ("net: dsa: bcm_sf2: Register our slave MDIO bus") Signed-off-by: Jinjie Ruan Suggested-by: Simon Horman Reviewed-by: Simon Horman Reviewed-by: Florian Fainelli Link: https://lore.kernel.org/r/20231011032419.2423290-1-ruanjinjie@huawei.com Signed-off-by: Jakub Kicinski --- drivers/net/dsa/bcm_sf2.c | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 72374b066f64..cd1f240c90f3 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -617,17 +617,16 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds) dn = of_find_compatible_node(NULL, NULL, "brcm,unimac-mdio"); priv->master_mii_bus = of_mdio_find_bus(dn); if (!priv->master_mii_bus) { - of_node_put(dn); - return -EPROBE_DEFER; + err = -EPROBE_DEFER; + goto err_of_node_put; } - get_device(&priv->master_mii_bus->dev); priv->master_mii_dn = dn; priv->slave_mii_bus = mdiobus_alloc(); if (!priv->slave_mii_bus) { - of_node_put(dn); - return -ENOMEM; + err = -ENOMEM; + goto err_put_master_mii_bus_dev; } priv->slave_mii_bus->priv = priv; @@ -684,11 +683,17 @@ static int bcm_sf2_mdio_register(struct dsa_switch *ds) } err = mdiobus_register(priv->slave_mii_bus); - if (err && dn) { - mdiobus_free(priv->slave_mii_bus); - of_node_put(dn); - } + if (err && dn) + goto err_free_slave_mii_bus; + return 0; + +err_free_slave_mii_bus: + mdiobus_free(priv->slave_mii_bus); +err_put_master_mii_bus_dev: + put_device(&priv->master_mii_bus->dev); +err_of_node_put: + of_node_put(dn); return err; } @@ -696,6 +701,7 @@ static void bcm_sf2_mdio_unregister(struct bcm_sf2_priv *priv) { mdiobus_unregister(priv->slave_mii_bus); mdiobus_free(priv->slave_mii_bus); + put_device(&priv->master_mii_bus->dev); of_node_put(priv->master_mii_dn); } -- cgit v1.3.1 From 242e34500a32631f85c2b4eb6cb42a368a39e54f Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Tue, 10 Oct 2023 13:30:59 -0700 Subject: ice: fix over-shifted variable Since the introduction of the ice driver the code has been double-shifting the RSS enabling field, because the define already has shifts in it and can't have the regular pattern of "a << shiftval & mask" applied. Most places in the code got it right, but one line was still wrong. Fix this one location for easy backports to stable. An in-progress patch fixes the defines to "standard" and will be applied as part of the regular -next process sometime after this one. Fixes: d76a60ba7afb ("ice: Add support for VLANs and offloads") Reviewed-by: Przemek Kitszel CC: stable@vger.kernel.org Signed-off-by: Jesse Brandeburg Reviewed-by: Simon Horman Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Signed-off-by: Jacob Keller Link: https://lore.kernel.org/r/20231010203101.406248-1-jacob.e.keller@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_lib.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 7bf9b7069754..73bbf06a76db 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -1201,8 +1201,7 @@ static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi) ctxt->info.q_opt_rss = ((lut_type << ICE_AQ_VSI_Q_OPT_RSS_LUT_S) & ICE_AQ_VSI_Q_OPT_RSS_LUT_M) | - ((hash_type << ICE_AQ_VSI_Q_OPT_RSS_HASH_S) & - ICE_AQ_VSI_Q_OPT_RSS_HASH_M); + (hash_type & ICE_AQ_VSI_Q_OPT_RSS_HASH_M); } static void -- cgit v1.3.1 From 419ce133ab928ab5efd7b50b2ef36ddfd4eadbd2 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 11 Oct 2023 09:20:55 +0200 Subject: tcp: allow again tcp_disconnect() when threads are waiting As reported by Tom, .NET and applications build on top of it rely on connect(AF_UNSPEC) to async cancel pending I/O operations on TCP socket. The blamed commit below caused a regression, as such cancellation can now fail. As suggested by Eric, this change addresses the problem explicitly causing blocking I/O operation to terminate immediately (with an error) when a concurrent disconnect() is executed. Instead of tracking the number of threads blocked on a given socket, track the number of disconnect() issued on such socket. If such counter changes after a blocking operation releasing and re-acquiring the socket lock, error out the current operation. Fixes: 4faeee0cf8a5 ("tcp: deny tcp_disconnect() when threads are waiting") Reported-by: Tom Deseyn Closes: https://bugzilla.redhat.com/show_bug.cgi?id=1886305 Suggested-by: Eric Dumazet Signed-off-by: Paolo Abeni Reviewed-by: Eric Dumazet Link: https://lore.kernel.org/r/f3b95e47e3dbed840960548aebaa8d954372db41.1697008693.git.pabeni@redhat.com Signed-off-by: Jakub Kicinski --- .../chelsio/inline_crypto/chtls/chtls_io.c | 36 +++++++++++++++++----- include/net/sock.h | 10 +++--- net/core/stream.c | 12 +++++--- net/ipv4/af_inet.c | 10 ++++-- net/ipv4/inet_connection_sock.c | 1 - net/ipv4/tcp.c | 16 +++++----- net/ipv4/tcp_bpf.c | 4 +++ net/mptcp/protocol.c | 7 ----- net/tls/tls_main.c | 10 ++++-- net/tls/tls_sw.c | 19 ++++++++---- 10 files changed, 80 insertions(+), 45 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c index 5fc64e47568a..d567e42e1760 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c @@ -911,7 +911,7 @@ static int csk_wait_memory(struct chtls_dev *cdev, struct sock *sk, long *timeo_p) { DEFINE_WAIT_FUNC(wait, woken_wake_function); - int err = 0; + int ret, err = 0; long current_timeo; long vm_wait = 0; bool noblock; @@ -942,10 +942,13 @@ static int csk_wait_memory(struct chtls_dev *cdev, set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); sk->sk_write_pending++; - sk_wait_event(sk, ¤t_timeo, sk->sk_err || - (sk->sk_shutdown & SEND_SHUTDOWN) || - (csk_mem_free(cdev, sk) && !vm_wait), &wait); + ret = sk_wait_event(sk, ¤t_timeo, sk->sk_err || + (sk->sk_shutdown & SEND_SHUTDOWN) || + (csk_mem_free(cdev, sk) && !vm_wait), + &wait); sk->sk_write_pending--; + if (ret < 0) + goto do_error; if (vm_wait) { vm_wait -= current_timeo; @@ -1348,6 +1351,7 @@ static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int copied = 0; int target; long timeo; + int ret; buffers_freed = 0; @@ -1423,7 +1427,11 @@ static int chtls_pt_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (copied >= target) break; chtls_cleanup_rbuf(sk, copied); - sk_wait_data(sk, &timeo, NULL); + ret = sk_wait_data(sk, &timeo, NULL); + if (ret < 0) { + copied = copied ? : ret; + goto unlock; + } continue; found_ok_skb: if (!skb->len) { @@ -1518,6 +1526,8 @@ skip_copy: if (buffers_freed) chtls_cleanup_rbuf(sk, copied); + +unlock: release_sock(sk); return copied; } @@ -1534,6 +1544,7 @@ static int peekmsg(struct sock *sk, struct msghdr *msg, int copied = 0; size_t avail; /* amount of available data in current skb */ long timeo; + int ret; lock_sock(sk); timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); @@ -1585,7 +1596,12 @@ static int peekmsg(struct sock *sk, struct msghdr *msg, release_sock(sk); lock_sock(sk); } else { - sk_wait_data(sk, &timeo, NULL); + ret = sk_wait_data(sk, &timeo, NULL); + if (ret < 0) { + /* here 'copied' is 0 due to previous checks */ + copied = ret; + break; + } } if (unlikely(peek_seq != tp->copied_seq)) { @@ -1656,6 +1672,7 @@ int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int copied = 0; long timeo; int target; /* Read at least this many bytes */ + int ret; buffers_freed = 0; @@ -1747,7 +1764,11 @@ int chtls_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (copied >= target) break; chtls_cleanup_rbuf(sk, copied); - sk_wait_data(sk, &timeo, NULL); + ret = sk_wait_data(sk, &timeo, NULL); + if (ret < 0) { + copied = copied ? : ret; + goto unlock; + } continue; found_ok_skb: @@ -1816,6 +1837,7 @@ skip_copy: if (buffers_freed) chtls_cleanup_rbuf(sk, copied); +unlock: release_sock(sk); return copied; } diff --git a/include/net/sock.h b/include/net/sock.h index b770261fbdaf..92f7ea62a915 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -336,7 +336,7 @@ struct sk_filter; * @sk_cgrp_data: cgroup data for this cgroup * @sk_memcg: this socket's memory cgroup association * @sk_write_pending: a write to stream socket waits to start - * @sk_wait_pending: number of threads blocked on this socket + * @sk_disconnects: number of disconnect operations performed on this sock * @sk_state_change: callback to indicate change in the state of the sock * @sk_data_ready: callback to indicate there is data to be processed * @sk_write_space: callback to indicate there is bf sending space available @@ -429,7 +429,7 @@ struct sock { unsigned int sk_napi_id; #endif int sk_rcvbuf; - int sk_wait_pending; + int sk_disconnects; struct sk_filter __rcu *sk_filter; union { @@ -1189,8 +1189,7 @@ static inline void sock_rps_reset_rxhash(struct sock *sk) } #define sk_wait_event(__sk, __timeo, __condition, __wait) \ - ({ int __rc; \ - __sk->sk_wait_pending++; \ + ({ int __rc, __dis = __sk->sk_disconnects; \ release_sock(__sk); \ __rc = __condition; \ if (!__rc) { \ @@ -1200,8 +1199,7 @@ static inline void sock_rps_reset_rxhash(struct sock *sk) } \ sched_annotate_sleep(); \ lock_sock(__sk); \ - __sk->sk_wait_pending--; \ - __rc = __condition; \ + __rc = __dis == __sk->sk_disconnects ? __condition : -EPIPE; \ __rc; \ }) diff --git a/net/core/stream.c b/net/core/stream.c index f5c4e47df165..96fbcb9bbb30 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -117,7 +117,7 @@ EXPORT_SYMBOL(sk_stream_wait_close); */ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) { - int err = 0; + int ret, err = 0; long vm_wait = 0; long current_timeo = *timeo_p; DEFINE_WAIT_FUNC(wait, woken_wake_function); @@ -142,11 +142,13 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); sk->sk_write_pending++; - sk_wait_event(sk, ¤t_timeo, READ_ONCE(sk->sk_err) || - (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) || - (sk_stream_memory_free(sk) && - !vm_wait), &wait); + ret = sk_wait_event(sk, ¤t_timeo, READ_ONCE(sk->sk_err) || + (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) || + (sk_stream_memory_free(sk) && !vm_wait), + &wait); sk->sk_write_pending--; + if (ret < 0) + goto do_error; if (vm_wait) { vm_wait -= current_timeo; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 3d2e30e20473..2713c9b06c4c 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -597,7 +597,6 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias) add_wait_queue(sk_sleep(sk), &wait); sk->sk_write_pending += writebias; - sk->sk_wait_pending++; /* Basic assumption: if someone sets sk->sk_err, he _must_ * change state of the socket from TCP_SYN_*. @@ -613,7 +612,6 @@ static long inet_wait_for_connect(struct sock *sk, long timeo, int writebias) } remove_wait_queue(sk_sleep(sk), &wait); sk->sk_write_pending -= writebias; - sk->sk_wait_pending--; return timeo; } @@ -642,6 +640,7 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, return -EINVAL; if (uaddr->sa_family == AF_UNSPEC) { + sk->sk_disconnects++; err = sk->sk_prot->disconnect(sk, flags); sock->state = err ? SS_DISCONNECTING : SS_UNCONNECTED; goto out; @@ -696,6 +695,7 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, int writebias = (sk->sk_protocol == IPPROTO_TCP) && tcp_sk(sk)->fastopen_req && tcp_sk(sk)->fastopen_req->data ? 1 : 0; + int dis = sk->sk_disconnects; /* Error code is set above */ if (!timeo || !inet_wait_for_connect(sk, timeo, writebias)) @@ -704,6 +704,11 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, err = sock_intr_errno(timeo); if (signal_pending(current)) goto out; + + if (dis != sk->sk_disconnects) { + err = -EPIPE; + goto out; + } } /* Connection was closed by RST, timeout, ICMP error @@ -725,6 +730,7 @@ out: sock_error: err = sock_error(sk) ? : -ECONNABORTED; sock->state = SS_UNCONNECTED; + sk->sk_disconnects++; if (sk->sk_prot->disconnect(sk, flags)) sock->state = SS_DISCONNECTING; goto out; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index aeebe8816689..394a498c2823 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -1145,7 +1145,6 @@ struct sock *inet_csk_clone_lock(const struct sock *sk, if (newsk) { struct inet_connection_sock *newicsk = inet_csk(newsk); - newsk->sk_wait_pending = 0; inet_sk_set_state(newsk, TCP_SYN_RECV); newicsk->icsk_bind_hash = NULL; newicsk->icsk_bind2_hash = NULL; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 3f66cdeef7de..d3456cf840de 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -831,7 +831,9 @@ ssize_t tcp_splice_read(struct socket *sock, loff_t *ppos, */ if (!skb_queue_empty(&sk->sk_receive_queue)) break; - sk_wait_data(sk, &timeo, NULL); + ret = sk_wait_data(sk, &timeo, NULL); + if (ret < 0) + break; if (signal_pending(current)) { ret = sock_intr_errno(timeo); break; @@ -2442,7 +2444,11 @@ static int tcp_recvmsg_locked(struct sock *sk, struct msghdr *msg, size_t len, __sk_flush_backlog(sk); } else { tcp_cleanup_rbuf(sk, copied); - sk_wait_data(sk, &timeo, last); + err = sk_wait_data(sk, &timeo, last); + if (err < 0) { + err = copied ? : err; + goto out; + } } if ((flags & MSG_PEEK) && @@ -2966,12 +2972,6 @@ int tcp_disconnect(struct sock *sk, int flags) int old_state = sk->sk_state; u32 seq; - /* Deny disconnect if other threads are blocked in sk_wait_event() - * or inet_wait_for_connect(). - */ - if (sk->sk_wait_pending) - return -EBUSY; - if (old_state != TCP_CLOSE) tcp_set_state(sk, TCP_CLOSE); diff --git a/net/ipv4/tcp_bpf.c b/net/ipv4/tcp_bpf.c index 327268203001..ba2e92188124 100644 --- a/net/ipv4/tcp_bpf.c +++ b/net/ipv4/tcp_bpf.c @@ -307,6 +307,8 @@ msg_bytes_ready: } data = tcp_msg_wait_data(sk, psock, timeo); + if (data < 0) + return data; if (data && !sk_psock_queue_empty(psock)) goto msg_bytes_ready; copied = -EAGAIN; @@ -351,6 +353,8 @@ msg_bytes_ready: timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); data = tcp_msg_wait_data(sk, psock, timeo); + if (data < 0) + return data; if (data) { if (!sk_psock_queue_empty(psock)) goto msg_bytes_ready; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index c3b83cb390d9..d1902373c974 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -3098,12 +3098,6 @@ static int mptcp_disconnect(struct sock *sk, int flags) { struct mptcp_sock *msk = mptcp_sk(sk); - /* Deny disconnect if other threads are blocked in sk_wait_event() - * or inet_wait_for_connect(). - */ - if (sk->sk_wait_pending) - return -EBUSY; - /* We are on the fastopen error path. We can't call straight into the * subflows cleanup code due to lock nesting (we are already under * msk->firstsocket lock). @@ -3173,7 +3167,6 @@ struct sock *mptcp_sk_clone_init(const struct sock *sk, inet_sk(nsk)->pinet6 = mptcp_inet6_sk(nsk); #endif - nsk->sk_wait_pending = 0; __mptcp_init_sock(nsk); msk = mptcp_sk(nsk); diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 02f583ff9239..002483e60c19 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -139,8 +139,8 @@ void update_sk_prot(struct sock *sk, struct tls_context *ctx) int wait_on_pending_writer(struct sock *sk, long *timeo) { - int rc = 0; DEFINE_WAIT_FUNC(wait, woken_wake_function); + int ret, rc = 0; add_wait_queue(sk_sleep(sk), &wait); while (1) { @@ -154,9 +154,13 @@ int wait_on_pending_writer(struct sock *sk, long *timeo) break; } - if (sk_wait_event(sk, timeo, - !READ_ONCE(sk->sk_write_pending), &wait)) + ret = sk_wait_event(sk, timeo, + !READ_ONCE(sk->sk_write_pending), &wait); + if (ret) { + if (ret < 0) + rc = ret; break; + } } remove_wait_queue(sk_sleep(sk), &wait); return rc; diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c index d1fc295b83b5..e9d1e83a859d 100644 --- a/net/tls/tls_sw.c +++ b/net/tls/tls_sw.c @@ -1291,6 +1291,7 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, struct tls_context *tls_ctx = tls_get_ctx(sk); struct tls_sw_context_rx *ctx = tls_sw_ctx_rx(tls_ctx); DEFINE_WAIT_FUNC(wait, woken_wake_function); + int ret = 0; long timeo; timeo = sock_rcvtimeo(sk, nonblock); @@ -1302,6 +1303,9 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, if (sk->sk_err) return sock_error(sk); + if (ret < 0) + return ret; + if (!skb_queue_empty(&sk->sk_receive_queue)) { tls_strp_check_rcv(&ctx->strp); if (tls_strp_msg_ready(ctx)) @@ -1320,10 +1324,10 @@ tls_rx_rec_wait(struct sock *sk, struct sk_psock *psock, bool nonblock, released = true; add_wait_queue(sk_sleep(sk), &wait); sk_set_bit(SOCKWQ_ASYNC_WAITDATA, sk); - sk_wait_event(sk, &timeo, - tls_strp_msg_ready(ctx) || - !sk_psock_queue_empty(psock), - &wait); + ret = sk_wait_event(sk, &timeo, + tls_strp_msg_ready(ctx) || + !sk_psock_queue_empty(psock), + &wait); sk_clear_bit(SOCKWQ_ASYNC_WAITDATA, sk); remove_wait_queue(sk_sleep(sk), &wait); @@ -1852,6 +1856,7 @@ static int tls_rx_reader_acquire(struct sock *sk, struct tls_sw_context_rx *ctx, bool nonblock) { long timeo; + int ret; timeo = sock_rcvtimeo(sk, nonblock); @@ -1861,14 +1866,16 @@ static int tls_rx_reader_acquire(struct sock *sk, struct tls_sw_context_rx *ctx, ctx->reader_contended = 1; add_wait_queue(&ctx->wq, &wait); - sk_wait_event(sk, &timeo, - !READ_ONCE(ctx->reader_present), &wait); + ret = sk_wait_event(sk, &timeo, + !READ_ONCE(ctx->reader_present), &wait); remove_wait_queue(&ctx->wq, &wait); if (timeo <= 0) return -EAGAIN; if (signal_pending(current)) return sock_intr_errno(timeo); + if (ret < 0) + return ret; } WRITE_ONCE(ctx->reader_present, 1); -- cgit v1.3.1 From 2c0d808f36cc6e0617f9dda055a6651c777a9d64 Mon Sep 17 00:00:00 2001 From: MD Danish Anwar Date: Thu, 12 Oct 2023 12:16:26 +0530 Subject: net: ti: icssg-prueth: Fix tx_total_bytes count ICSSG HW stats on TX side considers 8 preamble bytes as data bytes. Due to this the tx_bytes of ICSSG interface doesn't match the rx_bytes of the link partner. There is no public errata available yet. As a workaround to fix this, decrease tx_bytes by 8 bytes for every tx frame. Fixes: c1e10d5dc7a1 ("net: ti: icssg-prueth: Add ICSSG Stats") Signed-off-by: MD Danish Anwar Link: https://lore.kernel.org/r/20231012064626.977466-1-danishanwar@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/icssg/icssg_stats.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/ti/icssg/icssg_stats.c b/drivers/net/ethernet/ti/icssg/icssg_stats.c index bb0b33927e3b..3dbadddd7e35 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_stats.c +++ b/drivers/net/ethernet/ti/icssg/icssg_stats.c @@ -9,6 +9,9 @@ #include "icssg_stats.h" #include +#define ICSSG_TX_PACKET_OFFSET 0xA0 +#define ICSSG_TX_BYTE_OFFSET 0xEC + static u32 stats_base[] = { 0x54c, /* Slice 0 stats start */ 0xb18, /* Slice 1 stats start */ }; @@ -18,6 +21,7 @@ void emac_update_hardware_stats(struct prueth_emac *emac) struct prueth *prueth = emac->prueth; int slice = prueth_emac_slice(emac); u32 base = stats_base[slice]; + u32 tx_pkt_cnt = 0; u32 val; int i; @@ -29,7 +33,12 @@ void emac_update_hardware_stats(struct prueth_emac *emac) base + icssg_all_stats[i].offset, val); + if (icssg_all_stats[i].offset == ICSSG_TX_PACKET_OFFSET) + tx_pkt_cnt = val; + emac->stats[i] += val; + if (icssg_all_stats[i].offset == ICSSG_TX_BYTE_OFFSET) + emac->stats[i] -= tx_pkt_cnt * 8; } } -- cgit v1.3.1 From fc6f716a5069180c40a8c9b63631e97da34f64a3 Mon Sep 17 00:00:00 2001 From: Michal Schmidt Date: Wed, 11 Oct 2023 16:33:32 -0700 Subject: i40e: prevent crash on probe if hw registers have invalid values The hardware provides the indexes of the first and the last available queue and VF. From the indexes, the driver calculates the numbers of queues and VFs. In theory, a faulty device might say the last index is smaller than the first index. In that case, the driver's calculation would underflow, it would attempt to write to non-existent registers outside of the ioremapped range and crash. I ran into this not by having a faulty device, but by an operator error. I accidentally ran a QE test meant for i40e devices on an ice device. The test used 'echo i40e > /sys/...ice PCI device.../driver_override', bound the driver to the device and crashed in one of the wr32 calls in i40e_clear_hw. Add checks to prevent underflows in the calculations of num_queues and num_vfs. With this fix, the wrong device probing reports errors and returns a failure without crashing. Fixes: 838d41d92a90 ("i40e: clear all queues and interrupts") Signed-off-by: Michal Schmidt Reviewed-by: Simon Horman Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Link: https://lore.kernel.org/r/20231011233334.336092-2-jacob.e.keller@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/i40e/i40e_common.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/i40e/i40e_common.c b/drivers/net/ethernet/intel/i40e/i40e_common.c index eeef20f77106..1b493854f522 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_common.c +++ b/drivers/net/ethernet/intel/i40e/i40e_common.c @@ -1082,7 +1082,7 @@ void i40e_clear_hw(struct i40e_hw *hw) I40E_PFLAN_QALLOC_FIRSTQ_SHIFT; j = (val & I40E_PFLAN_QALLOC_LASTQ_MASK) >> I40E_PFLAN_QALLOC_LASTQ_SHIFT; - if (val & I40E_PFLAN_QALLOC_VALID_MASK) + if (val & I40E_PFLAN_QALLOC_VALID_MASK && j >= base_queue) num_queues = (j - base_queue) + 1; else num_queues = 0; @@ -1092,7 +1092,7 @@ void i40e_clear_hw(struct i40e_hw *hw) I40E_PF_VT_PFALLOC_FIRSTVF_SHIFT; j = (val & I40E_PF_VT_PFALLOC_LASTVF_MASK) >> I40E_PF_VT_PFALLOC_LASTVF_SHIFT; - if (val & I40E_PF_VT_PFALLOC_VALID_MASK) + if (val & I40E_PF_VT_PFALLOC_VALID_MASK && j >= i) num_vfs = (j - i) + 1; else num_vfs = 0; -- cgit v1.3.1 From 0288c3e709e5fabd51e84715c5c798a02f43061a Mon Sep 17 00:00:00 2001 From: Jesse Brandeburg Date: Wed, 11 Oct 2023 16:33:33 -0700 Subject: ice: reset first in crash dump kernels When the system boots into the crash dump kernel after a panic, the ice networking device may still have pending transactions that can cause errors or machine checks when the device is re-enabled. This can prevent the crash dump kernel from loading the driver or collecting the crash data. To avoid this issue, perform a function level reset (FLR) on the ice device via PCIe config space before enabling it on the crash kernel. This will clear any outstanding transactions and stop all queues and interrupts. Restore the config space after the FLR, otherwise it was found in testing that the driver wouldn't load successfully. The following sequence causes the original issue: - Load the ice driver with modprobe ice - Enable SR-IOV with 2 VFs: echo 2 > /sys/class/net/eth0/device/sriov_num_vfs - Trigger a crash with echo c > /proc/sysrq-trigger - Load the ice driver again (or let it load automatically) with modprobe ice - The system crashes again during pcim_enable_device() Fixes: 837f08fdecbe ("ice: Add basic driver framework for Intel(R) E800 Series") Reported-by: Vishal Agrawal Reviewed-by: Jay Vosburgh Reviewed-by: Przemek Kitszel Signed-off-by: Jesse Brandeburg Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Link: https://lore.kernel.org/r/20231011233334.336092-3-jacob.e.keller@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_main.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index c8286adae946..6550c46e4e36 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -6,6 +6,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include +#include #include "ice.h" #include "ice_base.h" #include "ice_lib.h" @@ -5014,6 +5015,20 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) return -EINVAL; } + /* when under a kdump kernel initiate a reset before enabling the + * device in order to clear out any pending DMA transactions. These + * transactions can cause some systems to machine check when doing + * the pcim_enable_device() below. + */ + if (is_kdump_kernel()) { + pci_save_state(pdev); + pci_clear_master(pdev); + err = pcie_flr(pdev); + if (err) + return err; + pci_restore_state(pdev); + } + /* this driver uses devres, see * Documentation/driver-api/driver-model/devres.rst */ -- cgit v1.3.1 From 42066c4d5d344cdf8564556cdbe0aa36854fefa4 Mon Sep 17 00:00:00 2001 From: Mateusz Pacuszka Date: Wed, 11 Oct 2023 16:33:34 -0700 Subject: ice: Fix safe mode when DDP is missing One thing is broken in the safe mode, that is ice_deinit_features() is being executed even that ice_init_features() was not causing stack trace during pci_unregister_driver(). Add check on the top of the function. Fixes: 5b246e533d01 ("ice: split probe into smaller functions") Signed-off-by: Mateusz Pacuszka Signed-off-by: Jan Sokolowski Reviewed-by: Przemek Kitszel Tested-by: Pucha Himasekhar Reddy (A Contingent worker at Intel) Link: https://lore.kernel.org/r/20231011233334.336092-4-jacob.e.keller@intel.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/intel/ice/ice_main.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index 6550c46e4e36..7784135160fd 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -4684,6 +4684,9 @@ static void ice_init_features(struct ice_pf *pf) static void ice_deinit_features(struct ice_pf *pf) { + if (ice_is_safe_mode(pf)) + return; + ice_deinit_lag(pf); if (test_bit(ICE_FLAG_DCB_CAPABLE, pf->flags)) ice_cfg_lldp_mib_change(&pf->hw, false); -- cgit v1.3.1 From 2f3389c73832ad90b63208c0fc281ad080114c7a Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Fri, 13 Oct 2023 18:48:12 +0530 Subject: qed: fix LL2 RX buffer allocation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Driver allocates the LL2 rx buffers from kmalloc() area to construct the skb using slab_build_skb() The required size allocation seems to have overlooked for accounting both skb_shared_info size and device placement padding bytes which results into the below panic when doing skb_put() for a standard MTU sized frame. skbuff: skb_over_panic: text:ffffffffc0b0225f len:1514 put:1514 head:ff3dabceaf39c000 data:ff3dabceaf39c042 tail:0x62c end:0x566 dev: … skb_panic+0x48/0x4a skb_put.cold+0x10/0x10 qed_ll2b_complete_rx_packet+0x14f/0x260 [qed] qed_ll2_rxq_handle_completion.constprop.0+0x169/0x200 [qed] qed_ll2_rxq_completion+0xba/0x320 [qed] qed_int_sp_dpc+0x1a7/0x1e0 [qed] This patch fixes this by accouting skb_shared_info and device placement padding size bytes when allocating the buffers. Cc: David S. Miller Fixes: 0a7fb11c23c0 ("qed: Add Light L2 support") Signed-off-by: Manish Chopra Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_ll2.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c index 717a0b3f89bd..ab5ef254a748 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c +++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c @@ -113,7 +113,10 @@ static void qed_ll2b_complete_tx_packet(void *cxt, static int qed_ll2_alloc_buffer(struct qed_dev *cdev, u8 **data, dma_addr_t *phys_addr) { - *data = kmalloc(cdev->ll2->rx_size, GFP_ATOMIC); + size_t size = cdev->ll2->rx_size + NET_SKB_PAD + + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); + + *data = kmalloc(size, GFP_ATOMIC); if (!(*data)) { DP_INFO(cdev, "Failed to allocate LL2 buffer data\n"); return -ENOMEM; @@ -2589,7 +2592,7 @@ static int qed_ll2_start(struct qed_dev *cdev, struct qed_ll2_params *params) INIT_LIST_HEAD(&cdev->ll2->list); spin_lock_init(&cdev->ll2->lock); - cdev->ll2->rx_size = NET_SKB_PAD + ETH_HLEN + + cdev->ll2->rx_size = PRM_DMA_PAD_BYTES_NUM + ETH_HLEN + L1_CACHE_BYTES + params->mtu; /* Allocate memory for LL2. -- cgit v1.3.1 From 5720c43d5216b5dbd9ab25595f7c61e55d36d4fc Mon Sep 17 00:00:00 2001 From: Xuan Zhuo Date: Wed, 27 Sep 2023 13:52:46 +0800 Subject: virtio_net: fix the missing of the dma cpu sync Commit 295525e29a5b ("virtio_net: merge dma operations when filling mergeable buffers") unmaps the buffer with DMA_ATTR_SKIP_CPU_SYNC when the dma->ref is zero. We do that with DMA_ATTR_SKIP_CPU_SYNC, because we do not want to do the sync for the entire page_frag. But that misses the sync for the current area. This patch does cpu sync regardless of whether the ref is zero or not. Fixes: 295525e29a5b ("virtio_net: merge dma operations when filling mergeable buffers") Reported-by: Michael Roth Closes: http://lore.kernel.org/all/20230926130451.axgodaa6tvwqs3ut@amd.com Signed-off-by: Xuan Zhuo Acked-by: Jason Wang Signed-off-by: Linus Torvalds --- drivers/net/virtio_net.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index fe7f314d65c9..d67f742fbd4c 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -607,16 +607,16 @@ static void virtnet_rq_unmap(struct receive_queue *rq, void *buf, u32 len) --dma->ref; - if (dma->ref) { - if (dma->need_sync && len) { - offset = buf - (head + sizeof(*dma)); + if (dma->need_sync && len) { + offset = buf - (head + sizeof(*dma)); - virtqueue_dma_sync_single_range_for_cpu(rq->vq, dma->addr, offset, - len, DMA_FROM_DEVICE); - } + virtqueue_dma_sync_single_range_for_cpu(rq->vq, dma->addr, + offset, len, + DMA_FROM_DEVICE); + } + if (dma->ref) return; - } virtqueue_dma_unmap_single_attrs(rq->vq, dma->addr, dma->len, DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC); -- cgit v1.3.1 From 95535e37e8959f50e7aee365a5bdc9e5ed720443 Mon Sep 17 00:00:00 2001 From: Shailend Chand Date: Sat, 14 Oct 2023 01:41:21 +0000 Subject: gve: Do not fully free QPL pages on prefill errors The prefill function should have only removed the page count bias it added. Fully freeing the page will cause gve_free_queue_page_list to free a page the driver no longer owns. Fixes: 82fd151d38d9 ("gve: Reduce alloc and copy costs in the GQ rx path") Signed-off-by: Shailend Chand Link: https://lore.kernel.org/r/20231014014121.2843922-1-shailend@google.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/google/gve/gve_rx.c | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/google/gve/gve_rx.c b/drivers/net/ethernet/google/gve/gve_rx.c index d1da7413dc4d..e84a066aa1a4 100644 --- a/drivers/net/ethernet/google/gve/gve_rx.c +++ b/drivers/net/ethernet/google/gve/gve_rx.c @@ -146,7 +146,7 @@ static int gve_prefill_rx_pages(struct gve_rx_ring *rx) err = gve_rx_alloc_buffer(priv, &priv->pdev->dev, &rx->data.page_info[i], &rx->data.data_ring[i]); if (err) - goto alloc_err; + goto alloc_err_rda; } if (!rx->data.raw_addressing) { @@ -171,12 +171,26 @@ static int gve_prefill_rx_pages(struct gve_rx_ring *rx) return slots; alloc_err_qpl: + /* Fully free the copy pool pages. */ while (j--) { page_ref_sub(rx->qpl_copy_pool[j].page, rx->qpl_copy_pool[j].pagecnt_bias - 1); put_page(rx->qpl_copy_pool[j].page); } -alloc_err: + + /* Do not fully free QPL pages - only remove the bias added in this + * function with gve_setup_rx_buffer. + */ + while (i--) + page_ref_sub(rx->data.page_info[i].page, + rx->data.page_info[i].pagecnt_bias - 1); + + gve_unassign_qpl(priv, rx->data.qpl->id); + rx->data.qpl = NULL; + + return err; + +alloc_err_rda: while (i--) gve_rx_free_buffer(&priv->pdev->dev, &rx->data.page_info[i], -- cgit v1.3.1 From cbfbfe3aee718dc4c3c837f5d2463170ee59d78c Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 16 Oct 2023 18:08:51 +0000 Subject: tun: prevent negative ifindex After commit 956db0a13b47 ("net: warn about attempts to register negative ifindex") syzbot is able to trigger the following splat. Negative ifindex are not supported. WARNING: CPU: 1 PID: 6003 at net/core/dev.c:9596 dev_index_reserve+0x104/0x210 Modules linked in: CPU: 1 PID: 6003 Comm: syz-executor926 Not tainted 6.6.0-rc4-syzkaller-g19af4a4ed414 #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 09/06/2023 pstate: 80400005 (Nzcv daif +PAN -UAO -TCO -DIT -SSBS BTYPE=--) pc : dev_index_reserve+0x104/0x210 lr : dev_index_reserve+0x100/0x210 sp : ffff800096a878e0 x29: ffff800096a87930 x28: ffff0000d04380d0 x27: ffff0000d04380f8 x26: ffff0000d04380f0 x25: 1ffff00012d50f20 x24: 1ffff00012d50f1c x23: dfff800000000000 x22: ffff8000929c21c0 x21: 00000000ffffffea x20: ffff0000d04380e0 x19: ffff800096a87900 x18: ffff800096a874c0 x17: ffff800084df5008 x16: ffff80008051f9c4 x15: 0000000000000001 x14: 1fffe0001a087198 x13: 0000000000000000 x12: 0000000000000000 x11: 0000000000000000 x10: 0000000000000000 x9 : 0000000000000000 x8 : ffff0000d41c9bc0 x7 : 0000000000000000 x6 : 0000000000000000 x5 : ffff800091763d88 x4 : 0000000000000000 x3 : ffff800084e04748 x2 : 0000000000000001 x1 : 00000000fead71c7 x0 : 0000000000000000 Call trace: dev_index_reserve+0x104/0x210 register_netdevice+0x598/0x1074 net/core/dev.c:10084 tun_set_iff+0x630/0xb0c drivers/net/tun.c:2850 __tun_chr_ioctl+0x788/0x2af8 drivers/net/tun.c:3118 tun_chr_ioctl+0x38/0x4c drivers/net/tun.c:3403 vfs_ioctl fs/ioctl.c:51 [inline] __do_sys_ioctl fs/ioctl.c:871 [inline] __se_sys_ioctl fs/ioctl.c:857 [inline] __arm64_sys_ioctl+0x14c/0x1c8 fs/ioctl.c:857 __invoke_syscall arch/arm64/kernel/syscall.c:37 [inline] invoke_syscall+0x98/0x2b8 arch/arm64/kernel/syscall.c:51 el0_svc_common+0x130/0x23c arch/arm64/kernel/syscall.c:136 do_el0_svc+0x48/0x58 arch/arm64/kernel/syscall.c:155 el0_svc+0x58/0x16c arch/arm64/kernel/entry-common.c:678 el0t_64_sync_handler+0x84/0xfc arch/arm64/kernel/entry-common.c:696 el0t_64_sync+0x190/0x194 arch/arm64/kernel/entry.S:595 irq event stamp: 11348 hardirqs last enabled at (11347): [] __raw_spin_unlock_irqrestore include/linux/spinlock_api_smp.h:151 [inline] hardirqs last enabled at (11347): [] _raw_spin_unlock_irqrestore+0x38/0x98 kernel/locking/spinlock.c:194 hardirqs last disabled at (11348): [] el1_dbg+0x24/0x80 arch/arm64/kernel/entry-common.c:436 softirqs last enabled at (11138): [] spin_unlock_bh include/linux/spinlock.h:396 [inline] softirqs last enabled at (11138): [] release_sock+0x15c/0x1b0 net/core/sock.c:3531 softirqs last disabled at (11136): [] spin_lock_bh include/linux/spinlock.h:356 [inline] softirqs last disabled at (11136): [] release_sock+0x3c/0x1b0 net/core/sock.c:3518 Fixes: fb7589a16216 ("tun: Add ability to create tun device with given index") Reported-by: syzbot Signed-off-by: Eric Dumazet Reviewed-by: Willem de Bruijn Acked-by: Jason Wang Link: https://lore.kernel.org/r/20231016180851.3560092-1-edumazet@google.com Signed-off-by: Jakub Kicinski --- drivers/net/tun.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 89ab9efe522c..afa5497f7c35 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -3073,10 +3073,11 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, struct net *net = sock_net(&tfile->sk); struct tun_struct *tun; void __user* argp = (void __user*)arg; - unsigned int ifindex, carrier; + unsigned int carrier; struct ifreq ifr; kuid_t owner; kgid_t group; + int ifindex; int sndbuf; int vnet_hdr_sz; int le; @@ -3132,7 +3133,9 @@ static long __tun_chr_ioctl(struct file *file, unsigned int cmd, ret = -EFAULT; if (copy_from_user(&ifindex, argp, sizeof(ifindex))) goto unlock; - + ret = -EINVAL; + if (ifindex < 0) + goto unlock; ret = 0; tfile->ifindex = ifindex; goto unlock; -- cgit v1.3.1 From c53647a5df9e66dd9fedf240198e1fe50d88c286 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 16 Oct 2023 20:28:10 +0300 Subject: net: usb: smsc95xx: Fix an error code in smsc95xx_reset() Return a negative error code instead of success. Fixes: 2f7ca802bdae ("net: Add SMSC LAN9500 USB2.0 10/100 ethernet adapter driver") Signed-off-by: Dan Carpenter Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/147927f0-9ada-45cc-81ff-75a19dd30b76@moroto.mountain Signed-off-by: Jakub Kicinski --- drivers/net/usb/smsc95xx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/net') diff --git a/drivers/net/usb/smsc95xx.c b/drivers/net/usb/smsc95xx.c index 563ecd27b93e..17da42fe605c 100644 --- a/drivers/net/usb/smsc95xx.c +++ b/drivers/net/usb/smsc95xx.c @@ -897,7 +897,7 @@ static int smsc95xx_reset(struct usbnet *dev) if (timeout >= 100) { netdev_warn(dev->net, "timeout waiting for completion of Lite Reset\n"); - return ret; + return -ETIMEDOUT; } ret = smsc95xx_set_mac_address(dev); -- cgit v1.3.1 From 1db34aa58d80988f5ee99d2fd9d8f7489c3b0681 Mon Sep 17 00:00:00 2001 From: Bagas Sanjaya Date: Tue, 17 Oct 2023 15:08:12 +0700 Subject: Revert "net: wwan: iosm: enable runtime pm support for 7560" Runtime power management support breaks Intel LTE modem where dmesg dump showes timeout errors: ``` [ 72.027442] iosm 0000:01:00.0: msg timeout [ 72.531638] iosm 0000:01:00.0: msg timeout [ 73.035414] iosm 0000:01:00.0: msg timeout [ 73.540359] iosm 0000:01:00.0: msg timeout ``` Furthermore, when shutting down with `poweroff` and modem attached, the system rebooted instead of powering down as expected. The modem works again only after power cycling. Revert runtime power management support for IOSM driver as introduced by commit e4f5073d53be6c ("net: wwan: iosm: enable runtime pm support for 7560"). Fixes: e4f5073d53be ("net: wwan: iosm: enable runtime pm support for 7560") Reported-by: Martin Closes: https://bugzilla.kernel.org/show_bug.cgi?id=217996 Link: https://lore.kernel.org/r/267abf02-4b60-4a2e-92cd-709e3da6f7d3@gmail.com/ Signed-off-by: Bagas Sanjaya Reviewed-by: Loic Poulain Signed-off-by: David S. Miller --- drivers/net/wwan/iosm/iosm_ipc_imem.c | 17 ----------------- drivers/net/wwan/iosm/iosm_ipc_imem.h | 2 -- drivers/net/wwan/iosm/iosm_ipc_pcie.c | 4 +--- drivers/net/wwan/iosm/iosm_ipc_port.c | 17 +---------------- drivers/net/wwan/iosm/iosm_ipc_trace.c | 8 -------- drivers/net/wwan/iosm/iosm_ipc_wwan.c | 21 ++------------------- 6 files changed, 4 insertions(+), 65 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/wwan/iosm/iosm_ipc_imem.c b/drivers/net/wwan/iosm/iosm_ipc_imem.c index 635301d677e1..829515a601b3 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_imem.c +++ b/drivers/net/wwan/iosm/iosm_ipc_imem.c @@ -4,7 +4,6 @@ */ #include -#include #include "iosm_ipc_chnl_cfg.h" #include "iosm_ipc_devlink.h" @@ -632,11 +631,6 @@ static void ipc_imem_run_state_worker(struct work_struct *instance) /* Complete all memory stores after setting bit */ smp_mb__after_atomic(); - if (ipc_imem->pcie->pci->device == INTEL_CP_DEVICE_7560_ID) { - pm_runtime_mark_last_busy(ipc_imem->dev); - pm_runtime_put_autosuspend(ipc_imem->dev); - } - return; err_ipc_mux_deinit: @@ -1240,7 +1234,6 @@ void ipc_imem_cleanup(struct iosm_imem *ipc_imem) /* forward MDM_NOT_READY to listeners */ ipc_uevent_send(ipc_imem->dev, UEVENT_MDM_NOT_READY); - pm_runtime_get_sync(ipc_imem->dev); hrtimer_cancel(&ipc_imem->td_alloc_timer); hrtimer_cancel(&ipc_imem->tdupdate_timer); @@ -1426,16 +1419,6 @@ struct iosm_imem *ipc_imem_init(struct iosm_pcie *pcie, unsigned int device_id, set_bit(IOSM_DEVLINK_INIT, &ipc_imem->flag); } - - if (!pm_runtime_enabled(ipc_imem->dev)) - pm_runtime_enable(ipc_imem->dev); - - pm_runtime_set_autosuspend_delay(ipc_imem->dev, - IPC_MEM_AUTO_SUSPEND_DELAY_MS); - pm_runtime_use_autosuspend(ipc_imem->dev); - pm_runtime_allow(ipc_imem->dev); - pm_runtime_mark_last_busy(ipc_imem->dev); - return ipc_imem; devlink_channel_fail: ipc_devlink_deinit(ipc_imem->ipc_devlink); diff --git a/drivers/net/wwan/iosm/iosm_ipc_imem.h b/drivers/net/wwan/iosm/iosm_ipc_imem.h index 0144b45e2afb..5664ac507c90 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_imem.h +++ b/drivers/net/wwan/iosm/iosm_ipc_imem.h @@ -103,8 +103,6 @@ struct ipc_chnl_cfg; #define FULLY_FUNCTIONAL 0 #define IOSM_DEVLINK_INIT 1 -#define IPC_MEM_AUTO_SUSPEND_DELAY_MS 5000 - /* List of the supported UL/DL pipes. */ enum ipc_mem_pipes { IPC_MEM_PIPE_0 = 0, diff --git a/drivers/net/wwan/iosm/iosm_ipc_pcie.c b/drivers/net/wwan/iosm/iosm_ipc_pcie.c index 3a259c9abefd..04517bd3325a 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_pcie.c +++ b/drivers/net/wwan/iosm/iosm_ipc_pcie.c @@ -6,7 +6,6 @@ #include #include #include -#include #include #include "iosm_ipc_imem.h" @@ -438,8 +437,7 @@ static int __maybe_unused ipc_pcie_resume_cb(struct device *dev) return 0; } -static DEFINE_RUNTIME_DEV_PM_OPS(iosm_ipc_pm, ipc_pcie_suspend_cb, - ipc_pcie_resume_cb, NULL); +static SIMPLE_DEV_PM_OPS(iosm_ipc_pm, ipc_pcie_suspend_cb, ipc_pcie_resume_cb); static struct pci_driver iosm_ipc_driver = { .name = KBUILD_MODNAME, diff --git a/drivers/net/wwan/iosm/iosm_ipc_port.c b/drivers/net/wwan/iosm/iosm_ipc_port.c index 2ba1ddca3945..5d5b4183e14a 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_port.c +++ b/drivers/net/wwan/iosm/iosm_ipc_port.c @@ -3,8 +3,6 @@ * Copyright (C) 2020-21 Intel Corporation. */ -#include - #include "iosm_ipc_chnl_cfg.h" #include "iosm_ipc_imem_ops.h" #include "iosm_ipc_port.h" @@ -15,16 +13,12 @@ static int ipc_port_ctrl_start(struct wwan_port *port) struct iosm_cdev *ipc_port = wwan_port_get_drvdata(port); int ret = 0; - pm_runtime_get_sync(ipc_port->ipc_imem->dev); ipc_port->channel = ipc_imem_sys_port_open(ipc_port->ipc_imem, ipc_port->chl_id, IPC_HP_CDEV_OPEN); if (!ipc_port->channel) ret = -EIO; - pm_runtime_mark_last_busy(ipc_port->ipc_imem->dev); - pm_runtime_put_autosuspend(ipc_port->ipc_imem->dev); - return ret; } @@ -33,24 +27,15 @@ static void ipc_port_ctrl_stop(struct wwan_port *port) { struct iosm_cdev *ipc_port = wwan_port_get_drvdata(port); - pm_runtime_get_sync(ipc_port->ipc_imem->dev); ipc_imem_sys_port_close(ipc_port->ipc_imem, ipc_port->channel); - pm_runtime_mark_last_busy(ipc_port->ipc_imem->dev); - pm_runtime_put_autosuspend(ipc_port->ipc_imem->dev); } /* transfer control data to modem */ static int ipc_port_ctrl_tx(struct wwan_port *port, struct sk_buff *skb) { struct iosm_cdev *ipc_port = wwan_port_get_drvdata(port); - int ret; - pm_runtime_get_sync(ipc_port->ipc_imem->dev); - ret = ipc_imem_sys_cdev_write(ipc_port, skb); - pm_runtime_mark_last_busy(ipc_port->ipc_imem->dev); - pm_runtime_put_autosuspend(ipc_port->ipc_imem->dev); - - return ret; + return ipc_imem_sys_cdev_write(ipc_port, skb); } static const struct wwan_port_ops ipc_wwan_ctrl_ops = { diff --git a/drivers/net/wwan/iosm/iosm_ipc_trace.c b/drivers/net/wwan/iosm/iosm_ipc_trace.c index 4368373797b6..eeecfa3d10c5 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_trace.c +++ b/drivers/net/wwan/iosm/iosm_ipc_trace.c @@ -3,9 +3,7 @@ * Copyright (C) 2020-2021 Intel Corporation. */ -#include #include - #include "iosm_ipc_trace.h" /* sub buffer size and number of sub buffer */ @@ -99,8 +97,6 @@ static ssize_t ipc_trace_ctrl_file_write(struct file *filp, if (ret) return ret; - pm_runtime_get_sync(ipc_trace->ipc_imem->dev); - mutex_lock(&ipc_trace->trc_mutex); if (val == TRACE_ENABLE && ipc_trace->mode != TRACE_ENABLE) { ipc_trace->channel = ipc_imem_sys_port_open(ipc_trace->ipc_imem, @@ -121,10 +117,6 @@ static ssize_t ipc_trace_ctrl_file_write(struct file *filp, ret = count; unlock: mutex_unlock(&ipc_trace->trc_mutex); - - pm_runtime_mark_last_busy(ipc_trace->ipc_imem->dev); - pm_runtime_put_autosuspend(ipc_trace->ipc_imem->dev); - return ret; } diff --git a/drivers/net/wwan/iosm/iosm_ipc_wwan.c b/drivers/net/wwan/iosm/iosm_ipc_wwan.c index 93d17de08786..ff747fc79aaf 100644 --- a/drivers/net/wwan/iosm/iosm_ipc_wwan.c +++ b/drivers/net/wwan/iosm/iosm_ipc_wwan.c @@ -6,7 +6,6 @@ #include #include #include -#include #include #include #include @@ -52,13 +51,11 @@ static int ipc_wwan_link_open(struct net_device *netdev) struct iosm_netdev_priv *priv = wwan_netdev_drvpriv(netdev); struct iosm_wwan *ipc_wwan = priv->ipc_wwan; int if_id = priv->if_id; - int ret = 0; if (if_id < IP_MUX_SESSION_START || if_id >= ARRAY_SIZE(ipc_wwan->sub_netlist)) return -EINVAL; - pm_runtime_get_sync(ipc_wwan->ipc_imem->dev); /* get channel id */ priv->ch_id = ipc_imem_sys_wwan_open(ipc_wwan->ipc_imem, if_id); @@ -66,8 +63,7 @@ static int ipc_wwan_link_open(struct net_device *netdev) dev_err(ipc_wwan->dev, "cannot connect wwan0 & id %d to the IPC mem layer", if_id); - ret = -ENODEV; - goto err_out; + return -ENODEV; } /* enable tx path, DL data may follow */ @@ -76,11 +72,7 @@ static int ipc_wwan_link_open(struct net_device *netdev) dev_dbg(ipc_wwan->dev, "Channel id %d allocated to if_id %d", priv->ch_id, priv->if_id); -err_out: - pm_runtime_mark_last_busy(ipc_wwan->ipc_imem->dev); - pm_runtime_put_autosuspend(ipc_wwan->ipc_imem->dev); - - return ret; + return 0; } /* Bring-down the wwan net link */ @@ -90,12 +82,9 @@ static int ipc_wwan_link_stop(struct net_device *netdev) netif_stop_queue(netdev); - pm_runtime_get_sync(priv->ipc_wwan->ipc_imem->dev); ipc_imem_sys_wwan_close(priv->ipc_wwan->ipc_imem, priv->if_id, priv->ch_id); priv->ch_id = -1; - pm_runtime_mark_last_busy(priv->ipc_wwan->ipc_imem->dev); - pm_runtime_put_autosuspend(priv->ipc_wwan->ipc_imem->dev); return 0; } @@ -117,7 +106,6 @@ static netdev_tx_t ipc_wwan_link_transmit(struct sk_buff *skb, if_id >= ARRAY_SIZE(ipc_wwan->sub_netlist)) return -EINVAL; - pm_runtime_get(ipc_wwan->ipc_imem->dev); /* Send the SKB to device for transmission */ ret = ipc_imem_sys_wwan_transmit(ipc_wwan->ipc_imem, if_id, priv->ch_id, skb); @@ -131,14 +119,9 @@ static netdev_tx_t ipc_wwan_link_transmit(struct sk_buff *skb, ret = NETDEV_TX_BUSY; dev_err(ipc_wwan->dev, "unable to push packets"); } else { - pm_runtime_mark_last_busy(ipc_wwan->ipc_imem->dev); - pm_runtime_put_autosuspend(ipc_wwan->ipc_imem->dev); goto exit; } - pm_runtime_mark_last_busy(ipc_wwan->ipc_imem->dev); - pm_runtime_put_autosuspend(ipc_wwan->ipc_imem->dev); - return ret; exit: -- cgit v1.3.1 From a0ca6b9dfef0b3cc83aa8bb485ed61a018f84982 Mon Sep 17 00:00:00 2001 From: Shinas Rasheed Date: Tue, 17 Oct 2023 03:50:30 -0700 Subject: octeon_ep: update BQL sent bytes before ringing doorbell Sometimes Tx is completed immediately after doorbell is updated, which causes Tx completion routing to update completion bytes before the same packet bytes are updated in sent bytes in transmit function, hence hitting BUG_ON() in dql_completed(). To avoid this, update BQL sent bytes before ringing doorbell. Fixes: 37d79d059606 ("octeon_ep: add Tx/Rx processing and interrupt support") Signed-off-by: Shinas Rasheed Link: https://lore.kernel.org/r/20231017105030.2310966-1-srasheed@marvell.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/marvell/octeon_ep/octep_main.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c index dbc518ff8276..5b46ca47c8e5 100644 --- a/drivers/net/ethernet/marvell/octeon_ep/octep_main.c +++ b/drivers/net/ethernet/marvell/octeon_ep/octep_main.c @@ -715,20 +715,19 @@ static netdev_tx_t octep_start_xmit(struct sk_buff *skb, hw_desc->dptr = tx_buffer->sglist_dma; } - /* Flush the hw descriptor before writing to doorbell */ - wmb(); - - /* Ring Doorbell to notify the NIC there is a new packet */ - writel(1, iq->doorbell_reg); + netdev_tx_sent_queue(iq->netdev_q, skb->len); + skb_tx_timestamp(skb); atomic_inc(&iq->instr_pending); wi++; if (wi == iq->max_count) wi = 0; iq->host_write_index = wi; + /* Flush the hw descriptor before writing to doorbell */ + wmb(); - netdev_tx_sent_queue(iq->netdev_q, skb->len); + /* Ring Doorbell to notify the NIC there is a new packet */ + writel(1, iq->doorbell_reg); iq->stats.instr_posted++; - skb_tx_timestamp(skb); return NETDEV_TX_OK; dma_map_sg_err: -- cgit v1.3.1 From 1f9f2143f24e224a8582a5d54918c43b9121eccc Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Tue, 17 Oct 2023 17:31:44 +0300 Subject: net: mdio-mux: fix C45 access returning -EIO after API change The mii_bus API conversion to read_c45() and write_c45() did not cover the mdio-mux driver before read() and write() were made C22-only. This broke arch/arm64/boot/dts/freescale/fsl-ls1028a-qds-13bb.dtso. The -EOPNOTSUPP from mdiobus_c45_read() is transformed by get_phy_c45_devs_in_pkg() into -EIO, is further propagated to of_mdiobus_register() and this makes the mdio-mux driver fail to probe the entire child buses, not just the PHYs that cause access errors. Fix the regression by introducing special c45 read and write accessors to mdio-mux which forward the operation to the parent MDIO bus. Fixes: db1a63aed89c ("net: phy: Remove fallback to old C45 method") Signed-off-by: Vladimir Oltean Reviewed-by: Florian Fainelli Reviewed-by: Andrew Lunn Reviewed-by: Russell King (Oracle) Link: https://lore.kernel.org/r/20231017143144.3212657-1-vladimir.oltean@nxp.com Signed-off-by: Jakub Kicinski --- drivers/net/mdio/mdio-mux.c | 47 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/mdio/mdio-mux.c b/drivers/net/mdio/mdio-mux.c index a881e3523328..bef4cce71287 100644 --- a/drivers/net/mdio/mdio-mux.c +++ b/drivers/net/mdio/mdio-mux.c @@ -55,6 +55,27 @@ out: return r; } +static int mdio_mux_read_c45(struct mii_bus *bus, int phy_id, int dev_addr, + int regnum) +{ + struct mdio_mux_child_bus *cb = bus->priv; + struct mdio_mux_parent_bus *pb = cb->parent; + int r; + + mutex_lock_nested(&pb->mii_bus->mdio_lock, MDIO_MUTEX_MUX); + r = pb->switch_fn(pb->current_child, cb->bus_number, pb->switch_data); + if (r) + goto out; + + pb->current_child = cb->bus_number; + + r = pb->mii_bus->read_c45(pb->mii_bus, phy_id, dev_addr, regnum); +out: + mutex_unlock(&pb->mii_bus->mdio_lock); + + return r; +} + /* * The parent bus' lock is used to order access to the switch_fn. */ @@ -80,6 +101,28 @@ out: return r; } +static int mdio_mux_write_c45(struct mii_bus *bus, int phy_id, int dev_addr, + int regnum, u16 val) +{ + struct mdio_mux_child_bus *cb = bus->priv; + struct mdio_mux_parent_bus *pb = cb->parent; + + int r; + + mutex_lock_nested(&pb->mii_bus->mdio_lock, MDIO_MUTEX_MUX); + r = pb->switch_fn(pb->current_child, cb->bus_number, pb->switch_data); + if (r) + goto out; + + pb->current_child = cb->bus_number; + + r = pb->mii_bus->write_c45(pb->mii_bus, phy_id, dev_addr, regnum, val); +out: + mutex_unlock(&pb->mii_bus->mdio_lock); + + return r; +} + static int parent_count; static void mdio_mux_uninit_children(struct mdio_mux_parent_bus *pb) @@ -173,6 +216,10 @@ int mdio_mux_init(struct device *dev, cb->mii_bus->parent = dev; cb->mii_bus->read = mdio_mux_read; cb->mii_bus->write = mdio_mux_write; + if (parent_bus->read_c45) + cb->mii_bus->read_c45 = mdio_mux_read_c45; + if (parent_bus->write_c45) + cb->mii_bus->write_c45 = mdio_mux_write_c45; r = of_mdiobus_register(cb->mii_bus, child_bus_node); if (r) { mdiobus_free(cb->mii_bus); -- cgit v1.3.1 From 6200e00e112ce2d17b066a20dd2476d9aecbefa6 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 17 Oct 2023 13:51:19 -0700 Subject: net: phy: bcm7xxx: Add missing 16nm EPHY statistics The .probe() function would allocate the necessary space and ensure that the library call sizes the number of statistics but the callbacks necessary to fetch the name and values were not wired up. Reported-by: Justin Chen Fixes: f68d08c437f9 ("net: phy: bcm7xxx: Add EPHY entry for 72165") Reviewed-by: Andrew Lunn Signed-off-by: Florian Fainelli Reviewed-by: Simon Horman Link: https://lore.kernel.org/r/20231017205119.416392-1-florian.fainelli@broadcom.com Signed-off-by: Jakub Kicinski --- drivers/net/phy/bcm7xxx.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers/net') diff --git a/drivers/net/phy/bcm7xxx.c b/drivers/net/phy/bcm7xxx.c index 8478b081c058..97638ba7ae85 100644 --- a/drivers/net/phy/bcm7xxx.c +++ b/drivers/net/phy/bcm7xxx.c @@ -894,6 +894,9 @@ static int bcm7xxx_28nm_probe(struct phy_device *phydev) .name = _name, \ /* PHY_BASIC_FEATURES */ \ .flags = PHY_IS_INTERNAL, \ + .get_sset_count = bcm_phy_get_sset_count, \ + .get_strings = bcm_phy_get_strings, \ + .get_stats = bcm7xxx_28nm_get_phy_stats, \ .probe = bcm7xxx_28nm_probe, \ .config_init = bcm7xxx_16nm_ephy_config_init, \ .config_aneg = genphy_config_aneg, \ -- cgit v1.3.1 From a602ee3176a81280b829c9f0cf259450f7982168 Mon Sep 17 00:00:00 2001 From: MD Danish Anwar Date: Wed, 18 Oct 2023 12:19:36 +0530 Subject: net: ethernet: ti: Fix mixed module-builtin object With CONFIG_TI_K3_AM65_CPSW_NUSS=y and CONFIG_TI_ICSSG_PRUETH=m, k3-cppi-desc-pool.o is linked to a module and also to vmlinux even though the expected CFLAGS are different between builtins and modules. The build system is complaining about the following: k3-cppi-desc-pool.o is added to multiple modules: icssg-prueth ti-am65-cpsw-nuss Introduce the new module, k3-cppi-desc-pool, to provide the common functions to ti-am65-cpsw-nuss and icssg-prueth. Fixes: 128d5874c082 ("net: ti: icssg-prueth: Add ICSSG ethernet driver") Signed-off-by: MD Danish Anwar Link: https://lore.kernel.org/r/20231018064936.3146846-1-danishanwar@ti.com Signed-off-by: Paolo Abeni --- drivers/net/ethernet/ti/Kconfig | 5 +++++ drivers/net/ethernet/ti/Makefile | 7 ++++--- drivers/net/ethernet/ti/k3-cppi-desc-pool.c | 10 ++++++++++ 3 files changed, 19 insertions(+), 3 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig index 0a3346650e03..cac61f5d3fd4 100644 --- a/drivers/net/ethernet/ti/Kconfig +++ b/drivers/net/ethernet/ti/Kconfig @@ -90,12 +90,16 @@ config TI_CPTS The unit can time stamp PTP UDP/IPv4 and Layer 2 packets, and the driver offers a PTP Hardware Clock. +config TI_K3_CPPI_DESC_POOL + tristate + config TI_K3_AM65_CPSW_NUSS tristate "TI K3 AM654x/J721E CPSW Ethernet driver" depends on ARCH_K3 && OF && TI_K3_UDMA_GLUE_LAYER select NET_DEVLINK select TI_DAVINCI_MDIO select PHYLINK + select TI_K3_CPPI_DESC_POOL imply PHY_TI_GMII_SEL depends on TI_K3_AM65_CPTS || !TI_K3_AM65_CPTS help @@ -187,6 +191,7 @@ config TI_ICSSG_PRUETH tristate "TI Gigabit PRU Ethernet driver" select PHYLIB select TI_ICSS_IEP + select TI_K3_CPPI_DESC_POOL depends on PRU_REMOTEPROC depends on ARCH_K3 && OF && TI_K3_UDMA_GLUE_LAYER help diff --git a/drivers/net/ethernet/ti/Makefile b/drivers/net/ethernet/ti/Makefile index 34fd7a716ba6..67bed861f31d 100644 --- a/drivers/net/ethernet/ti/Makefile +++ b/drivers/net/ethernet/ti/Makefile @@ -24,14 +24,15 @@ keystone_netcp-y := netcp_core.o cpsw_ale.o obj-$(CONFIG_TI_KEYSTONE_NETCP_ETHSS) += keystone_netcp_ethss.o keystone_netcp_ethss-y := netcp_ethss.o netcp_sgmii.o netcp_xgbepcsr.o cpsw_ale.o +obj-$(CONFIG_TI_K3_CPPI_DESC_POOL) += k3-cppi-desc-pool.o + obj-$(CONFIG_TI_K3_AM65_CPSW_NUSS) += ti-am65-cpsw-nuss.o -ti-am65-cpsw-nuss-y := am65-cpsw-nuss.o cpsw_sl.o am65-cpsw-ethtool.o cpsw_ale.o k3-cppi-desc-pool.o am65-cpsw-qos.o +ti-am65-cpsw-nuss-y := am65-cpsw-nuss.o cpsw_sl.o am65-cpsw-ethtool.o cpsw_ale.o am65-cpsw-qos.o ti-am65-cpsw-nuss-$(CONFIG_TI_K3_AM65_CPSW_SWITCHDEV) += am65-cpsw-switchdev.o obj-$(CONFIG_TI_K3_AM65_CPTS) += am65-cpts.o obj-$(CONFIG_TI_ICSSG_PRUETH) += icssg-prueth.o -icssg-prueth-y := k3-cppi-desc-pool.o \ - icssg/icssg_prueth.o \ +icssg-prueth-y := icssg/icssg_prueth.o \ icssg/icssg_classifier.o \ icssg/icssg_queues.o \ icssg/icssg_config.o \ diff --git a/drivers/net/ethernet/ti/k3-cppi-desc-pool.c b/drivers/net/ethernet/ti/k3-cppi-desc-pool.c index 38cc12f9f133..05cc7aab1ec8 100644 --- a/drivers/net/ethernet/ti/k3-cppi-desc-pool.c +++ b/drivers/net/ethernet/ti/k3-cppi-desc-pool.c @@ -39,6 +39,7 @@ void k3_cppi_desc_pool_destroy(struct k3_cppi_desc_pool *pool) gen_pool_destroy(pool->gen_pool); /* frees pool->name */ } +EXPORT_SYMBOL_GPL(k3_cppi_desc_pool_destroy); struct k3_cppi_desc_pool * k3_cppi_desc_pool_create_name(struct device *dev, size_t size, @@ -98,29 +99,38 @@ gen_pool_create_fail: devm_kfree(pool->dev, pool); return ERR_PTR(ret); } +EXPORT_SYMBOL_GPL(k3_cppi_desc_pool_create_name); dma_addr_t k3_cppi_desc_pool_virt2dma(struct k3_cppi_desc_pool *pool, void *addr) { return addr ? pool->dma_addr + (addr - pool->cpumem) : 0; } +EXPORT_SYMBOL_GPL(k3_cppi_desc_pool_virt2dma); void *k3_cppi_desc_pool_dma2virt(struct k3_cppi_desc_pool *pool, dma_addr_t dma) { return dma ? pool->cpumem + (dma - pool->dma_addr) : NULL; } +EXPORT_SYMBOL_GPL(k3_cppi_desc_pool_dma2virt); void *k3_cppi_desc_pool_alloc(struct k3_cppi_desc_pool *pool) { return (void *)gen_pool_alloc(pool->gen_pool, pool->desc_size); } +EXPORT_SYMBOL_GPL(k3_cppi_desc_pool_alloc); void k3_cppi_desc_pool_free(struct k3_cppi_desc_pool *pool, void *addr) { gen_pool_free(pool->gen_pool, (unsigned long)addr, pool->desc_size); } +EXPORT_SYMBOL_GPL(k3_cppi_desc_pool_free); size_t k3_cppi_desc_pool_avail(struct k3_cppi_desc_pool *pool) { return gen_pool_avail(pool->gen_pool) / pool->desc_size; } +EXPORT_SYMBOL_GPL(k3_cppi_desc_pool_avail); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("TI K3 CPPI5 descriptors pool API"); -- cgit v1.3.1 From 389db4fd673e5b122393a69b2973dd294a125573 Mon Sep 17 00:00:00 2001 From: MD Danish Anwar Date: Wed, 18 Oct 2023 20:37:15 +0530 Subject: net: ti: icssg-prueth: Fix r30 CMDs bitmasks The bitmasks for EMAC_PORT_DISABLE and EMAC_PORT_FORWARD r30 commands are wrong in the driver. Update the bitmasks of these commands to the correct ones as used by the ICSSG firmware. These bitmasks are backwards compatible and work with any ICSSG firmware version. Fixes: e9b4ece7d74b ("net: ti: icssg-prueth: Add Firmware config and classification APIs.") Signed-off-by: MD Danish Anwar Reviewed-by: Andrew Lunn Link: https://lore.kernel.org/r/20231018150715.3085380-1-danishanwar@ti.com Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/ti/icssg/icssg_config.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/net') diff --git a/drivers/net/ethernet/ti/icssg/icssg_config.c b/drivers/net/ethernet/ti/icssg/icssg_config.c index 933b84666574..b272361e378f 100644 --- a/drivers/net/ethernet/ti/icssg/icssg_config.c +++ b/drivers/net/ethernet/ti/icssg/icssg_config.c @@ -379,9 +379,9 @@ int icssg_config(struct prueth *prueth, struct prueth_emac *emac, int slice) /* Bitmask for ICSSG r30 commands */ static const struct icssg_r30_cmd emac_r32_bitmask[] = { - {{0xffff0004, 0xffff0100, 0xffff0100, EMAC_NONE}}, /* EMAC_PORT_DISABLE */ + {{0xffff0004, 0xffff0100, 0xffff0004, EMAC_NONE}}, /* EMAC_PORT_DISABLE */ {{0xfffb0040, 0xfeff0200, 0xfeff0200, EMAC_NONE}}, /* EMAC_PORT_BLOCK */ - {{0xffbb0000, 0xfcff0000, 0xdcff0000, EMAC_NONE}}, /* EMAC_PORT_FORWARD */ + {{0xffbb0000, 0xfcff0000, 0xdcfb0000, EMAC_NONE}}, /* EMAC_PORT_FORWARD */ {{0xffbb0000, 0xfcff0000, 0xfcff2000, EMAC_NONE}}, /* EMAC_PORT_FORWARD_WO_LEARNING */ {{0xffff0001, EMAC_NONE, EMAC_NONE, EMAC_NONE}}, /* ACCEPT ALL */ {{0xfffe0002, EMAC_NONE, EMAC_NONE, EMAC_NONE}}, /* ACCEPT TAGGED */ -- cgit v1.3.1