From c1fef618d611b31964ab397aa0bf0611da94bade Mon Sep 17 00:00:00 2001 From: Sandipan Patra Date: Mon, 13 Mar 2023 22:42:23 -0700 Subject: net/mlx5: Implement thermal zone Implement thermal zone support for mlx5 based HW. The NIC uses temperature sensor provided by ASIC to report current temperature to thermal core. Signed-off-by: Sandipan Patra Reviewed-by: Gal Pressman Signed-off-by: Saeed Mahameed Link: https://lore.kernel.org/r/20230314054234.267365-5-saeed@kernel.org Signed-off-by: Jakub Kicinski --- include/linux/mlx5/driver.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux/mlx5/driver.h') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index f33389b42209..7a898113b6b7 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -134,6 +134,7 @@ enum { MLX5_REG_PCAM = 0x507f, MLX5_REG_NODE_DESC = 0x6001, MLX5_REG_HOST_ENDIANNESS = 0x7004, + MLX5_REG_MTMP = 0x900A, MLX5_REG_MCIA = 0x9014, MLX5_REG_MFRL = 0x9028, MLX5_REG_MLCR = 0x902b, @@ -731,6 +732,7 @@ struct mlx5_fw_tracer; struct mlx5_vxlan; struct mlx5_geneve; struct mlx5_hv_vhca; +struct mlx5_thermal; #define MLX5_LOG_SW_ICM_BLOCK_SIZE(dev) (MLX5_CAP_DEV_MEM(dev, log_sw_icm_alloc_granularity)) #define MLX5_SW_ICM_BLOCK_SIZE(dev) (1 << MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)) @@ -808,6 +810,7 @@ struct mlx5_core_dev { struct mlx5_rsc_dump *rsc_dump; u32 vsc_addr; struct mlx5_hv_vhca *hv_vhca; + struct mlx5_thermal *thermal; }; struct mlx5_db { -- cgit v1.2.3-70-g09d2 From fb0a6a268dcd6fe144c99d60a1166e34c6991d5f Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 5 Jan 2023 11:31:46 +0200 Subject: net/mlx5: Provide external API for allocating vectors Provide external API to be used by other drivers relying on mlx5_core, for allocating MSIX vectors. An example for such a driver would be mlx5_vdpa. Signed-off-by: Eli Cohen Reviewed-by: Shay Drory Signed-off-by: Saeed Mahameed Reviewed-by: Jacob Keller --- drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c | 52 +++++++++++++++++++++++ include/linux/mlx5/driver.h | 6 +++ 2 files changed, 58 insertions(+) (limited to 'include/linux/mlx5/driver.h') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c index 7fa63d31ae5b..e12e528c09f5 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/pci_irq.c @@ -483,6 +483,58 @@ struct mlx5_irq *mlx5_irq_request(struct mlx5_core_dev *dev, u16 vecidx, return irq; } +/** + * mlx5_msix_alloc - allocate msix interrupt + * @dev: mlx5 device from which to request + * @handler: interrupt handler + * @affdesc: affinity descriptor + * @name: interrupt name + * + * Returns: struct msi_map with result encoded. + * Note: the caller must make sure to release the irq by calling + * mlx5_msix_free() if shutdown was initiated. + */ +struct msi_map mlx5_msix_alloc(struct mlx5_core_dev *dev, + irqreturn_t (*handler)(int, void *), + const struct irq_affinity_desc *affdesc, + const char *name) +{ + struct msi_map map; + int err; + + if (!dev->pdev) { + map.virq = 0; + map.index = -EINVAL; + return map; + } + + map = pci_msix_alloc_irq_at(dev->pdev, MSI_ANY_INDEX, affdesc); + if (!map.virq) + return map; + + err = request_irq(map.virq, handler, 0, name, NULL); + if (err) { + mlx5_core_warn(dev, "err %d\n", err); + pci_msix_free_irq(dev->pdev, map); + map.virq = 0; + map.index = -ENOMEM; + } + return map; +} +EXPORT_SYMBOL(mlx5_msix_alloc); + +/** + * mlx5_msix_free - free a previously allocated msix interrupt + * @dev: mlx5 device associated with interrupt + * @map: map previously returned by mlx5_msix_alloc() + */ +void mlx5_msix_free(struct mlx5_core_dev *dev, struct msi_map map) +{ + free_irq(map.virq, NULL); + pci_msix_free_irq(dev->pdev, map); +} +EXPORT_SYMBOL(mlx5_msix_free); + /** * mlx5_irqs_release_vectors - release one or more IRQs back to the system. * @irqs: IRQs to be released. diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index f33389b42209..df0f82110249 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1308,4 +1308,10 @@ enum { MLX5_OCTWORD = 16, }; +struct msi_map mlx5_msix_alloc(struct mlx5_core_dev *dev, + irqreturn_t (*handler)(int, void *), + const struct irq_affinity_desc *affdesc, + const char *name); +void mlx5_msix_free(struct mlx5_core_dev *dev, struct msi_map map); + #endif /* MLX5_DRIVER_H */ -- cgit v1.2.3-70-g09d2 From 9df839a711aee437390b16ee39cf0b5c1620be6a Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Thu, 23 Apr 2020 08:27:59 -0500 Subject: net/mlx5: Create a new profile for SFs Create a new profile for SFs in order to disable the command cache. Each function command cache consumes ~500KB of memory, when using a large number of SFs this savings is notable on memory constarined systems. Use a new profile to provide for future differences between SFs and PFs. The mr_cache not used for non-PF functions, so it is excluded from the new profile. Signed-off-by: Parav Pandit Reviewed-by: Bodong Wang Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/cmd.c | 6 +++--- drivers/net/ethernet/mellanox/mlx5/core/main.c | 9 +++++++++ drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h | 1 + drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c | 2 +- include/linux/mlx5/driver.h | 1 + 5 files changed, 15 insertions(+), 4 deletions(-) (limited to 'include/linux/mlx5/driver.h') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c index b00e33ed05e9..d53de39539a8 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c @@ -1802,7 +1802,7 @@ static struct mlx5_cmd_msg *alloc_msg(struct mlx5_core_dev *dev, int in_size, if (in_size <= 16) goto cache_miss; - for (i = 0; i < MLX5_NUM_COMMAND_CACHES; i++) { + for (i = 0; i < dev->profile.num_cmd_caches; i++) { ch = &cmd->cache[i]; if (in_size > ch->max_inbox_size) continue; @@ -2097,7 +2097,7 @@ static void destroy_msg_cache(struct mlx5_core_dev *dev) struct mlx5_cmd_msg *n; int i; - for (i = 0; i < MLX5_NUM_COMMAND_CACHES; i++) { + for (i = 0; i < dev->profile.num_cmd_caches; i++) { ch = &dev->cmd.cache[i]; list_for_each_entry_safe(msg, n, &ch->head, list) { list_del(&msg->list); @@ -2127,7 +2127,7 @@ static void create_msg_cache(struct mlx5_core_dev *dev) int k; /* Initialize and fill the caches with initial entries */ - for (k = 0; k < MLX5_NUM_COMMAND_CACHES; k++) { + for (k = 0; k < dev->profile.num_cmd_caches; k++) { ch = &cmd->cache[k]; spin_lock_init(&ch->lock); INIT_LIST_HEAD(&ch->head); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index f95df73d1089..a95d1218def9 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -100,15 +100,19 @@ enum { static struct mlx5_profile profile[] = { [0] = { .mask = 0, + .num_cmd_caches = MLX5_NUM_COMMAND_CACHES, }, [1] = { .mask = MLX5_PROF_MASK_QP_SIZE, .log_max_qp = 12, + .num_cmd_caches = MLX5_NUM_COMMAND_CACHES, + }, [2] = { .mask = MLX5_PROF_MASK_QP_SIZE | MLX5_PROF_MASK_MR_CACHE, .log_max_qp = LOG_MAX_SUPPORTED_QPS, + .num_cmd_caches = MLX5_NUM_COMMAND_CACHES, .mr_cache[0] = { .size = 500, .limit = 250 @@ -174,6 +178,11 @@ static struct mlx5_profile profile[] = { .limit = 4 }, }, + [3] = { + .mask = MLX5_PROF_MASK_QP_SIZE, + .log_max_qp = LOG_MAX_SUPPORTED_QPS, + .num_cmd_caches = 0, + }, }; static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index be0785f83083..5eaab99678ee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -142,6 +142,7 @@ enum mlx5_semaphore_space_address { }; #define MLX5_DEFAULT_PROF 2 +#define MLX5_SF_PROF 3 static inline int mlx5_flexible_inlen(struct mlx5_core_dev *dev, size_t fixed, size_t item_size, size_t num_items, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c index a7377619ba6f..e2f26d0bc615 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/sf/dev/driver.c @@ -28,7 +28,7 @@ static int mlx5_sf_dev_probe(struct auxiliary_device *adev, const struct auxilia mdev->priv.adev_idx = adev->id; sf_dev->mdev = mdev; - err = mlx5_mdev_init(mdev, MLX5_DEFAULT_PROF); + err = mlx5_mdev_init(mdev, MLX5_SF_PROF); if (err) { mlx5_core_warn(mdev, "mlx5_mdev_init on err=%d\n", err); goto mdev_err; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index f243bd10a5e1..135a3c8d8237 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -751,6 +751,7 @@ enum { struct mlx5_profile { u64 mask; u8 log_max_qp; + u8 num_cmd_caches; struct { int size; int limit; -- cgit v1.2.3-70-g09d2 From f52cc627b832e08a7bcf1b7e81e650ec308fe1d8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 13 Apr 2023 15:25:47 -0700 Subject: Revert "net/mlx5: Enable management PF initialization" This reverts commit fe998a3c77b9f989a30a2a01fb00d3729a6d53a4. Paul reports that it causes a regression with IB on CX4 and FW 12.18.1000. In addition I think that the concept of "management PF" is not fully accepted and requires a discussion. Fixes: fe998a3c77b9 ("net/mlx5: Enable management PF initialization") Reported-by: Paul Moore Link: https://lore.kernel.org/all/CAHC9VhQ7A4+msL38WpbOMYjAqLp0EtOjeLh4Dc6SQtD6OUvCQg@mail.gmail.com/ Link: https://lore.kernel.org/r/20230413222547.56901-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- drivers/net/ethernet/mellanox/mlx5/core/dev.c | 6 ------ drivers/net/ethernet/mellanox/mlx5/core/ecpf.c | 8 -------- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 2 +- include/linux/mlx5/driver.h | 5 ----- 4 files changed, 1 insertion(+), 20 deletions(-) (limited to 'include/linux/mlx5/driver.h') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/dev.c b/drivers/net/ethernet/mellanox/mlx5/core/dev.c index 445fe30c3d0b..2e7806001fdc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/dev.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/dev.c @@ -59,9 +59,6 @@ bool mlx5_eth_supported(struct mlx5_core_dev *dev) if (!IS_ENABLED(CONFIG_MLX5_CORE_EN)) return false; - if (mlx5_core_is_management_pf(dev)) - return false; - if (MLX5_CAP_GEN(dev, port_type) != MLX5_CAP_PORT_TYPE_ETH) return false; @@ -201,9 +198,6 @@ bool mlx5_rdma_supported(struct mlx5_core_dev *dev) if (!IS_ENABLED(CONFIG_MLX5_INFINIBAND)) return false; - if (mlx5_core_is_management_pf(dev)) - return false; - if (dev->priv.flags & MLX5_PRIV_FLAGS_DISABLE_IB_ADEV) return false; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c index 7c9c4e40c019..d000236ddbac 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c @@ -75,10 +75,6 @@ int mlx5_ec_init(struct mlx5_core_dev *dev) if (!mlx5_core_is_ecpf(dev)) return 0; - /* Management PF don't have a peer PF */ - if (mlx5_core_is_management_pf(dev)) - return 0; - return mlx5_host_pf_init(dev); } @@ -89,10 +85,6 @@ void mlx5_ec_cleanup(struct mlx5_core_dev *dev) if (!mlx5_core_is_ecpf(dev)) return; - /* Management PF don't have a peer PF */ - if (mlx5_core_is_management_pf(dev)) - return; - mlx5_host_pf_cleanup(dev); err = mlx5_wait_for_pages(dev, &dev->priv.page_counters[MLX5_HOST_PF]); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 8bdf28762f41..19fed514fc17 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1488,7 +1488,7 @@ int mlx5_esw_sf_max_hpf_functions(struct mlx5_core_dev *dev, u16 *max_sfs, u16 * void *hca_caps; int err; - if (!mlx5_core_is_ecpf(dev) || mlx5_core_is_management_pf(dev)) { + if (!mlx5_core_is_ecpf(dev)) { *max_sfs = 0; return 0; } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index f33389b42209..7e225e41d55b 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1211,11 +1211,6 @@ static inline bool mlx5_core_is_vf(const struct mlx5_core_dev *dev) return dev->coredev_type == MLX5_COREDEV_VF; } -static inline bool mlx5_core_is_management_pf(const struct mlx5_core_dev *dev) -{ - return MLX5_CAP_GEN(dev, num_ports) == 1 && !MLX5_CAP_GEN(dev, native_port_num); -} - static inline bool mlx5_core_is_ecpf(const struct mlx5_core_dev *dev) { return dev->caps.embedded_cpu; -- cgit v1.2.3-70-g09d2 From b0bc615df488abd0e95107e4a9ecefb9bf8c250a Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Tue, 21 Mar 2023 00:10:16 +0200 Subject: net/mlx5: Add vnic devlink health reporter to PFs/VFs Create a vnic devlink health reporter for PFs/VFs interfaces. The reporter's diagnose callback displays the values of vNIC/vport transport debug counters of PFs/VFs, as follows: $ devlink health diagnose pci/0000:08:00.0 reporter vnic vNIC env counters: total_error_queues: 0 send_queue_priority_update_flow: 0 comp_eq_overrun: 0 async_eq_overrun: 0 cq_overrun: 0 invalid_command: 0 quota_exceeded_command: 0 nic_receive_steering_discard: 0 Moreover, add documentation on the reporter functionality and the counters description. While at it, expose the vNIC counters diagnose function to be used by the downstream patch, which will reveal the counters for representor interfaces. Signed-off-by: Maher Sanalla Reviewed-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- .../ethernet/mellanox/mlx5/devlink.rst | 30 +++++ drivers/net/ethernet/mellanox/mlx5/core/Makefile | 2 +- .../mellanox/mlx5/core/diag/reporter_vnic.c | 125 +++++++++++++++++++++ .../mellanox/mlx5/core/diag/reporter_vnic.h | 16 +++ drivers/net/ethernet/mellanox/mlx5/core/health.c | 4 + include/linux/mlx5/driver.h | 1 + 6 files changed, 177 insertions(+), 1 deletion(-) create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.h (limited to 'include/linux/mlx5/driver.h') diff --git a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst index 0995e4e5acd7..ceab18e46456 100644 --- a/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst +++ b/Documentation/networking/device_drivers/ethernet/mellanox/mlx5/devlink.rst @@ -257,3 +257,33 @@ User commands examples: $ devlink health dump show pci/0000:82:00.1 reporter fw_fatal NOTE: This command can run only on PF. + +vnic reporter +------------- +The vnic reporter implements only the `diagnose` callback. +It is responsible for querying the vnic diagnostic counters from fw and displaying +them in realtime. + +Description of the vnic counters: +total_q_under_processor_handle: number of queues in an error state due to +an async error or errored command. +send_queue_priority_update_flow: number of QP/SQ priority/SL update +events. +cq_overrun: number of times CQ entered an error state due to an +overflow. +async_eq_overrun: number of times an EQ mapped to async events was +overrun. +comp_eq_overrun: number of times an EQ mapped to completion events was +overrun. +quota_exceeded_command: number of commands issued and failed due to quota +exceeded. +invalid_command: number of commands issued and failed dues to any reason +other than quota exceeded. +nic_receive_steering_discard: number of packets that completed RX flow +steering but were discarded due to a mismatch in flow table. + +User commands examples: +- Diagnose PF/VF vnic counters + $ devlink health diagnose pci/0000:82:00.1 reporter vnic + +NOTE: This command can run only on PF/VF ports. diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile index 68f6a4544f7e..ddf1e352f51d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile +++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile @@ -16,7 +16,7 @@ mlx5_core-y := main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \ transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \ fs_counters.o fs_ft_pool.o rl.o lag/debugfs.o lag/lag.o dev.o events.o wq.o lib/gid.o \ lib/devcom.o lib/pci_vsc.o lib/dm.o lib/fs_ttc.o diag/fs_tracepoint.o \ - diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o \ + diag/fw_tracer.o diag/crdump.o devlink.o diag/rsc_dump.o diag/reporter_vnic.o \ fw_reset.o qos.o lib/tout.o lib/aso.o # diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c new file mode 100644 index 000000000000..9114661cd967 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.c @@ -0,0 +1,125 @@ +// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB +/* Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. */ + +#include "reporter_vnic.h" +#include "devlink.h" + +#define VNIC_ENV_GET64(vnic_env_stats, c) \ + MLX5_GET64(query_vnic_env_out, (vnic_env_stats)->query_vnic_env_out, \ + vport_env.c) + +struct mlx5_vnic_diag_stats { + __be64 query_vnic_env_out[MLX5_ST_SZ_QW(query_vnic_env_out)]; +}; + +int mlx5_reporter_vnic_diagnose_counters(struct mlx5_core_dev *dev, + struct devlink_fmsg *fmsg, + u16 vport_num, bool other_vport) +{ + u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {}; + struct mlx5_vnic_diag_stats vnic; + int err; + + MLX5_SET(query_vnic_env_in, in, opcode, MLX5_CMD_OP_QUERY_VNIC_ENV); + MLX5_SET(query_vnic_env_in, in, vport_number, vport_num); + MLX5_SET(query_vnic_env_in, in, other_vport, !!other_vport); + + err = mlx5_cmd_exec_inout(dev, query_vnic_env, in, &vnic.query_vnic_env_out); + if (err) + return err; + + err = devlink_fmsg_pair_nest_start(fmsg, "vNIC env counters"); + if (err) + return err; + + err = devlink_fmsg_obj_nest_start(fmsg); + if (err) + return err; + + err = devlink_fmsg_u64_pair_put(fmsg, "total_error_queues", + VNIC_ENV_GET64(&vnic, total_error_queues)); + if (err) + return err; + + err = devlink_fmsg_u64_pair_put(fmsg, "send_queue_priority_update_flow", + VNIC_ENV_GET64(&vnic, send_queue_priority_update_flow)); + if (err) + return err; + + err = devlink_fmsg_u64_pair_put(fmsg, "comp_eq_overrun", + VNIC_ENV_GET64(&vnic, comp_eq_overrun)); + if (err) + return err; + + err = devlink_fmsg_u64_pair_put(fmsg, "async_eq_overrun", + VNIC_ENV_GET64(&vnic, async_eq_overrun)); + if (err) + return err; + + err = devlink_fmsg_u64_pair_put(fmsg, "cq_overrun", + VNIC_ENV_GET64(&vnic, cq_overrun)); + if (err) + return err; + + err = devlink_fmsg_u64_pair_put(fmsg, "invalid_command", + VNIC_ENV_GET64(&vnic, invalid_command)); + if (err) + return err; + + err = devlink_fmsg_u64_pair_put(fmsg, "quota_exceeded_command", + VNIC_ENV_GET64(&vnic, quota_exceeded_command)); + if (err) + return err; + + err = devlink_fmsg_u64_pair_put(fmsg, "nic_receive_steering_discard", + VNIC_ENV_GET64(&vnic, nic_receive_steering_discard)); + if (err) + return err; + + err = devlink_fmsg_obj_nest_end(fmsg); + if (err) + return err; + + err = devlink_fmsg_pair_nest_end(fmsg); + if (err) + return err; + + return 0; +} + +static int mlx5_reporter_vnic_diagnose(struct devlink_health_reporter *reporter, + struct devlink_fmsg *fmsg, + struct netlink_ext_ack *extack) +{ + struct mlx5_core_dev *dev = devlink_health_reporter_priv(reporter); + + return mlx5_reporter_vnic_diagnose_counters(dev, fmsg, 0, false); +} + +static const struct devlink_health_reporter_ops mlx5_reporter_vnic_ops = { + .name = "vnic", + .diagnose = mlx5_reporter_vnic_diagnose, +}; + +void mlx5_reporter_vnic_create(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + struct devlink *devlink = priv_to_devlink(dev); + + health->vnic_reporter = + devlink_health_reporter_create(devlink, + &mlx5_reporter_vnic_ops, + 0, dev); + if (IS_ERR(health->vnic_reporter)) + mlx5_core_warn(dev, + "Failed to create vnic reporter, err = %ld\n", + PTR_ERR(health->vnic_reporter)); +} + +void mlx5_reporter_vnic_destroy(struct mlx5_core_dev *dev) +{ + struct mlx5_core_health *health = &dev->priv.health; + + if (!IS_ERR_OR_NULL(health->vnic_reporter)) + devlink_health_reporter_destroy(health->vnic_reporter); +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.h b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.h new file mode 100644 index 000000000000..eba87a39e9b1 --- /dev/null +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/reporter_vnic.h @@ -0,0 +1,16 @@ +/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB + * Copyright (c) 2023, NVIDIA CORPORATION & AFFILIATES. + */ +#ifndef __MLX5_REPORTER_VNIC_H +#define __MLX5_REPORTER_VNIC_H + +#include "mlx5_core.h" + +void mlx5_reporter_vnic_create(struct mlx5_core_dev *dev); +void mlx5_reporter_vnic_destroy(struct mlx5_core_dev *dev); + +int mlx5_reporter_vnic_diagnose_counters(struct mlx5_core_dev *dev, + struct devlink_fmsg *fmsg, + u16 vport_num, bool other_vport); + +#endif /* __MLX5_REPORTER_VNIC_H */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c index 016c5f99c470..871c32dda66e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/health.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c @@ -42,6 +42,7 @@ #include "lib/pci_vsc.h" #include "lib/tout.h" #include "diag/fw_tracer.h" +#include "diag/reporter_vnic.h" enum { MAX_MISSES = 3, @@ -898,6 +899,7 @@ void mlx5_health_cleanup(struct mlx5_core_dev *dev) cancel_delayed_work_sync(&health->update_fw_log_ts_work); destroy_workqueue(health->wq); + mlx5_reporter_vnic_destroy(dev); mlx5_fw_reporters_destroy(dev); } @@ -907,6 +909,7 @@ int mlx5_health_init(struct mlx5_core_dev *dev) char *name; mlx5_fw_reporters_create(dev); + mlx5_reporter_vnic_create(dev); health = &dev->priv.health; name = kmalloc(64, GFP_KERNEL); @@ -926,6 +929,7 @@ int mlx5_health_init(struct mlx5_core_dev *dev) return 0; out_err: + mlx5_reporter_vnic_destroy(dev); mlx5_fw_reporters_destroy(dev); return -ENOMEM; } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 135a3c8d8237..5d25c4c73046 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -439,6 +439,7 @@ struct mlx5_core_health { struct work_struct report_work; struct devlink_health_reporter *fw_reporter; struct devlink_health_reporter *fw_fatal_reporter; + struct devlink_health_reporter *vnic_reporter; struct delayed_work update_fw_log_ts_work; }; -- cgit v1.2.3-70-g09d2 From 1db1f21caebbb1b6e9b1e7657df613616be3fb49 Mon Sep 17 00:00:00 2001 From: Dragos Tatulea Date: Thu, 13 Apr 2023 15:48:30 +0300 Subject: net/mlx5e: Use query_special_contexts cmd only once per mdev Don't query the firmware so many times (num rqs * num wqes * wqe frags) because it slows down linearly the interface creation time when the product is larger. Do it only once per mdev and store the result in mlx5e_param. Due to helper function being called from different files, move it to an appropriate location. Rename the function with a proper prefix and add a small cleanup. This fix applies only for legacy rq. Fixes: 1b1e4868836a ("net/mlx5e: Use query_special_contexts for mkeys") Signed-off-by: Dragos Tatulea Reviewed-by: Or Har-Toov Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 1 + drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 24 +++-------------------- drivers/net/ethernet/mellanox/mlx5/core/mr.c | 21 ++++++++++++++++++++ include/linux/mlx5/driver.h | 1 + 4 files changed, 26 insertions(+), 21 deletions(-) (limited to 'include/linux/mlx5/driver.h') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index b8987a404d75..8e999f238194 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -327,6 +327,7 @@ struct mlx5e_params { unsigned int sw_mtu; int hard_mtu; bool ptp_rx; + __be32 terminate_lkey_be; }; static inline u8 mlx5e_get_dcb_num_tc(struct mlx5e_params *params) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 2944691f06ad..0235adcbc609 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -727,26 +727,6 @@ static void mlx5e_rq_free_shampo(struct mlx5e_rq *rq) mlx5e_rq_shampo_hd_free(rq); } -static __be32 mlx5e_get_terminate_scatter_list_mkey(struct mlx5_core_dev *dev) -{ - u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {}; - u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)] = {}; - int res; - - if (!MLX5_CAP_GEN(dev, terminate_scatter_list_mkey)) - return MLX5_TERMINATE_SCATTER_LIST_LKEY; - - MLX5_SET(query_special_contexts_in, in, opcode, - MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS); - res = mlx5_cmd_exec_inout(dev, query_special_contexts, in, out); - if (res) - return MLX5_TERMINATE_SCATTER_LIST_LKEY; - - res = MLX5_GET(query_special_contexts_out, out, - terminate_scatter_list_mkey); - return cpu_to_be32(res); -} - static int mlx5e_alloc_rq(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk, struct mlx5e_rq_param *rqp, @@ -908,7 +888,7 @@ static int mlx5e_alloc_rq(struct mlx5e_params *params, /* check if num_frags is not a pow of two */ if (rq->wqe.info.num_frags < (1 << rq->wqe.info.log_num_frags)) { wqe->data[f].byte_count = 0; - wqe->data[f].lkey = mlx5e_get_terminate_scatter_list_mkey(mdev); + wqe->data[f].lkey = params->terminate_lkey_be; wqe->data[f].addr = 0; } } @@ -5007,6 +4987,8 @@ void mlx5e_build_nic_params(struct mlx5e_priv *priv, struct mlx5e_xsk *xsk, u16 /* RQ */ mlx5e_build_rq_params(mdev, params); + params->terminate_lkey_be = mlx5_core_get_terminate_scatter_list_mkey(mdev); + params->packet_merge.timeout = mlx5e_choose_lro_timeout(mdev, MLX5E_DEFAULT_LRO_TIMEOUT); /* CQ moderation params */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c index 9d735c343a3b..678f0be81375 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c @@ -32,6 +32,7 @@ #include #include +#include #include "mlx5_core.h" int mlx5_core_create_mkey(struct mlx5_core_dev *dev, u32 *mkey, u32 *in, @@ -122,3 +123,23 @@ int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num) return mlx5_cmd_exec_in(dev, destroy_psv, in); } EXPORT_SYMBOL(mlx5_core_destroy_psv); + +__be32 mlx5_core_get_terminate_scatter_list_mkey(struct mlx5_core_dev *dev) +{ + u32 out[MLX5_ST_SZ_DW(query_special_contexts_out)] = {}; + u32 in[MLX5_ST_SZ_DW(query_special_contexts_in)] = {}; + u32 mkey; + + if (!MLX5_CAP_GEN(dev, terminate_scatter_list_mkey)) + return MLX5_TERMINATE_SCATTER_LIST_LKEY; + + MLX5_SET(query_special_contexts_in, in, opcode, + MLX5_CMD_OP_QUERY_SPECIAL_CONTEXTS); + if (mlx5_cmd_exec_inout(dev, query_special_contexts, in, out)) + return MLX5_TERMINATE_SCATTER_LIST_LKEY; + + mkey = MLX5_GET(query_special_contexts_out, out, + terminate_scatter_list_mkey); + return cpu_to_be32(mkey); +} +EXPORT_SYMBOL(mlx5_core_get_terminate_scatter_list_mkey); diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index a4c4f737f9c1..94d2be5848ae 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1093,6 +1093,7 @@ void mlx5_cmdif_debugfs_cleanup(struct mlx5_core_dev *dev); int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn, int npsvs, u32 *sig_index); int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num); +__be32 mlx5_core_get_terminate_scatter_list_mkey(struct mlx5_core_dev *dev); void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common); int mlx5_query_odp_caps(struct mlx5_core_dev *dev, struct mlx5_odp_caps *odp_caps); -- cgit v1.2.3-70-g09d2 From 617f5db1a626f18d5cbb7c7faf7bf8f9ea12be78 Mon Sep 17 00:00:00 2001 From: Mark Bloch Date: Mon, 5 Jun 2023 13:33:26 +0300 Subject: RDMA/mlx5: Fix affinity assignment The cited commit aimed to ensure that Virtual Functions (VFs) assign a queue affinity to a Queue Pair (QP) to distribute traffic when the LAG master creates a hardware LAG. If the affinity was set while the hardware was not in LAG, the firmware would ignore the affinity value. However, this commit unintentionally assigned an affinity to QPs on the LAG master's VPORT even if the RDMA device was not marked as LAG-enabled. In most cases, this was not an issue because when the hardware entered hardware LAG configuration, the RDMA device of the LAG master would be destroyed and a new one would be created, marked as LAG-enabled. The problem arises when a user configures Equal-Cost Multipath (ECMP). In ECMP mode, traffic can be directed to different physical ports based on the queue affinity, which is intended for use by VPORTS other than the E-Switch manager. ECMP mode is supported only if both E-Switch managers are in switchdev mode and the appropriate route is configured via IP. In this configuration, the RDMA device is not destroyed, and we retain the RDMA device that is not marked as LAG-enabled. To ensure correct behavior, Send Queues (SQs) opened by the E-Switch manager through verbs should be assigned strict affinity. This means they will only be able to communicate through the native physical port associated with the E-Switch manager. This will prevent the firmware from assigning affinity and will not allow the SQs to be remapped in case of failover. Fixes: 802dcc7fc5ec ("RDMA/mlx5: Support TX port affinity for VF drivers in LAG mode") Reviewed-by: Maor Gottlieb Signed-off-by: Mark Bloch Link: https://lore.kernel.org/r/425b05f4da840bc684b0f7e8ebf61aeb5cef09b0.1685960567.git.leon@kernel.org Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 +++ drivers/infiniband/hw/mlx5/qp.c | 3 +++ drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h | 12 ------------ include/linux/mlx5/driver.h | 12 ++++++++++++ 4 files changed, 18 insertions(+), 12 deletions(-) (limited to 'include/linux/mlx5/driver.h') diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index 91fc0cdf377d..2dfa6f49a6f4 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1598,6 +1598,9 @@ static inline bool mlx5_ib_lag_should_assign_affinity(struct mlx5_ib_dev *dev) MLX5_CAP_PORT_SELECTION(dev->mdev, port_select_flow_table_bypass)) return 0; + if (mlx5_lag_is_lacp_owner(dev->mdev) && !dev->lag_active) + return 0; + return dev->lag_active || (MLX5_CAP_GEN(dev->mdev, num_lag_ports) > 1 && MLX5_CAP_GEN(dev->mdev, lag_tx_port_affinity)); diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 70ca8ffa9256..78b96bfb4e6a 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1237,6 +1237,9 @@ static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev, MLX5_SET(create_tis_in, in, uid, to_mpd(pd)->uid); MLX5_SET(tisc, tisc, transport_domain, tdn); + if (!mlx5_ib_lag_should_assign_affinity(dev) && + mlx5_lag_is_lacp_owner(dev->mdev)) + MLX5_SET(tisc, tisc, strict_lag_tx_port_affinity, 1); if (qp->flags & IB_QP_CREATE_SOURCE_QPN) MLX5_SET(tisc, tisc, underlay_qpn, qp->underlay_qpn); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 1d879374acaa..229520405d4a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -276,18 +276,6 @@ static inline bool mlx5_sriov_is_enabled(struct mlx5_core_dev *dev) return pci_num_vf(dev->pdev) ? true : false; } -static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev) -{ - /* LACP owner conditions: - * 1) Function is physical. - * 2) LAG is supported by FW. - * 3) LAG is managed by driver (currently the only option). - */ - return MLX5_CAP_GEN(dev, vport_group_manager) && - (MLX5_CAP_GEN(dev, num_lag_ports) > 1) && - MLX5_CAP_GEN(dev, lag_master); -} - int mlx5_rescan_drivers_locked(struct mlx5_core_dev *dev); static inline int mlx5_rescan_drivers(struct mlx5_core_dev *dev) { diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index a4c4f737f9c1..8ad16b779898 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1237,6 +1237,18 @@ static inline u16 mlx5_core_max_vfs(const struct mlx5_core_dev *dev) return dev->priv.sriov.max_vfs; } +static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev) +{ + /* LACP owner conditions: + * 1) Function is physical. + * 2) LAG is supported by FW. + * 3) LAG is managed by driver (currently the only option). + */ + return MLX5_CAP_GEN(dev, vport_group_manager) && + (MLX5_CAP_GEN(dev, num_lag_ports) > 1) && + MLX5_CAP_GEN(dev, lag_master); +} + static inline int mlx5_get_gid_table_len(u16 param) { if (param > 4) { -- cgit v1.2.3-70-g09d2