summaryrefslogtreecommitdiff
path: root/drivers/net/ethernet/mellanox
diff options
context:
space:
mode:
authorCosmin Ratiu <cratiu@nvidia.com>2024-10-31 14:58:52 +0200
committerJakub Kicinski <kuba@kernel.org>2024-11-03 15:37:14 -0800
commitcac7356c653d1410838209b6e840a705898d1811 (patch)
tree32993b7f552130709a4b202b5afeaae5d4d84c59 /drivers/net/ethernet/mellanox
parent9ff75a23dff3622451057b2ccd88c19bbb293841 (diff)
net/mlx5: Rework esw qos domain init and cleanup
The first approach was flawed, because there are situations where the esw mode change fails, leaving the qos domain as NULL. Various calls into the QoS infra then trigger a NULL pointer access and unhappiness. Improve that by a combination of: - Allocating the QoS domain on esw init and cleaning it up on teardown. - Refactoring mode change to only call qos domain init but not cleanup. - Making qos domain init idempotent - not change anything if nothing needs changing. Together, these should guarantee that, as long as the memory allocations succeed, there should always be a valid qos domain until the esw cleanup, no matter what mode changes happen (or failures thereof). Fixes: 107a034d5c1e ("net/mlx5: qos: Store rate groups in a qos domain") Signed-off-by: Cosmin Ratiu <cratiu@nvidia.com> Reviewed-by: Carolina Jubran <cjubran@nvidia.com> Signed-off-by: Tariq Toukan <tariqt@nvidia.com> Link: https://patch.msgid.link/20241031125856.530927-2-tariqt@nvidia.com Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'drivers/net/ethernet/mellanox')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c3
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/eswitch.c16
2 files changed, 12 insertions, 7 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
index 7e7f99b38a37..940e1c2d1e39 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c
@@ -951,6 +951,9 @@ static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char *
int mlx5_esw_qos_init(struct mlx5_eswitch *esw)
{
+ if (esw->qos.domain)
+ return 0; /* Nothing to change. */
+
return esw_qos_domain_init(esw);
}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 09719e9b8611..cead41ddbc38 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1485,7 +1485,7 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs)
err = mlx5_esw_qos_init(esw);
if (err)
- goto err_qos_init;
+ goto err_esw_init;
if (esw->mode == MLX5_ESWITCH_LEGACY) {
err = esw_legacy_enable(esw);
@@ -1495,7 +1495,7 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs)
}
if (err)
- goto err_esw_enable;
+ goto err_esw_init;
esw->fdb_table.flags |= MLX5_ESW_FDB_CREATED;
@@ -1509,9 +1509,7 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs)
return 0;
-err_esw_enable:
- mlx5_esw_qos_cleanup(esw);
-err_qos_init:
+err_esw_init:
mlx5_eq_notifier_unregister(esw->dev, &esw->nb);
mlx5_esw_acls_ns_cleanup(esw);
return err;
@@ -1640,7 +1638,6 @@ void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw)
if (esw->mode == MLX5_ESWITCH_OFFLOADS)
devl_rate_nodes_destroy(devlink);
- mlx5_esw_qos_cleanup(esw);
}
void mlx5_eswitch_disable(struct mlx5_eswitch *esw)
@@ -1884,6 +1881,11 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
if (err)
goto reps_err;
+ esw->mode = MLX5_ESWITCH_LEGACY;
+ err = mlx5_esw_qos_init(esw);
+ if (err)
+ goto reps_err;
+
mutex_init(&esw->offloads.encap_tbl_lock);
hash_init(esw->offloads.encap_tbl);
mutex_init(&esw->offloads.decap_tbl_lock);
@@ -1897,7 +1899,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
refcount_set(&esw->qos.refcnt, 0);
esw->enabled_vports = 0;
- esw->mode = MLX5_ESWITCH_LEGACY;
esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE;
if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) &&
MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap))
@@ -1934,6 +1935,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
esw_info(esw->dev, "cleanup\n");
+ mlx5_esw_qos_cleanup(esw);
destroy_workqueue(esw->work_queue);
WARN_ON(refcount_read(&esw->qos.refcnt));
mutex_destroy(&esw->state_lock);