diff options
author | Cosmin Ratiu <cratiu@nvidia.com> | 2024-10-31 14:58:52 +0200 |
---|---|---|
committer | Jakub Kicinski <kuba@kernel.org> | 2024-11-03 15:37:14 -0800 |
commit | cac7356c653d1410838209b6e840a705898d1811 (patch) | |
tree | 32993b7f552130709a4b202b5afeaae5d4d84c59 /drivers/net/ethernet | |
parent | 9ff75a23dff3622451057b2ccd88c19bbb293841 (diff) |
net/mlx5: Rework esw qos domain init and cleanup
The first approach was flawed, because there are situations where the
esw mode change fails, leaving the qos domain as NULL. Various calls
into the QoS infra then trigger a NULL pointer access and unhappiness.
Improve that by a combination of:
- Allocating the QoS domain on esw init and cleaning it up on teardown.
- Refactoring mode change to only call qos domain init but not cleanup.
- Making qos domain init idempotent - not change anything if nothing
needs changing.
Together, these should guarantee that, as long as the memory allocations
succeed, there should always be a valid qos domain until the esw
cleanup, no matter what mode changes happen (or failures thereof).
Fixes: 107a034d5c1e ("net/mlx5: qos: Store rate groups in a qos domain")
Signed-off-by: Cosmin Ratiu <cratiu@nvidia.com>
Reviewed-by: Carolina Jubran <cjubran@nvidia.com>
Signed-off-by: Tariq Toukan <tariqt@nvidia.com>
Link: https://patch.msgid.link/20241031125856.530927-2-tariqt@nvidia.com
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Diffstat (limited to 'drivers/net/ethernet')
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c | 3 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 16 |
2 files changed, 12 insertions, 7 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c index 7e7f99b38a37..940e1c2d1e39 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/qos.c @@ -951,6 +951,9 @@ static int esw_qos_devlink_rate_to_mbps(struct mlx5_core_dev *mdev, const char * int mlx5_esw_qos_init(struct mlx5_eswitch *esw) { + if (esw->qos.domain) + return 0; /* Nothing to change. */ + return esw_qos_domain_init(esw); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 09719e9b8611..cead41ddbc38 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1485,7 +1485,7 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs) err = mlx5_esw_qos_init(esw); if (err) - goto err_qos_init; + goto err_esw_init; if (esw->mode == MLX5_ESWITCH_LEGACY) { err = esw_legacy_enable(esw); @@ -1495,7 +1495,7 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs) } if (err) - goto err_esw_enable; + goto err_esw_init; esw->fdb_table.flags |= MLX5_ESW_FDB_CREATED; @@ -1509,9 +1509,7 @@ int mlx5_eswitch_enable_locked(struct mlx5_eswitch *esw, int num_vfs) return 0; -err_esw_enable: - mlx5_esw_qos_cleanup(esw); -err_qos_init: +err_esw_init: mlx5_eq_notifier_unregister(esw->dev, &esw->nb); mlx5_esw_acls_ns_cleanup(esw); return err; @@ -1640,7 +1638,6 @@ void mlx5_eswitch_disable_locked(struct mlx5_eswitch *esw) if (esw->mode == MLX5_ESWITCH_OFFLOADS) devl_rate_nodes_destroy(devlink); - mlx5_esw_qos_cleanup(esw); } void mlx5_eswitch_disable(struct mlx5_eswitch *esw) @@ -1884,6 +1881,11 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) if (err) goto reps_err; + esw->mode = MLX5_ESWITCH_LEGACY; + err = mlx5_esw_qos_init(esw); + if (err) + goto reps_err; + mutex_init(&esw->offloads.encap_tbl_lock); hash_init(esw->offloads.encap_tbl); mutex_init(&esw->offloads.decap_tbl_lock); @@ -1897,7 +1899,6 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev) refcount_set(&esw->qos.refcnt, 0); esw->enabled_vports = 0; - esw->mode = MLX5_ESWITCH_LEGACY; esw->offloads.inline_mode = MLX5_INLINE_MODE_NONE; if (MLX5_CAP_ESW_FLOWTABLE_FDB(dev, reformat) && MLX5_CAP_ESW_FLOWTABLE_FDB(dev, decap)) @@ -1934,6 +1935,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw) esw_info(esw->dev, "cleanup\n"); + mlx5_esw_qos_cleanup(esw); destroy_workqueue(esw->work_queue); WARN_ON(refcount_read(&esw->qos.refcnt)); mutex_destroy(&esw->state_lock); |