diff options
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h | 10 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h | 14 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c | 513 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 9 | ||||
-rw-r--r-- | include/net/tls.h | 2 | ||||
-rw-r--r-- | net/tls/tls_device.c | 79 |
7 files changed, 527 insertions, 102 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h index 04c0a5e1c89a..1839f1ab1ddd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h @@ -194,4 +194,14 @@ static inline void mlx5e_accel_cleanup_rx(struct mlx5e_priv *priv) { mlx5e_ktls_cleanup_rx(priv); } + +static inline int mlx5e_accel_init_tx(struct mlx5e_priv *priv) +{ + return mlx5e_ktls_init_tx(priv); +} + +static inline void mlx5e_accel_cleanup_tx(struct mlx5e_priv *priv) +{ + mlx5e_ktls_cleanup_tx(priv); +} #endif /* __MLX5E_EN_ACCEL_H__ */ diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h index d016624fbc9d..948400dee525 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h @@ -42,6 +42,8 @@ static inline bool mlx5e_ktls_type_check(struct mlx5_core_dev *mdev, } void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv); +int mlx5e_ktls_init_tx(struct mlx5e_priv *priv); +void mlx5e_ktls_cleanup_tx(struct mlx5e_priv *priv); int mlx5e_ktls_init_rx(struct mlx5e_priv *priv); void mlx5e_ktls_cleanup_rx(struct mlx5e_priv *priv); int mlx5e_ktls_set_feature_rx(struct net_device *netdev, bool enable); @@ -62,6 +64,8 @@ static inline bool mlx5e_is_ktls_rx(struct mlx5_core_dev *mdev) struct mlx5e_tls_sw_stats { atomic64_t tx_tls_ctx; atomic64_t tx_tls_del; + atomic64_t tx_tls_pool_alloc; + atomic64_t tx_tls_pool_free; atomic64_t rx_tls_ctx; atomic64_t rx_tls_del; }; @@ -69,6 +73,7 @@ struct mlx5e_tls_sw_stats { struct mlx5e_tls { struct mlx5e_tls_sw_stats sw_stats; struct workqueue_struct *rx_wq; + struct mlx5e_tls_tx_pool *tx_pool; }; int mlx5e_ktls_init(struct mlx5e_priv *priv); @@ -83,6 +88,15 @@ static inline void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv) { } +static inline int mlx5e_ktls_init_tx(struct mlx5e_priv *priv) +{ + return 0; +} + +static inline void mlx5e_ktls_cleanup_tx(struct mlx5e_priv *priv) +{ +} + static inline int mlx5e_ktls_init_rx(struct mlx5e_priv *priv) { return 0; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c index 2ab46c4247ff..7c1c0eb16787 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_stats.c @@ -41,6 +41,8 @@ static const struct counter_desc mlx5e_ktls_sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_ctx) }, { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_del) }, + { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_pool_alloc) }, + { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, tx_tls_pool_free) }, { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, rx_tls_ctx) }, { MLX5E_DECLARE_STAT(struct mlx5e_tls_sw_stats, rx_tls_del) }, }; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c index fba21edf88d8..6b6c7044b64a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c @@ -35,30 +35,70 @@ u16 mlx5e_ktls_get_stop_room(struct mlx5_core_dev *mdev, struct mlx5e_params *pa stop_room += mlx5e_stop_room_for_wqe(mdev, MLX5E_TLS_SET_STATIC_PARAMS_WQEBBS); stop_room += mlx5e_stop_room_for_wqe(mdev, MLX5E_TLS_SET_PROGRESS_PARAMS_WQEBBS); stop_room += num_dumps * mlx5e_stop_room_for_wqe(mdev, MLX5E_KTLS_DUMP_WQEBBS); + stop_room += 1; /* fence nop */ return stop_room; } +static void mlx5e_ktls_set_tisc(struct mlx5_core_dev *mdev, void *tisc) +{ + MLX5_SET(tisc, tisc, tls_en, 1); + MLX5_SET(tisc, tisc, pd, mdev->mlx5e_res.hw_objs.pdn); + MLX5_SET(tisc, tisc, transport_domain, mdev->mlx5e_res.hw_objs.td.tdn); +} + static int mlx5e_ktls_create_tis(struct mlx5_core_dev *mdev, u32 *tisn) { u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; - void *tisc; - tisc = MLX5_ADDR_OF(create_tis_in, in, ctx); + mlx5e_ktls_set_tisc(mdev, MLX5_ADDR_OF(create_tis_in, in, ctx)); - MLX5_SET(tisc, tisc, tls_en, 1); + return mlx5_core_create_tis(mdev, in, tisn); +} + +static int mlx5e_ktls_create_tis_cb(struct mlx5_core_dev *mdev, + struct mlx5_async_ctx *async_ctx, + u32 *out, int outlen, + mlx5_async_cbk_t callback, + struct mlx5_async_work *context) +{ + u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {}; + + mlx5e_ktls_set_tisc(mdev, MLX5_ADDR_OF(create_tis_in, in, ctx)); + MLX5_SET(create_tis_in, in, opcode, MLX5_CMD_OP_CREATE_TIS); + + return mlx5_cmd_exec_cb(async_ctx, in, sizeof(in), + out, outlen, callback, context); +} + +static int mlx5e_ktls_destroy_tis_cb(struct mlx5_core_dev *mdev, u32 tisn, + struct mlx5_async_ctx *async_ctx, + u32 *out, int outlen, + mlx5_async_cbk_t callback, + struct mlx5_async_work *context) +{ + u32 in[MLX5_ST_SZ_DW(destroy_tis_in)] = {}; + + MLX5_SET(destroy_tis_in, in, opcode, MLX5_CMD_OP_DESTROY_TIS); + MLX5_SET(destroy_tis_in, in, tisn, tisn); - return mlx5e_create_tis(mdev, in, tisn); + return mlx5_cmd_exec_cb(async_ctx, in, sizeof(in), + out, outlen, callback, context); } struct mlx5e_ktls_offload_context_tx { - struct tls_offload_context_tx *tx_ctx; - struct tls12_crypto_info_aes_gcm_128 crypto_info; - struct mlx5e_tls_sw_stats *sw_stats; + /* fast path */ u32 expected_seq; u32 tisn; - u32 key_id; bool ctx_post_pending; + /* control / resync */ + struct list_head list_node; /* member of the pool */ + struct tls12_crypto_info_aes_gcm_128 crypto_info; + struct tls_offload_context_tx *tx_ctx; + struct mlx5_core_dev *mdev; + struct mlx5e_tls_sw_stats *sw_stats; + u32 key_id; + u8 create_err : 1; }; static void @@ -82,28 +122,368 @@ mlx5e_get_ktls_tx_priv_ctx(struct tls_context *tls_ctx) return *ctx; } +/* struct for callback API management */ +struct mlx5e_async_ctx { + struct mlx5_async_work context; + struct mlx5_async_ctx async_ctx; + struct work_struct work; + struct mlx5e_ktls_offload_context_tx *priv_tx; + struct completion complete; + int err; + union { + u32 out_create[MLX5_ST_SZ_DW(create_tis_out)]; + u32 out_destroy[MLX5_ST_SZ_DW(destroy_tis_out)]; + }; +}; + +static struct mlx5e_async_ctx *mlx5e_bulk_async_init(struct mlx5_core_dev *mdev, int n) +{ + struct mlx5e_async_ctx *bulk_async; + int i; + + bulk_async = kvcalloc(n, sizeof(struct mlx5e_async_ctx), GFP_KERNEL); + if (!bulk_async) + return NULL; + + for (i = 0; i < n; i++) { + struct mlx5e_async_ctx *async = &bulk_async[i]; + + mlx5_cmd_init_async_ctx(mdev, &async->async_ctx); + init_completion(&async->complete); + } + + return bulk_async; +} + +static void mlx5e_bulk_async_cleanup(struct mlx5e_async_ctx *bulk_async, int n) +{ + int i; + + for (i = 0; i < n; i++) { + struct mlx5e_async_ctx *async = &bulk_async[i]; + + mlx5_cmd_cleanup_async_ctx(&async->async_ctx); + } + kvfree(bulk_async); +} + +static void create_tis_callback(int status, struct mlx5_async_work *context) +{ + struct mlx5e_async_ctx *async = + container_of(context, struct mlx5e_async_ctx, context); + struct mlx5e_ktls_offload_context_tx *priv_tx = async->priv_tx; + + if (status) { + async->err = status; + priv_tx->create_err = 1; + goto out; + } + + priv_tx->tisn = MLX5_GET(create_tis_out, async->out_create, tisn); +out: + complete(&async->complete); +} + +static void destroy_tis_callback(int status, struct mlx5_async_work *context) +{ + struct mlx5e_async_ctx *async = + container_of(context, struct mlx5e_async_ctx, context); + struct mlx5e_ktls_offload_context_tx *priv_tx = async->priv_tx; + + complete(&async->complete); + kfree(priv_tx); +} + +static struct mlx5e_ktls_offload_context_tx * +mlx5e_tls_priv_tx_init(struct mlx5_core_dev *mdev, struct mlx5e_tls_sw_stats *sw_stats, + struct mlx5e_async_ctx *async) +{ + struct mlx5e_ktls_offload_context_tx *priv_tx; + int err; + + priv_tx = kzalloc(sizeof(*priv_tx), GFP_KERNEL); + if (!priv_tx) + return ERR_PTR(-ENOMEM); + + priv_tx->mdev = mdev; + priv_tx->sw_stats = sw_stats; + + if (!async) { + err = mlx5e_ktls_create_tis(mdev, &priv_tx->tisn); + if (err) + goto err_out; + } else { + async->priv_tx = priv_tx; + err = mlx5e_ktls_create_tis_cb(mdev, &async->async_ctx, + async->out_create, sizeof(async->out_create), + create_tis_callback, &async->context); + if (err) + goto err_out; + } + + return priv_tx; + +err_out: + kfree(priv_tx); + return ERR_PTR(err); +} + +static void mlx5e_tls_priv_tx_cleanup(struct mlx5e_ktls_offload_context_tx *priv_tx, + struct mlx5e_async_ctx *async) +{ + if (priv_tx->create_err) { + complete(&async->complete); + kfree(priv_tx); + return; + } + async->priv_tx = priv_tx; + mlx5e_ktls_destroy_tis_cb(priv_tx->mdev, priv_tx->tisn, + &async->async_ctx, + async->out_destroy, sizeof(async->out_destroy), + destroy_tis_callback, &async->context); +} + +static void mlx5e_tls_priv_tx_list_cleanup(struct mlx5_core_dev *mdev, + struct list_head *list, int size) +{ + struct mlx5e_ktls_offload_context_tx *obj; + struct mlx5e_async_ctx *bulk_async; + int i; + + bulk_async = mlx5e_bulk_async_init(mdev, size); + if (!bulk_async) + return; + + i = 0; + list_for_each_entry(obj, list, list_node) { + mlx5e_tls_priv_tx_cleanup(obj, &bulk_async[i]); + i++; + } + + for (i = 0; i < size; i++) { + struct mlx5e_async_ctx *async = &bulk_async[i]; + + wait_for_completion(&async->complete); + } + mlx5e_bulk_async_cleanup(bulk_async, size); +} + +/* Recycling pool API */ + +#define MLX5E_TLS_TX_POOL_BULK (16) +#define MLX5E_TLS_TX_POOL_HIGH (4 * 1024) +#define MLX5E_TLS_TX_POOL_LOW (MLX5E_TLS_TX_POOL_HIGH / 4) + +struct mlx5e_tls_tx_pool { + struct mlx5_core_dev *mdev; + struct mlx5e_tls_sw_stats *sw_stats; + struct mutex lock; /* Protects access to the pool */ + struct list_head list; + size_t size; + + struct workqueue_struct *wq; + struct work_struct create_work; + struct work_struct destroy_work; +}; + +static void create_work(struct work_struct *work) +{ + struct mlx5e_tls_tx_pool *pool = + container_of(work, struct mlx5e_tls_tx_pool, create_work); + struct mlx5e_ktls_offload_context_tx *obj; + struct mlx5e_async_ctx *bulk_async; + LIST_HEAD(local_list); + int i, j, err = 0; + + bulk_async = mlx5e_bulk_async_init(pool->mdev, MLX5E_TLS_TX_POOL_BULK); + if (!bulk_async) + return; + + for (i = 0; i < MLX5E_TLS_TX_POOL_BULK; i++) { + obj = mlx5e_tls_priv_tx_init(pool->mdev, pool->sw_stats, &bulk_async[i]); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + break; + } + list_add(&obj->list_node, &local_list); + } + + for (j = 0; j < i; j++) { + struct mlx5e_async_ctx *async = &bulk_async[j]; + + wait_for_completion(&async->complete); + if (!err && async->err) + err = async->err; + } + atomic64_add(i, &pool->sw_stats->tx_tls_pool_alloc); + mlx5e_bulk_async_cleanup(bulk_async, MLX5E_TLS_TX_POOL_BULK); + if (err) + goto err_out; + + mutex_lock(&pool->lock); + if (pool->size + MLX5E_TLS_TX_POOL_BULK >= MLX5E_TLS_TX_POOL_HIGH) { + mutex_unlock(&pool->lock); + goto err_out; + } + list_splice(&local_list, &pool->list); + pool->size += MLX5E_TLS_TX_POOL_BULK; + if (pool->size <= MLX5E_TLS_TX_POOL_LOW) + queue_work(pool->wq, work); + mutex_unlock(&pool->lock); + return; + +err_out: + mlx5e_tls_priv_tx_list_cleanup(pool->mdev, &local_list, i); + atomic64_add(i, &pool->sw_stats->tx_tls_pool_free); +} + +static void destroy_work(struct work_struct *work) +{ + struct mlx5e_tls_tx_pool *pool = + container_of(work, struct mlx5e_tls_tx_pool, destroy_work); + struct mlx5e_ktls_offload_context_tx *obj; + LIST_HEAD(local_list); + int i = 0; + + mutex_lock(&pool->lock); + if (pool->size < MLX5E_TLS_TX_POOL_HIGH) { + mutex_unlock(&pool->lock); + return; + } + + list_for_each_entry(obj, &pool->list, list_node) + if (++i == MLX5E_TLS_TX_POOL_BULK) + break; + + list_cut_position(&local_list, &pool->list, &obj->list_node); + pool->size -= MLX5E_TLS_TX_POOL_BULK; + if (pool->size >= MLX5E_TLS_TX_POOL_HIGH) + queue_work(pool->wq, work); + mutex_unlock(&pool->lock); + + mlx5e_tls_priv_tx_list_cleanup(pool->mdev, &local_list, MLX5E_TLS_TX_POOL_BULK); + atomic64_add(MLX5E_TLS_TX_POOL_BULK, &pool->sw_stats->tx_tls_pool_free); +} + +static struct mlx5e_tls_tx_pool *mlx5e_tls_tx_pool_init(struct mlx5_core_dev *mdev, + struct mlx5e_tls_sw_stats *sw_stats) +{ + struct mlx5e_tls_tx_pool *pool; + + BUILD_BUG_ON(MLX5E_TLS_TX_POOL_LOW + MLX5E_TLS_TX_POOL_BULK >= MLX5E_TLS_TX_POOL_HIGH); + + pool = kvzalloc(sizeof(*pool), GFP_KERNEL); + if (!pool) + return NULL; + + pool->wq = create_singlethread_workqueue("mlx5e_tls_tx_pool"); + if (!pool->wq) + goto err_free; + + INIT_LIST_HEAD(&pool->list); + mutex_init(&pool->lock); + + INIT_WORK(&pool->create_work, create_work); + INIT_WORK(&pool->destroy_work, destroy_work); + + pool->mdev = mdev; + pool->sw_stats = sw_stats; + + return pool; + +err_free: + kvfree(pool); + return NULL; +} + +static void mlx5e_tls_tx_pool_list_cleanup(struct mlx5e_tls_tx_pool *pool) +{ + while (pool->size > MLX5E_TLS_TX_POOL_BULK) { + struct mlx5e_ktls_offload_context_tx *obj; + LIST_HEAD(local_list); + int i = 0; + + list_for_each_entry(obj, &pool->list, list_node) + if (++i == MLX5E_TLS_TX_POOL_BULK) + break; + + list_cut_position(&local_list, &pool->list, &obj->list_node); + mlx5e_tls_priv_tx_list_cleanup(pool->mdev, &local_list, MLX5E_TLS_TX_POOL_BULK); + atomic64_add(MLX5E_TLS_TX_POOL_BULK, &pool->sw_stats->tx_tls_pool_free); + pool->size -= MLX5E_TLS_TX_POOL_BULK; + } + if (pool->size) { + mlx5e_tls_priv_tx_list_cleanup(pool->mdev, &pool->list, pool->size); + atomic64_add(pool->size, &pool->sw_stats->tx_tls_pool_free); + } +} + +static void mlx5e_tls_tx_pool_cleanup(struct mlx5e_tls_tx_pool *pool) +{ + mlx5e_tls_tx_pool_list_cleanup(pool); + destroy_workqueue(pool->wq); + kvfree(pool); +} + +static void pool_push(struct mlx5e_tls_tx_pool *pool, struct mlx5e_ktls_offload_context_tx *obj) +{ + mutex_lock(&pool->lock); + list_add(&obj->list_node, &pool->list); + if (++pool->size == MLX5E_TLS_TX_POOL_HIGH) + queue_work(pool->wq, &pool->destroy_work); + mutex_unlock(&pool->lock); +} + +static struct mlx5e_ktls_offload_context_tx *pool_pop(struct mlx5e_tls_tx_pool *pool) +{ + struct mlx5e_ktls_offload_context_tx *obj; + + mutex_lock(&pool->lock); + if (unlikely(pool->size == 0)) { + /* pool is empty: + * - trigger the populating work, and + * - serve the current context via the regular blocking api. + */ + queue_work(pool->wq, &pool->create_work); + mutex_unlock(&pool->lock); + obj = mlx5e_tls_priv_tx_init(pool->mdev, pool->sw_stats, NULL); + if (!IS_ERR(obj)) + atomic64_inc(&pool->sw_stats->tx_tls_pool_alloc); + return obj; + } + + obj = list_first_entry(&pool->list, struct mlx5e_ktls_offload_context_tx, + list_node); + list_del(&obj->list_node); + if (--pool->size == MLX5E_TLS_TX_POOL_LOW) + queue_work(pool->wq, &pool->create_work); + mutex_unlock(&pool->lock); + return obj; +} + +/* End of pool API */ + int mlx5e_ktls_add_tx(struct net_device *netdev, struct sock *sk, struct tls_crypto_info *crypto_info, u32 start_offload_tcp_sn) { struct mlx5e_ktls_offload_context_tx *priv_tx; + struct mlx5e_tls_tx_pool *pool; struct tls_context *tls_ctx; - struct mlx5_core_dev *mdev; struct mlx5e_priv *priv; int err; tls_ctx = tls_get_ctx(sk); priv = netdev_priv(netdev); - mdev = priv->mdev; + pool = priv->tls->tx_pool; - priv_tx = kzalloc(sizeof(*priv_tx), GFP_KERNEL); - if (!priv_tx) - return -ENOMEM; + priv_tx = pool_pop(pool); + if (IS_ERR(priv_tx)) + return PTR_ERR(priv_tx); - err = mlx5_ktls_create_key(mdev, crypto_info, &priv_tx->key_id); + err = mlx5_ktls_create_key(pool->mdev, crypto_info, &priv_tx->key_id); if (err) goto err_create_key; - priv_tx->sw_stats = &priv->tls->sw_stats; priv_tx->expected_seq = start_offload_tcp_sn; priv_tx->crypto_info = *(struct tls12_crypto_info_aes_gcm_128 *)crypto_info; @@ -111,36 +491,29 @@ int mlx5e_ktls_add_tx(struct net_device *netdev, struct sock *sk, mlx5e_set_ktls_tx_priv_ctx(tls_ctx, priv_tx); - err = mlx5e_ktls_create_tis(mdev, &priv_tx->tisn); - if (err) - goto err_create_tis; - priv_tx->ctx_post_pending = true; atomic64_inc(&priv_tx->sw_stats->tx_tls_ctx); return 0; -err_create_tis: - mlx5_ktls_destroy_key(mdev, priv_tx->key_id); err_create_key: - kfree(priv_tx); + pool_push(pool, priv_tx); return err; } void mlx5e_ktls_del_tx(struct net_device *netdev, struct tls_context *tls_ctx) { struct mlx5e_ktls_offload_context_tx *priv_tx; - struct mlx5_core_dev *mdev; + struct mlx5e_tls_tx_pool *pool; struct mlx5e_priv *priv; priv_tx = mlx5e_get_ktls_tx_priv_ctx(tls_ctx); priv = netdev_priv(netdev); - mdev = priv->mdev; + pool = priv->tls->tx_pool; atomic64_inc(&priv_tx->sw_stats->tx_tls_del); - mlx5e_destroy_tis(mdev, priv_tx->tisn); - mlx5_ktls_destroy_key(mdev, priv_tx->key_id); - kfree(priv_tx); + mlx5_ktls_destroy_key(priv_tx->mdev, priv_tx->key_id); + pool_push(pool, priv_tx); } static void tx_fill_wi(struct mlx5e_txqsq *sq, @@ -201,6 +574,16 @@ post_progress_params(struct mlx5e_txqsq *sq, sq->pc += num_wqebbs; } +static void tx_post_fence_nop(struct mlx5e_txqsq *sq) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); + + tx_fill_wi(sq, pi, 1, 0, NULL); + + mlx5e_post_nop_fence(wq, sq->sqn, &sq->pc); +} + static void mlx5e_ktls_tx_post_param_wqes(struct mlx5e_txqsq *sq, struct mlx5e_ktls_offload_context_tx *priv_tx, @@ -212,6 +595,7 @@ mlx5e_ktls_tx_post_param_wqes(struct mlx5e_txqsq *sq, post_static_params(sq, priv_tx, fence_first_post); post_progress_params(sq, priv_tx, progress_fence); + tx_post_fence_nop(sq); } struct tx_sync_info { @@ -304,7 +688,7 @@ tx_post_resync_params(struct mlx5e_txqsq *sq, } static int -tx_post_resync_dump(struct mlx5e_txqsq *sq, skb_frag_t *frag, u32 tisn, bool first) +tx_post_resync_dump(struct mlx5e_txqsq *sq, skb_frag_t *frag, u32 tisn) { struct mlx5_wqe_ctrl_seg *cseg; struct mlx5_wqe_data_seg *dseg; @@ -326,7 +710,6 @@ tx_post_resync_dump(struct mlx5e_txqsq *sq, skb_frag_t *frag, u32 tisn, bool fir cseg->opmod_idx_opcode = cpu_to_be32((sq->pc << 8) | MLX5_OPCODE_DUMP); cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt); cseg->tis_tir_num = cpu_to_be32(tisn << 8); - cseg->fm_ce_se = first ? MLX5_FENCE_MODE_INITIATOR_SMALL : 0; fsz = skb_frag_size(frag); dma_addr = skb_frag_dma_map(sq->pdev, frag, 0, fsz, @@ -361,67 +744,39 @@ void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq, stats->tls_dump_bytes += wi->num_bytes; } -static void tx_post_fence_nop(struct mlx5e_txqsq *sq) -{ - struct mlx5_wq_cyc *wq = &sq->wq; - u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc); - - tx_fill_wi(sq, pi, 1, 0, NULL); - - mlx5e_post_nop_fence(wq, sq->sqn, &sq->pc); -} - static enum mlx5e_ktls_sync_retval mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx, struct mlx5e_txqsq *sq, int datalen, u32 seq) { - struct mlx5e_sq_stats *stats = sq->stats; enum mlx5e_ktls_sync_retval ret; struct tx_sync_info info = {}; - int i = 0; + int i; ret = tx_sync_info_get(priv_tx, seq, datalen, &info); - if (unlikely(ret != MLX5E_KTLS_SYNC_DONE)) { - if (ret == MLX5E_KTLS_SYNC_SKIP_NO_DATA) { - stats->tls_skip_no_sync_data++; - return MLX5E_KTLS_SYNC_SKIP_NO_DATA; - } - /* We might get here if a retransmission reaches the driver - * after the relevant record is acked. + if (unlikely(ret != MLX5E_KTLS_SYNC_DONE)) + /* We might get here with ret == FAIL if a retransmission + * reaches the driver after the relevant record is acked. * It should be safe to drop the packet in this case */ - stats->tls_drop_no_sync_data++; - goto err_out; - } - - stats->tls_ooo++; + return ret; tx_post_resync_params(sq, priv_tx, info.rcd_sn); - /* If no dump WQE was sent, we need to have a fence NOP WQE before the - * actual data xmit. - */ - if (!info.nr_frags) { - tx_post_fence_nop(sq); - return MLX5E_KTLS_SYNC_DONE; - } - - for (; i < info.nr_frags; i++) { + for (i = 0; i < info.nr_frags; i++) { unsigned int orig_fsz, frag_offset = 0, n = 0; skb_frag_t *f = &info.frags[i]; orig_fsz = skb_frag_size(f); do { - bool fence = !(i || frag_offset); unsigned int fsz; n++; fsz = min_t(unsigned int, sq->hw_mtu, orig_fsz - frag_offset); skb_frag_size_set(f, fsz); - if (tx_post_resync_dump(sq, f, priv_tx->tisn, fence)) { + if (tx_post_resync_dump(sq, f, priv_tx->tisn)) { page_ref_add(skb_frag_page(f), n - 1); goto err_out; } @@ -469,24 +824,27 @@ bool mlx5e_ktls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq, priv_tx = mlx5e_get_ktls_tx_priv_ctx(tls_ctx); - if (unlikely(mlx5e_ktls_tx_offload_test_and_clear_pending(priv_tx))) { + if (unlikely(mlx5e_ktls_tx_offload_test_and_clear_pending(priv_tx))) mlx5e_ktls_tx_post_param_wqes(sq, priv_tx, false, false); - } seq = ntohl(tcp_hdr(skb)->seq); if (unlikely(priv_tx->expected_seq != seq)) { enum mlx5e_ktls_sync_retval ret = mlx5e_ktls_tx_handle_ooo(priv_tx, sq, datalen, seq); + stats->tls_ooo++; + switch (ret) { case MLX5E_KTLS_SYNC_DONE: break; case MLX5E_KTLS_SYNC_SKIP_NO_DATA: + stats->tls_skip_no_sync_data++; if (likely(!skb->decrypted)) goto out; WARN_ON_ONCE(1); - fallthrough; + goto err_out; case MLX5E_KTLS_SYNC_FAIL: + stats->tls_drop_no_sync_data++; goto err_out; } } @@ -505,3 +863,24 @@ err_out: dev_kfree_skb_any(skb); return false; } + +int mlx5e_ktls_init_tx(struct mlx5e_priv *priv) +{ + if (!mlx5e_is_ktls_tx(priv->mdev)) + return 0; + + priv->tls->tx_pool = mlx5e_tls_tx_pool_init(priv->mdev, &priv->tls->sw_stats); + if (!priv->tls->tx_pool) + return -ENOMEM; + + return 0; +} + +void mlx5e_ktls_cleanup_tx(struct mlx5e_priv *priv) +{ + if (!mlx5e_is_ktls_tx(priv->mdev)) + return; + + mlx5e_tls_tx_pool_cleanup(priv->tls->tx_pool); + priv->tls->tx_pool = NULL; +} diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 180b2f418339..24ddd438c066 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -3144,6 +3144,7 @@ static void mlx5e_cleanup_nic_tx(struct mlx5e_priv *priv) mlx5e_mqprio_rl_free(priv->mqprio_rl); priv->mqprio_rl = NULL; } + mlx5e_accel_cleanup_tx(priv); mlx5e_destroy_tises(priv); } @@ -5147,9 +5148,17 @@ static int mlx5e_init_nic_tx(struct mlx5e_priv *priv) return err; } + err = mlx5e_accel_init_tx(priv); + if (err) + goto err_destroy_tises; + mlx5e_set_mqprio_rl(priv); mlx5e_dcbnl_initialize(priv); return 0; + +err_destroy_tises: + mlx5e_destroy_tises(priv); + return err; } static void mlx5e_nic_enable(struct mlx5e_priv *priv) diff --git a/include/net/tls.h b/include/net/tls.h index abb050b0df83..b75b5727abdb 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -161,6 +161,8 @@ struct tls_offload_context_tx { struct scatterlist sg_tx_data[MAX_SKB_FRAGS]; void (*sk_destruct)(struct sock *sk); + struct work_struct destruct_work; + struct tls_context *ctx; u8 driver_state[] __aligned(8); /* The TLS layer reserves room for driver specific state * Currently the belief is that there is not enough diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index b3e2a30041c6..18c7e5c6d228 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -46,10 +46,8 @@ */ static DECLARE_RWSEM(device_offload_lock); -static void tls_device_gc_task(struct work_struct *work); +static struct workqueue_struct *destruct_wq __read_mostly; -static DECLARE_WORK(tls_device_gc_work, tls_device_gc_task); -static LIST_HEAD(tls_device_gc_list); static LIST_HEAD(tls_device_list); static LIST_HEAD(tls_device_down_list); static DEFINE_SPINLOCK(tls_device_lock); @@ -68,47 +66,44 @@ static void tls_device_free_ctx(struct tls_context *ctx) tls_ctx_free(NULL, ctx); } -static void tls_device_gc_task(struct work_struct *work) +static void tls_device_tx_del_task(struct work_struct *work) { - struct tls_context *ctx, *tmp; - unsigned long flags; - LIST_HEAD(gc_list); - - spin_lock_irqsave(&tls_device_lock, flags); - list_splice_init(&tls_device_gc_list, &gc_list); - spin_unlock_irqrestore(&tls_device_lock, flags); - - list_for_each_entry_safe(ctx, tmp, &gc_list, list) { - struct net_device *netdev = ctx->netdev; - - if (netdev && ctx->tx_conf == TLS_HW) { - netdev->tlsdev_ops->tls_dev_del(netdev, ctx, - TLS_OFFLOAD_CTX_DIR_TX); - dev_put(netdev); - ctx->netdev = NULL; - } + struct tls_offload_context_tx *offload_ctx = + container_of(work, struct tls_offload_context_tx, destruct_work); + struct tls_context *ctx = offload_ctx->ctx; + struct net_device *netdev = ctx->netdev; - list_del(&ctx->list); - tls_device_free_ctx(ctx); - } + netdev->tlsdev_ops->tls_dev_del(netdev, ctx, TLS_OFFLOAD_CTX_DIR_TX); + dev_put(netdev); + ctx->netdev = NULL; + tls_device_free_ctx(ctx); } static void tls_device_queue_ctx_destruction(struct tls_context *ctx) { unsigned long flags; + bool async_cleanup; spin_lock_irqsave(&tls_device_lock, flags); - if (unlikely(!refcount_dec_and_test(&ctx->refcount))) - goto unlock; + if (unlikely(!refcount_dec_and_test(&ctx->refcount))) { + spin_unlock_irqrestore(&tls_device_lock, flags); + return; + } - list_move_tail(&ctx->list, &tls_device_gc_list); + list_del(&ctx->list); /* Remove from tls_device_list / tls_device_down_list */ + async_cleanup = ctx->netdev && ctx->tx_conf == TLS_HW; + if (async_cleanup) { + struct tls_offload_context_tx *offload_ctx = tls_offload_ctx_tx(ctx); - /* schedule_work inside the spinlock - * to make sure tls_device_down waits for that work. - */ - schedule_work(&tls_device_gc_work); -unlock: + /* queue_work inside the spinlock + * to make sure tls_device_down waits for that work. + */ + queue_work(destruct_wq, &offload_ctx->destruct_work); + } spin_unlock_irqrestore(&tls_device_lock, flags); + + if (!async_cleanup) + tls_device_free_ctx(ctx); } /* We assume that the socket is already connected */ @@ -1150,6 +1145,9 @@ int tls_set_device_offload(struct sock *sk, struct tls_context *ctx) start_marker_record->len = 0; start_marker_record->num_frags = 0; + INIT_WORK(&offload_ctx->destruct_work, tls_device_tx_del_task); + offload_ctx->ctx = ctx; + INIT_LIST_HEAD(&offload_ctx->records_list); list_add_tail(&start_marker_record->list, &offload_ctx->records_list); spin_lock_init(&offload_ctx->lock); @@ -1394,7 +1392,7 @@ static int tls_device_down(struct net_device *netdev) up_write(&device_offload_lock); - flush_work(&tls_device_gc_work); + flush_workqueue(destruct_wq); return NOTIFY_DONE; } @@ -1435,12 +1433,23 @@ static struct notifier_block tls_dev_notifier = { int __init tls_device_init(void) { - return register_netdevice_notifier(&tls_dev_notifier); + int err; + + destruct_wq = alloc_workqueue("ktls_device_destruct", 0, 0); + if (!destruct_wq) + return -ENOMEM; + + err = register_netdevice_notifier(&tls_dev_notifier); + if (err) + destroy_workqueue(destruct_wq); + + return err; } void __exit tls_device_cleanup(void) { unregister_netdevice_notifier(&tls_dev_notifier); - flush_work(&tls_device_gc_work); + flush_workqueue(destruct_wq); + destroy_workqueue(destruct_wq); clean_acked_data_flush(); } |