summaryrefslogtreecommitdiff
path: root/fs/cifs/smbdirect.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/cifs/smbdirect.c')
-rw-r--r--fs/cifs/smbdirect.c309
1 files changed, 136 insertions, 173 deletions
diff --git a/fs/cifs/smbdirect.c b/fs/cifs/smbdirect.c
index b943b74cd246..cd07e5301d42 100644
--- a/fs/cifs/smbdirect.c
+++ b/fs/cifs/smbdirect.c
@@ -1,17 +1,8 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
/*
* Copyright (C) 2017, Microsoft Corporation.
*
* Author(s): Long Li <longli@microsoft.com>
- *
- * This program is free software; you can redistribute it and/or modify
- * it under the terms of the GNU General Public License as published by
- * the Free Software Foundation; either version 2 of the License, or
- * (at your option) any later version.
- *
- * This program is distributed in the hope that it will be useful,
- * but WITHOUT ANY WARRANTY; without even the implied warranty of
- * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See
- * the GNU General Public License for more details.
*/
#include <linux/module.h>
#include <linux/highmem.h>
@@ -164,95 +155,6 @@ do { \
#define log_rdma_mr(level, fmt, args...) \
log_rdma(level, LOG_RDMA_MR, fmt, ##args)
-/*
- * Destroy the transport and related RDMA and memory resources
- * Need to go through all the pending counters and make sure on one is using
- * the transport while it is destroyed
- */
-static void smbd_destroy_rdma_work(struct work_struct *work)
-{
- struct smbd_response *response;
- struct smbd_connection *info =
- container_of(work, struct smbd_connection, destroy_work);
- unsigned long flags;
-
- log_rdma_event(INFO, "destroying qp\n");
- ib_drain_qp(info->id->qp);
- rdma_destroy_qp(info->id);
-
- /* Unblock all I/O waiting on the send queue */
- wake_up_interruptible_all(&info->wait_send_queue);
-
- log_rdma_event(INFO, "cancelling idle timer\n");
- cancel_delayed_work_sync(&info->idle_timer_work);
- log_rdma_event(INFO, "cancelling send immediate work\n");
- cancel_delayed_work_sync(&info->send_immediate_work);
-
- log_rdma_event(INFO, "wait for all send to finish\n");
- wait_event(info->wait_smbd_send_pending,
- info->smbd_send_pending == 0);
-
- log_rdma_event(INFO, "wait for all recv to finish\n");
- wake_up_interruptible(&info->wait_reassembly_queue);
- wait_event(info->wait_smbd_recv_pending,
- info->smbd_recv_pending == 0);
-
- log_rdma_event(INFO, "wait for all send posted to IB to finish\n");
- wait_event(info->wait_send_pending,
- atomic_read(&info->send_pending) == 0);
- wait_event(info->wait_send_payload_pending,
- atomic_read(&info->send_payload_pending) == 0);
-
- log_rdma_event(INFO, "freeing mr list\n");
- wake_up_interruptible_all(&info->wait_mr);
- wait_event(info->wait_for_mr_cleanup,
- atomic_read(&info->mr_used_count) == 0);
- destroy_mr_list(info);
-
- /* It's not posssible for upper layer to get to reassembly */
- log_rdma_event(INFO, "drain the reassembly queue\n");
- do {
- spin_lock_irqsave(&info->reassembly_queue_lock, flags);
- response = _get_first_reassembly(info);
- if (response) {
- list_del(&response->list);
- spin_unlock_irqrestore(
- &info->reassembly_queue_lock, flags);
- put_receive_buffer(info, response);
- } else
- spin_unlock_irqrestore(&info->reassembly_queue_lock, flags);
- } while (response);
-
- info->reassembly_data_length = 0;
-
- log_rdma_event(INFO, "free receive buffers\n");
- wait_event(info->wait_receive_queues,
- info->count_receive_queue + info->count_empty_packet_queue
- == info->receive_credit_max);
- destroy_receive_buffers(info);
-
- ib_free_cq(info->send_cq);
- ib_free_cq(info->recv_cq);
- ib_dealloc_pd(info->pd);
- rdma_destroy_id(info->id);
-
- /* free mempools */
- mempool_destroy(info->request_mempool);
- kmem_cache_destroy(info->request_cache);
-
- mempool_destroy(info->response_mempool);
- kmem_cache_destroy(info->response_cache);
-
- info->transport_status = SMBD_DESTROYED;
- wake_up_all(&info->wait_destroy);
-}
-
-static int smbd_process_disconnected(struct smbd_connection *info)
-{
- schedule_work(&info->destroy_work);
- return 0;
-}
-
static void smbd_disconnect_rdma_work(struct work_struct *work)
{
struct smbd_connection *info =
@@ -319,7 +221,9 @@ static int smbd_conn_upcall(
}
info->transport_status = SMBD_DISCONNECTED;
- smbd_process_disconnected(info);
+ wake_up_interruptible(&info->disconn_wait);
+ wake_up_interruptible(&info->wait_reassembly_queue);
+ wake_up_interruptible_all(&info->wait_send_queue);
break;
default:
@@ -940,7 +844,7 @@ static int smbd_create_header(struct smbd_connection *info,
if (info->transport_status != SMBD_CONNECTED) {
log_outgoing(ERR, "disconnected not sending\n");
- return -ENOENT;
+ return -EAGAIN;
}
atomic_dec(&info->send_credits);
@@ -990,7 +894,7 @@ static int smbd_create_header(struct smbd_connection *info,
request->sge[0].addr = ib_dma_map_single(info->id->device,
(void *)packet,
header_length,
- DMA_BIDIRECTIONAL);
+ DMA_TO_DEVICE);
if (ib_dma_mapping_error(info->id->device, request->sge[0].addr)) {
mempool_free(request, info->request_mempool);
rc = -EIO;
@@ -1066,6 +970,7 @@ static int smbd_post_send(struct smbd_connection *info,
wake_up(&info->wait_send_pending);
}
smbd_disconnect_rdma_connection(info);
+ rc = -EAGAIN;
} else
/* Reset timer for idle connection after packet is sent */
mod_delayed_work(info->workqueue, &info->idle_timer_work,
@@ -1091,7 +996,7 @@ static int smbd_post_send_sgl(struct smbd_connection *info,
for_each_sg(sgl, sg, num_sgs, i) {
request->sge[i+1].addr =
ib_dma_map_page(info->id->device, sg_page(sg),
- sg->offset, sg->length, DMA_BIDIRECTIONAL);
+ sg->offset, sg->length, DMA_TO_DEVICE);
if (ib_dma_mapping_error(
info->id->device, request->sge[i+1].addr)) {
rc = -EIO;
@@ -1478,17 +1383,97 @@ static void idle_connection_timer(struct work_struct *work)
info->keep_alive_interval*HZ);
}
-/* Destroy this SMBD connection, called from upper layer */
-void smbd_destroy(struct smbd_connection *info)
+/*
+ * Destroy the transport and related RDMA and memory resources
+ * Need to go through all the pending counters and make sure on one is using
+ * the transport while it is destroyed
+ */
+void smbd_destroy(struct TCP_Server_Info *server)
{
+ struct smbd_connection *info = server->smbd_conn;
+ struct smbd_response *response;
+ unsigned long flags;
+
+ if (!info) {
+ log_rdma_event(INFO, "rdma session already destroyed\n");
+ return;
+ }
+
log_rdma_event(INFO, "destroying rdma session\n");
+ if (info->transport_status != SMBD_DISCONNECTED) {
+ rdma_disconnect(server->smbd_conn->id);
+ log_rdma_event(INFO, "wait for transport being disconnected\n");
+ wait_event_interruptible(
+ info->disconn_wait,
+ info->transport_status == SMBD_DISCONNECTED);
+ }
- /* Kick off the disconnection process */
- smbd_disconnect_rdma_connection(info);
+ log_rdma_event(INFO, "destroying qp\n");
+ ib_drain_qp(info->id->qp);
+ rdma_destroy_qp(info->id);
+
+ log_rdma_event(INFO, "cancelling idle timer\n");
+ cancel_delayed_work_sync(&info->idle_timer_work);
+ log_rdma_event(INFO, "cancelling send immediate work\n");
+ cancel_delayed_work_sync(&info->send_immediate_work);
+
+ log_rdma_event(INFO, "wait for all send posted to IB to finish\n");
+ wait_event(info->wait_send_pending,
+ atomic_read(&info->send_pending) == 0);
+ wait_event(info->wait_send_payload_pending,
+ atomic_read(&info->send_payload_pending) == 0);
+
+ /* It's not posssible for upper layer to get to reassembly */
+ log_rdma_event(INFO, "drain the reassembly queue\n");
+ do {
+ spin_lock_irqsave(&info->reassembly_queue_lock, flags);
+ response = _get_first_reassembly(info);
+ if (response) {
+ list_del(&response->list);
+ spin_unlock_irqrestore(
+ &info->reassembly_queue_lock, flags);
+ put_receive_buffer(info, response);
+ } else
+ spin_unlock_irqrestore(
+ &info->reassembly_queue_lock, flags);
+ } while (response);
+ info->reassembly_data_length = 0;
- log_rdma_event(INFO, "wait for transport being destroyed\n");
- wait_event(info->wait_destroy,
- info->transport_status == SMBD_DESTROYED);
+ log_rdma_event(INFO, "free receive buffers\n");
+ wait_event(info->wait_receive_queues,
+ info->count_receive_queue + info->count_empty_packet_queue
+ == info->receive_credit_max);
+ destroy_receive_buffers(info);
+
+ /*
+ * For performance reasons, memory registration and deregistration
+ * are not locked by srv_mutex. It is possible some processes are
+ * blocked on transport srv_mutex while holding memory registration.
+ * Release the transport srv_mutex to allow them to hit the failure
+ * path when sending data, and then release memory registartions.
+ */
+ log_rdma_event(INFO, "freeing mr list\n");
+ wake_up_interruptible_all(&info->wait_mr);
+ while (atomic_read(&info->mr_used_count)) {
+ mutex_unlock(&server->srv_mutex);
+ msleep(1000);
+ mutex_lock(&server->srv_mutex);
+ }
+ destroy_mr_list(info);
+
+ ib_free_cq(info->send_cq);
+ ib_free_cq(info->recv_cq);
+ ib_dealloc_pd(info->pd);
+ rdma_destroy_id(info->id);
+
+ /* free mempools */
+ mempool_destroy(info->request_mempool);
+ kmem_cache_destroy(info->request_cache);
+
+ mempool_destroy(info->response_mempool);
+ kmem_cache_destroy(info->response_cache);
+
+ info->transport_status = SMBD_DESTROYED;
destroy_workqueue(info->workqueue);
kfree(info);
@@ -1513,17 +1498,9 @@ int smbd_reconnect(struct TCP_Server_Info *server)
*/
if (server->smbd_conn->transport_status == SMBD_CONNECTED) {
log_rdma_event(INFO, "disconnecting transport\n");
- smbd_disconnect_rdma_connection(server->smbd_conn);
+ smbd_destroy(server);
}
- /* wait until the transport is destroyed */
- if (!wait_event_timeout(server->smbd_conn->wait_destroy,
- server->smbd_conn->transport_status == SMBD_DESTROYED, 5*HZ))
- return -EAGAIN;
-
- destroy_workqueue(server->smbd_conn->workqueue);
- kfree(server->smbd_conn);
-
create_conn:
log_rdma_event(INFO, "creating rdma session\n");
server->smbd_conn = smbd_get_connection(
@@ -1739,12 +1716,13 @@ static struct smbd_connection *_smbd_get_connection(
conn_param.retry_count = SMBD_CM_RETRY;
conn_param.rnr_retry_count = SMBD_CM_RNR_RETRY;
conn_param.flow_control = 0;
- init_waitqueue_head(&info->wait_destroy);
log_rdma_event(INFO, "connecting to IP %pI4 port %d\n",
&addr_in->sin_addr, port);
init_waitqueue_head(&info->conn_wait);
+ init_waitqueue_head(&info->disconn_wait);
+ init_waitqueue_head(&info->wait_reassembly_queue);
rc = rdma_connect(info->id, &conn_param);
if (rc) {
log_rdma_event(ERR, "rdma_connect() failed with %i\n", rc);
@@ -1768,19 +1746,11 @@ static struct smbd_connection *_smbd_get_connection(
}
init_waitqueue_head(&info->wait_send_queue);
- init_waitqueue_head(&info->wait_reassembly_queue);
-
INIT_DELAYED_WORK(&info->idle_timer_work, idle_connection_timer);
INIT_DELAYED_WORK(&info->send_immediate_work, send_immediate_work);
queue_delayed_work(info->workqueue, &info->idle_timer_work,
info->keep_alive_interval*HZ);
- init_waitqueue_head(&info->wait_smbd_send_pending);
- info->smbd_send_pending = 0;
-
- init_waitqueue_head(&info->wait_smbd_recv_pending);
- info->smbd_recv_pending = 0;
-
init_waitqueue_head(&info->wait_send_pending);
atomic_set(&info->send_pending, 0);
@@ -1788,7 +1758,6 @@ static struct smbd_connection *_smbd_get_connection(
atomic_set(&info->send_payload_pending, 0);
INIT_WORK(&info->disconnect_work, smbd_disconnect_rdma_work);
- INIT_WORK(&info->destroy_work, smbd_destroy_rdma_work);
INIT_WORK(&info->recv_done_work, smbd_recv_done_work);
INIT_WORK(&info->post_send_credits_work, smbd_post_send_credits);
info->new_credits_offered = 0;
@@ -1810,7 +1779,7 @@ static struct smbd_connection *_smbd_get_connection(
allocate_mr_failed:
/* At this point, need to a full transport shutdown */
- smbd_destroy(info);
+ smbd_destroy(server);
return NULL;
negotiation_failed:
@@ -1882,11 +1851,6 @@ static int smbd_recv_buf(struct smbd_connection *info, char *buf,
int rc;
again:
- if (info->transport_status != SMBD_CONNECTED) {
- log_read(ERR, "disconnected\n");
- return -ENODEV;
- }
-
/*
* No need to hold the reassembly queue lock all the time as we are
* the only one reading from the front of the queue. The transport
@@ -2000,7 +1964,12 @@ read_rfc1002_done:
info->transport_status != SMBD_CONNECTED);
/* Don't return any data if interrupted */
if (rc)
- return -ENODEV;
+ return rc;
+
+ if (info->transport_status != SMBD_CONNECTED) {
+ log_read(ERR, "disconnected\n");
+ return 0;
+ }
goto again;
}
@@ -2052,8 +2021,6 @@ int smbd_recv(struct smbd_connection *info, struct msghdr *msg)
unsigned int to_read, page_offset;
int rc;
- info->smbd_recv_pending++;
-
if (iov_iter_rw(&msg->msg_iter) == WRITE) {
/* It's a bug in upper layer to get there */
cifs_dbg(VFS, "CIFS: invalid msg iter dir %u\n",
@@ -2084,9 +2051,6 @@ int smbd_recv(struct smbd_connection *info, struct msghdr *msg)
}
out:
- info->smbd_recv_pending--;
- wake_up(&info->wait_smbd_recv_pending);
-
/* SMBDirect will read it all or nothing */
if (rc > 0)
msg->msg_iter.count = 0;
@@ -2099,7 +2063,8 @@ out:
* rqst: the data to write
* return value: 0 if successfully write, otherwise error code
*/
-int smbd_send(struct TCP_Server_Info *server, struct smb_rqst *rqst)
+int smbd_send(struct TCP_Server_Info *server,
+ int num_rqst, struct smb_rqst *rqst_array)
{
struct smbd_connection *info = server->smbd_conn;
struct kvec vec;
@@ -2111,54 +2076,51 @@ int smbd_send(struct TCP_Server_Info *server, struct smb_rqst *rqst)
info->max_send_size - sizeof(struct smbd_data_transfer);
struct kvec *iov;
int rc;
+ struct smb_rqst *rqst;
+ int rqst_idx;
- info->smbd_send_pending++;
if (info->transport_status != SMBD_CONNECTED) {
- rc = -ENODEV;
+ rc = -EAGAIN;
goto done;
}
/*
- * Skip the RFC1002 length defined in MS-SMB2 section 2.1
- * It is used only for TCP transport in the iov[0]
- * In future we may want to add a transport layer under protocol
- * layer so this will only be issued to TCP transport
- */
-
- if (rqst->rq_iov[0].iov_len != 4) {
- log_write(ERR, "expected the pdu length in 1st iov, but got %zu\n", rqst->rq_iov[0].iov_len);
- return -EINVAL;
- }
-
- /*
* Add in the page array if there is one. The caller needs to set
* rq_tailsz to PAGE_SIZE when the buffer has multiple pages and
* ends at page boundary
*/
- buflen = smb_rqst_len(server, rqst);
+ remaining_data_length = 0;
+ for (i = 0; i < num_rqst; i++)
+ remaining_data_length += smb_rqst_len(server, &rqst_array[i]);
- if (buflen + sizeof(struct smbd_data_transfer) >
+ if (remaining_data_length + sizeof(struct smbd_data_transfer) >
info->max_fragmented_send_size) {
log_write(ERR, "payload size %d > max size %d\n",
- buflen, info->max_fragmented_send_size);
+ remaining_data_length, info->max_fragmented_send_size);
rc = -EINVAL;
goto done;
}
- iov = &rqst->rq_iov[1];
+ log_write(INFO, "num_rqst=%d total length=%u\n",
+ num_rqst, remaining_data_length);
+
+ rqst_idx = 0;
+next_rqst:
+ rqst = &rqst_array[rqst_idx];
+ iov = rqst->rq_iov;
- cifs_dbg(FYI, "Sending smb (RDMA): smb_len=%u\n", buflen);
- for (i = 0; i < rqst->rq_nvec-1; i++)
+ cifs_dbg(FYI, "Sending smb (RDMA): idx=%d smb_len=%lu\n",
+ rqst_idx, smb_rqst_len(server, rqst));
+ for (i = 0; i < rqst->rq_nvec; i++)
dump_smb(iov[i].iov_base, iov[i].iov_len);
- remaining_data_length = buflen;
- log_write(INFO, "rqst->rq_nvec=%d rqst->rq_npages=%d rq_pagesz=%d "
- "rq_tailsz=%d buflen=%d\n",
- rqst->rq_nvec, rqst->rq_npages, rqst->rq_pagesz,
- rqst->rq_tailsz, buflen);
+ log_write(INFO, "rqst_idx=%d nvec=%d rqst->rq_npages=%d rq_pagesz=%d "
+ "rq_tailsz=%d buflen=%lu\n",
+ rqst_idx, rqst->rq_nvec, rqst->rq_npages, rqst->rq_pagesz,
+ rqst->rq_tailsz, smb_rqst_len(server, rqst));
- start = i = iov[0].iov_len ? 0 : 1;
+ start = i = 0;
buflen = 0;
while (true) {
buflen += iov[i].iov_len;
@@ -2206,14 +2168,14 @@ int smbd_send(struct TCP_Server_Info *server, struct smb_rqst *rqst)
goto done;
}
i++;
- if (i == rqst->rq_nvec-1)
+ if (i == rqst->rq_nvec)
break;
}
start = i;
buflen = 0;
} else {
i++;
- if (i == rqst->rq_nvec-1) {
+ if (i == rqst->rq_nvec) {
/* send out all remaining vecs */
remaining_data_length -= buflen;
log_write(INFO,
@@ -2257,6 +2219,10 @@ int smbd_send(struct TCP_Server_Info *server, struct smb_rqst *rqst)
}
}
+ rqst_idx++;
+ if (rqst_idx < num_rqst)
+ goto next_rqst;
+
done:
/*
* As an optimization, we don't wait for individual I/O to finish
@@ -2268,9 +2234,6 @@ done:
wait_event(info->wait_send_payload_pending,
atomic_read(&info->send_payload_pending) == 0);
- info->smbd_send_pending--;
- wake_up(&info->wait_smbd_send_pending);
-
return rc;
}