summaryrefslogtreecommitdiff
path: root/drivers/accel
diff options
context:
space:
mode:
authorPranjal Ramajor Asha Kanojiya <quic_pkanojiy@quicinc.com>2023-10-27 10:43:30 -0600
committerJeffrey Hugo <quic_jhugo@quicinc.com>2023-11-03 09:03:01 -0600
commit3b511278b6ef514b3ae3d99ff62947cddd434479 (patch)
tree267474940e9fc6d9ea4e63871af9fe250f0d9814 /drivers/accel
parent44793c6a5b784f1f25608e3773fd40e011c63391 (diff)
accel/qaic: Support for 0 resize slice execution in BO
Add support to partially execute a slice which is resized to zero. Executing a zero size slice in a BO should mean that there is no DMA transfers involved but you should still configure doorbell and semaphores. For example consider a BO of size 18K and it is sliced into 3 6K slices and user calls partial execute ioctl with resize as 10K. slice 0 - size is 6k and offset is 0, so resize of 10K will not cut short this slice hence we send the entire slice for execution. slice 1 - size is 6k and offset is 6k, so resize of 10K will cut short this slice and only the first 4k should be DMA along with configuring doorbell and semaphores. slice 2 - size is 6k and offset is 12k, so resize of 10k will cut short this slice and no DMA transfer would be involved but we should would configure doorbell and semaphores. This change begs to change the behavior of 0 resize. Currently, 0 resize partial execute ioctl behaves exactly like execute ioctl i.e. no resize. After this patch all the slice in BO should behave exactly like slice 2 in above example. Refactor copy_partial_exec_reqs() to make it more readable and less complex. Signed-off-by: Pranjal Ramajor Asha Kanojiya <quic_pkanojiy@quicinc.com> Reviewed-by: Jeffrey Hugo <quic_jhugo@quicinc.com> Signed-off-by: Jeffrey Hugo <quic_jhugo@quicinc.com> Reviewed-by: Stanislaw Gruszka <stanislaw.gruszka@linux.intel.com> Link: https://patchwork.freedesktop.org/patch/msgid/20231027164330.11978-1-quic_jhugo@quicinc.com
Diffstat (limited to 'drivers/accel')
-rw-r--r--drivers/accel/qaic/qaic_data.c104
1 files changed, 43 insertions, 61 deletions
diff --git a/drivers/accel/qaic/qaic_data.c b/drivers/accel/qaic/qaic_data.c
index ebc3cca1b094..8da81768f2ab 100644
--- a/drivers/accel/qaic/qaic_data.c
+++ b/drivers/accel/qaic/qaic_data.c
@@ -51,6 +51,7 @@
})
#define NUM_EVENTS 128
#define NUM_DELAYS 10
+#define fifo_at(base, offset) ((base) + (offset) * get_dbc_req_elem_size())
static unsigned int wait_exec_default_timeout_ms = 5000; /* 5 sec default */
module_param(wait_exec_default_timeout_ms, uint, 0600);
@@ -1058,6 +1059,16 @@ unlock_usr_srcu:
return ret;
}
+static inline u32 fifo_space_avail(u32 head, u32 tail, u32 q_size)
+{
+ u32 avail = head - tail - 1;
+
+ if (head <= tail)
+ avail += q_size;
+
+ return avail;
+}
+
static inline int copy_exec_reqs(struct qaic_device *qdev, struct bo_slice *slice, u32 dbc_id,
u32 head, u32 *ptail)
{
@@ -1066,27 +1077,20 @@ static inline int copy_exec_reqs(struct qaic_device *qdev, struct bo_slice *slic
u32 tail = *ptail;
u32 avail;
- avail = head - tail;
- if (head <= tail)
- avail += dbc->nelem;
-
- --avail;
-
+ avail = fifo_space_avail(head, tail, dbc->nelem);
if (avail < slice->nents)
return -EAGAIN;
if (tail + slice->nents > dbc->nelem) {
avail = dbc->nelem - tail;
avail = min_t(u32, avail, slice->nents);
- memcpy(dbc->req_q_base + tail * get_dbc_req_elem_size(), reqs,
- sizeof(*reqs) * avail);
+ memcpy(fifo_at(dbc->req_q_base, tail), reqs, sizeof(*reqs) * avail);
reqs += avail;
avail = slice->nents - avail;
if (avail)
memcpy(dbc->req_q_base, reqs, sizeof(*reqs) * avail);
} else {
- memcpy(dbc->req_q_base + tail * get_dbc_req_elem_size(), reqs,
- sizeof(*reqs) * slice->nents);
+ memcpy(fifo_at(dbc->req_q_base, tail), reqs, sizeof(*reqs) * slice->nents);
}
*ptail = (tail + slice->nents) % dbc->nelem;
@@ -1094,46 +1098,31 @@ static inline int copy_exec_reqs(struct qaic_device *qdev, struct bo_slice *slic
return 0;
}
-/*
- * Based on the value of resize we may only need to transmit first_n
- * entries and the last entry, with last_bytes to send from the last entry.
- * Note that first_n could be 0.
- */
static inline int copy_partial_exec_reqs(struct qaic_device *qdev, struct bo_slice *slice,
- u64 resize, u32 dbc_id, u32 head, u32 *ptail)
+ u64 resize, struct dma_bridge_chan *dbc, u32 head,
+ u32 *ptail)
{
- struct dma_bridge_chan *dbc = &qdev->dbc[dbc_id];
struct dbc_req *reqs = slice->reqs;
struct dbc_req *last_req;
u32 tail = *ptail;
- u64 total_bytes;
u64 last_bytes;
u32 first_n;
u32 avail;
- int ret;
- int i;
-
- avail = head - tail;
- if (head <= tail)
- avail += dbc->nelem;
- --avail;
+ avail = fifo_space_avail(head, tail, dbc->nelem);
- total_bytes = 0;
- for (i = 0; i < slice->nents; i++) {
- total_bytes += le32_to_cpu(reqs[i].len);
- if (total_bytes >= resize)
+ /*
+ * After this for loop is complete, first_n represents the index
+ * of the last DMA request of this slice that needs to be
+ * transferred after resizing and last_bytes represents DMA size
+ * of that request.
+ */
+ last_bytes = resize;
+ for (first_n = 0; first_n < slice->nents; first_n++)
+ if (last_bytes > le32_to_cpu(reqs[first_n].len))
+ last_bytes -= le32_to_cpu(reqs[first_n].len);
+ else
break;
- }
-
- if (total_bytes < resize) {
- /* User space should have used the full buffer path. */
- ret = -EINVAL;
- return ret;
- }
-
- first_n = i;
- last_bytes = i ? resize + le32_to_cpu(reqs[i].len) - total_bytes : resize;
if (avail < (first_n + 1))
return -EAGAIN;
@@ -1142,22 +1131,21 @@ static inline int copy_partial_exec_reqs(struct qaic_device *qdev, struct bo_sli
if (tail + first_n > dbc->nelem) {
avail = dbc->nelem - tail;
avail = min_t(u32, avail, first_n);
- memcpy(dbc->req_q_base + tail * get_dbc_req_elem_size(), reqs,
- sizeof(*reqs) * avail);
+ memcpy(fifo_at(dbc->req_q_base, tail), reqs, sizeof(*reqs) * avail);
last_req = reqs + avail;
avail = first_n - avail;
if (avail)
memcpy(dbc->req_q_base, last_req, sizeof(*reqs) * avail);
} else {
- memcpy(dbc->req_q_base + tail * get_dbc_req_elem_size(), reqs,
- sizeof(*reqs) * first_n);
+ memcpy(fifo_at(dbc->req_q_base, tail), reqs, sizeof(*reqs) * first_n);
}
}
- /* Copy over the last entry. Here we need to adjust len to the left over
+ /*
+ * Copy over the last entry. Here we need to adjust len to the left over
* size, and set src and dst to the entry it is copied to.
*/
- last_req = dbc->req_q_base + (tail + first_n) % dbc->nelem * get_dbc_req_elem_size();
+ last_req = fifo_at(dbc->req_q_base, (tail + first_n) % dbc->nelem);
memcpy(last_req, reqs + slice->nents - 1, sizeof(*reqs));
/*
@@ -1168,6 +1156,9 @@ static inline int copy_partial_exec_reqs(struct qaic_device *qdev, struct bo_sli
last_req->len = cpu_to_le32((u32)last_bytes);
last_req->src_addr = reqs[first_n].src_addr;
last_req->dest_addr = reqs[first_n].dest_addr;
+ if (!last_bytes)
+ /* Disable DMA transfer */
+ last_req->cmd = GENMASK(7, 2) & reqs[first_n].cmd;
*ptail = (tail + first_n + 1) % dbc->nelem;
@@ -1227,26 +1218,17 @@ static int send_bo_list_to_device(struct qaic_device *qdev, struct drm_file *fil
bo->req_id = dbc->next_req_id++;
list_for_each_entry(slice, &bo->slices, slice) {
- /*
- * If this slice does not fall under the given
- * resize then skip this slice and continue the loop
- */
- if (is_partial && pexec[i].resize && pexec[i].resize <= slice->offset)
- continue;
-
for (j = 0; j < slice->nents; j++)
slice->reqs[j].req_id = cpu_to_le16(bo->req_id);
- /*
- * If it is a partial execute ioctl call then check if
- * resize has cut this slice short then do a partial copy
- * else do complete copy
- */
- if (is_partial && pexec[i].resize &&
- pexec[i].resize < slice->offset + slice->size)
+ if (is_partial && (!pexec[i].resize || pexec[i].resize <= slice->offset))
+ /* Configure the slice for no DMA transfer */
+ ret = copy_partial_exec_reqs(qdev, slice, 0, dbc, head, tail);
+ else if (is_partial && pexec[i].resize < slice->offset + slice->size)
+ /* Configure the slice to be partially DMA transferred */
ret = copy_partial_exec_reqs(qdev, slice,
- pexec[i].resize - slice->offset,
- dbc->id, head, tail);
+ pexec[i].resize - slice->offset, dbc,
+ head, tail);
else
ret = copy_exec_reqs(qdev, slice, dbc->id, head, tail);
if (ret) {