diff options
author | Ofir Bitton <obitton@habana.ai> | 2023-01-10 11:41:39 +0200 |
---|---|---|
committer | Oded Gabbay <ogabbay@kernel.org> | 2023-01-26 11:52:13 +0200 |
commit | 75b6984ef659a0d2a86cd937a208f8d1e98e957e (patch) | |
tree | 924e8c20b788737b39fe2350326c031f086dd073 /drivers/accel | |
parent | 9a7d530a8048980c76e1e61078ed5223bf6fa283 (diff) |
habanalabs: optimize command submission completion timestamp
Completion timestamp is taken during the actual command submission
release. As the release happens in a work queue, the timestamp taken
is not accurate. Hence, we will take the timestamp in the interrupt
handler itself while propagating it to the release function.
Signed-off-by: Ofir Bitton <obitton@habana.ai>
Reviewed-by: Oded Gabbay <ogabbay@kernel.org>
Signed-off-by: Oded Gabbay <ogabbay@kernel.org>
Diffstat (limited to 'drivers/accel')
-rw-r--r-- | drivers/accel/habanalabs/common/command_submission.c | 12 | ||||
-rw-r--r-- | drivers/accel/habanalabs/common/habanalabs.h | 4 | ||||
-rw-r--r-- | drivers/accel/habanalabs/common/irq.c | 13 |
3 files changed, 23 insertions, 6 deletions
diff --git a/drivers/accel/habanalabs/common/command_submission.c b/drivers/accel/habanalabs/common/command_submission.c index 00fedf2d8654..8270db0a72a2 100644 --- a/drivers/accel/habanalabs/common/command_submission.c +++ b/drivers/accel/habanalabs/common/command_submission.c @@ -398,8 +398,16 @@ static void hl_complete_job(struct hl_device *hdev, struct hl_cs_job *job) * flow by calling 'hl_hw_queue_update_ci'. */ if (cs_needs_completion(cs) && - (job->queue_type == QUEUE_TYPE_EXT || job->queue_type == QUEUE_TYPE_HW)) + (job->queue_type == QUEUE_TYPE_EXT || job->queue_type == QUEUE_TYPE_HW)) { + + /* In CS based completions, the timestamp is already available, + * so no need to extract it from job + */ + if (hdev->asic_prop.completion_mode == HL_COMPLETION_MODE_JOB) + cs->completion_timestamp = job->timestamp; + cs_put(cs); + } hl_cs_job_put(job); } @@ -776,7 +784,7 @@ out: } if (cs->timestamp) { - cs->fence->timestamp = ktime_get(); + cs->fence->timestamp = cs->completion_timestamp; hl_push_cs_outcome(hdev, &cs->ctx->outcome_store, cs->sequence, cs->fence->timestamp, cs->fence->error); } diff --git a/drivers/accel/habanalabs/common/habanalabs.h b/drivers/accel/habanalabs/common/habanalabs.h index a0dfbf4f6cbb..afc0c0d3f9e3 100644 --- a/drivers/accel/habanalabs/common/habanalabs.h +++ b/drivers/accel/habanalabs/common/habanalabs.h @@ -1940,6 +1940,7 @@ struct hl_userptr { * @type: CS_TYPE_*. * @jobs_cnt: counter of submitted jobs on all queues. * @encaps_sig_hdl_id: encaps signals handle id, set for the first staged cs. + * @completion_timestamp: timestamp of the last completed cs job. * @sob_addr_offset: sob offset from the configuration base address. * @initial_sob_count: count of completed signals in SOB before current submission of signal or * cs with encaps signals. @@ -1972,6 +1973,7 @@ struct hl_cs { struct list_head staged_cs_node; struct list_head debugfs_list; struct hl_cs_encaps_sig_handle *encaps_sig_hdl; + ktime_t completion_timestamp; u64 sequence; u64 staged_sequence; u64 timeout_jiffies; @@ -2007,6 +2009,7 @@ struct hl_cs { * @debugfs_list: node in debugfs list of command submission jobs. * @refcount: reference counter for usage of the CS job. * @queue_type: the type of the H/W queue this job is submitted to. + * @timestamp: timestamp upon job completion * @id: the id of this job inside a CS. * @hw_queue_id: the id of the H/W queue this job is submitted to. * @user_cb_size: the actual size of the CB we got from the user. @@ -2033,6 +2036,7 @@ struct hl_cs_job { struct list_head debugfs_list; struct kref refcount; enum hl_queue_type queue_type; + ktime_t timestamp; u32 id; u32 hw_queue_id; u32 user_cb_size; diff --git a/drivers/accel/habanalabs/common/irq.c b/drivers/accel/habanalabs/common/irq.c index a986d7dea453..04844e843a7b 100644 --- a/drivers/accel/habanalabs/common/irq.c +++ b/drivers/accel/habanalabs/common/irq.c @@ -72,15 +72,17 @@ static void irq_handle_eqe(struct work_struct *work) * @hdev: pointer to device structure * @cs_seq: command submission sequence * @cq: completion queue + * @timestamp: interrupt timestamp * */ -static void job_finish(struct hl_device *hdev, u32 cs_seq, struct hl_cq *cq) +static void job_finish(struct hl_device *hdev, u32 cs_seq, struct hl_cq *cq, ktime_t timestamp) { struct hl_hw_queue *queue; struct hl_cs_job *job; queue = &hdev->kernel_queues[cq->hw_queue_id]; job = queue->shadow_queue[hl_pi_2_offset(cs_seq)]; + job->timestamp = timestamp; queue_work(hdev->cq_wq[cq->cq_idx], &job->finish_work); atomic_inc(&queue->ci); @@ -91,9 +93,10 @@ static void job_finish(struct hl_device *hdev, u32 cs_seq, struct hl_cq *cq) * * @hdev: pointer to device structure * @cs_seq: command submission sequence + * @timestamp: interrupt timestamp * */ -static void cs_finish(struct hl_device *hdev, u16 cs_seq) +static void cs_finish(struct hl_device *hdev, u16 cs_seq, ktime_t timestamp) { struct asic_fixed_properties *prop = &hdev->asic_prop; struct hl_hw_queue *queue; @@ -113,6 +116,7 @@ static void cs_finish(struct hl_device *hdev, u16 cs_seq) atomic_inc(&queue->ci); } + cs->completion_timestamp = timestamp; queue_work(hdev->cs_cmplt_wq, &cs->finish_work); } @@ -130,6 +134,7 @@ irqreturn_t hl_irq_handler_cq(int irq, void *arg) bool shadow_index_valid, entry_ready; u16 shadow_index; struct hl_cq_entry *cq_entry, *cq_base; + ktime_t timestamp = ktime_get(); if (hdev->disabled) { dev_dbg(hdev->dev, @@ -171,9 +176,9 @@ irqreturn_t hl_irq_handler_cq(int irq, void *arg) if (shadow_index_valid && !hdev->disabled) { if (hdev->asic_prop.completion_mode == HL_COMPLETION_MODE_CS) - cs_finish(hdev, shadow_index); + cs_finish(hdev, shadow_index, timestamp); else - job_finish(hdev, shadow_index, cq); + job_finish(hdev, shadow_index, cq, timestamp); } /* Clear CQ entry ready bit */ |