diff options
author | Ashutosh Dixit <ashutosh.dixit@intel.com> | 2024-06-17 18:46:09 -0700 |
---|---|---|
committer | Ashutosh Dixit <ashutosh.dixit@intel.com> | 2024-06-18 12:40:45 -0700 |
commit | cffd77865f476994680892601e09bc2164179907 (patch) | |
tree | 59dedf46526da490a2c26de1ed6b1b79ecf51b83 /drivers/gpu/drm/xe/xe_oa.c | |
parent | 3a1fc394ba85e851084d9fa1bdd4ecd625ef913b (diff) |
drm/xe/oa: Enable Xe2+ overrun mode
Enable Xe2+ overrun mode. For Xe2+, when overrun mode is enabled, there are
no partial reports at the end of buffer, making the OA buffer effectively a
non-power-of-2 size circular buffer whose size, circ_size, is a multiple of
the report size.
v2: Fix implementation of xe_oa_circ_diff/xe_oa_circ_incr (Umesh)
Acked-by: Rodrigo Vivi <rodrigo.vivi@intel.com>
Reviewed-by: Umesh Nerlige Ramappa <umesh.nerlige.ramappa@intel.com>
Signed-off-by: Ashutosh Dixit <ashutosh.dixit@intel.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20240618014609.3233427-18-ashutosh.dixit@intel.com
Diffstat (limited to 'drivers/gpu/drm/xe/xe_oa.c')
-rw-r--r-- | drivers/gpu/drm/xe/xe_oa.c | 35 |
1 files changed, 27 insertions, 8 deletions
diff --git a/drivers/gpu/drm/xe/xe_oa.c b/drivers/gpu/drm/xe/xe_oa.c index 2d398b7231c1..34206e0b6a08 100644 --- a/drivers/gpu/drm/xe/xe_oa.c +++ b/drivers/gpu/drm/xe/xe_oa.c @@ -114,7 +114,14 @@ static const struct xe_oa_format oa_formats[] = { static u32 xe_oa_circ_diff(struct xe_oa_stream *stream, u32 tail, u32 head) { - return (tail - head) & (XE_OA_BUFFER_SIZE - 1); + return tail >= head ? tail - head : + tail + stream->oa_buffer.circ_size - head; +} + +static u32 xe_oa_circ_incr(struct xe_oa_stream *stream, u32 ptr, u32 n) +{ + return ptr + n >= stream->oa_buffer.circ_size ? + ptr + n - stream->oa_buffer.circ_size : ptr + n; } static void xe_oa_config_release(struct kref *ref) @@ -288,7 +295,7 @@ static int xe_oa_append_report(struct xe_oa_stream *stream, char __user *buf, buf += *offset; - oa_buf_end = stream->oa_buffer.vaddr + XE_OA_BUFFER_SIZE; + oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size; report_size_partial = oa_buf_end - report; if (report_size_partial < report_size) { @@ -314,7 +321,6 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf, int report_size = stream->oa_buffer.format->size; u8 *oa_buf_base = stream->oa_buffer.vaddr; u32 gtt_offset = xe_bo_ggtt_addr(stream->oa_buffer.bo); - u32 mask = (XE_OA_BUFFER_SIZE - 1); size_t start_offset = *offset; unsigned long flags; u32 head, tail; @@ -325,21 +331,23 @@ static int xe_oa_append_reports(struct xe_oa_stream *stream, char __user *buf, tail = stream->oa_buffer.tail; spin_unlock_irqrestore(&stream->oa_buffer.ptr_lock, flags); - xe_assert(stream->oa->xe, head < XE_OA_BUFFER_SIZE && tail < XE_OA_BUFFER_SIZE); + xe_assert(stream->oa->xe, + head < stream->oa_buffer.circ_size && tail < stream->oa_buffer.circ_size); - for (; xe_oa_circ_diff(stream, tail, head); head = (head + report_size) & mask) { + for (; xe_oa_circ_diff(stream, tail, head); + head = xe_oa_circ_incr(stream, head, report_size)) { u8 *report = oa_buf_base + head; ret = xe_oa_append_report(stream, buf, count, offset, report); if (ret) break; - if (is_power_of_2(report_size)) { + if (!(stream->oa_buffer.circ_size % report_size)) { /* Clear out report id and timestamp to detect unlanded reports */ oa_report_id_clear(stream, (void *)report); oa_timestamp_clear(stream, (void *)report); } else { - u8 *oa_buf_end = stream->oa_buffer.vaddr + XE_OA_BUFFER_SIZE; + u8 *oa_buf_end = stream->oa_buffer.vaddr + stream->oa_buffer.circ_size; u32 part = oa_buf_end - report; /* Zero out the entire report */ @@ -377,7 +385,6 @@ static void xe_oa_init_oa_buffer(struct xe_oa_stream *stream) xe_mmio_write32(stream->gt, __oa_regs(stream)->oa_head_ptr, gtt_offset & OAG_OAHEADPTR_MASK); stream->oa_buffer.head = 0; - /* * PRM says: "This MMIO must be set before the OATAILPTR register and after the * OAHEADPTR register. This is to enable proper functionality of the overflow bit". @@ -1300,6 +1307,18 @@ static int xe_oa_stream_init(struct xe_oa_stream *stream, stream->periodic = param->period_exponent > 0; stream->period_exponent = param->period_exponent; + /* + * For Xe2+, when overrun mode is enabled, there are no partial reports at the end + * of buffer, making the OA buffer effectively a non-power-of-2 size circular + * buffer whose size, circ_size, is a multiple of the report size + */ + if (GRAPHICS_VER(stream->oa->xe) >= 20 && + stream->hwe->oa_unit->type == DRM_XE_OA_UNIT_TYPE_OAG && stream->sample) + stream->oa_buffer.circ_size = + XE_OA_BUFFER_SIZE - XE_OA_BUFFER_SIZE % stream->oa_buffer.format->size; + else + stream->oa_buffer.circ_size = XE_OA_BUFFER_SIZE; + if (stream->exec_q && engine_supports_mi_query(stream->hwe)) { /* If we don't find the context offset, just return error */ ret = xe_oa_set_ctx_ctrl_offset(stream); |