From 8169b2097d88d99d7e4a72e20e4b549efe9eb8d7 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Wed, 3 Jul 2024 09:48:01 -0700 Subject: drm/xe/uapi: Rename xe perf layer as xe observation layer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In Xe, the perf layer allows capture of HW counter streams. These HW counters are generally performance related but don't have to be necessarily so. Also, the name "perf" is a carryover from i915 and is not preferred. Here we propose the name "observation" for this common layer which allows capture of different types of these counter streams. v2: Rename observability layer to observation layer (Lucas/Rodrigo) v3: Rename sysctl file to "observation_paranoid" (Jose) Fixes: 52c2e956dceb ("drm/xe/perf/uapi: "Perf" layer to support multiple perf counter stream types") Fixes: fe8929bdf835 ("drm/xe/perf/uapi: Add perf_stream_paranoid sysctl") Acked-by: Lucas De Marchi Acked-by: Rodrigo Vivi Signed-off-by: Ashutosh Dixit Reviewed-by: Umesh Nerlige Ramappa Acked-by: José Roberto de Souza Link: https://patchwork.freedesktop.org/patch/msgid/20240703164801.2561423-1-ashutosh.dixit@intel.com --- include/uapi/drm/xe_drm.h | 102 +++++++++++++++++++++++----------------------- 1 file changed, 52 insertions(+), 50 deletions(-) (limited to 'include') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 12eaa8532b5c..33544ef78d3e 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -80,7 +80,7 @@ extern "C" { * - &DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY * - &DRM_IOCTL_XE_EXEC * - &DRM_IOCTL_XE_WAIT_USER_FENCE - * - &DRM_IOCTL_XE_PERF + * - &DRM_IOCTL_XE_OBSERVATION */ /* @@ -101,7 +101,7 @@ extern "C" { #define DRM_XE_EXEC_QUEUE_GET_PROPERTY 0x08 #define DRM_XE_EXEC 0x09 #define DRM_XE_WAIT_USER_FENCE 0x0a -#define DRM_XE_PERF 0x0b +#define DRM_XE_OBSERVATION 0x0b /* Must be kept compact -- no holes */ @@ -116,7 +116,7 @@ extern "C" { #define DRM_IOCTL_XE_EXEC_QUEUE_GET_PROPERTY DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_EXEC_QUEUE_GET_PROPERTY, struct drm_xe_exec_queue_get_property) #define DRM_IOCTL_XE_EXEC DRM_IOW(DRM_COMMAND_BASE + DRM_XE_EXEC, struct drm_xe_exec) #define DRM_IOCTL_XE_WAIT_USER_FENCE DRM_IOWR(DRM_COMMAND_BASE + DRM_XE_WAIT_USER_FENCE, struct drm_xe_wait_user_fence) -#define DRM_IOCTL_XE_PERF DRM_IOW(DRM_COMMAND_BASE + DRM_XE_PERF, struct drm_xe_perf_param) +#define DRM_IOCTL_XE_OBSERVATION DRM_IOW(DRM_COMMAND_BASE + DRM_XE_OBSERVATION, struct drm_xe_observation_param) /** * DOC: Xe IOCTL Extensions @@ -1376,66 +1376,67 @@ struct drm_xe_wait_user_fence { }; /** - * enum drm_xe_perf_type - Perf stream types + * enum drm_xe_observation_type - Observation stream types */ -enum drm_xe_perf_type { - /** @DRM_XE_PERF_TYPE_OA: OA perf stream type */ - DRM_XE_PERF_TYPE_OA, +enum drm_xe_observation_type { + /** @DRM_XE_OBSERVATION_TYPE_OA: OA observation stream type */ + DRM_XE_OBSERVATION_TYPE_OA, }; /** - * enum drm_xe_perf_op - Perf stream ops + * enum drm_xe_observation_op - Observation stream ops */ -enum drm_xe_perf_op { - /** @DRM_XE_PERF_OP_STREAM_OPEN: Open a perf counter stream */ - DRM_XE_PERF_OP_STREAM_OPEN, +enum drm_xe_observation_op { + /** @DRM_XE_OBSERVATION_OP_STREAM_OPEN: Open an observation stream */ + DRM_XE_OBSERVATION_OP_STREAM_OPEN, - /** @DRM_XE_PERF_OP_ADD_CONFIG: Add perf stream config */ - DRM_XE_PERF_OP_ADD_CONFIG, + /** @DRM_XE_OBSERVATION_OP_ADD_CONFIG: Add observation stream config */ + DRM_XE_OBSERVATION_OP_ADD_CONFIG, - /** @DRM_XE_PERF_OP_REMOVE_CONFIG: Remove perf stream config */ - DRM_XE_PERF_OP_REMOVE_CONFIG, + /** @DRM_XE_OBSERVATION_OP_REMOVE_CONFIG: Remove observation stream config */ + DRM_XE_OBSERVATION_OP_REMOVE_CONFIG, }; /** - * struct drm_xe_perf_param - Input of &DRM_XE_PERF + * struct drm_xe_observation_param - Input of &DRM_XE_OBSERVATION * - * The perf layer enables multiplexing perf counter streams of multiple - * types. The actual params for a particular stream operation are supplied - * via the @param pointer (use __copy_from_user to get these params). + * The observation layer enables multiplexing observation streams of + * multiple types. The actual params for a particular stream operation are + * supplied via the @param pointer (use __copy_from_user to get these + * params). */ -struct drm_xe_perf_param { +struct drm_xe_observation_param { /** @extensions: Pointer to the first extension struct, if any */ __u64 extensions; - /** @perf_type: Perf stream type, of enum @drm_xe_perf_type */ - __u64 perf_type; - /** @perf_op: Perf op, of enum @drm_xe_perf_op */ - __u64 perf_op; + /** @observation_type: observation stream type, of enum @drm_xe_observation_type */ + __u64 observation_type; + /** @observation_op: observation stream op, of enum @drm_xe_observation_op */ + __u64 observation_op; /** @param: Pointer to actual stream params */ __u64 param; }; /** - * enum drm_xe_perf_ioctls - Perf fd ioctl's + * enum drm_xe_observation_ioctls - Observation stream fd ioctl's * - * Information exchanged between userspace and kernel for perf fd ioctl's - * is stream type specific + * Information exchanged between userspace and kernel for observation fd + * ioctl's is stream type specific */ -enum drm_xe_perf_ioctls { - /** @DRM_XE_PERF_IOCTL_ENABLE: Enable data capture for a stream */ - DRM_XE_PERF_IOCTL_ENABLE = _IO('i', 0x0), +enum drm_xe_observation_ioctls { + /** @DRM_XE_OBSERVATION_IOCTL_ENABLE: Enable data capture for an observation stream */ + DRM_XE_OBSERVATION_IOCTL_ENABLE = _IO('i', 0x0), - /** @DRM_XE_PERF_IOCTL_DISABLE: Disable data capture for a stream */ - DRM_XE_PERF_IOCTL_DISABLE = _IO('i', 0x1), + /** @DRM_XE_OBSERVATION_IOCTL_DISABLE: Disable data capture for a observation stream */ + DRM_XE_OBSERVATION_IOCTL_DISABLE = _IO('i', 0x1), - /** @DRM_XE_PERF_IOCTL_CONFIG: Change stream configuration */ - DRM_XE_PERF_IOCTL_CONFIG = _IO('i', 0x2), + /** @DRM_XE_OBSERVATION_IOCTL_CONFIG: Change observation stream configuration */ + DRM_XE_OBSERVATION_IOCTL_CONFIG = _IO('i', 0x2), - /** @DRM_XE_PERF_IOCTL_STATUS: Return stream status */ - DRM_XE_PERF_IOCTL_STATUS = _IO('i', 0x3), + /** @DRM_XE_OBSERVATION_IOCTL_STATUS: Return observation stream status */ + DRM_XE_OBSERVATION_IOCTL_STATUS = _IO('i', 0x3), - /** @DRM_XE_PERF_IOCTL_INFO: Return stream info */ - DRM_XE_PERF_IOCTL_INFO = _IO('i', 0x4), + /** @DRM_XE_OBSERVATION_IOCTL_INFO: Return observation stream info */ + DRM_XE_OBSERVATION_IOCTL_INFO = _IO('i', 0x4), }; /** @@ -1546,12 +1547,12 @@ enum drm_xe_oa_format_type { * Stream params are specified as a chain of @drm_xe_ext_set_property * struct's, with @property values from enum @drm_xe_oa_property_id and * @drm_xe_user_extension base.name set to @DRM_XE_OA_EXTENSION_SET_PROPERTY. - * @param field in struct @drm_xe_perf_param points to the first + * @param field in struct @drm_xe_observation_param points to the first * @drm_xe_ext_set_property struct. * - * Exactly the same mechanism is also used for stream reconfiguration using - * the @DRM_XE_PERF_IOCTL_CONFIG perf fd ioctl, though only a subset of - * properties below can be specified for stream reconfiguration. + * Exactly the same mechanism is also used for stream reconfiguration using the + * @DRM_XE_OBSERVATION_IOCTL_CONFIG observation stream fd ioctl, though only a + * subset of properties below can be specified for stream reconfiguration. */ enum drm_xe_oa_property_id { #define DRM_XE_OA_EXTENSION_SET_PROPERTY 0 @@ -1571,11 +1572,11 @@ enum drm_xe_oa_property_id { /** * @DRM_XE_OA_PROPERTY_OA_METRIC_SET: OA metrics defining contents of OA - * reports, previously added via @DRM_XE_PERF_OP_ADD_CONFIG. + * reports, previously added via @DRM_XE_OBSERVATION_OP_ADD_CONFIG. */ DRM_XE_OA_PROPERTY_OA_METRIC_SET, - /** @DRM_XE_OA_PROPERTY_OA_FORMAT: Perf counter report format */ + /** @DRM_XE_OA_PROPERTY_OA_FORMAT: OA counter report format */ DRM_XE_OA_PROPERTY_OA_FORMAT, /* * OA_FORMAT's are specified the same way as in PRM/Bspec 52198/60942, @@ -1596,13 +1597,13 @@ enum drm_xe_oa_property_id { /** * @DRM_XE_OA_PROPERTY_OA_DISABLED: A value of 1 will open the OA - * stream in a DISABLED state (see @DRM_XE_PERF_IOCTL_ENABLE). + * stream in a DISABLED state (see @DRM_XE_OBSERVATION_IOCTL_ENABLE). */ DRM_XE_OA_PROPERTY_OA_DISABLED, /** * @DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID: Open the stream for a specific - * @exec_queue_id. Perf queries can be executed on this exec queue. + * @exec_queue_id. OA queries can be executed on this exec queue. */ DRM_XE_OA_PROPERTY_EXEC_QUEUE_ID, @@ -1622,7 +1623,7 @@ enum drm_xe_oa_property_id { /** * struct drm_xe_oa_config - OA metric configuration * - * Multiple OA configs can be added using @DRM_XE_PERF_OP_ADD_CONFIG. A + * Multiple OA configs can be added using @DRM_XE_OBSERVATION_OP_ADD_CONFIG. A * particular config can be specified when opening an OA stream using * @DRM_XE_OA_PROPERTY_OA_METRIC_SET property. */ @@ -1645,8 +1646,9 @@ struct drm_xe_oa_config { /** * struct drm_xe_oa_stream_status - OA stream status returned from - * @DRM_XE_PERF_IOCTL_STATUS perf fd ioctl. Userspace can call the ioctl to - * query stream status in response to EIO errno from perf fd read(). + * @DRM_XE_OBSERVATION_IOCTL_STATUS observation stream fd ioctl. Userspace can + * call the ioctl to query stream status in response to EIO errno from + * observation fd read(). */ struct drm_xe_oa_stream_status { /** @extensions: Pointer to the first extension struct, if any */ @@ -1665,7 +1667,7 @@ struct drm_xe_oa_stream_status { /** * struct drm_xe_oa_stream_info - OA stream info returned from - * @DRM_XE_PERF_IOCTL_INFO perf fd ioctl + * @DRM_XE_OBSERVATION_IOCTL_INFO observation stream fd ioctl */ struct drm_xe_oa_stream_info { /** @extensions: Pointer to the first extension struct, if any */ -- cgit v1.3.1 From 01e0cfc994be484ddcb9e121e353e51d8bb837c0 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Fri, 5 Jul 2024 15:28:28 +0200 Subject: drm/xe: Use write-back caching mode for system memory on DGFX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The caching mode for buffer objects with VRAM as a possible placement was forced to write-combined, regardless of placement. However, write-combined system memory is expensive to allocate and even though it is pooled, the pool is expensive to shrink, since it involves global CPU TLB flushes. Moreover write-combined system memory from TTM is only reliably available on x86 and DGFX doesn't have an x86 restriction. So regardless of the cpu caching mode selected for a bo, internally use write-back caching mode for system memory on DGFX. Coherency is maintained, but user-space clients may perceive a difference in cpu access speeds. v2: - Update RB- and Ack tags. - Rephrase wording in xe_drm.h (Matt Roper) v3: - Really rephrase wording. Signed-off-by: Thomas Hellström Fixes: 622f709ca629 ("drm/xe/uapi: Add support for CPU caching mode") Cc: Pallavi Mishra Cc: Matthew Auld Cc: dri-devel@lists.freedesktop.org Cc: Joonas Lahtinen Cc: Effie Yu Cc: Matthew Brost Cc: Maarten Lankhorst Cc: Jose Souza Cc: Michal Mrozek Cc: # v6.8+ Acked-by: Matthew Auld Acked-by: José Roberto de Souza Reviewed-by: Rodrigo Vivi Fixes: 622f709ca629 ("drm/xe/uapi: Add support for CPU caching mode") Acked-by: Michal Mrozek Acked-by: Effie Yu #On chat Link: https://patchwork.freedesktop.org/patch/msgid/20240705132828.27714-1-thomas.hellstrom@linux.intel.com --- drivers/gpu/drm/xe/xe_bo.c | 47 ++++++++++++++++++++++++---------------- drivers/gpu/drm/xe/xe_bo_types.h | 3 ++- include/uapi/drm/xe_drm.h | 8 ++++++- 3 files changed, 37 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/xe/xe_bo.c b/drivers/gpu/drm/xe/xe_bo.c index 65c696966e96..31192d983d9e 100644 --- a/drivers/gpu/drm/xe/xe_bo.c +++ b/drivers/gpu/drm/xe/xe_bo.c @@ -343,7 +343,7 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo, struct xe_device *xe = xe_bo_device(bo); struct xe_ttm_tt *tt; unsigned long extra_pages; - enum ttm_caching caching; + enum ttm_caching caching = ttm_cached; int err; tt = kzalloc(sizeof(*tt), GFP_KERNEL); @@ -357,26 +357,35 @@ static struct ttm_tt *xe_ttm_tt_create(struct ttm_buffer_object *ttm_bo, extra_pages = DIV_ROUND_UP(xe_device_ccs_bytes(xe, bo->size), PAGE_SIZE); - switch (bo->cpu_caching) { - case DRM_XE_GEM_CPU_CACHING_WC: - caching = ttm_write_combined; - break; - default: - caching = ttm_cached; - break; - } - - WARN_ON((bo->flags & XE_BO_FLAG_USER) && !bo->cpu_caching); - /* - * Display scanout is always non-coherent with the CPU cache. - * - * For Xe_LPG and beyond, PPGTT PTE lookups are also non-coherent and - * require a CPU:WC mapping. + * DGFX system memory is always WB / ttm_cached, since + * other caching modes are only supported on x86. DGFX + * GPU system memory accesses are always coherent with the + * CPU. */ - if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) || - (xe->info.graphics_verx100 >= 1270 && bo->flags & XE_BO_FLAG_PAGETABLE)) - caching = ttm_write_combined; + if (!IS_DGFX(xe)) { + switch (bo->cpu_caching) { + case DRM_XE_GEM_CPU_CACHING_WC: + caching = ttm_write_combined; + break; + default: + caching = ttm_cached; + break; + } + + WARN_ON((bo->flags & XE_BO_FLAG_USER) && !bo->cpu_caching); + + /* + * Display scanout is always non-coherent with the CPU cache. + * + * For Xe_LPG and beyond, PPGTT PTE lookups are also + * non-coherent and require a CPU:WC mapping. + */ + if ((!bo->cpu_caching && bo->flags & XE_BO_FLAG_SCANOUT) || + (xe->info.graphics_verx100 >= 1270 && + bo->flags & XE_BO_FLAG_PAGETABLE)) + caching = ttm_write_combined; + } if (bo->flags & XE_BO_FLAG_NEEDS_UC) { /* diff --git a/drivers/gpu/drm/xe/xe_bo_types.h b/drivers/gpu/drm/xe/xe_bo_types.h index 02d68873558a..ebc8abf7930a 100644 --- a/drivers/gpu/drm/xe/xe_bo_types.h +++ b/drivers/gpu/drm/xe/xe_bo_types.h @@ -68,7 +68,8 @@ struct xe_bo { /** * @cpu_caching: CPU caching mode. Currently only used for userspace - * objects. + * objects. Exceptions are system memory on DGFX, which is always + * WB. */ u16 cpu_caching; diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 33544ef78d3e..19619d4952a8 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -783,7 +783,13 @@ struct drm_xe_gem_create { #define DRM_XE_GEM_CPU_CACHING_WC 2 /** * @cpu_caching: The CPU caching mode to select for this object. If - * mmaping the object the mode selected here will also be used. + * mmaping the object the mode selected here will also be used. The + * exception is when mapping system memory (including data evicted + * to system) on discrete GPUs. The caching mode selected will + * then be overridden to DRM_XE_GEM_CPU_CACHING_WB, and coherency + * between GPU- and CPU is guaranteed. The caching mode of + * existing CPU-mappings will be updated transparently to + * user-space clients. */ __u16 cpu_caching; /** @pad: MBZ */ -- cgit v1.3.1 From f592e01664b4a57b109fcf6f6916145517f94bd7 Mon Sep 17 00:00:00 2001 From: Sebastian Wick Date: Tue, 2 Jul 2024 16:30:16 +0200 Subject: drm/drm_connector: Document Colorspace property variants The initial idea of the Colorspace prop was that this maps 1:1 to InfoFrames/SDP but KMS does not give user space enough information nor control over the output format to figure out which variants can be used for a given KMS commit. At the same time, properties like Broadcast RGB expect full range quantization range being produced by user space from the CRTC and drivers to convert to the range expected by the sink for the chosen output format, mode, InfoFrames, etc. This change documents the reality of the Colorspace property. The Default variant unfortunately is very much driver specific and not reflected by the EDID. The BT2020 variants are in active use by generic compositors which have expectations from the driver about the conversions it has to do when selecting certain output formats. Everything else is also marked as undefined. Coming up with valid behavior that makes it usable from user space and consistent with other KMS properties for those variants is left as an exercise for whoever wants to use them. v2: * Talk about "pixel operation properties" that user space configures * Mention that user space is responsible for checking the EDID for sink support * Make it clear that drivers can choose between RGB and YCbCr on their own Signed-off-by: Sebastian Wick Reviewed-by: Pekka Paalanen Signed-off-by: Maxime Ripard Link: https://patchwork.freedesktop.org/patch/msgid/20240702143017.2429975-1-sebastian.wick@redhat.com --- drivers/gpu/drm/drm_connector.c | 79 +++++++++++++++++++++++++++++++---------- include/drm/drm_connector.h | 8 ----- 2 files changed, 61 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index ab6ab7ff7ea8..b4f4d2f908d1 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -2315,24 +2315,67 @@ EXPORT_SYMBOL(drm_mode_create_aspect_ratio_property); * DOC: standard connector properties * * Colorspace: - * This property helps select a suitable colorspace based on the sink - * capability. Modern sink devices support wider gamut like BT2020. - * This helps switch to BT2020 mode if the BT2020 encoded video stream - * is being played by the user, same for any other colorspace. Thereby - * giving a good visual experience to users. - * - * The expectation from userspace is that it should parse the EDID - * and get supported colorspaces. Use this property and switch to the - * one supported. Sink supported colorspaces should be retrieved by - * userspace from EDID and driver will not explicitly expose them. - * - * Basically the expectation from userspace is: - * - Set up CRTC DEGAMMA/CTM/GAMMA to convert to some sink - * colorspace - * - Set this new property to let the sink know what it - * converted the CRTC output to. - * - This property is just to inform sink what colorspace - * source is trying to drive. + * This property is used to inform the driver about the color encoding + * user space configured the pixel operation properties to produce. + * The variants set the colorimetry, transfer characteristics, and which + * YCbCr conversion should be used when necessary. + * The transfer characteristics from HDR_OUTPUT_METADATA takes precedence + * over this property. + * User space always configures the pixel operation properties to produce + * full quantization range data (see the Broadcast RGB property). + * + * Drivers inform the sink about what colorimetry, transfer + * characteristics, YCbCr conversion, and quantization range to expect + * (this can depend on the output mode, output format and other + * properties). Drivers also convert the user space provided data to what + * the sink expects. + * + * User space has to check if the sink supports all of the possible + * colorimetries that the driver is allowed to pick by parsing the EDID. + * + * For historical reasons this property exposes a number of variants which + * result in undefined behavior. + * + * Default: + * The behavior is driver-specific. + * BT2020_RGB: + * BT2020_YCC: + * User space configures the pixel operation properties to produce + * RGB content with Rec. ITU-R BT.2020 colorimetry, Rec. + * ITU-R BT.2020 (Table 4, RGB) transfer characteristics and full + * quantization range. + * User space can use the HDR_OUTPUT_METADATA property to set the + * transfer characteristics to PQ (Rec. ITU-R BT.2100 Table 4) or + * HLG (Rec. ITU-R BT.2100 Table 5) in which case, user space + * configures pixel operation properties to produce content with + * the respective transfer characteristics. + * User space has to make sure the sink supports Rec. + * ITU-R BT.2020 R'G'B' and Rec. ITU-R BT.2020 Y'C'BC'R + * colorimetry. + * Drivers can configure the sink to use an RGB format, tell the + * sink to expect Rec. ITU-R BT.2020 R'G'B' colorimetry and convert + * to the appropriate quantization range. + * Drivers can configure the sink to use a YCbCr format, tell the + * sink to expect Rec. ITU-R BT.2020 Y'C'BC'R colorimetry, convert + * to YCbCr using the Rec. ITU-R BT.2020 non-constant luminance + * conversion matrix and convert to the appropriate quantization + * range. + * The variants BT2020_RGB and BT2020_YCC are equivalent and the + * driver chooses between RGB and YCbCr on its own. + * SMPTE_170M_YCC: + * BT709_YCC: + * XVYCC_601: + * XVYCC_709: + * SYCC_601: + * opYCC_601: + * opRGB: + * BT2020_CYCC: + * DCI-P3_RGB_D65: + * DCI-P3_RGB_Theater: + * RGB_WIDE_FIXED: + * RGB_WIDE_FLOAT: + * BT601_YCC: + * The behavior is undefined. * * Because between HDMI and DP have different colorspaces, * drm_mode_create_hdmi_colorspace_property() is used for HDMI connector and diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index c754651044d4..e3fa43291f44 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -471,14 +471,6 @@ enum drm_privacy_screen_status { * * DP definitions come from the DP v2.0 spec * HDMI definitions come from the CTA-861-H spec - * - * A note on YCC and RGB variants: - * - * Since userspace is not aware of the encoding on the wire - * (RGB or YCbCr), drivers are free to pick the appropriate - * variant, regardless of what userspace selects. E.g., if - * BT2020_RGB is selected by userspace a driver will pick - * BT2020_YCC if the encoding on the wire is YUV444 or YUV420. * * @DRM_MODE_COLORIMETRY_DEFAULT: * Driver specific behavior. -- cgit v1.3.1 From 6be74ddd0609959b4005f88b6a4d4af678e4a71f Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Fri, 5 Jul 2024 17:31:59 +0200 Subject: drm/ttm: Allow TTM LRU list nodes of different types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To be able to handle list unlocking while traversing the LRU list, we want the iterators not only to point to the next position of the list traversal, but to insert themselves as list nodes at that point to work around the fact that the next node might otherwise disappear from the list while the iterator is pointing to it. These list nodes need to be easily distinguishable from other list nodes so that others traversing the list can skip over them. So declare a struct ttm_lru_item, with a struct list_head member and a type enum. This will slightly increase the size of a struct ttm_resource. Changes in previous series: - Update enum ttm_lru_item_type documentation. v3: - Introduce ttm_lru_first_res_or_null() (Christian König, Thomas Hellström) v5: - Update also the TTM test code (Xe CI). Cc: Christian König Cc: Somalapuram Amaranath Cc: Matthew Brost Cc: Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20240705153206.68526-2-thomas.hellstrom@linux.intel.com Signed-off-by: Christian König --- drivers/gpu/drm/ttm/tests/ttm_bo_test.c | 6 +- drivers/gpu/drm/ttm/tests/ttm_resource_test.c | 2 +- drivers/gpu/drm/ttm/ttm_device.c | 4 +- drivers/gpu/drm/ttm/ttm_resource.c | 89 +++++++++++++++++++++------ include/drm/ttm/ttm_resource.h | 54 +++++++++++++++- 5 files changed, 129 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/ttm/tests/ttm_bo_test.c b/drivers/gpu/drm/ttm/tests/ttm_bo_test.c index d1b32303d051..f0a7eb62116c 100644 --- a/drivers/gpu/drm/ttm/tests/ttm_bo_test.c +++ b/drivers/gpu/drm/ttm/tests/ttm_bo_test.c @@ -271,7 +271,7 @@ static void ttm_bo_unreserve_basic(struct kunit *test) man = ttm_manager_type(priv->ttm_dev, mem_type); KUNIT_ASSERT_EQ(test, - list_is_last(&res1->lru, &man->lru[bo->priority]), 1); + list_is_last(&res1->lru.link, &man->lru[bo->priority]), 1); ttm_resource_free(bo, &res2); ttm_resource_free(bo, &res1); @@ -308,11 +308,11 @@ static void ttm_bo_unreserve_pinned(struct kunit *test) err = ttm_resource_alloc(bo, place, &res2); KUNIT_ASSERT_EQ(test, err, 0); KUNIT_ASSERT_EQ(test, - list_is_last(&res2->lru, &priv->ttm_dev->pinned), 1); + list_is_last(&res2->lru.link, &priv->ttm_dev->pinned), 1); ttm_bo_unreserve(bo); KUNIT_ASSERT_EQ(test, - list_is_last(&res1->lru, &priv->ttm_dev->pinned), 1); + list_is_last(&res1->lru.link, &priv->ttm_dev->pinned), 1); ttm_resource_free(bo, &res1); ttm_resource_free(bo, &res2); diff --git a/drivers/gpu/drm/ttm/tests/ttm_resource_test.c b/drivers/gpu/drm/ttm/tests/ttm_resource_test.c index 9c2f13e53162..22260e7aea58 100644 --- a/drivers/gpu/drm/ttm/tests/ttm_resource_test.c +++ b/drivers/gpu/drm/ttm/tests/ttm_resource_test.c @@ -198,7 +198,7 @@ static void ttm_resource_fini_basic(struct kunit *test) ttm_resource_init(bo, place, res); ttm_resource_fini(man, res); - KUNIT_ASSERT_TRUE(test, list_empty(&res->lru)); + KUNIT_ASSERT_TRUE(test, list_empty(&res->lru.link)); KUNIT_ASSERT_EQ(test, man->usage, 0); } diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index 434cf0258000..09411978a13a 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -274,14 +274,14 @@ static void ttm_device_clear_lru_dma_mappings(struct ttm_device *bdev, struct ttm_resource *res; spin_lock(&bdev->lru_lock); - while ((res = list_first_entry_or_null(list, typeof(*res), lru))) { + while ((res = ttm_lru_first_res_or_null(list))) { struct ttm_buffer_object *bo = res->bo; /* Take ref against racing releases once lru_lock is unlocked */ if (!ttm_bo_get_unless_zero(bo)) continue; - list_del_init(&res->lru); + list_del_init(&bo->resource->lru.link); spin_unlock(&bdev->lru_lock); if (bo->ttm) diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c index 4a66b851b67d..db9a7a3717c4 100644 --- a/drivers/gpu/drm/ttm/ttm_resource.c +++ b/drivers/gpu/drm/ttm/ttm_resource.c @@ -70,8 +70,8 @@ void ttm_lru_bulk_move_tail(struct ttm_lru_bulk_move *bulk) dma_resv_assert_held(pos->last->bo->base.resv); man = ttm_manager_type(pos->first->bo->bdev, i); - list_bulk_move_tail(&man->lru[j], &pos->first->lru, - &pos->last->lru); + list_bulk_move_tail(&man->lru[j], &pos->first->lru.link, + &pos->last->lru.link); } } } @@ -84,14 +84,38 @@ ttm_lru_bulk_move_pos(struct ttm_lru_bulk_move *bulk, struct ttm_resource *res) return &bulk->pos[res->mem_type][res->bo->priority]; } +/* Return the previous resource on the list (skip over non-resource list items) */ +static struct ttm_resource *ttm_lru_prev_res(struct ttm_resource *cur) +{ + struct ttm_lru_item *lru = &cur->lru; + + do { + lru = list_prev_entry(lru, link); + } while (!ttm_lru_item_is_res(lru)); + + return ttm_lru_item_to_res(lru); +} + +/* Return the next resource on the list (skip over non-resource list items) */ +static struct ttm_resource *ttm_lru_next_res(struct ttm_resource *cur) +{ + struct ttm_lru_item *lru = &cur->lru; + + do { + lru = list_next_entry(lru, link); + } while (!ttm_lru_item_is_res(lru)); + + return ttm_lru_item_to_res(lru); +} + /* Move the resource to the tail of the bulk move range */ static void ttm_lru_bulk_move_pos_tail(struct ttm_lru_bulk_move_pos *pos, struct ttm_resource *res) { if (pos->last != res) { if (pos->first == res) - pos->first = list_next_entry(res, lru); - list_move(&res->lru, &pos->last->lru); + pos->first = ttm_lru_next_res(res); + list_move(&res->lru.link, &pos->last->lru.link); pos->last = res; } } @@ -122,11 +146,11 @@ static void ttm_lru_bulk_move_del(struct ttm_lru_bulk_move *bulk, pos->first = NULL; pos->last = NULL; } else if (pos->first == res) { - pos->first = list_next_entry(res, lru); + pos->first = ttm_lru_next_res(res); } else if (pos->last == res) { - pos->last = list_prev_entry(res, lru); + pos->last = ttm_lru_prev_res(res); } else { - list_move(&res->lru, &pos->last->lru); + list_move(&res->lru.link, &pos->last->lru.link); } } @@ -155,7 +179,7 @@ void ttm_resource_move_to_lru_tail(struct ttm_resource *res) lockdep_assert_held(&bo->bdev->lru_lock); if (bo->pin_count) { - list_move_tail(&res->lru, &bdev->pinned); + list_move_tail(&res->lru.link, &bdev->pinned); } else if (bo->bulk_move) { struct ttm_lru_bulk_move_pos *pos = @@ -166,7 +190,7 @@ void ttm_resource_move_to_lru_tail(struct ttm_resource *res) struct ttm_resource_manager *man; man = ttm_manager_type(bdev, res->mem_type); - list_move_tail(&res->lru, &man->lru[bo->priority]); + list_move_tail(&res->lru.link, &man->lru[bo->priority]); } } @@ -197,9 +221,9 @@ void ttm_resource_init(struct ttm_buffer_object *bo, man = ttm_manager_type(bo->bdev, place->mem_type); spin_lock(&bo->bdev->lru_lock); if (bo->pin_count) - list_add_tail(&res->lru, &bo->bdev->pinned); + list_add_tail(&res->lru.link, &bo->bdev->pinned); else - list_add_tail(&res->lru, &man->lru[bo->priority]); + list_add_tail(&res->lru.link, &man->lru[bo->priority]); man->usage += res->size; spin_unlock(&bo->bdev->lru_lock); } @@ -221,7 +245,7 @@ void ttm_resource_fini(struct ttm_resource_manager *man, struct ttm_device *bdev = man->bdev; spin_lock(&bdev->lru_lock); - list_del_init(&res->lru); + list_del_init(&res->lru.link); man->usage -= res->size; spin_unlock(&bdev->lru_lock); } @@ -472,14 +496,16 @@ struct ttm_resource * ttm_resource_manager_first(struct ttm_resource_manager *man, struct ttm_resource_cursor *cursor) { - struct ttm_resource *res; + struct ttm_lru_item *lru; lockdep_assert_held(&man->bdev->lru_lock); for (cursor->priority = 0; cursor->priority < TTM_MAX_BO_PRIORITY; ++cursor->priority) - list_for_each_entry(res, &man->lru[cursor->priority], lru) - return res; + list_for_each_entry(lru, &man->lru[cursor->priority], link) { + if (ttm_lru_item_is_res(lru)) + return ttm_lru_item_to_res(lru); + } return NULL; } @@ -498,15 +524,40 @@ ttm_resource_manager_next(struct ttm_resource_manager *man, struct ttm_resource_cursor *cursor, struct ttm_resource *res) { + struct ttm_lru_item *lru = &res->lru; + lockdep_assert_held(&man->bdev->lru_lock); - list_for_each_entry_continue(res, &man->lru[cursor->priority], lru) - return res; + list_for_each_entry_continue(lru, &man->lru[cursor->priority], link) { + if (ttm_lru_item_is_res(lru)) + return ttm_lru_item_to_res(lru); + } for (++cursor->priority; cursor->priority < TTM_MAX_BO_PRIORITY; ++cursor->priority) - list_for_each_entry(res, &man->lru[cursor->priority], lru) - return res; + list_for_each_entry(lru, &man->lru[cursor->priority], link) { + if (ttm_lru_item_is_res(lru)) + ttm_lru_item_to_res(lru); + } + + return NULL; +} + +/** + * ttm_lru_first_res_or_null() - Return the first resource on an lru list + * @head: The list head of the lru list. + * + * Return: Pointer to the first resource on the lru list or NULL if + * there is none. + */ +struct ttm_resource *ttm_lru_first_res_or_null(struct list_head *head) +{ + struct ttm_lru_item *lru; + + list_for_each_entry(lru, head, link) { + if (ttm_lru_item_is_res(lru)) + return ttm_lru_item_to_res(lru); + } return NULL; } diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h index 69769355139f..1511d91e290d 100644 --- a/include/drm/ttm/ttm_resource.h +++ b/include/drm/ttm/ttm_resource.h @@ -49,6 +49,43 @@ struct io_mapping; struct sg_table; struct scatterlist; +/** + * enum ttm_lru_item_type - enumerate ttm_lru_item subclasses + */ +enum ttm_lru_item_type { + /** @TTM_LRU_RESOURCE: The resource subclass */ + TTM_LRU_RESOURCE, + /** @TTM_LRU_HITCH: The iterator hitch subclass */ + TTM_LRU_HITCH +}; + +/** + * struct ttm_lru_item - The TTM lru list node base class + * @link: The list link + * @type: The subclass type + */ +struct ttm_lru_item { + struct list_head link; + enum ttm_lru_item_type type; +}; + +/** + * ttm_lru_item_init() - initialize a struct ttm_lru_item + * @item: The item to initialize + * @type: The subclass type + */ +static inline void ttm_lru_item_init(struct ttm_lru_item *item, + enum ttm_lru_item_type type) +{ + item->type = type; + INIT_LIST_HEAD(&item->link); +} + +static inline bool ttm_lru_item_is_res(const struct ttm_lru_item *item) +{ + return item->type == TTM_LRU_RESOURCE; +} + struct ttm_resource_manager_func { /** * struct ttm_resource_manager_func member alloc @@ -217,9 +254,21 @@ struct ttm_resource { /** * @lru: Least recently used list, see &ttm_resource_manager.lru */ - struct list_head lru; + struct ttm_lru_item lru; }; +/** + * ttm_lru_item_to_res() - Downcast a struct ttm_lru_item to a struct ttm_resource + * @item: The struct ttm_lru_item to downcast + * + * Return: Pointer to the embedding struct ttm_resource + */ +static inline struct ttm_resource * +ttm_lru_item_to_res(struct ttm_lru_item *item) +{ + return container_of(item, struct ttm_resource, lru); +} + /** * struct ttm_resource_cursor * @@ -393,6 +442,9 @@ ttm_resource_manager_next(struct ttm_resource_manager *man, struct ttm_resource_cursor *cursor, struct ttm_resource *res); +struct ttm_resource * +ttm_lru_first_res_or_null(struct list_head *head); + /** * ttm_resource_manager_for_each_res - iterate over all resources * @man: the resource manager -- cgit v1.3.1 From 9c62fb62c9f0761eeda8f2a9517e007ff2cdbe9a Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Fri, 5 Jul 2024 17:32:00 +0200 Subject: drm/ttm: Slightly clean up LRU list iteration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To make the transition to using lru hitches easier, simplify the ttm_resource_manager_next() interface to only take the cursor and reuse ttm_resource_manager_next() functionality from ttm_resource_manager_first(). Cc: Christian König Cc: Somalapuram Amaranath Cc: Matthew Brost Cc: Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20240705153206.68526-3-thomas.hellstrom@linux.intel.com Signed-off-by: Christian König --- drivers/gpu/drm/ttm/ttm_resource.c | 48 +++++++++++++++++--------------------- include/drm/ttm/ttm_resource.h | 10 ++++---- 2 files changed, 27 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c index db9a7a3717c4..8bfbddddc0e8 100644 --- a/drivers/gpu/drm/ttm/ttm_resource.c +++ b/drivers/gpu/drm/ttm/ttm_resource.c @@ -496,50 +496,44 @@ struct ttm_resource * ttm_resource_manager_first(struct ttm_resource_manager *man, struct ttm_resource_cursor *cursor) { - struct ttm_lru_item *lru; - lockdep_assert_held(&man->bdev->lru_lock); - for (cursor->priority = 0; cursor->priority < TTM_MAX_BO_PRIORITY; - ++cursor->priority) - list_for_each_entry(lru, &man->lru[cursor->priority], link) { - if (ttm_lru_item_is_res(lru)) - return ttm_lru_item_to_res(lru); - } - - return NULL; + cursor->priority = 0; + cursor->man = man; + cursor->cur = &man->lru[cursor->priority]; + return ttm_resource_manager_next(cursor); } /** * ttm_resource_manager_next * - * @man: resource manager to iterate over * @cursor: cursor to record the position - * @res: the current resource pointer * - * Returns the next resource from the resource manager. + * Return: the next resource from the resource manager. */ struct ttm_resource * -ttm_resource_manager_next(struct ttm_resource_manager *man, - struct ttm_resource_cursor *cursor, - struct ttm_resource *res) +ttm_resource_manager_next(struct ttm_resource_cursor *cursor) { - struct ttm_lru_item *lru = &res->lru; + struct ttm_resource_manager *man = cursor->man; + struct ttm_lru_item *lru; lockdep_assert_held(&man->bdev->lru_lock); - list_for_each_entry_continue(lru, &man->lru[cursor->priority], link) { - if (ttm_lru_item_is_res(lru)) - return ttm_lru_item_to_res(lru); - } - - for (++cursor->priority; cursor->priority < TTM_MAX_BO_PRIORITY; - ++cursor->priority) - list_for_each_entry(lru, &man->lru[cursor->priority], link) { - if (ttm_lru_item_is_res(lru)) - ttm_lru_item_to_res(lru); + for (;;) { + lru = list_entry(cursor->cur, typeof(*lru), link); + list_for_each_entry_continue(lru, &man->lru[cursor->priority], link) { + if (ttm_lru_item_is_res(lru)) { + cursor->cur = &lru->link; + return ttm_lru_item_to_res(lru); + } } + if (++cursor->priority >= TTM_MAX_BO_PRIORITY) + break; + + cursor->cur = &man->lru[cursor->priority]; + } + return NULL; } diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h index 1511d91e290d..7d81fd5b5b83 100644 --- a/include/drm/ttm/ttm_resource.h +++ b/include/drm/ttm/ttm_resource.h @@ -272,11 +272,15 @@ ttm_lru_item_to_res(struct ttm_lru_item *item) /** * struct ttm_resource_cursor * + * @man: The resource manager currently being iterated over. + * @cur: The list head the cursor currently points to. * @priority: the current priority * * Cursor to iterate over the resources in a manager. */ struct ttm_resource_cursor { + struct ttm_resource_manager *man; + struct list_head *cur; unsigned int priority; }; @@ -438,9 +442,7 @@ struct ttm_resource * ttm_resource_manager_first(struct ttm_resource_manager *man, struct ttm_resource_cursor *cursor); struct ttm_resource * -ttm_resource_manager_next(struct ttm_resource_manager *man, - struct ttm_resource_cursor *cursor, - struct ttm_resource *res); +ttm_resource_manager_next(struct ttm_resource_cursor *cursor); struct ttm_resource * ttm_lru_first_res_or_null(struct list_head *head); @@ -455,7 +457,7 @@ ttm_lru_first_res_or_null(struct list_head *head); */ #define ttm_resource_manager_for_each_res(man, cursor, res) \ for (res = ttm_resource_manager_first(man, cursor); res; \ - res = ttm_resource_manager_next(man, cursor, res)) + res = ttm_resource_manager_next(cursor)) struct ttm_kmap_iter * ttm_kmap_iter_iomap_init(struct ttm_kmap_iter_iomap *iter_io, -- cgit v1.3.1 From 8e9bf0fb10a79aaed37474600948cd33d14aa606 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Fri, 5 Jul 2024 17:32:01 +0200 Subject: drm/ttm: Use LRU hitches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Have iterators insert themselves into the list they are iterating over using hitch list nodes. Since only the iterator owner can remove these list nodes from the list, it's safe to unlock the list and when continuing, use them as a starting point. Due to the way LRU bumping works in TTM, newly added items will not be missed, and bumped items will be iterated over a second time before reaching the end of the list. The exception is list with bulk move sublists. When bumping a sublist, a hitch that is part of that sublist will also be moved and we might miss items if restarting from it. This will be addressed in a later patch. Changes in previous series: - Updated ttm_resource_cursor_fini() documentation. v2: - Don't reorder ttm_resource_manager_first() and _next(). (Christian König). - Use list_add instead of list_move (Christian König) v3: - Split into two patches, one cleanup, one new functionality (Christian König) - use ttm_resource_cursor_fini_locked() instead of open-coding (Matthew Brost) Cc: Christian König Cc: Somalapuram Amaranath Cc: Matthew Brost Cc: Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20240705153206.68526-4-thomas.hellstrom@linux.intel.com Signed-off-by: Christian König --- drivers/gpu/drm/ttm/ttm_bo.c | 1 + drivers/gpu/drm/ttm/ttm_device.c | 9 ++++-- drivers/gpu/drm/ttm/ttm_resource.c | 56 ++++++++++++++++++++++++++++++++------ include/drm/ttm/ttm_resource.h | 9 ++++-- 4 files changed, 62 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 6396dece0db1..43eda720657f 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -621,6 +621,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, if (locked) dma_resv_unlock(res->bo->base.resv); } + ttm_resource_cursor_fini_locked(&cursor); if (!bo) { if (busy_bo && !ttm_bo_get_unless_zero(busy_bo)) diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index 09411978a13a..f9e9b1ec8c8a 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -170,12 +170,17 @@ int ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, num_pages = PFN_UP(bo->base.size); ret = ttm_bo_swapout(bo, ctx, gfp_flags); /* ttm_bo_swapout has dropped the lru_lock */ - if (!ret) + if (!ret) { + ttm_resource_cursor_fini(&cursor); return num_pages; - if (ret != -EBUSY) + } + if (ret != -EBUSY) { + ttm_resource_cursor_fini(&cursor); return ret; + } } } + ttm_resource_cursor_fini_locked(&cursor); spin_unlock(&bdev->lru_lock); return 0; } diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c index 8bfbddddc0e8..9c8b6499edfb 100644 --- a/drivers/gpu/drm/ttm/ttm_resource.c +++ b/drivers/gpu/drm/ttm/ttm_resource.c @@ -33,6 +33,37 @@ #include +/** + * ttm_resource_cursor_fini_locked() - Finalize the LRU list cursor usage + * @cursor: The struct ttm_resource_cursor to finalize. + * + * The function pulls the LRU list cursor off any lists it was previusly + * attached to. Needs to be called with the LRU lock held. The function + * can be called multiple times after eachother. + */ +void ttm_resource_cursor_fini_locked(struct ttm_resource_cursor *cursor) +{ + lockdep_assert_held(&cursor->man->bdev->lru_lock); + list_del_init(&cursor->hitch.link); +} + +/** + * ttm_resource_cursor_fini() - Finalize the LRU list cursor usage + * @cursor: The struct ttm_resource_cursor to finalize. + * + * The function pulls the LRU list cursor off any lists it was previusly + * attached to. Needs to be called without the LRU list lock held. The + * function can be called multiple times after eachother. + */ +void ttm_resource_cursor_fini(struct ttm_resource_cursor *cursor) +{ + spinlock_t *lru_lock = &cursor->man->bdev->lru_lock; + + spin_lock(lru_lock); + ttm_resource_cursor_fini_locked(cursor); + spin_unlock(lru_lock); +} + /** * ttm_lru_bulk_move_init - initialize a bulk move structure * @bulk: the structure to init @@ -485,12 +516,15 @@ void ttm_resource_manager_debug(struct ttm_resource_manager *man, EXPORT_SYMBOL(ttm_resource_manager_debug); /** - * ttm_resource_manager_first - * + * ttm_resource_manager_first() - Start iterating over the resources + * of a resource manager * @man: resource manager to iterate over * @cursor: cursor to record the position * - * Returns the first resource from the resource manager. + * Initializes the cursor and starts iterating. When done iterating, + * the caller must explicitly call ttm_resource_cursor_fini(). + * + * Return: The first resource from the resource manager. */ struct ttm_resource * ttm_resource_manager_first(struct ttm_resource_manager *man, @@ -500,13 +534,15 @@ ttm_resource_manager_first(struct ttm_resource_manager *man, cursor->priority = 0; cursor->man = man; - cursor->cur = &man->lru[cursor->priority]; + ttm_lru_item_init(&cursor->hitch, TTM_LRU_HITCH); + list_add(&cursor->hitch.link, &man->lru[cursor->priority]); + return ttm_resource_manager_next(cursor); } /** - * ttm_resource_manager_next - * + * ttm_resource_manager_next() - Continue iterating over the resource manager + * resources * @cursor: cursor to record the position * * Return: the next resource from the resource manager. @@ -520,10 +556,10 @@ ttm_resource_manager_next(struct ttm_resource_cursor *cursor) lockdep_assert_held(&man->bdev->lru_lock); for (;;) { - lru = list_entry(cursor->cur, typeof(*lru), link); + lru = &cursor->hitch; list_for_each_entry_continue(lru, &man->lru[cursor->priority], link) { if (ttm_lru_item_is_res(lru)) { - cursor->cur = &lru->link; + list_move(&cursor->hitch.link, &lru->link); return ttm_lru_item_to_res(lru); } } @@ -531,9 +567,11 @@ ttm_resource_manager_next(struct ttm_resource_cursor *cursor) if (++cursor->priority >= TTM_MAX_BO_PRIORITY) break; - cursor->cur = &man->lru[cursor->priority]; + list_move(&cursor->hitch.link, &man->lru[cursor->priority]); } + ttm_resource_cursor_fini_locked(cursor); + return NULL; } diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h index 7d81fd5b5b83..8fac781f641e 100644 --- a/include/drm/ttm/ttm_resource.h +++ b/include/drm/ttm/ttm_resource.h @@ -273,17 +273,22 @@ ttm_lru_item_to_res(struct ttm_lru_item *item) * struct ttm_resource_cursor * * @man: The resource manager currently being iterated over. - * @cur: The list head the cursor currently points to. + * @hitch: A hitch list node inserted before the next resource + * to iterate over. * @priority: the current priority * * Cursor to iterate over the resources in a manager. */ struct ttm_resource_cursor { struct ttm_resource_manager *man; - struct list_head *cur; + struct ttm_lru_item hitch; unsigned int priority; }; +void ttm_resource_cursor_fini_locked(struct ttm_resource_cursor *cursor); + +void ttm_resource_cursor_fini(struct ttm_resource_cursor *cursor); + /** * struct ttm_lru_bulk_move_pos * -- cgit v1.3.1 From 4c44f89c5daee9540cb7428de5d835bd00951350 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Fri, 5 Jul 2024 17:32:02 +0200 Subject: drm/ttm, drm/amdgpu, drm/xe: Consider hitch moves within bulk sublist moves MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit To address the problem with hitches moving when bulk move sublists are lru-bumped, register the list cursors with the ttm_lru_bulk_move structure when traversing its list, and when lru-bumping the list, move the cursor hitch to the tail. This also means it's mandatory for drivers to call ttm_lru_bulk_move_init() and ttm_lru_bulk_move_fini() when initializing and finalizing the bulk move structure, so add those calls to the amdgpu- and xe driver. Compared to v1 this is slightly more code but less fragile and hopefully easier to understand. Changes in previous series: - Completely rework the functionality - Avoid a NULL pointer dereference assigning manager->mem_type - Remove some leftover code causing build problems v2: - For hitch bulk tail moves, store the mem_type in the cursor instead of with the manager. v3: - Remove leftover mem_type member from change in v2. v6: - Add some lockdep asserts (Matthew Brost) - Avoid NULL pointer dereference (Matthew Brost) - No need to check bo->resource before dereferencing bo->bulk_move (Matthew Brost) Cc: Christian König Cc: Somalapuram Amaranath Cc: Matthew Brost Cc: Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost Acked-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20240705153206.68526-5-thomas.hellstrom@linux.intel.com Signed-off-by: Christian König --- drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 4 ++ drivers/gpu/drm/ttm/ttm_resource.c | 92 ++++++++++++++++++++++++++++++++++ drivers/gpu/drm/xe/xe_vm.c | 4 ++ include/drm/ttm/ttm_resource.h | 56 +++++++++++++-------- 4 files changed, 135 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 4e2391c83d7c..6293f3b54b4a 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -2422,6 +2422,8 @@ int amdgpu_vm_init(struct amdgpu_device *adev, struct amdgpu_vm *vm, if (r) return r; + ttm_lru_bulk_move_init(&vm->lru_bulk_move); + vm->is_compute_context = false; vm->use_cpu_for_update = !!(adev->vm_manager.vm_update_mode & @@ -2486,6 +2488,7 @@ error_free_root: error_free_delayed: dma_fence_put(vm->last_tlb_flush); dma_fence_put(vm->last_unlocked); + ttm_lru_bulk_move_fini(&adev->mman.bdev, &vm->lru_bulk_move); amdgpu_vm_fini_entities(vm); return r; @@ -2642,6 +2645,7 @@ void amdgpu_vm_fini(struct amdgpu_device *adev, struct amdgpu_vm *vm) } } + ttm_lru_bulk_move_fini(&adev->mman.bdev, &vm->lru_bulk_move); } /** diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c index 9c8b6499edfb..b6a2daac5518 100644 --- a/drivers/gpu/drm/ttm/ttm_resource.c +++ b/drivers/gpu/drm/ttm/ttm_resource.c @@ -33,6 +33,53 @@ #include +/* Detach the cursor from the bulk move list*/ +static void +ttm_resource_cursor_clear_bulk(struct ttm_resource_cursor *cursor) +{ + lockdep_assert_held(&cursor->man->bdev->lru_lock); + + cursor->bulk = NULL; + list_del_init(&cursor->bulk_link); +} + +/* Move the cursor to the end of the bulk move list it's in */ +static void ttm_resource_cursor_move_bulk_tail(struct ttm_lru_bulk_move *bulk, + struct ttm_resource_cursor *cursor) +{ + struct ttm_lru_bulk_move_pos *pos; + + lockdep_assert_held(&cursor->man->bdev->lru_lock); + + if (WARN_ON_ONCE(bulk != cursor->bulk)) { + list_del_init(&cursor->bulk_link); + return; + } + + pos = &bulk->pos[cursor->mem_type][cursor->priority]; + if (pos->last) + list_move(&cursor->hitch.link, &pos->last->lru.link); + ttm_resource_cursor_clear_bulk(cursor); +} + +/* Move all cursors attached to a bulk move to its end */ +static void ttm_bulk_move_adjust_cursors(struct ttm_lru_bulk_move *bulk) +{ + struct ttm_resource_cursor *cursor, *next; + + list_for_each_entry_safe(cursor, next, &bulk->cursor_list, bulk_link) + ttm_resource_cursor_move_bulk_tail(bulk, cursor); +} + +/* Remove a cursor from an empty bulk move list */ +static void ttm_bulk_move_drop_cursors(struct ttm_lru_bulk_move *bulk) +{ + struct ttm_resource_cursor *cursor, *next; + + list_for_each_entry_safe(cursor, next, &bulk->cursor_list, bulk_link) + ttm_resource_cursor_clear_bulk(cursor); +} + /** * ttm_resource_cursor_fini_locked() - Finalize the LRU list cursor usage * @cursor: The struct ttm_resource_cursor to finalize. @@ -45,6 +92,7 @@ void ttm_resource_cursor_fini_locked(struct ttm_resource_cursor *cursor) { lockdep_assert_held(&cursor->man->bdev->lru_lock); list_del_init(&cursor->hitch.link); + ttm_resource_cursor_clear_bulk(cursor); } /** @@ -73,9 +121,27 @@ void ttm_resource_cursor_fini(struct ttm_resource_cursor *cursor) void ttm_lru_bulk_move_init(struct ttm_lru_bulk_move *bulk) { memset(bulk, 0, sizeof(*bulk)); + INIT_LIST_HEAD(&bulk->cursor_list); } EXPORT_SYMBOL(ttm_lru_bulk_move_init); +/** + * ttm_lru_bulk_move_fini - finalize a bulk move structure + * @bdev: The struct ttm_device + * @bulk: the structure to finalize + * + * Sanity checks that bulk moves don't have any + * resources left and hence no cursors attached. + */ +void ttm_lru_bulk_move_fini(struct ttm_device *bdev, + struct ttm_lru_bulk_move *bulk) +{ + spin_lock(&bdev->lru_lock); + ttm_bulk_move_drop_cursors(bulk); + spin_unlock(&bdev->lru_lock); +} +EXPORT_SYMBOL(ttm_lru_bulk_move_fini); + /** * ttm_lru_bulk_move_tail - bulk move range of resources to the LRU tail. * @@ -88,6 +154,7 @@ void ttm_lru_bulk_move_tail(struct ttm_lru_bulk_move *bulk) { unsigned i, j; + ttm_bulk_move_adjust_cursors(bulk); for (i = 0; i < TTM_NUM_MEM_TYPES; ++i) { for (j = 0; j < TTM_MAX_BO_PRIORITY; ++j) { struct ttm_lru_bulk_move_pos *pos = &bulk->pos[i][j]; @@ -515,6 +582,28 @@ void ttm_resource_manager_debug(struct ttm_resource_manager *man, } EXPORT_SYMBOL(ttm_resource_manager_debug); +static void +ttm_resource_cursor_check_bulk(struct ttm_resource_cursor *cursor, + struct ttm_lru_item *next_lru) +{ + struct ttm_resource *next = ttm_lru_item_to_res(next_lru); + struct ttm_lru_bulk_move *bulk = NULL; + struct ttm_buffer_object *bo = next->bo; + + lockdep_assert_held(&cursor->man->bdev->lru_lock); + bulk = bo->bulk_move; + + if (cursor->bulk != bulk) { + if (bulk) { + list_move_tail(&cursor->bulk_link, &bulk->cursor_list); + cursor->mem_type = next->mem_type; + } else { + list_del_init(&cursor->bulk_link); + } + cursor->bulk = bulk; + } +} + /** * ttm_resource_manager_first() - Start iterating over the resources * of a resource manager @@ -535,6 +624,7 @@ ttm_resource_manager_first(struct ttm_resource_manager *man, cursor->priority = 0; cursor->man = man; ttm_lru_item_init(&cursor->hitch, TTM_LRU_HITCH); + INIT_LIST_HEAD(&cursor->bulk_link); list_add(&cursor->hitch.link, &man->lru[cursor->priority]); return ttm_resource_manager_next(cursor); @@ -559,6 +649,7 @@ ttm_resource_manager_next(struct ttm_resource_cursor *cursor) lru = &cursor->hitch; list_for_each_entry_continue(lru, &man->lru[cursor->priority], link) { if (ttm_lru_item_is_res(lru)) { + ttm_resource_cursor_check_bulk(cursor, lru); list_move(&cursor->hitch.link, &lru->link); return ttm_lru_item_to_res(lru); } @@ -568,6 +659,7 @@ ttm_resource_manager_next(struct ttm_resource_cursor *cursor) break; list_move(&cursor->hitch.link, &man->lru[cursor->priority]); + ttm_resource_cursor_clear_bulk(cursor); } ttm_resource_cursor_fini_locked(cursor); diff --git a/drivers/gpu/drm/xe/xe_vm.c b/drivers/gpu/drm/xe/xe_vm.c index 4aa3943e6f29..47b737f10e74 100644 --- a/drivers/gpu/drm/xe/xe_vm.c +++ b/drivers/gpu/drm/xe/xe_vm.c @@ -1252,6 +1252,8 @@ struct xe_vm *xe_vm_create(struct xe_device *xe, u32 flags) init_rwsem(&vm->userptr.notifier_lock); spin_lock_init(&vm->userptr.invalidated_lock); + ttm_lru_bulk_move_init(&vm->lru_bulk_move); + INIT_LIST_HEAD(&vm->preempt.exec_queues); vm->preempt.min_run_period_ms = 10; /* FIXME: Wire up to uAPI */ @@ -1369,6 +1371,7 @@ err_no_resv: mutex_destroy(&vm->snap_mutex); for_each_tile(tile, xe, id) xe_range_fence_tree_fini(&vm->rftree[id]); + ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); kfree(vm); if (!(flags & XE_VM_FLAG_MIGRATION)) xe_pm_runtime_put(xe); @@ -1511,6 +1514,7 @@ static void xe_vm_free(struct drm_gpuvm *gpuvm) XE_WARN_ON(vm->pt_root[id]); trace_xe_vm_free(vm); + ttm_lru_bulk_move_fini(&xe->ttm, &vm->lru_bulk_move); kfree(vm); } diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h index 8fac781f641e..571abb4861a6 100644 --- a/include/drm/ttm/ttm_resource.h +++ b/include/drm/ttm/ttm_resource.h @@ -269,26 +269,6 @@ ttm_lru_item_to_res(struct ttm_lru_item *item) return container_of(item, struct ttm_resource, lru); } -/** - * struct ttm_resource_cursor - * - * @man: The resource manager currently being iterated over. - * @hitch: A hitch list node inserted before the next resource - * to iterate over. - * @priority: the current priority - * - * Cursor to iterate over the resources in a manager. - */ -struct ttm_resource_cursor { - struct ttm_resource_manager *man; - struct ttm_lru_item hitch; - unsigned int priority; -}; - -void ttm_resource_cursor_fini_locked(struct ttm_resource_cursor *cursor); - -void ttm_resource_cursor_fini(struct ttm_resource_cursor *cursor); - /** * struct ttm_lru_bulk_move_pos * @@ -304,8 +284,9 @@ struct ttm_lru_bulk_move_pos { /** * struct ttm_lru_bulk_move - * * @pos: first/last lru entry for resources in the each domain/priority + * @cursor_list: The list of cursors currently traversing any of + * the sublists of @pos. Protected by the ttm device's lru_lock. * * Container for the current bulk move state. Should be used with * ttm_lru_bulk_move_init() and ttm_bo_set_bulk_move(). @@ -315,8 +296,39 @@ struct ttm_lru_bulk_move_pos { */ struct ttm_lru_bulk_move { struct ttm_lru_bulk_move_pos pos[TTM_NUM_MEM_TYPES][TTM_MAX_BO_PRIORITY]; + struct list_head cursor_list; }; +/** + * struct ttm_resource_cursor + * @man: The resource manager currently being iterated over + * @hitch: A hitch list node inserted before the next resource + * to iterate over. + * @bulk_link: A list link for the list of cursors traversing the + * bulk sublist of @bulk. Protected by the ttm device's lru_lock. + * @bulk: Pointer to struct ttm_lru_bulk_move whose subrange @hitch is + * inserted to. NULL if none. Never dereference this pointer since + * the struct ttm_lru_bulk_move object pointed to might have been + * freed. The pointer is only for comparison. + * @mem_type: The memory type of the LRU list being traversed. + * This field is valid iff @bulk != NULL. + * @priority: the current priority + * + * Cursor to iterate over the resources in a manager. + */ +struct ttm_resource_cursor { + struct ttm_resource_manager *man; + struct ttm_lru_item hitch; + struct list_head bulk_link; + struct ttm_lru_bulk_move *bulk; + unsigned int mem_type; + unsigned int priority; +}; + +void ttm_resource_cursor_fini_locked(struct ttm_resource_cursor *cursor); + +void ttm_resource_cursor_fini(struct ttm_resource_cursor *cursor); + /** * struct ttm_kmap_iter_iomap - Specialization for a struct io_mapping + * struct sg_table backed struct ttm_resource. @@ -405,6 +417,8 @@ ttm_resource_manager_cleanup(struct ttm_resource_manager *man) void ttm_lru_bulk_move_init(struct ttm_lru_bulk_move *bulk); void ttm_lru_bulk_move_tail(struct ttm_lru_bulk_move *bulk); +void ttm_lru_bulk_move_fini(struct ttm_device *bdev, + struct ttm_lru_bulk_move *bulk); void ttm_resource_add_bulk_move(struct ttm_resource *res, struct ttm_buffer_object *bo); -- cgit v1.3.1 From da966b82bf3d16f89a05732c933a589ec798d3f7 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Fri, 5 Jul 2024 17:32:03 +0200 Subject: drm/ttm: Provide a generic LRU walker helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Provide a generic LRU walker in TTM, in the spirit of drm_gem_lru_scan() but building on the restartable TTM LRU functionality. The LRU walker optionally supports locking objects as part of a ww mutex locking transaction, to mimic to some extent the current functionality in ttm. However any -EDEADLK return is converted to -ENOSPC and then to -ENOMEM before reaching the driver, so that the driver will need to backoff and possibly retry without being able to keep the ticket. v3: - Move the helper to core ttm. - Remove the drm_exec usage from it for now, it will be reintroduced later in the series. v4: - Handle the -EALREADY case if ticketlocking. v6: - Some cleanup and added code comments (Matthew Brost) - Clarified the ticketlock in the commit message (Matthew Brost) v7: - Use s64 rather than long for the target and progress (Christian König) - Update documentation to not encourage using pages as a progress measure. (Christian König) - Remove cond_resched(). (Christian König) Cc: Christian König Cc: Somalapuram Amaranath Cc: Matthew Brost Cc: Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost #v6 Link: https://patchwork.freedesktop.org/patch/msgid/20240705153206.68526-6-thomas.hellstrom@linux.intel.com Signed-off-by: Christian König --- drivers/gpu/drm/ttm/ttm_bo_util.c | 151 ++++++++++++++++++++++++++++++++++++++ include/drm/ttm/ttm_bo.h | 35 +++++++++ 2 files changed, 186 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index 0b3f4267130c..c2759add58f5 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -768,3 +768,154 @@ error_destroy_tt: ttm_tt_destroy(bo->bdev, ttm); return ret; } + +static bool ttm_lru_walk_trylock(struct ttm_lru_walk *walk, + struct ttm_buffer_object *bo, + bool *needs_unlock) +{ + struct ttm_operation_ctx *ctx = walk->ctx; + + *needs_unlock = false; + + if (dma_resv_trylock(bo->base.resv)) { + *needs_unlock = true; + return true; + } + + if (bo->base.resv == ctx->resv && ctx->allow_res_evict) { + dma_resv_assert_held(bo->base.resv); + return true; + } + + return false; +} + +static int ttm_lru_walk_ticketlock(struct ttm_lru_walk *walk, + struct ttm_buffer_object *bo, + bool *needs_unlock) +{ + struct dma_resv *resv = bo->base.resv; + int ret; + + if (walk->ctx->interruptible) + ret = dma_resv_lock_interruptible(resv, walk->ticket); + else + ret = dma_resv_lock(resv, walk->ticket); + + if (!ret) { + *needs_unlock = true; + /* + * Only a single ticketlock per loop. Ticketlocks are prone + * to return -EDEADLK causing the eviction to fail, so + * after waiting for the ticketlock, revert back to + * trylocking for this walk. + */ + walk->ticket = NULL; + } else if (ret == -EDEADLK) { + /* Caller needs to exit the ww transaction. */ + ret = -ENOSPC; + } + + return ret; +} + +static void ttm_lru_walk_unlock(struct ttm_buffer_object *bo, bool locked) +{ + if (locked) + dma_resv_unlock(bo->base.resv); +} + +/** + * ttm_lru_walk_for_evict() - Perform a LRU list walk, with actions taken on + * valid items. + * @walk: describe the walks and actions taken + * @bdev: The TTM device. + * @man: The struct ttm_resource manager whose LRU lists we're walking. + * @target: The end condition for the walk. + * + * The LRU lists of @man are walk, and for each struct ttm_resource encountered, + * the corresponding ttm_buffer_object is locked and taken a reference on, and + * the LRU lock is dropped. the LRU lock may be dropped before locking and, in + * that case, it's verified that the item actually remains on the LRU list after + * the lock, and that the buffer object didn't switch resource in between. + * + * With a locked object, the actions indicated by @walk->process_bo are + * performed, and after that, the bo is unlocked, the refcount dropped and the + * next struct ttm_resource is processed. Here, the walker relies on + * TTM's restartable LRU list implementation. + * + * Typically @walk->process_bo() would return the number of pages evicted, + * swapped or shrunken, so that when the total exceeds @target, or when the + * LRU list has been walked in full, iteration is terminated. It's also terminated + * on error. Note that the definition of @target is done by the caller, it + * could have a different meaning than the number of pages. + * + * Note that the way dma_resv individualization is done, locking needs to be done + * either with the LRU lock held (trylocking only) or with a reference on the + * object. + * + * Return: The progress made towards target or negative error code on error. + */ +s64 ttm_lru_walk_for_evict(struct ttm_lru_walk *walk, struct ttm_device *bdev, + struct ttm_resource_manager *man, s64 target) +{ + struct ttm_resource_cursor cursor; + struct ttm_resource *res; + s64 progress = 0; + s64 lret; + + spin_lock(&bdev->lru_lock); + ttm_resource_manager_for_each_res(man, &cursor, res) { + struct ttm_buffer_object *bo = res->bo; + bool bo_needs_unlock = false; + bool bo_locked = false; + int mem_type; + + /* + * Attempt a trylock before taking a reference on the bo, + * since if we do it the other way around, and the trylock fails, + * we need to drop the lru lock to put the bo. + */ + if (ttm_lru_walk_trylock(walk, bo, &bo_needs_unlock)) + bo_locked = true; + else if (!walk->ticket || walk->ctx->no_wait_gpu || + walk->trylock_only) + continue; + + if (!ttm_bo_get_unless_zero(bo)) { + ttm_lru_walk_unlock(bo, bo_needs_unlock); + continue; + } + + mem_type = res->mem_type; + spin_unlock(&bdev->lru_lock); + + lret = 0; + if (!bo_locked) + lret = ttm_lru_walk_ticketlock(walk, bo, &bo_needs_unlock); + + /* + * Note that in between the release of the lru lock and the + * ticketlock, the bo may have switched resource, + * and also memory type, since the resource may have been + * freed and allocated again with a different memory type. + * In that case, just skip it. + */ + if (!lret && bo->resource && bo->resource->mem_type == mem_type) + lret = walk->ops->process_bo(walk, bo); + + ttm_lru_walk_unlock(bo, bo_needs_unlock); + ttm_bo_put(bo); + if (lret == -EBUSY || lret == -EALREADY) + lret = 0; + progress = (lret < 0) ? lret : progress + lret; + + spin_lock(&bdev->lru_lock); + if (progress < 0 || progress >= target) + break; + } + ttm_resource_cursor_fini_locked(&cursor); + spin_unlock(&bdev->lru_lock); + + return progress; +} diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h index ef0f52f56ebc..21fa9d5964ec 100644 --- a/include/drm/ttm/ttm_bo.h +++ b/include/drm/ttm/ttm_bo.h @@ -194,6 +194,41 @@ struct ttm_operation_ctx { uint64_t bytes_moved; }; +struct ttm_lru_walk; + +/** struct ttm_lru_walk_ops - Operations for a LRU walk. */ +struct ttm_lru_walk_ops { + /** + * process_bo - Process this bo. + * @walk: struct ttm_lru_walk describing the walk. + * @bo: A locked and referenced buffer object. + * + * Return: Negative error code on error, User-defined positive value + * (typically, but not always, size of the processed bo) on success. + * On success, the returned values are summed by the walk and the + * walk exits when its target is met. + * 0 also indicates success, -EBUSY means this bo was skipped. + */ + s64 (*process_bo)(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo); +}; + +/** + * struct ttm_lru_walk - Structure describing a LRU walk. + */ +struct ttm_lru_walk { + /** @ops: Pointer to the ops structure. */ + const struct ttm_lru_walk_ops *ops; + /** @ctx: Pointer to the struct ttm_operation_ctx. */ + struct ttm_operation_ctx *ctx; + /** @ticket: The struct ww_acquire_ctx if any. */ + struct ww_acquire_ctx *ticket; + /** @tryock_only: Only use trylock for locking. */ + bool trylock_only; +}; + +s64 ttm_lru_walk_for_evict(struct ttm_lru_walk *walk, struct ttm_device *bdev, + struct ttm_resource_manager *man, s64 target); + /** * ttm_bo_get - reference a struct ttm_buffer_object * -- cgit v1.3.1 From 10efe34dae798c652053d4363871914c478f1475 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Fri, 5 Jul 2024 17:32:04 +0200 Subject: drm/ttm: Use the LRU walker helper for swapping MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Rework the TTM swapping to use the LRU walker helper. This helps fixing up the ttm_bo_swapout() interface to be consistent about not requiring any locking. For now mimic the current behaviour of using trylock only. We could be using ticket-locks here but defer that until it's deemed necessary. The TTM swapout functionality is a bit weird anyway since it alternates between memory types without exhausting TTM_PL_SYSTEM first. Intentionally keep pages as the unit of progress since changing that to bytes is an unrelated change that can be done later. v6: - Improve on error code translation in the swapout callback (Matthew Brost). v7: - Use s64 rather than long. - Remove ttm_resource_cursor_fini() since it's no longer used. - Rename ttm_resource_cursor_fini_locked() to ttm_resource_cursor_fini(). - Don't swap out pinned bos. Cc: Christian König Cc: Somalapuram Amaranath Cc: Matthew Brost Cc: Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost #v6 Link: https://patchwork.freedesktop.org/patch/msgid/20240705153206.68526-7-thomas.hellstrom@linux.intel.com Signed-off-by: Christian König --- drivers/gpu/drm/ttm/ttm_bo.c | 116 ++++++++++++++++++++++++------------- drivers/gpu/drm/ttm/ttm_bo_util.c | 2 +- drivers/gpu/drm/ttm/ttm_device.c | 30 ++-------- drivers/gpu/drm/ttm/ttm_resource.c | 23 +------- include/drm/ttm/ttm_bo.h | 5 +- include/drm/ttm/ttm_resource.h | 2 - 6 files changed, 89 insertions(+), 89 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index 43eda720657f..f4b2b2bea6cb 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -621,7 +621,7 @@ int ttm_mem_evict_first(struct ttm_device *bdev, if (locked) dma_resv_unlock(res->bo->base.resv); } - ttm_resource_cursor_fini_locked(&cursor); + ttm_resource_cursor_fini(&cursor); if (!bo) { if (busy_bo && !ttm_bo_get_unless_zero(busy_bo)) @@ -1118,12 +1118,24 @@ int ttm_bo_wait_ctx(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx) } EXPORT_SYMBOL(ttm_bo_wait_ctx); -int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, - gfp_t gfp_flags) +/** + * struct ttm_bo_swapout_walk - Parameters for the swapout walk + */ +struct ttm_bo_swapout_walk { + /** @walk: The walk base parameters. */ + struct ttm_lru_walk walk; + /** @gfp_flags: The gfp flags to use for ttm_tt_swapout() */ + gfp_t gfp_flags; +}; + +static s64 +ttm_bo_swapout_cb(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo) { - struct ttm_place place; - bool locked; - long ret; + struct ttm_place place = {.mem_type = bo->resource->mem_type}; + struct ttm_bo_swapout_walk *swapout_walk = + container_of(walk, typeof(*swapout_walk), walk); + struct ttm_operation_ctx *ctx = walk->ctx; + s64 ret; /* * While the bo may already reside in SYSTEM placement, set @@ -1131,28 +1143,29 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, * The driver may use the fact that we're moving from SYSTEM * as an indication that we're about to swap out. */ - memset(&place, 0, sizeof(place)); - place.mem_type = bo->resource->mem_type; - if (!ttm_bo_evict_swapout_allowable(bo, ctx, &place, &locked, NULL)) - return -EBUSY; + if (bo->pin_count || !bo->bdev->funcs->eviction_valuable(bo, &place)) { + ret = -EBUSY; + goto out; + } if (!bo->ttm || !ttm_tt_is_populated(bo->ttm) || bo->ttm->page_flags & TTM_TT_FLAG_EXTERNAL || - bo->ttm->page_flags & TTM_TT_FLAG_SWAPPED || - !ttm_bo_get_unless_zero(bo)) { - if (locked) - dma_resv_unlock(bo->base.resv); - return -EBUSY; + bo->ttm->page_flags & TTM_TT_FLAG_SWAPPED) { + ret = -EBUSY; + goto out; } if (bo->deleted) { - ret = ttm_bo_cleanup_refs(bo, false, false, locked); - ttm_bo_put(bo); - return ret == -EBUSY ? -ENOSPC : ret; - } + pgoff_t num_pages = bo->ttm->num_pages; - /* TODO: Cleanup the locking */ - spin_unlock(&bo->bdev->lru_lock); + ret = ttm_bo_wait_ctx(bo, ctx); + if (ret) + goto out; + + ttm_bo_cleanup_memtype_use(bo); + ret = num_pages; + goto out; + } /* * Move to system cached @@ -1164,12 +1177,13 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, memset(&hop, 0, sizeof(hop)); place.mem_type = TTM_PL_SYSTEM; ret = ttm_resource_alloc(bo, &place, &evict_mem); - if (unlikely(ret)) + if (ret) goto out; ret = ttm_bo_handle_move_mem(bo, evict_mem, true, ctx, &hop); - if (unlikely(ret != 0)) { - WARN(ret == -EMULTIHOP, "Unexpected multihop in swaput - likely driver bug.\n"); + if (ret) { + WARN(ret == -EMULTIHOP, + "Unexpected multihop in swapout - likely driver bug.\n"); ttm_resource_free(bo, &evict_mem); goto out; } @@ -1179,30 +1193,54 @@ int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, * Make sure BO is idle. */ ret = ttm_bo_wait_ctx(bo, ctx); - if (unlikely(ret != 0)) + if (ret) goto out; ttm_bo_unmap_virtual(bo); - - /* - * Swap out. Buffer will be swapped in again as soon as - * anyone tries to access a ttm page. - */ if (bo->bdev->funcs->swap_notify) bo->bdev->funcs->swap_notify(bo); if (ttm_tt_is_populated(bo->ttm)) - ret = ttm_tt_swapout(bo->bdev, bo->ttm, gfp_flags); + ret = ttm_tt_swapout(bo->bdev, bo->ttm, swapout_walk->gfp_flags); + out: + /* Consider -ENOMEM and -ENOSPC non-fatal. */ + if (ret == -ENOMEM || ret == -ENOSPC) + ret = -EBUSY; - /* - * Unreserve without putting on LRU to avoid swapping out an - * already swapped buffer. - */ - if (locked) - dma_resv_unlock(bo->base.resv); - ttm_bo_put(bo); - return ret == -EBUSY ? -ENOSPC : ret; + return ret; +} + +const struct ttm_lru_walk_ops ttm_swap_ops = { + .process_bo = ttm_bo_swapout_cb, +}; + +/** + * ttm_bo_swapout() - Swap out buffer objects on the LRU list to shmem. + * @bdev: The ttm device. + * @ctx: The ttm_operation_ctx governing the swapout operation. + * @man: The resource manager whose resources / buffer objects are + * goint to be swapped out. + * @gfp_flags: The gfp flags used for shmem page allocations. + * @target: The desired number of bytes to swap out. + * + * Return: The number of bytes actually swapped out, or negative error code + * on error. + */ +s64 ttm_bo_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, + struct ttm_resource_manager *man, gfp_t gfp_flags, + s64 target) +{ + struct ttm_bo_swapout_walk swapout_walk = { + .walk = { + .ops = &ttm_swap_ops, + .ctx = ctx, + .trylock_only = true, + }, + .gfp_flags = gfp_flags, + }; + + return ttm_lru_walk_for_evict(&swapout_walk.walk, bdev, man, target); } void ttm_bo_tt_destroy(struct ttm_buffer_object *bo) diff --git a/drivers/gpu/drm/ttm/ttm_bo_util.c b/drivers/gpu/drm/ttm/ttm_bo_util.c index c2759add58f5..3c07f4712d5c 100644 --- a/drivers/gpu/drm/ttm/ttm_bo_util.c +++ b/drivers/gpu/drm/ttm/ttm_bo_util.c @@ -914,7 +914,7 @@ s64 ttm_lru_walk_for_evict(struct ttm_lru_walk *walk, struct ttm_device *bdev, if (progress < 0 || progress >= target) break; } - ttm_resource_cursor_fini_locked(&cursor); + ttm_resource_cursor_fini(&cursor); spin_unlock(&bdev->lru_lock); return progress; diff --git a/drivers/gpu/drm/ttm/ttm_device.c b/drivers/gpu/drm/ttm/ttm_device.c index f9e9b1ec8c8a..e7cc4954c1bc 100644 --- a/drivers/gpu/drm/ttm/ttm_device.c +++ b/drivers/gpu/drm/ttm/ttm_device.c @@ -148,40 +148,20 @@ int ttm_global_swapout(struct ttm_operation_ctx *ctx, gfp_t gfp_flags) int ttm_device_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, gfp_t gfp_flags) { - struct ttm_resource_cursor cursor; struct ttm_resource_manager *man; - struct ttm_resource *res; unsigned i; - int ret; + s64 lret; - spin_lock(&bdev->lru_lock); for (i = TTM_PL_SYSTEM; i < TTM_NUM_MEM_TYPES; ++i) { man = ttm_manager_type(bdev, i); if (!man || !man->use_tt) continue; - ttm_resource_manager_for_each_res(man, &cursor, res) { - struct ttm_buffer_object *bo = res->bo; - uint32_t num_pages; - - if (!bo || bo->resource != res) - continue; - - num_pages = PFN_UP(bo->base.size); - ret = ttm_bo_swapout(bo, ctx, gfp_flags); - /* ttm_bo_swapout has dropped the lru_lock */ - if (!ret) { - ttm_resource_cursor_fini(&cursor); - return num_pages; - } - if (ret != -EBUSY) { - ttm_resource_cursor_fini(&cursor); - return ret; - } - } + lret = ttm_bo_swapout(bdev, ctx, man, gfp_flags, 1); + /* Can be both positive (num_pages) and negative (error) */ + if (lret) + return lret; } - ttm_resource_cursor_fini_locked(&cursor); - spin_unlock(&bdev->lru_lock); return 0; } EXPORT_SYMBOL(ttm_device_swapout); diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c index b6a2daac5518..b43d04a1dc91 100644 --- a/drivers/gpu/drm/ttm/ttm_resource.c +++ b/drivers/gpu/drm/ttm/ttm_resource.c @@ -81,37 +81,20 @@ static void ttm_bulk_move_drop_cursors(struct ttm_lru_bulk_move *bulk) } /** - * ttm_resource_cursor_fini_locked() - Finalize the LRU list cursor usage + * ttm_resource_cursor_fini() - Finalize the LRU list cursor usage * @cursor: The struct ttm_resource_cursor to finalize. * * The function pulls the LRU list cursor off any lists it was previusly * attached to. Needs to be called with the LRU lock held. The function * can be called multiple times after eachother. */ -void ttm_resource_cursor_fini_locked(struct ttm_resource_cursor *cursor) +void ttm_resource_cursor_fini(struct ttm_resource_cursor *cursor) { lockdep_assert_held(&cursor->man->bdev->lru_lock); list_del_init(&cursor->hitch.link); ttm_resource_cursor_clear_bulk(cursor); } -/** - * ttm_resource_cursor_fini() - Finalize the LRU list cursor usage - * @cursor: The struct ttm_resource_cursor to finalize. - * - * The function pulls the LRU list cursor off any lists it was previusly - * attached to. Needs to be called without the LRU list lock held. The - * function can be called multiple times after eachother. - */ -void ttm_resource_cursor_fini(struct ttm_resource_cursor *cursor) -{ - spinlock_t *lru_lock = &cursor->man->bdev->lru_lock; - - spin_lock(lru_lock); - ttm_resource_cursor_fini_locked(cursor); - spin_unlock(lru_lock); -} - /** * ttm_lru_bulk_move_init - initialize a bulk move structure * @bulk: the structure to init @@ -662,7 +645,7 @@ ttm_resource_manager_next(struct ttm_resource_cursor *cursor) ttm_resource_cursor_clear_bulk(cursor); } - ttm_resource_cursor_fini_locked(cursor); + ttm_resource_cursor_fini(cursor); return NULL; } diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h index 21fa9d5964ec..eb2692de06b8 100644 --- a/include/drm/ttm/ttm_bo.h +++ b/include/drm/ttm/ttm_bo.h @@ -417,8 +417,9 @@ void ttm_bo_kunmap(struct ttm_bo_kmap_obj *map); int ttm_bo_vmap(struct ttm_buffer_object *bo, struct iosys_map *map); void ttm_bo_vunmap(struct ttm_buffer_object *bo, struct iosys_map *map); int ttm_bo_mmap_obj(struct vm_area_struct *vma, struct ttm_buffer_object *bo); -int ttm_bo_swapout(struct ttm_buffer_object *bo, struct ttm_operation_ctx *ctx, - gfp_t gfp_flags); +s64 ttm_bo_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, + struct ttm_resource_manager *man, gfp_t gfp_flags, + s64 target); void ttm_bo_pin(struct ttm_buffer_object *bo); void ttm_bo_unpin(struct ttm_buffer_object *bo); int ttm_mem_evict_first(struct ttm_device *bdev, diff --git a/include/drm/ttm/ttm_resource.h b/include/drm/ttm/ttm_resource.h index 571abb4861a6..be034be56ba1 100644 --- a/include/drm/ttm/ttm_resource.h +++ b/include/drm/ttm/ttm_resource.h @@ -325,8 +325,6 @@ struct ttm_resource_cursor { unsigned int priority; }; -void ttm_resource_cursor_fini_locked(struct ttm_resource_cursor *cursor); - void ttm_resource_cursor_fini(struct ttm_resource_cursor *cursor); /** -- cgit v1.3.1 From 3756310e9fe1e0182adac89cedaa98c0eea66675 Mon Sep 17 00:00:00 2001 From: Thomas Hellström Date: Fri, 5 Jul 2024 17:32:05 +0200 Subject: drm/ttm: Use the LRU walker for eviction MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use the LRU walker for eviction. This helps removing a lot of code with weird locking semantics. The functionality is slightly changed so that when trylocked buffer objects are exhausted, we continue to interleave walks with ticket-locks while there is still progress made. The list walks are not restarted in-between evictions. Also provide a separate ttm_bo_evict_first() function for its single user. The context of that user allows sleeping dma_resv locks. v6: - Various cleanups suggested by Matthew Brost. - Fix error return code of ttm_bo_evict_first(). (Matthew Brost) - Fix an error check that was inverted. (Matthew Brost) v7: - Use s64 rather than long (Christian König) - Early ttm_resource_cursor_fini() in ttm_bo_evict_first(). - Simplify check for bo_moved in ttm_bo_evict_first(). (Christian König) - Don't evict pinned bos. Cc: Christian König Cc: Somalapuram Amaranath Cc: Matthew Brost Cc: Signed-off-by: Thomas Hellström Reviewed-by: Matthew Brost #v6 Link: https://patchwork.freedesktop.org/patch/msgid/20240705153206.68526-8-thomas.hellstrom@linux.intel.com Signed-off-by: Christian König --- drivers/gpu/drm/ttm/ttm_bo.c | 345 +++++++++++++++---------------------- drivers/gpu/drm/ttm/ttm_resource.c | 21 +-- include/drm/ttm/ttm_bo.h | 8 +- 3 files changed, 143 insertions(+), 231 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/ttm/ttm_bo.c b/drivers/gpu/drm/ttm/ttm_bo.c index f4b2b2bea6cb..0131ec802066 100644 --- a/drivers/gpu/drm/ttm/ttm_bo.c +++ b/drivers/gpu/drm/ttm/ttm_bo.c @@ -224,80 +224,6 @@ static void ttm_bo_flush_all_fences(struct ttm_buffer_object *bo) dma_resv_iter_end(&cursor); } -/** - * ttm_bo_cleanup_refs - * If bo idle, remove from lru lists, and unref. - * If not idle, block if possible. - * - * Must be called with lru_lock and reservation held, this function - * will drop the lru lock and optionally the reservation lock before returning. - * - * @bo: The buffer object to clean-up - * @interruptible: Any sleeps should occur interruptibly. - * @no_wait_gpu: Never wait for gpu. Return -EBUSY instead. - * @unlock_resv: Unlock the reservation lock as well. - */ - -static int ttm_bo_cleanup_refs(struct ttm_buffer_object *bo, - bool interruptible, bool no_wait_gpu, - bool unlock_resv) -{ - struct dma_resv *resv = &bo->base._resv; - int ret; - - if (dma_resv_test_signaled(resv, DMA_RESV_USAGE_BOOKKEEP)) - ret = 0; - else - ret = -EBUSY; - - if (ret && !no_wait_gpu) { - long lret; - - if (unlock_resv) - dma_resv_unlock(bo->base.resv); - spin_unlock(&bo->bdev->lru_lock); - - lret = dma_resv_wait_timeout(resv, DMA_RESV_USAGE_BOOKKEEP, - interruptible, - 30 * HZ); - - if (lret < 0) - return lret; - else if (lret == 0) - return -EBUSY; - - spin_lock(&bo->bdev->lru_lock); - if (unlock_resv && !dma_resv_trylock(bo->base.resv)) { - /* - * We raced, and lost, someone else holds the reservation now, - * and is probably busy in ttm_bo_cleanup_memtype_use. - * - * Even if it's not the case, because we finished waiting any - * delayed destruction would succeed, so just return success - * here. - */ - spin_unlock(&bo->bdev->lru_lock); - return 0; - } - ret = 0; - } - - if (ret) { - if (unlock_resv) - dma_resv_unlock(bo->base.resv); - spin_unlock(&bo->bdev->lru_lock); - return ret; - } - - spin_unlock(&bo->bdev->lru_lock); - ttm_bo_cleanup_memtype_use(bo); - - if (unlock_resv) - dma_resv_unlock(bo->base.resv); - - return 0; -} - /* * Block for the dma_resv object to become idle, lock the buffer and clean up * the resource and tt object. @@ -505,151 +431,152 @@ bool ttm_bo_eviction_valuable(struct ttm_buffer_object *bo, } EXPORT_SYMBOL(ttm_bo_eviction_valuable); -/* - * Check the target bo is allowable to be evicted or swapout, including cases: - * - * a. if share same reservation object with ctx->resv, have assumption - * reservation objects should already be locked, so not lock again and - * return true directly when either the opreation allow_reserved_eviction - * or the target bo already is in delayed free list; +/** + * ttm_bo_evict_first() - Evict the first bo on the manager's LRU list. + * @bdev: The ttm device. + * @man: The manager whose bo to evict. + * @ctx: The TTM operation ctx governing the eviction. * - * b. Otherwise, trylock it. + * Return: 0 if successful or the resource disappeared. Negative error code on error. */ -static bool ttm_bo_evict_swapout_allowable(struct ttm_buffer_object *bo, - struct ttm_operation_ctx *ctx, - const struct ttm_place *place, - bool *locked, bool *busy) +int ttm_bo_evict_first(struct ttm_device *bdev, struct ttm_resource_manager *man, + struct ttm_operation_ctx *ctx) { - bool ret = false; + struct ttm_resource_cursor cursor; + struct ttm_buffer_object *bo; + struct ttm_resource *res; + unsigned int mem_type; + int ret = 0; - if (bo->pin_count) { - *locked = false; - if (busy) - *busy = false; - return false; + spin_lock(&bdev->lru_lock); + res = ttm_resource_manager_first(man, &cursor); + ttm_resource_cursor_fini(&cursor); + if (!res) { + ret = -ENOENT; + goto out_no_ref; } + bo = res->bo; + if (!ttm_bo_get_unless_zero(bo)) + goto out_no_ref; + mem_type = res->mem_type; + spin_unlock(&bdev->lru_lock); + ret = ttm_bo_reserve(bo, ctx->interruptible, ctx->no_wait_gpu, NULL); + if (ret) + goto out_no_lock; + if (!bo->resource || bo->resource->mem_type != mem_type) + goto out_bo_moved; - if (bo->base.resv == ctx->resv) { - dma_resv_assert_held(bo->base.resv); - if (ctx->allow_res_evict) - ret = true; - *locked = false; - if (busy) - *busy = false; + if (bo->deleted) { + ret = ttm_bo_wait_ctx(bo, ctx); + if (!ret) + ttm_bo_cleanup_memtype_use(bo); } else { - ret = dma_resv_trylock(bo->base.resv); - *locked = ret; - if (busy) - *busy = !ret; - } - - if (ret && place && (bo->resource->mem_type != place->mem_type || - !bo->bdev->funcs->eviction_valuable(bo, place))) { - ret = false; - if (*locked) { - dma_resv_unlock(bo->base.resv); - *locked = false; - } + ret = ttm_bo_evict(bo, ctx); } +out_bo_moved: + dma_resv_unlock(bo->base.resv); +out_no_lock: + ttm_bo_put(bo); + return ret; +out_no_ref: + spin_unlock(&bdev->lru_lock); return ret; } /** - * ttm_mem_evict_wait_busy - wait for a busy BO to become available - * - * @busy_bo: BO which couldn't be locked with trylock - * @ctx: operation context - * @ticket: acquire ticket - * - * Try to lock a busy buffer object to avoid failing eviction. + * struct ttm_bo_evict_walk - Parameters for the evict walk. */ -static int ttm_mem_evict_wait_busy(struct ttm_buffer_object *busy_bo, - struct ttm_operation_ctx *ctx, - struct ww_acquire_ctx *ticket) -{ - int r; - - if (!busy_bo || !ticket) - return -EBUSY; - - if (ctx->interruptible) - r = dma_resv_lock_interruptible(busy_bo->base.resv, - ticket); - else - r = dma_resv_lock(busy_bo->base.resv, ticket); - - /* - * TODO: It would be better to keep the BO locked until allocation is at - * least tried one more time, but that would mean a much larger rework - * of TTM. - */ - if (!r) - dma_resv_unlock(busy_bo->base.resv); - - return r == -EDEADLK ? -EBUSY : r; -} +struct ttm_bo_evict_walk { + /** @walk: The walk base parameters. */ + struct ttm_lru_walk walk; + /** @place: The place passed to the resource allocation. */ + const struct ttm_place *place; + /** @evictor: The buffer object we're trying to make room for. */ + struct ttm_buffer_object *evictor; + /** @res: The allocated resource if any. */ + struct ttm_resource **res; + /** @evicted: Number of successful evictions. */ + unsigned long evicted; +}; -int ttm_mem_evict_first(struct ttm_device *bdev, - struct ttm_resource_manager *man, - const struct ttm_place *place, - struct ttm_operation_ctx *ctx, - struct ww_acquire_ctx *ticket) +static s64 ttm_bo_evict_cb(struct ttm_lru_walk *walk, struct ttm_buffer_object *bo) { - struct ttm_buffer_object *bo = NULL, *busy_bo = NULL; - struct ttm_resource_cursor cursor; - struct ttm_resource *res; - bool locked = false; - int ret; + struct ttm_bo_evict_walk *evict_walk = + container_of(walk, typeof(*evict_walk), walk); + s64 lret; - spin_lock(&bdev->lru_lock); - ttm_resource_manager_for_each_res(man, &cursor, res) { - bool busy; - - if (!ttm_bo_evict_swapout_allowable(res->bo, ctx, place, - &locked, &busy)) { - if (busy && !busy_bo && ticket != - dma_resv_locking_ctx(res->bo->base.resv)) - busy_bo = res->bo; - continue; - } + if (bo->pin_count || !bo->bdev->funcs->eviction_valuable(bo, evict_walk->place)) + return 0; - if (ttm_bo_get_unless_zero(res->bo)) { - bo = res->bo; - break; - } - if (locked) - dma_resv_unlock(res->bo->base.resv); + if (bo->deleted) { + lret = ttm_bo_wait_ctx(bo, walk->ctx); + if (!lret) + ttm_bo_cleanup_memtype_use(bo); + } else { + lret = ttm_bo_evict(bo, walk->ctx); } - ttm_resource_cursor_fini(&cursor); - if (!bo) { - if (busy_bo && !ttm_bo_get_unless_zero(busy_bo)) - busy_bo = NULL; - spin_unlock(&bdev->lru_lock); - ret = ttm_mem_evict_wait_busy(busy_bo, ctx, ticket); - if (busy_bo) - ttm_bo_put(busy_bo); - return ret; - } + if (lret) + goto out; - if (bo->deleted) { - ret = ttm_bo_cleanup_refs(bo, ctx->interruptible, - ctx->no_wait_gpu, locked); - ttm_bo_put(bo); - return ret; - } + evict_walk->evicted++; + if (evict_walk->res) + lret = ttm_resource_alloc(evict_walk->evictor, evict_walk->place, + evict_walk->res); + if (lret == 0) + return 1; +out: + /* Errors that should terminate the walk. */ + if (lret == -ENOSPC) + return -EBUSY; - spin_unlock(&bdev->lru_lock); + return lret; +} - ret = ttm_bo_evict(bo, ctx); - if (locked) - ttm_bo_unreserve(bo); - else - ttm_bo_move_to_lru_tail_unlocked(bo); +static const struct ttm_lru_walk_ops ttm_evict_walk_ops = { + .process_bo = ttm_bo_evict_cb, +}; - ttm_bo_put(bo); - return ret; +static int ttm_bo_evict_alloc(struct ttm_device *bdev, + struct ttm_resource_manager *man, + const struct ttm_place *place, + struct ttm_buffer_object *evictor, + struct ttm_operation_ctx *ctx, + struct ww_acquire_ctx *ticket, + struct ttm_resource **res) +{ + struct ttm_bo_evict_walk evict_walk = { + .walk = { + .ops = &ttm_evict_walk_ops, + .ctx = ctx, + .ticket = ticket, + }, + .place = place, + .evictor = evictor, + .res = res, + }; + s64 lret; + + evict_walk.walk.trylock_only = true; + lret = ttm_lru_walk_for_evict(&evict_walk.walk, bdev, man, 1); + if (lret || !ticket) + goto out; + + /* If ticket-locking, repeat while making progress. */ + evict_walk.walk.trylock_only = false; + do { + /* The walk may clear the evict_walk.walk.ticket field */ + evict_walk.walk.ticket = ticket; + evict_walk.evicted = 0; + lret = ttm_lru_walk_for_evict(&evict_walk.walk, bdev, man, 1); + } while (!lret && evict_walk.evicted); +out: + if (lret < 0) + return lret; + if (lret == 0) + return -EBUSY; + return 0; } /** @@ -760,6 +687,7 @@ static int ttm_bo_alloc_resource(struct ttm_buffer_object *bo, for (i = 0; i < placement->num_placement; ++i) { const struct ttm_place *place = &placement->placement[i]; struct ttm_resource_manager *man; + bool may_evict; man = ttm_manager_type(bdev, place->mem_type); if (!man || !ttm_resource_manager_used(man)) @@ -769,22 +697,21 @@ static int ttm_bo_alloc_resource(struct ttm_buffer_object *bo, TTM_PL_FLAG_FALLBACK)) continue; - do { - ret = ttm_resource_alloc(bo, place, res); - if (unlikely(ret && ret != -ENOSPC)) + may_evict = (force_space && place->mem_type != TTM_PL_SYSTEM); + ret = ttm_resource_alloc(bo, place, res); + if (ret) { + if (ret != -ENOSPC) return ret; - if (likely(!ret) || !force_space) - break; - - ret = ttm_mem_evict_first(bdev, man, place, ctx, - ticket); - if (unlikely(ret == -EBUSY)) - break; - if (unlikely(ret)) + if (!may_evict) + continue; + + ret = ttm_bo_evict_alloc(bdev, man, place, bo, ctx, + ticket, res); + if (ret == -EBUSY) + continue; + if (ret) return ret; - } while (1); - if (ret) - continue; + } ret = ttm_bo_add_move_fence(bo, man, ctx->no_wait_gpu); if (unlikely(ret)) { diff --git a/drivers/gpu/drm/ttm/ttm_resource.c b/drivers/gpu/drm/ttm/ttm_resource.c index b43d04a1dc91..6d764ba88aab 100644 --- a/drivers/gpu/drm/ttm/ttm_resource.c +++ b/drivers/gpu/drm/ttm/ttm_resource.c @@ -495,24 +495,11 @@ int ttm_resource_manager_evict_all(struct ttm_device *bdev, }; struct dma_fence *fence; int ret; - unsigned i; - - /* - * Can't use standard list traversal since we're unlocking. - */ - spin_lock(&bdev->lru_lock); - for (i = 0; i < TTM_MAX_BO_PRIORITY; ++i) { - while (!list_empty(&man->lru[i])) { - spin_unlock(&bdev->lru_lock); - ret = ttm_mem_evict_first(bdev, man, NULL, &ctx, - NULL); - if (ret) - return ret; - spin_lock(&bdev->lru_lock); - } - } - spin_unlock(&bdev->lru_lock); + do { + ret = ttm_bo_evict_first(bdev, man, &ctx); + cond_resched(); + } while (!ret); spin_lock(&man->move_lock); fence = dma_fence_get(man->move); diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h index eb2692de06b8..d1a732d56259 100644 --- a/include/drm/ttm/ttm_bo.h +++ b/include/drm/ttm/ttm_bo.h @@ -422,11 +422,9 @@ s64 ttm_bo_swapout(struct ttm_device *bdev, struct ttm_operation_ctx *ctx, s64 target); void ttm_bo_pin(struct ttm_buffer_object *bo); void ttm_bo_unpin(struct ttm_buffer_object *bo); -int ttm_mem_evict_first(struct ttm_device *bdev, - struct ttm_resource_manager *man, - const struct ttm_place *place, - struct ttm_operation_ctx *ctx, - struct ww_acquire_ctx *ticket); +int ttm_bo_evict_first(struct ttm_device *bdev, + struct ttm_resource_manager *man, + struct ttm_operation_ctx *ctx); vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo, struct vm_fault *vmf); vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf, -- cgit v1.3.1 From 76299a557f36d624ca32500173ad7856e1ad93c0 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 3 Jul 2024 00:17:21 -0500 Subject: drm: Introduce 'power saving policy' drm property The `power saving policy` DRM property is an optional property that can be added to a connector by a driver. This property is for compositors to indicate intent of policy of whether a driver can use power saving features that may compromise the experience intended by the compositor. Acked-by: Leo Li Signed-off-by: Mario Limonciello Signed-off-by: Hamza Mahfooz Link: https://patchwork.freedesktop.org/patch/msgid/20240703051722.328-2-mario.limonciello@amd.com --- drivers/gpu/drm/drm_connector.c | 48 +++++++++++++++++++++++++++++++++++++++++ include/drm/drm_connector.h | 2 ++ include/drm/drm_mode_config.h | 5 +++++ include/uapi/drm/drm_mode.h | 7 ++++++ 4 files changed, 62 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index b4f4d2f908d1..7c44e3a1d8e0 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -1043,6 +1043,11 @@ static const struct drm_prop_enum_list drm_scaling_mode_enum_list[] = { { DRM_MODE_SCALE_ASPECT, "Full aspect" }, }; +static const struct drm_prop_enum_list drm_power_saving_policy_enum_list[] = { + { __builtin_ffs(DRM_MODE_REQUIRE_COLOR_ACCURACY) - 1, "Require color accuracy" }, + { __builtin_ffs(DRM_MODE_REQUIRE_LOW_LATENCY) - 1, "Require low latency" }, +}; + static const struct drm_prop_enum_list drm_aspect_ratio_enum_list[] = { { DRM_MODE_PICTURE_ASPECT_NONE, "Automatic" }, { DRM_MODE_PICTURE_ASPECT_4_3, "4:3" }, @@ -1629,6 +1634,16 @@ EXPORT_SYMBOL(drm_hdmi_connector_get_output_format_name); * * Drivers can set up these properties by calling * drm_mode_create_tv_margin_properties(). + * power saving policy: + * This property is used to set the power saving policy for the connector. + * This property is populated with a bitmask of optional requirements set + * by the drm master for the drm driver to respect: + * - "Require color accuracy": Disable power saving features that will + * affect color fidelity. + * For example: Hardware assisted backlight modulation. + * - "Require low latency": Disable power saving features that will + * affect latency. + * For example: Panel self refresh (PSR) */ int drm_connector_create_standard_properties(struct drm_device *dev) @@ -2131,6 +2146,39 @@ int drm_mode_create_scaling_mode_property(struct drm_device *dev) } EXPORT_SYMBOL(drm_mode_create_scaling_mode_property); +/** + * drm_mode_create_power_saving_policy_property - create power saving policy property + * @dev: DRM device + * @supported_policies: bitmask of supported power saving policies + * + * Called by a driver the first time it's needed, must be attached to desired + * connectors. + * + * Returns: %0 + */ +int drm_mode_create_power_saving_policy_property(struct drm_device *dev, + uint64_t supported_policies) +{ + struct drm_property *power_saving; + + if (dev->mode_config.power_saving_policy) + return 0; + WARN_ON((supported_policies & DRM_MODE_POWER_SAVING_POLICY_ALL) == 0); + + power_saving = + drm_property_create_bitmask(dev, 0, "power saving policy", + drm_power_saving_policy_enum_list, + ARRAY_SIZE(drm_power_saving_policy_enum_list), + supported_policies); + if (!power_saving) + return -ENOMEM; + + dev->mode_config.power_saving_policy = power_saving; + + return 0; +} +EXPORT_SYMBOL(drm_mode_create_power_saving_policy_property); + /** * DOC: Variable refresh properties * diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index e3fa43291f44..5ad735253413 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -2267,6 +2267,8 @@ int drm_mode_create_dp_colorspace_property(struct drm_connector *connector, u32 supported_colorspaces); int drm_mode_create_content_type_property(struct drm_device *dev); int drm_mode_create_suggested_offset_properties(struct drm_device *dev); +int drm_mode_create_power_saving_policy_property(struct drm_device *dev, + uint64_t supported_policies); int drm_connector_set_path_property(struct drm_connector *connector, const char *path); diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h index ab0f167474b1..150f9a3b649f 100644 --- a/include/drm/drm_mode_config.h +++ b/include/drm/drm_mode_config.h @@ -969,6 +969,11 @@ struct drm_mode_config { */ struct drm_atomic_state *suspend_state; + /** + * @power_saving_policy: bitmask for power saving policy requests. + */ + struct drm_property *power_saving_policy; + const struct drm_mode_config_helper_funcs *helper_private; }; diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index d390011b89b4..880303c2ad97 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -152,6 +152,13 @@ extern "C" { #define DRM_MODE_SCALE_CENTER 2 /* Centered, no scaling */ #define DRM_MODE_SCALE_ASPECT 3 /* Full screen, preserve aspect */ +/* power saving policy options */ +#define DRM_MODE_REQUIRE_COLOR_ACCURACY BIT(0) /* Compositor requires color accuracy */ +#define DRM_MODE_REQUIRE_LOW_LATENCY BIT(1) /* Compositor requires low latency */ + +#define DRM_MODE_POWER_SAVING_POLICY_ALL (DRM_MODE_REQUIRE_COLOR_ACCURACY |\ + DRM_MODE_REQUIRE_LOW_LATENCY) + /* Dithering mode options */ #define DRM_MODE_DITHERING_OFF 0 #define DRM_MODE_DITHERING_ON 1 -- cgit v1.3.1 From 0daf44ea9dccfcdb1fe694e9ea85497b6cc3a065 Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 8 Jul 2024 22:00:28 +0300 Subject: drm/dp: Add helper to dump an LTTPR PHY descriptor MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a helper to dump the DPCD descriptor for an LTTPR PHY. This is based on [1] and [2] moving the helper to DRM core as suggested by Ville. [1] https://lore.kernel.org/all/20240703155937.1674856-5-imre.deak@intel.com [2] https://lore.kernel.org/all/20240703155937.1674856-6-imre.deak@intel.com Cc: dri-devel@lists.freedesktop.org Cc: Ville Syrjälä Reviewed-by: Ankit Nautiyal Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20240708190029.271247-6-imre.deak@intel.com --- drivers/gpu/drm/display/drm_dp_helper.c | 66 ++++++++++++++++++++++++++++----- include/drm/display/drm_dp.h | 4 ++ include/drm/display/drm_dp_helper.h | 2 + 3 files changed, 62 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/display/drm_dp_helper.c b/drivers/gpu/drm/display/drm_dp_helper.c index d4c34f364140..6ee51003de3c 100644 --- a/drivers/gpu/drm/display/drm_dp_helper.c +++ b/drivers/gpu/drm/display/drm_dp_helper.c @@ -2328,6 +2328,31 @@ drm_dp_get_quirks(const struct drm_dp_dpcd_ident *ident, bool is_branch) #undef DEVICE_ID_ANY #undef DEVICE_ID +static int drm_dp_read_ident(struct drm_dp_aux *aux, unsigned int offset, + struct drm_dp_dpcd_ident *ident) +{ + int ret; + + ret = drm_dp_dpcd_read(aux, offset, ident, sizeof(*ident)); + + return ret < 0 ? ret : 0; +} + +static void drm_dp_dump_desc(struct drm_dp_aux *aux, + const char *device_name, const struct drm_dp_desc *desc) +{ + const struct drm_dp_dpcd_ident *ident = &desc->ident; + + drm_dbg_kms(aux->drm_dev, + "%s: %s: OUI %*phD dev-ID %*pE HW-rev %d.%d SW-rev %d.%d quirks 0x%04x\n", + aux->name, device_name, + (int)sizeof(ident->oui), ident->oui, + (int)strnlen(ident->device_id, sizeof(ident->device_id)), ident->device_id, + ident->hw_rev >> 4, ident->hw_rev & 0xf, + ident->sw_major_rev, ident->sw_minor_rev, + desc->quirks); +} + /** * drm_dp_read_desc - read sink/branch descriptor from DPCD * @aux: DisplayPort AUX channel @@ -2344,27 +2369,48 @@ int drm_dp_read_desc(struct drm_dp_aux *aux, struct drm_dp_desc *desc, { struct drm_dp_dpcd_ident *ident = &desc->ident; unsigned int offset = is_branch ? DP_BRANCH_OUI : DP_SINK_OUI; - int ret, dev_id_len; + int ret; - ret = drm_dp_dpcd_read(aux, offset, ident, sizeof(*ident)); + ret = drm_dp_read_ident(aux, offset, ident); if (ret < 0) return ret; desc->quirks = drm_dp_get_quirks(ident, is_branch); - dev_id_len = strnlen(ident->device_id, sizeof(ident->device_id)); - - drm_dbg_kms(aux->drm_dev, - "%s: DP %s: OUI %*phD dev-ID %*pE HW-rev %d.%d SW-rev %d.%d quirks 0x%04x\n", - aux->name, is_branch ? "branch" : "sink", - (int)sizeof(ident->oui), ident->oui, dev_id_len, - ident->device_id, ident->hw_rev >> 4, ident->hw_rev & 0xf, - ident->sw_major_rev, ident->sw_minor_rev, desc->quirks); + drm_dp_dump_desc(aux, is_branch ? "DP branch" : "DP sink", desc); return 0; } EXPORT_SYMBOL(drm_dp_read_desc); +/** + * drm_dp_dump_lttpr_desc - read and dump the DPCD descriptor for an LTTPR PHY + * @aux: DisplayPort AUX channel + * @dp_phy: LTTPR PHY instance + * + * Read the DPCD LTTPR PHY descriptor for @dp_phy and print a debug message + * with its details to dmesg. + * + * Returns 0 on success or a negative error code on failure. + */ +int drm_dp_dump_lttpr_desc(struct drm_dp_aux *aux, enum drm_dp_phy dp_phy) +{ + struct drm_dp_desc desc = {}; + int ret; + + if (drm_WARN_ON(aux->drm_dev, dp_phy < DP_PHY_LTTPR1 || dp_phy > DP_MAX_LTTPR_COUNT)) + return -EINVAL; + + ret = drm_dp_read_ident(aux, DP_OUI_PHY_REPEATER(dp_phy), &desc.ident); + if (ret < 0) + return ret; + + drm_dp_dump_desc(aux, drm_dp_phy_name(dp_phy), &desc); + + return 0; +} +EXPORT_SYMBOL(drm_dp_dump_lttpr_desc); + /** * drm_dp_dsc_sink_bpp_incr() - Get bits per pixel increment * @dsc_dpcd: DSC capabilities from DPCD diff --git a/include/drm/display/drm_dp.h b/include/drm/display/drm_dp.h index 0b032faa8cf2..0146ccfe9c2e 100644 --- a/include/drm/display/drm_dp.h +++ b/include/drm/display/drm_dp.h @@ -1523,6 +1523,10 @@ enum drm_dp_phy { #define DP_SYMBOL_ERROR_COUNT_LANE2_PHY_REPEATER1 0xf0039 /* 1.3 */ #define DP_SYMBOL_ERROR_COUNT_LANE3_PHY_REPEATER1 0xf003b /* 1.3 */ +#define DP_OUI_PHY_REPEATER1 0xf003d /* 1.3 */ +#define DP_OUI_PHY_REPEATER(dp_phy) \ + DP_LTTPR_REG(dp_phy, DP_OUI_PHY_REPEATER1) + #define __DP_FEC1_BASE 0xf0290 /* 1.4 */ #define __DP_FEC2_BASE 0xf0298 /* 1.4 */ #define DP_FEC_BASE(dp_phy) \ diff --git a/include/drm/display/drm_dp_helper.h b/include/drm/display/drm_dp_helper.h index 8defcc399f42..f88d78e43443 100644 --- a/include/drm/display/drm_dp_helper.h +++ b/include/drm/display/drm_dp_helper.h @@ -656,6 +656,8 @@ struct drm_dp_desc { int drm_dp_read_desc(struct drm_dp_aux *aux, struct drm_dp_desc *desc, bool is_branch); +int drm_dp_dump_lttpr_desc(struct drm_dp_aux *aux, enum drm_dp_phy dp_phy); + /** * enum drm_dp_quirk - Display Port sink/branch device specific quirks * -- cgit v1.3.1 From e1a261ba599eec97e1c5c7760d5c3698fc24e6a6 Mon Sep 17 00:00:00 2001 From: Jocelyn Falempe Date: Tue, 2 Jul 2024 14:26:04 +0200 Subject: printk: Add a short description string to kmsg_dump() kmsg_dump doesn't forward the panic reason string to the kmsg_dumper callback. This patch adds a new struct kmsg_dump_detail, that will hold the reason and description, and pass it to the dump() callback. To avoid updating all kmsg_dump() call, it adds a kmsg_dump_desc() function and a macro for backward compatibility. I've written this for drm_panic, but it can be useful for other kmsg_dumper. It allows to see the panic reason, like "sysrq triggered crash" or "VFS: Unable to mount root fs on xxxx" on the drm panic screen. v2: * Use a struct kmsg_dump_detail to hold the reason and description pointer, for more flexibility if we want to add other parameters. (Kees Cook) * Fix powerpc/nvram_64 build, as I didn't update the forward declaration of oops_to_nvram() Signed-off-by: Jocelyn Falempe Acked-by: Petr Mladek Acked-by: Michael Ellerman (powerpc) Acked-by: Kees Cook Link: https://patchwork.freedesktop.org/patch/msgid/20240702122639.248110-1-jfalempe@redhat.com --- arch/powerpc/kernel/nvram_64.c | 8 ++++---- arch/powerpc/platforms/powernv/opal-kmsg.c | 4 ++-- arch/um/kernel/kmsg_dump.c | 2 +- drivers/gpu/drm/drm_panic.c | 4 ++-- drivers/hv/hv_common.c | 4 ++-- drivers/mtd/mtdoops.c | 2 +- fs/pstore/platform.c | 10 +++++----- include/linux/kmsg_dump.h | 22 +++++++++++++++++++--- kernel/panic.c | 2 +- kernel/printk/printk.c | 11 ++++++++--- 10 files changed, 45 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/arch/powerpc/kernel/nvram_64.c b/arch/powerpc/kernel/nvram_64.c index e385d3164648..f9c6568a9137 100644 --- a/arch/powerpc/kernel/nvram_64.c +++ b/arch/powerpc/kernel/nvram_64.c @@ -73,7 +73,7 @@ static const char *nvram_os_partitions[] = { }; static void oops_to_nvram(struct kmsg_dumper *dumper, - enum kmsg_dump_reason reason); + struct kmsg_dump_detail *detail); static struct kmsg_dumper nvram_kmsg_dumper = { .dump = oops_to_nvram @@ -643,7 +643,7 @@ void __init nvram_init_oops_partition(int rtas_partition_exists) * partition. If that's too much, go back and capture uncompressed text. */ static void oops_to_nvram(struct kmsg_dumper *dumper, - enum kmsg_dump_reason reason) + struct kmsg_dump_detail *detail) { struct oops_log_info *oops_hdr = (struct oops_log_info *)oops_buf; static unsigned int oops_count = 0; @@ -655,7 +655,7 @@ static void oops_to_nvram(struct kmsg_dumper *dumper, unsigned int err_type = ERR_TYPE_KERNEL_PANIC_GZ; int rc = -1; - switch (reason) { + switch (detail->reason) { case KMSG_DUMP_SHUTDOWN: /* These are almost always orderly shutdowns. */ return; @@ -671,7 +671,7 @@ static void oops_to_nvram(struct kmsg_dumper *dumper, break; default: pr_err("%s: ignoring unrecognized KMSG_DUMP_* reason %d\n", - __func__, (int) reason); + __func__, (int) detail->reason); return; } diff --git a/arch/powerpc/platforms/powernv/opal-kmsg.c b/arch/powerpc/platforms/powernv/opal-kmsg.c index 6c3bc4b4da98..bb4218fa796e 100644 --- a/arch/powerpc/platforms/powernv/opal-kmsg.c +++ b/arch/powerpc/platforms/powernv/opal-kmsg.c @@ -20,13 +20,13 @@ * message, it just ensures that OPAL completely flushes the console buffer. */ static void kmsg_dump_opal_console_flush(struct kmsg_dumper *dumper, - enum kmsg_dump_reason reason) + struct kmsg_dump_detail *detail) { /* * Outside of a panic context the pollers will continue to run, * so we don't need to do any special flushing. */ - if (reason != KMSG_DUMP_PANIC) + if (detail->reason != KMSG_DUMP_PANIC) return; opal_flush_console(0); diff --git a/arch/um/kernel/kmsg_dump.c b/arch/um/kernel/kmsg_dump.c index 4382cf02a6d1..419021175272 100644 --- a/arch/um/kernel/kmsg_dump.c +++ b/arch/um/kernel/kmsg_dump.c @@ -8,7 +8,7 @@ #include static void kmsg_dumper_stdout(struct kmsg_dumper *dumper, - enum kmsg_dump_reason reason) + struct kmsg_dump_detail *detail) { static struct kmsg_dump_iter iter; static DEFINE_SPINLOCK(lock); diff --git a/drivers/gpu/drm/drm_panic.c b/drivers/gpu/drm/drm_panic.c index 948aed00595e..8794c7f6c0ee 100644 --- a/drivers/gpu/drm/drm_panic.c +++ b/drivers/gpu/drm/drm_panic.c @@ -655,11 +655,11 @@ static struct drm_plane *to_drm_plane(struct kmsg_dumper *kd) return container_of(kd, struct drm_plane, kmsg_panic); } -static void drm_panic(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason) +static void drm_panic(struct kmsg_dumper *dumper, struct kmsg_dump_detail *detail) { struct drm_plane *plane = to_drm_plane(dumper); - if (reason == KMSG_DUMP_PANIC) + if (detail->reason == KMSG_DUMP_PANIC) draw_panic_plane(plane); } diff --git a/drivers/hv/hv_common.c b/drivers/hv/hv_common.c index 9c452bfbd571..7a35c82976e0 100644 --- a/drivers/hv/hv_common.c +++ b/drivers/hv/hv_common.c @@ -207,13 +207,13 @@ static int hv_die_panic_notify_crash(struct notifier_block *self, * buffer and call into Hyper-V to transfer the data. */ static void hv_kmsg_dump(struct kmsg_dumper *dumper, - enum kmsg_dump_reason reason) + struct kmsg_dump_detail *detail) { struct kmsg_dump_iter iter; size_t bytes_written; /* We are only interested in panics. */ - if (reason != KMSG_DUMP_PANIC || !sysctl_record_panic_msg) + if (detail->reason != KMSG_DUMP_PANIC || !sysctl_record_panic_msg) return; /* diff --git a/drivers/mtd/mtdoops.c b/drivers/mtd/mtdoops.c index 2f11585b5613..86d49db9196d 100644 --- a/drivers/mtd/mtdoops.c +++ b/drivers/mtd/mtdoops.c @@ -298,7 +298,7 @@ static void find_next_position(struct mtdoops_context *cxt) } static void mtdoops_do_dump(struct kmsg_dumper *dumper, - enum kmsg_dump_reason reason) + struct kmsg_dump_detail *detail) { struct mtdoops_context *cxt = container_of(dumper, struct mtdoops_context, dump); diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 03425928d2fb..7f5810589a26 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -275,7 +275,7 @@ void pstore_record_init(struct pstore_record *record, * end of the buffer. */ static void pstore_dump(struct kmsg_dumper *dumper, - enum kmsg_dump_reason reason) + struct kmsg_dump_detail *detail) { struct kmsg_dump_iter iter; unsigned long total = 0; @@ -285,9 +285,9 @@ static void pstore_dump(struct kmsg_dumper *dumper, int saved_ret = 0; int ret; - why = kmsg_dump_reason_str(reason); + why = kmsg_dump_reason_str(detail->reason); - if (pstore_cannot_block_path(reason)) { + if (pstore_cannot_block_path(detail->reason)) { if (!spin_trylock_irqsave(&psinfo->buf_lock, flags)) { pr_err("dump skipped in %s path because of concurrent dump\n", in_nmi() ? "NMI" : why); @@ -311,7 +311,7 @@ static void pstore_dump(struct kmsg_dumper *dumper, pstore_record_init(&record, psinfo); record.type = PSTORE_TYPE_DMESG; record.count = oopscount; - record.reason = reason; + record.reason = detail->reason; record.part = part; record.buf = psinfo->buf; @@ -352,7 +352,7 @@ static void pstore_dump(struct kmsg_dumper *dumper, } ret = psinfo->write(&record); - if (ret == 0 && reason == KMSG_DUMP_OOPS) { + if (ret == 0 && detail->reason == KMSG_DUMP_OOPS) { pstore_new_entry = 1; pstore_timer_kick(); } else { diff --git a/include/linux/kmsg_dump.h b/include/linux/kmsg_dump.h index 906521c2329c..6055fc969877 100644 --- a/include/linux/kmsg_dump.h +++ b/include/linux/kmsg_dump.h @@ -39,6 +39,17 @@ struct kmsg_dump_iter { u64 next_seq; }; +/** + * struct kmsg_dump_detail - kernel crash detail + * @reason: reason for the crash, see kmsg_dump_reason. + * @description: optional short string, to provide additional information. + */ + +struct kmsg_dump_detail { + enum kmsg_dump_reason reason; + const char *description; +}; + /** * struct kmsg_dumper - kernel crash message dumper structure * @list: Entry in the dumper list (private) @@ -49,13 +60,13 @@ struct kmsg_dump_iter { */ struct kmsg_dumper { struct list_head list; - void (*dump)(struct kmsg_dumper *dumper, enum kmsg_dump_reason reason); + void (*dump)(struct kmsg_dumper *dumper, struct kmsg_dump_detail *detail); enum kmsg_dump_reason max_reason; bool registered; }; #ifdef CONFIG_PRINTK -void kmsg_dump(enum kmsg_dump_reason reason); +void kmsg_dump_desc(enum kmsg_dump_reason reason, const char *desc); bool kmsg_dump_get_line(struct kmsg_dump_iter *iter, bool syslog, char *line, size_t size, size_t *len); @@ -71,7 +82,7 @@ int kmsg_dump_unregister(struct kmsg_dumper *dumper); const char *kmsg_dump_reason_str(enum kmsg_dump_reason reason); #else -static inline void kmsg_dump(enum kmsg_dump_reason reason) +static inline void kmsg_dump_desc(enum kmsg_dump_reason reason, const char *desc) { } @@ -107,4 +118,9 @@ static inline const char *kmsg_dump_reason_str(enum kmsg_dump_reason reason) } #endif +static inline void kmsg_dump(enum kmsg_dump_reason reason) +{ + kmsg_dump_desc(reason, NULL); +} + #endif /* _LINUX_KMSG_DUMP_H */ diff --git a/kernel/panic.c b/kernel/panic.c index 8bff183d6180..d6219841f491 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -375,7 +375,7 @@ void panic(const char *fmt, ...) panic_print_sys_info(false); - kmsg_dump(KMSG_DUMP_PANIC); + kmsg_dump_desc(KMSG_DUMP_PANIC, buf); /* * If you doubt kdump always works fine in any situation, diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 420fd310129d..4310ff3f23e5 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -4126,16 +4126,21 @@ const char *kmsg_dump_reason_str(enum kmsg_dump_reason reason) EXPORT_SYMBOL_GPL(kmsg_dump_reason_str); /** - * kmsg_dump - dump kernel log to kernel message dumpers. + * kmsg_dump_desc - dump kernel log to kernel message dumpers. * @reason: the reason (oops, panic etc) for dumping + * @desc: a short string to describe what caused the panic or oops. Can be NULL + * if no additional description is available. * * Call each of the registered dumper's dump() callback, which can * retrieve the kmsg records with kmsg_dump_get_line() or * kmsg_dump_get_buffer(). */ -void kmsg_dump(enum kmsg_dump_reason reason) +void kmsg_dump_desc(enum kmsg_dump_reason reason, const char *desc) { struct kmsg_dumper *dumper; + struct kmsg_dump_detail detail = { + .reason = reason, + .description = desc}; rcu_read_lock(); list_for_each_entry_rcu(dumper, &dump_list, list) { @@ -4153,7 +4158,7 @@ void kmsg_dump(enum kmsg_dump_reason reason) continue; /* invoke dumper which will iterate over records */ - dumper->dump(dumper, reason); + dumper->dump(dumper, &detail); } rcu_read_unlock(); } -- cgit v1.3.1 From 7108b4a589cd6d3a2c1276fd610b3500f46de66a Mon Sep 17 00:00:00 2001 From: Lucas De Marchi Date: Wed, 10 Jul 2024 15:02:27 -0700 Subject: drm/xe/uapi: Expose SIMD16 EU mask in topology query MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit PVC, Xe2 and later platforms have 16-wide EUs. We were implicitly reporting for PVC the number of 16-wide EUs without giving userspace any hint that they were different than for other platforms. Xe2 and later also have 16-wide, but in those cases the reported number would correspond to the 8-wide count. To avoid confusion and make sure the right number is used by userspace depending on the platform, add a new item to the topology query and drop the one that is not available. The new mask reported for both PVC and Xe2 should now match the numbers reported via hwconfig. v2: Use a different topo item with EU type in its name to report the new mask instead of adding the type itself as the item (Matt Roper) Reviewed-by: Matt Roper Acked-by: José Roberto de Souza Acked-by: Mateusz Jablonski Acked-by: Wenbin Lu Acked-by: Effie Yu Acked-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240710220446.2169797-1-lucas.demarchi@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/xe/xe_gt_topology.c | 27 ++++++++++++++++++++++----- drivers/gpu/drm/xe/xe_gt_types.h | 11 +++++++++++ drivers/gpu/drm/xe/xe_query.c | 4 +++- include/uapi/drm/xe_drm.h | 10 +++++++++- 4 files changed, 45 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/xe/xe_gt_topology.c b/drivers/gpu/drm/xe/xe_gt_topology.c index 25ff03ab8448..5a1559edf3e9 100644 --- a/drivers/gpu/drm/xe/xe_gt_topology.c +++ b/drivers/gpu/drm/xe/xe_gt_topology.c @@ -6,6 +6,7 @@ #include "xe_gt_topology.h" #include +#include #include "regs/xe_gt_regs.h" #include "xe_assert.h" @@ -31,7 +32,7 @@ load_dss_mask(struct xe_gt *gt, xe_dss_mask_t mask, int numregs, ...) } static void -load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask) +load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask, enum xe_gt_eu_type *eu_type) { struct xe_device *xe = gt_to_xe(gt); u32 reg_val = xe_mmio_read32(gt, XELP_EU_ENABLE); @@ -47,11 +48,13 @@ load_eu_mask(struct xe_gt *gt, xe_eu_mask_t mask) if (GRAPHICS_VERx100(xe) < 1250) reg_val = ~reg_val & XELP_EU_MASK; - /* On PVC, one bit = one EU */ - if (GRAPHICS_VERx100(xe) == 1260) { + if (GRAPHICS_VERx100(xe) == 1260 || GRAPHICS_VER(xe) >= 20) { + /* SIMD16 EUs, one bit == one EU */ + *eu_type = XE_GT_EU_TYPE_SIMD16; val = reg_val; } else { - /* All other platforms, one bit = 2 EU */ + /* SIMD8 EUs, one bit == 2 EU */ + *eu_type = XE_GT_EU_TYPE_SIMD8; for (i = 0; i < fls(reg_val); i++) if (reg_val & BIT(i)) val |= 0x3 << 2 * i; @@ -213,7 +216,7 @@ xe_gt_topology_init(struct xe_gt *gt) XEHP_GT_COMPUTE_DSS_ENABLE, XEHPC_GT_COMPUTE_DSS_ENABLE_EXT, XE2_GT_COMPUTE_DSS_2); - load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss); + load_eu_mask(gt, gt->fuse_topo.eu_mask_per_dss, >->fuse_topo.eu_type); load_l3_bank_mask(gt, gt->fuse_topo.l3_bank_mask); p = drm_dbg_printer(>_to_xe(gt)->drm, DRM_UT_DRIVER, "GT topology"); @@ -221,6 +224,18 @@ xe_gt_topology_init(struct xe_gt *gt) xe_gt_topology_dump(gt, &p); } +static const char *eu_type_to_str(enum xe_gt_eu_type eu_type) +{ + switch (eu_type) { + case XE_GT_EU_TYPE_SIMD16: + return "simd16"; + case XE_GT_EU_TYPE_SIMD8: + return "simd8"; + } + + unreachable(); +} + void xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p) { @@ -231,6 +246,8 @@ xe_gt_topology_dump(struct xe_gt *gt, struct drm_printer *p) drm_printf(p, "EU mask per DSS: %*pb\n", XE_MAX_EU_FUSE_BITS, gt->fuse_topo.eu_mask_per_dss); + drm_printf(p, "EU type: %s\n", + eu_type_to_str(gt->fuse_topo.eu_type)); drm_printf(p, "L3 bank mask: %*pb\n", XE_MAX_L3_BANK_MASK_BITS, gt->fuse_topo.l3_bank_mask); diff --git a/drivers/gpu/drm/xe/xe_gt_types.h b/drivers/gpu/drm/xe/xe_gt_types.h index 38a0d0e178c8..ef68c4a92972 100644 --- a/drivers/gpu/drm/xe/xe_gt_types.h +++ b/drivers/gpu/drm/xe/xe_gt_types.h @@ -27,6 +27,11 @@ enum xe_gt_type { XE_GT_TYPE_MEDIA, }; +enum xe_gt_eu_type { + XE_GT_EU_TYPE_SIMD8, + XE_GT_EU_TYPE_SIMD16, +}; + #define XE_MAX_DSS_FUSE_REGS 3 #define XE_MAX_DSS_FUSE_BITS (32 * XE_MAX_DSS_FUSE_REGS) #define XE_MAX_EU_FUSE_REGS 1 @@ -343,6 +348,12 @@ struct xe_gt { /** @fuse_topo.l3_bank_mask: L3 bank mask */ xe_l3_bank_mask_t l3_bank_mask; + + /** + * @fuse_topo.eu_type: type/width of EU stored in + * fuse_topo.eu_mask_per_dss + */ + enum xe_gt_eu_type eu_type; } fuse_topo; /** @steering: register steering for individual HW units */ diff --git a/drivers/gpu/drm/xe/xe_query.c b/drivers/gpu/drm/xe/xe_query.c index 4e01df6b1b7a..73ef6e4c2dc9 100644 --- a/drivers/gpu/drm/xe/xe_query.c +++ b/drivers/gpu/drm/xe/xe_query.c @@ -518,7 +518,9 @@ static int query_gt_topology(struct xe_device *xe, if (err) return err; - topo.type = DRM_XE_TOPO_EU_PER_DSS; + topo.type = gt->fuse_topo.eu_type == XE_GT_EU_TYPE_SIMD16 ? + DRM_XE_TOPO_SIMD16_EU_PER_DSS : + DRM_XE_TOPO_EU_PER_DSS; err = copy_mask(&query_ptr, &topo, gt->fuse_topo.eu_mask_per_dss, sizeof(gt->fuse_topo.eu_mask_per_dss)); diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 19619d4952a8..29425d7fdc77 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -517,7 +517,14 @@ struct drm_xe_query_gt_list { * available per Dual Sub Slices (DSS). For example a query response * containing the following in mask: * ``EU_PER_DSS ff ff 00 00 00 00 00 00`` - * means each DSS has 16 EU. + * means each DSS has 16 SIMD8 EUs. This type may be omitted if device + * doesn't have SIMD8 EUs. + * - %DRM_XE_TOPO_SIMD16_EU_PER_DSS - To query the mask of SIMD16 Execution + * Units (EU) available per Dual Sub Slices (DSS). For example a query + * response containing the following in mask: + * ``SIMD16_EU_PER_DSS ff ff 00 00 00 00 00 00`` + * means each DSS has 16 SIMD16 EUs. This type may be omitted if device + * doesn't have SIMD16 EUs. */ struct drm_xe_query_topology_mask { /** @gt_id: GT ID the mask is associated with */ @@ -527,6 +534,7 @@ struct drm_xe_query_topology_mask { #define DRM_XE_TOPO_DSS_COMPUTE 2 #define DRM_XE_TOPO_L3_BANK 3 #define DRM_XE_TOPO_EU_PER_DSS 4 +#define DRM_XE_TOPO_SIMD16_EU_PER_DSS 5 /** @type: type of mask */ __u16 type; -- cgit v1.3.1 From d20a9f568f99591886ec73b80ff7283486710643 Mon Sep 17 00:00:00 2001 From: Jocelyn Falempe Date: Wed, 17 Jul 2024 10:48:40 +0200 Subject: fbcon: Add an option to disable fbcon in panic This is required to avoid conflict between DRM_PANIC, and fbcon. If a drm device already handle panic with drm_panic, it should set the skip_panic field in fb_info, so that fbcon will stay quiet, and not overwrite the panic_screen. Signed-off-by: Jocelyn Falempe Reviewed-by: Javier Martinez Canillas Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20240717090102.968152-3-jfalempe@redhat.com --- drivers/video/fbdev/core/fbcon.c | 7 ++++++- include/linux/fb.h | 1 + 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/video/fbdev/core/fbcon.c b/drivers/video/fbdev/core/fbcon.c index 3f7333dca508..498d9c07df80 100644 --- a/drivers/video/fbdev/core/fbcon.c +++ b/drivers/video/fbdev/core/fbcon.c @@ -270,12 +270,17 @@ static int fbcon_get_rotate(struct fb_info *info) return (ops) ? ops->rotate : 0; } +static bool fbcon_skip_panic(struct fb_info *info) +{ + return (info->skip_panic && unlikely(atomic_read(&panic_cpu) != PANIC_CPU_INVALID)); +} + static inline int fbcon_is_inactive(struct vc_data *vc, struct fb_info *info) { struct fbcon_ops *ops = info->fbcon_par; return (info->state != FBINFO_STATE_RUNNING || - vc->vc_mode != KD_TEXT || ops->graphics); + vc->vc_mode != KD_TEXT || ops->graphics || fbcon_skip_panic(info)); } static int get_color(struct vc_data *vc, struct fb_info *info, diff --git a/include/linux/fb.h b/include/linux/fb.h index db7d97b10964..865dad03e73e 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -510,6 +510,7 @@ struct fb_info { void *par; bool skip_vt_switch; /* no VT switch on suspend/resume required */ + bool skip_panic; /* Do not write to the fb after a panic */ }; /* This will go away -- cgit v1.3.1 From d5e79eeba3086a52593b295ac4bf6eddd64d4aad Mon Sep 17 00:00:00 2001 From: "T.J. Mercier" Date: Sat, 20 Jul 2024 15:15:42 +0800 Subject: dma-buf: heaps: Deduplicate docs and adopt common format MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The docs for dma_heap_get_name were incorrect, and since they were duplicated in the header they were wrong there too. The docs formatting was inconsistent so I tried to make it more consistent across functions since I'm already in here doing cleanup. Remove multiple unused includes and alphabetize. Signed-off-by: T.J. Mercier Signed-off-by: Yong Wu [Yong: Just add a comment for "priv" to mute build warning] Signed-off-by: Yunfei Dong Link: https://patchwork.freedesktop.org/patch/msgid/20240720071606.27930-5-yunfei.dong@mediatek.com Signed-off-by: Christian König --- drivers/dma-buf/dma-heap.c | 27 +++++++++++++++------------ include/linux/dma-heap.h | 21 +-------------------- 2 files changed, 16 insertions(+), 32 deletions(-) (limited to 'include') diff --git a/drivers/dma-buf/dma-heap.c b/drivers/dma-buf/dma-heap.c index 2298ca5e112e..3cbe87d4a464 100644 --- a/drivers/dma-buf/dma-heap.c +++ b/drivers/dma-buf/dma-heap.c @@ -7,17 +7,15 @@ */ #include -#include #include #include +#include #include -#include #include -#include #include -#include #include -#include +#include +#include #include #define DEVNAME "dma_heap" @@ -28,9 +26,10 @@ * struct dma_heap - represents a dmabuf heap in the system * @name: used for debugging/device-node name * @ops: ops struct for this heap - * @heap_devt heap device node - * @list list head connecting to list of heaps - * @heap_cdev heap char device + * @priv: private data for this heap + * @heap_devt: heap device node + * @list: list head connecting to list of heaps + * @heap_cdev: heap char device * * Represents a heap of memory from which buffers can be made. */ @@ -193,11 +192,11 @@ static const struct file_operations dma_heap_fops = { }; /** - * dma_heap_get_drvdata() - get per-subdriver data for the heap + * dma_heap_get_drvdata - get per-heap driver data * @heap: DMA-Heap to retrieve private data for * * Returns: - * The per-subdriver data for the heap. + * The per-heap data for the heap. */ void *dma_heap_get_drvdata(struct dma_heap *heap) { @@ -205,8 +204,8 @@ void *dma_heap_get_drvdata(struct dma_heap *heap) } /** - * dma_heap_get_name() - get heap name - * @heap: DMA-Heap to retrieve private data for + * dma_heap_get_name - get heap name + * @heap: DMA-Heap to retrieve the name of * * Returns: * The char* for the heap name. @@ -216,6 +215,10 @@ const char *dma_heap_get_name(struct dma_heap *heap) return heap->name; } +/** + * dma_heap_add - adds a heap to dmabuf heaps + * @exp_info: information needed to register this heap + */ struct dma_heap *dma_heap_add(const struct dma_heap_export_info *exp_info) { struct dma_heap *heap, *h, *err_ret; diff --git a/include/linux/dma-heap.h b/include/linux/dma-heap.h index 064bad725061..27d15f60950a 100644 --- a/include/linux/dma-heap.h +++ b/include/linux/dma-heap.h @@ -9,14 +9,13 @@ #ifndef _DMA_HEAPS_H #define _DMA_HEAPS_H -#include #include struct dma_heap; /** * struct dma_heap_ops - ops to operate on a given heap - * @allocate: allocate dmabuf and return struct dma_buf ptr + * @allocate: allocate dmabuf and return struct dma_buf ptr * * allocate returns dmabuf on success, ERR_PTR(-errno) on error. */ @@ -41,28 +40,10 @@ struct dma_heap_export_info { void *priv; }; -/** - * dma_heap_get_drvdata() - get per-heap driver data - * @heap: DMA-Heap to retrieve private data for - * - * Returns: - * The per-heap data for the heap. - */ void *dma_heap_get_drvdata(struct dma_heap *heap); -/** - * dma_heap_get_name() - get heap name - * @heap: DMA-Heap to retrieve private data for - * - * Returns: - * The char* for the heap name. - */ const char *dma_heap_get_name(struct dma_heap *heap); -/** - * dma_heap_add - adds a heap to dmabuf heaps - * @exp_info: information needed to register this heap - */ struct dma_heap *dma_heap_add(const struct dma_heap_export_info *exp_info); #endif /* _DMA_HEAPS_H */ -- cgit v1.3.1 From 7214da0ed2220a2b9ad22aa77a5974cdd2a62799 Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Sun, 14 Jul 2024 23:55:02 +0300 Subject: drm/virtio: Add DRM capset definition Define DRM native context capset in the VirtIO-GPU protocol header. Signed-off-by: Dmitry Osipenko Reviewed-by: Rob Clark Reviewed-by: Pierre-Eric Pelloux-Prayer Link: https://patchwork.freedesktop.org/patch/msgid/20240714205502.3409718-1-dmitry.osipenko@collabora.com --- include/uapi/linux/virtio_gpu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/virtio_gpu.h b/include/uapi/linux/virtio_gpu.h index 0e21f3998108..bf2c9cabd207 100644 --- a/include/uapi/linux/virtio_gpu.h +++ b/include/uapi/linux/virtio_gpu.h @@ -311,6 +311,7 @@ struct virtio_gpu_cmd_submit { #define VIRTIO_GPU_CAPSET_VIRGL2 2 /* 3 is reserved for gfxstream */ #define VIRTIO_GPU_CAPSET_VENUS 4 +#define VIRTIO_GPU_CAPSET_DRM 6 /* VIRTIO_GPU_CMD_GET_CAPSET_INFO */ struct virtio_gpu_get_capset_info { -- cgit v1.3.1 From 83b501c1799a96a41e163973e88826253ffadfb3 Mon Sep 17 00:00:00 2001 From: Christian König Date: Fri, 19 Jul 2024 17:24:14 +0200 Subject: drm/scheduler: remove full_recover from drm_sched_start MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This was basically just another one of amdgpus hacks. The parameter allowed to restart the scheduler without turning fence signaling on again. That this is absolutely not a good idea should be obvious by now since the fences will then just sit there and never signal. While at it cleanup the code a bit. Signed-off-by: Christian König Reviewed-by: Matthew Brost Link: https://patchwork.freedesktop.org/patch/msgid/20240722083816.99685-1-christian.koenig@amd.com --- .../gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c | 2 +- drivers/gpu/drm/amd/amdgpu/amdgpu_device.c | 4 ++-- drivers/gpu/drm/etnaviv/etnaviv_sched.c | 4 ++-- drivers/gpu/drm/imagination/pvr_queue.c | 4 ++-- drivers/gpu/drm/lima/lima_sched.c | 2 +- drivers/gpu/drm/nouveau/nouveau_sched.c | 2 +- drivers/gpu/drm/panfrost/panfrost_job.c | 2 +- drivers/gpu/drm/panthor/panthor_mmu.c | 2 +- drivers/gpu/drm/panthor/panthor_sched.c | 2 +- drivers/gpu/drm/scheduler/sched_main.c | 25 +++++++--------------- drivers/gpu/drm/v3d/v3d_sched.c | 2 +- include/drm/gpu_scheduler.h | 2 +- 12 files changed, 22 insertions(+), 31 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c index 3a3f3ce09f00..2320df51c914 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_arcturus.c @@ -300,7 +300,7 @@ static int suspend_resume_compute_scheduler(struct amdgpu_device *adev, bool sus if (r) goto out; } else { - drm_sched_start(&ring->sched, false); + drm_sched_start(&ring->sched); } } diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c index 861ccff78af9..c186fdb198ad 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_device.c @@ -5862,7 +5862,7 @@ skip_hw_reset: if (!amdgpu_ring_sched_ready(ring)) continue; - drm_sched_start(&ring->sched, true); + drm_sched_start(&ring->sched); } if (!drm_drv_uses_atomic_modeset(adev_to_drm(tmp_adev)) && !job_signaled) @@ -6360,7 +6360,7 @@ void amdgpu_pci_resume(struct pci_dev *pdev) if (!amdgpu_ring_sched_ready(ring)) continue; - drm_sched_start(&ring->sched, true); + drm_sched_start(&ring->sched); } amdgpu_device_unset_mp1_state(adev); diff --git a/drivers/gpu/drm/etnaviv/etnaviv_sched.c b/drivers/gpu/drm/etnaviv/etnaviv_sched.c index c4b04b0dee16..c53641aa146f 100644 --- a/drivers/gpu/drm/etnaviv/etnaviv_sched.c +++ b/drivers/gpu/drm/etnaviv/etnaviv_sched.c @@ -72,12 +72,12 @@ static enum drm_gpu_sched_stat etnaviv_sched_timedout_job(struct drm_sched_job drm_sched_resubmit_jobs(&gpu->sched); - drm_sched_start(&gpu->sched, true); + drm_sched_start(&gpu->sched); return DRM_GPU_SCHED_STAT_NOMINAL; out_no_timeout: /* restart scheduler after GPU is usable again */ - drm_sched_start(&gpu->sched, true); + drm_sched_start(&gpu->sched); return DRM_GPU_SCHED_STAT_NOMINAL; } diff --git a/drivers/gpu/drm/imagination/pvr_queue.c b/drivers/gpu/drm/imagination/pvr_queue.c index 5ed9c98fb599..20cb46012082 100644 --- a/drivers/gpu/drm/imagination/pvr_queue.c +++ b/drivers/gpu/drm/imagination/pvr_queue.c @@ -782,7 +782,7 @@ static void pvr_queue_start(struct pvr_queue *queue) } } - drm_sched_start(&queue->scheduler, true); + drm_sched_start(&queue->scheduler); } /** @@ -842,7 +842,7 @@ pvr_queue_timedout_job(struct drm_sched_job *s_job) } mutex_unlock(&pvr_dev->queues.lock); - drm_sched_start(sched, true); + drm_sched_start(sched); return DRM_GPU_SCHED_STAT_NOMINAL; } diff --git a/drivers/gpu/drm/lima/lima_sched.c b/drivers/gpu/drm/lima/lima_sched.c index bbf3f8feab94..1a944edb6ddc 100644 --- a/drivers/gpu/drm/lima/lima_sched.c +++ b/drivers/gpu/drm/lima/lima_sched.c @@ -463,7 +463,7 @@ static enum drm_gpu_sched_stat lima_sched_timedout_job(struct drm_sched_job *job lima_pm_idle(ldev); drm_sched_resubmit_jobs(&pipe->base); - drm_sched_start(&pipe->base, true); + drm_sched_start(&pipe->base); return DRM_GPU_SCHED_STAT_NOMINAL; } diff --git a/drivers/gpu/drm/nouveau/nouveau_sched.c b/drivers/gpu/drm/nouveau/nouveau_sched.c index ba4139288a6d..eb6c3f9a01f5 100644 --- a/drivers/gpu/drm/nouveau/nouveau_sched.c +++ b/drivers/gpu/drm/nouveau/nouveau_sched.c @@ -379,7 +379,7 @@ nouveau_sched_timedout_job(struct drm_sched_job *sched_job) else NV_PRINTK(warn, job->cli, "Generic job timeout.\n"); - drm_sched_start(sched, true); + drm_sched_start(sched); return stat; } diff --git a/drivers/gpu/drm/panfrost/panfrost_job.c b/drivers/gpu/drm/panfrost/panfrost_job.c index a61ef0af9a4e..df49d37d0e7e 100644 --- a/drivers/gpu/drm/panfrost/panfrost_job.c +++ b/drivers/gpu/drm/panfrost/panfrost_job.c @@ -727,7 +727,7 @@ panfrost_reset(struct panfrost_device *pfdev, /* Restart the schedulers */ for (i = 0; i < NUM_JOB_SLOTS; i++) - drm_sched_start(&pfdev->js->queue[i].sched, true); + drm_sched_start(&pfdev->js->queue[i].sched); /* Re-enable job interrupts now that everything has been restarted. */ job_write(pfdev, JOB_INT_MASK, diff --git a/drivers/gpu/drm/panthor/panthor_mmu.c b/drivers/gpu/drm/panthor/panthor_mmu.c index fa0a002b1016..d47972806d50 100644 --- a/drivers/gpu/drm/panthor/panthor_mmu.c +++ b/drivers/gpu/drm/panthor/panthor_mmu.c @@ -827,7 +827,7 @@ static void panthor_vm_stop(struct panthor_vm *vm) static void panthor_vm_start(struct panthor_vm *vm) { - drm_sched_start(&vm->sched, true); + drm_sched_start(&vm->sched); } /** diff --git a/drivers/gpu/drm/panthor/panthor_sched.c b/drivers/gpu/drm/panthor/panthor_sched.c index 42929e147107..2e1becd87e3a 100644 --- a/drivers/gpu/drm/panthor/panthor_sched.c +++ b/drivers/gpu/drm/panthor/panthor_sched.c @@ -2525,7 +2525,7 @@ static void queue_start(struct panthor_queue *queue) list_for_each_entry(job, &queue->scheduler.pending_list, base.list) job->base.s_fence->parent = dma_fence_get(job->done_fence); - drm_sched_start(&queue->scheduler, true); + drm_sched_start(&queue->scheduler); } static void panthor_group_stop(struct panthor_group *group) diff --git a/drivers/gpu/drm/scheduler/sched_main.c b/drivers/gpu/drm/scheduler/sched_main.c index 7e90c9f95611..ab53ab486fe6 100644 --- a/drivers/gpu/drm/scheduler/sched_main.c +++ b/drivers/gpu/drm/scheduler/sched_main.c @@ -674,13 +674,11 @@ EXPORT_SYMBOL(drm_sched_stop); * drm_sched_start - recover jobs after a reset * * @sched: scheduler instance - * @full_recovery: proceed with complete sched restart * */ -void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery) +void drm_sched_start(struct drm_gpu_scheduler *sched) { struct drm_sched_job *s_job, *tmp; - int r; /* * Locking the list is not required here as the sched thread is parked @@ -692,24 +690,17 @@ void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery) atomic_add(s_job->credits, &sched->credit_count); - if (!full_recovery) + if (!fence) { + drm_sched_job_done(s_job, -ECANCELED); continue; + } - if (fence) { - r = dma_fence_add_callback(fence, &s_job->cb, - drm_sched_job_done_cb); - if (r == -ENOENT) - drm_sched_job_done(s_job, fence->error); - else if (r) - DRM_DEV_ERROR(sched->dev, "fence add callback failed (%d)\n", - r); - } else - drm_sched_job_done(s_job, -ECANCELED); + if (dma_fence_add_callback(fence, &s_job->cb, + drm_sched_job_done_cb)) + drm_sched_job_done(s_job, fence->error); } - if (full_recovery) - drm_sched_start_timeout_unlocked(sched); - + drm_sched_start_timeout_unlocked(sched); drm_sched_wqueue_start(sched); } EXPORT_SYMBOL(drm_sched_start); diff --git a/drivers/gpu/drm/v3d/v3d_sched.c b/drivers/gpu/drm/v3d/v3d_sched.c index d193072703f3..42d4f4a2dba2 100644 --- a/drivers/gpu/drm/v3d/v3d_sched.c +++ b/drivers/gpu/drm/v3d/v3d_sched.c @@ -653,7 +653,7 @@ v3d_gpu_reset_for_timeout(struct v3d_dev *v3d, struct drm_sched_job *sched_job) /* Unblock schedulers and restart their jobs. */ for (q = 0; q < V3D_MAX_QUEUES; q++) { - drm_sched_start(&v3d->queue[q].sched, true); + drm_sched_start(&v3d->queue[q].sched); } mutex_unlock(&v3d->reset_lock); diff --git a/include/drm/gpu_scheduler.h b/include/drm/gpu_scheduler.h index 5acc64954a88..fe8edb917360 100644 --- a/include/drm/gpu_scheduler.h +++ b/include/drm/gpu_scheduler.h @@ -579,7 +579,7 @@ bool drm_sched_wqueue_ready(struct drm_gpu_scheduler *sched); void drm_sched_wqueue_stop(struct drm_gpu_scheduler *sched); void drm_sched_wqueue_start(struct drm_gpu_scheduler *sched); void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad); -void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery); +void drm_sched_start(struct drm_gpu_scheduler *sched); void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched); void drm_sched_increase_karma(struct drm_sched_job *bad); void drm_sched_reset_karma(struct drm_sched_job *bad); -- cgit v1.3.1 From e06b71b2313a00579ba64a1cc43ad29d64cb8d4c Mon Sep 17 00:00:00 2001 From: Jonathan Kim Date: Tue, 21 May 2024 13:22:15 -0400 Subject: drm/amdkfd: allow users to target recommended SDMA engines Certain GPUs have better copy performance over xGMI on specific SDMA engines depending on the source and destination GPU. Allow users to create SDMA queues on these recommended engines. Close to 2x overall performance has been observed with this optimization. Signed-off-by: Jonathan Kim Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 16 +++++++ .../gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 38 +++++++++++++++- drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 5 ++- .../gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 1 + drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 52 ++++++++++++++++++++++ drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 1 + include/uapi/linux/kfd_ioctl.h | 6 ++- 7 files changed, 116 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 65a37ac5a0f0..0622ebd7e8ef 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -255,6 +255,7 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, args->ctx_save_restore_address; q_properties->ctx_save_restore_area_size = args->ctx_save_restore_size; q_properties->ctl_stack_size = args->ctl_stack_size; + q_properties->sdma_engine_id = args->sdma_engine_id; if (args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE || args->queue_type == KFD_IOC_QUEUE_TYPE_COMPUTE_AQL) q_properties->type = KFD_QUEUE_TYPE_COMPUTE; @@ -262,6 +263,8 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, q_properties->type = KFD_QUEUE_TYPE_SDMA; else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_XGMI) q_properties->type = KFD_QUEUE_TYPE_SDMA_XGMI; + else if (args->queue_type == KFD_IOC_QUEUE_TYPE_SDMA_BY_ENG_ID) + q_properties->type = KFD_QUEUE_TYPE_SDMA_BY_ENG_ID; else return -ENOTSUPP; @@ -333,6 +336,18 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, goto err_bind_process; } + if (q_properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) { + int max_sdma_eng_id = kfd_get_num_sdma_engines(dev) + + kfd_get_num_xgmi_sdma_engines(dev) - 1; + + if (q_properties.sdma_engine_id > max_sdma_eng_id) { + err = -EINVAL; + pr_err("sdma_engine_id %i exceeds maximum id of %i\n", + q_properties.sdma_engine_id, max_sdma_eng_id); + goto err_sdma_engine_id; + } + } + if (!pdd->qpd.proc_doorbells) { err = kfd_alloc_process_doorbells(dev->kfd, pdd); if (err) { @@ -387,6 +402,7 @@ static int kfd_ioctl_create_queue(struct file *filep, struct kfd_process *p, err_create_queue: kfd_queue_release_buffers(pdd, &q_properties); err_acquire_queue_buf: +err_sdma_engine_id: err_bind_process: err_pdd: mutex_unlock(&p->mutex); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index fdc76c24b2e7..f0bfeb35246f 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -1532,6 +1532,41 @@ static int allocate_sdma_queue(struct device_queue_manager *dqm, q->sdma_id % kfd_get_num_xgmi_sdma_engines(dqm->dev); q->properties.sdma_queue_id = q->sdma_id / kfd_get_num_xgmi_sdma_engines(dqm->dev); + } else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) { + int i, num_queues, num_engines, eng_offset = 0, start_engine; + bool free_bit_found = false, is_xgmi = false; + + if (q->properties.sdma_engine_id < kfd_get_num_sdma_engines(dqm->dev)) { + num_queues = get_num_sdma_queues(dqm); + num_engines = kfd_get_num_sdma_engines(dqm->dev); + q->properties.type = KFD_QUEUE_TYPE_SDMA; + } else { + num_queues = get_num_xgmi_sdma_queues(dqm); + num_engines = kfd_get_num_xgmi_sdma_engines(dqm->dev); + eng_offset = kfd_get_num_sdma_engines(dqm->dev); + q->properties.type = KFD_QUEUE_TYPE_SDMA_XGMI; + is_xgmi = true; + } + + /* Scan available bit based on target engine ID. */ + start_engine = q->properties.sdma_engine_id - eng_offset; + for (i = start_engine; i < num_queues; i += num_engines) { + + if (!test_bit(i, is_xgmi ? dqm->xgmi_sdma_bitmap : dqm->sdma_bitmap)) + continue; + + clear_bit(i, is_xgmi ? dqm->xgmi_sdma_bitmap : dqm->sdma_bitmap); + q->sdma_id = i; + q->properties.sdma_queue_id = q->sdma_id / num_engines; + free_bit_found = true; + break; + } + + if (!free_bit_found) { + dev_err(dev, "No more SDMA queue to allocate for target ID %i\n", + q->properties.sdma_engine_id); + return -ENOMEM; + } } pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id); @@ -1784,7 +1819,8 @@ static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q, } if (q->properties.type == KFD_QUEUE_TYPE_SDMA || - q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) { + q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI || + q->properties.type == KFD_QUEUE_TYPE_SDMA_BY_ENG_ID) { dqm_lock(dqm); retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL); dqm_unlock(dqm); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index b5cae48dff66..4190fa339913 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -414,13 +414,16 @@ enum kfd_unmap_queues_filter { * @KFD_QUEUE_TYPE_DIQ: DIQ queue type. * * @KFD_QUEUE_TYPE_SDMA_XGMI: Special SDMA queue for XGMI interface. + * + * @KFD_QUEUE_TYPE_SDMA_BY_ENG_ID: SDMA user mode queue with target SDMA engine ID. */ enum kfd_queue_type { KFD_QUEUE_TYPE_COMPUTE, KFD_QUEUE_TYPE_SDMA, KFD_QUEUE_TYPE_HIQ, KFD_QUEUE_TYPE_DIQ, - KFD_QUEUE_TYPE_SDMA_XGMI + KFD_QUEUE_TYPE_SDMA_XGMI, + KFD_QUEUE_TYPE_SDMA_BY_ENG_ID }; enum kfd_queue_format { diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 9995dbb43359..f732ee35b531 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -366,6 +366,7 @@ int pqm_create_queue(struct process_queue_manager *pqm, switch (type) { case KFD_QUEUE_TYPE_SDMA: case KFD_QUEUE_TYPE_SDMA_XGMI: + case KFD_QUEUE_TYPE_SDMA_BY_ENG_ID: /* SDMA queues are always allocated statically no matter * which scheduler mode is used. We also do not need to * check whether a SDMA queue can be allocated here, because diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index a9b3eda65a2c..40771f8752cb 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -292,6 +292,8 @@ static ssize_t iolink_show(struct kobject *kobj, struct attribute *attr, iolink->max_bandwidth); sysfs_show_32bit_prop(buffer, offs, "recommended_transfer_size", iolink->rec_transfer_size); + sysfs_show_32bit_prop(buffer, offs, "recommended_sdma_engine_id_mask", + iolink->rec_sdma_eng_id_mask); sysfs_show_32bit_prop(buffer, offs, "flags", iolink->flags); return offs; @@ -1265,6 +1267,55 @@ static void kfd_set_iolink_non_coherent(struct kfd_topology_device *to_dev, } } +#define REC_SDMA_NUM_GPU 8 +static const int rec_sdma_eng_map[REC_SDMA_NUM_GPU][REC_SDMA_NUM_GPU] = { + { -1, 14, 12, 2, 4, 8, 10, 6 }, + { 14, -1, 2, 10, 8, 4, 6, 12 }, + { 10, 2, -1, 12, 14, 6, 4, 8 }, + { 2, 12, 10, -1, 6, 14, 8, 4 }, + { 4, 8, 14, 6, -1, 10, 12, 2 }, + { 8, 4, 6, 14, 12, -1, 2, 10 }, + { 10, 6, 4, 8, 12, 2, -1, 14 }, + { 6, 12, 8, 4, 2, 10, 14, -1 }}; + +static void kfd_set_recommended_sdma_engines(struct kfd_topology_device *to_dev, + struct kfd_iolink_properties *outbound_link, + struct kfd_iolink_properties *inbound_link) +{ + struct kfd_node *gpu = outbound_link->gpu; + struct amdgpu_device *adev = gpu->adev; + int num_xgmi_nodes = adev->gmc.xgmi.num_physical_nodes; + bool support_rec_eng = !amdgpu_sriov_vf(adev) && to_dev->gpu && + adev->aid_mask && num_xgmi_nodes && + (amdgpu_xcp_query_partition_mode(adev->xcp_mgr, AMDGPU_XCP_FL_NONE) == + AMDGPU_SPX_PARTITION_MODE) && + (!(adev->flags & AMD_IS_APU) && num_xgmi_nodes == 8); + + if (support_rec_eng) { + int src_socket_id = adev->gmc.xgmi.physical_node_id; + int dst_socket_id = to_dev->gpu->adev->gmc.xgmi.physical_node_id; + + outbound_link->rec_sdma_eng_id_mask = + 1 << rec_sdma_eng_map[src_socket_id][dst_socket_id]; + inbound_link->rec_sdma_eng_id_mask = + 1 << rec_sdma_eng_map[dst_socket_id][src_socket_id]; + } else { + int num_sdma_eng = kfd_get_num_sdma_engines(gpu); + int i, eng_offset = 0; + + if (outbound_link->iolink_type == CRAT_IOLINK_TYPE_XGMI && + kfd_get_num_xgmi_sdma_engines(gpu) && to_dev->gpu) { + eng_offset = num_sdma_eng; + num_sdma_eng = kfd_get_num_xgmi_sdma_engines(gpu); + } + + for (i = 0; i < num_sdma_eng; i++) { + outbound_link->rec_sdma_eng_id_mask |= (1 << (i + eng_offset)); + inbound_link->rec_sdma_eng_id_mask |= (1 << (i + eng_offset)); + } + } +} + static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev) { struct kfd_iolink_properties *link, *inbound_link; @@ -1303,6 +1354,7 @@ static void kfd_fill_iolink_non_crat_info(struct kfd_topology_device *dev) inbound_link->flags = CRAT_IOLINK_FLAGS_ENABLED; kfd_set_iolink_no_atomics(peer_dev, dev, inbound_link); kfd_set_iolink_non_coherent(peer_dev, link, inbound_link); + kfd_set_recommended_sdma_engines(peer_dev, link, inbound_link); } } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index 43ba0d32e5bd..155b5c410af1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -125,6 +125,7 @@ struct kfd_iolink_properties { uint32_t min_bandwidth; uint32_t max_bandwidth; uint32_t rec_transfer_size; + uint32_t rec_sdma_eng_id_mask; uint32_t flags; struct kfd_node *gpu; struct kobject *kobj; diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 285a36601dc9..71a7ce5f2d4c 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -42,9 +42,10 @@ * - 1.14 - Update kfd_event_data * - 1.15 - Enable managing mappings in compute VMs with GEM_VA ioctl * - 1.16 - Add contiguous VRAM allocation flag + * - 1.17 - Add SDMA queue creation with target SDMA engine ID */ #define KFD_IOCTL_MAJOR_VERSION 1 -#define KFD_IOCTL_MINOR_VERSION 16 +#define KFD_IOCTL_MINOR_VERSION 17 struct kfd_ioctl_get_version_args { __u32 major_version; /* from KFD */ @@ -56,6 +57,7 @@ struct kfd_ioctl_get_version_args { #define KFD_IOC_QUEUE_TYPE_SDMA 0x1 #define KFD_IOC_QUEUE_TYPE_COMPUTE_AQL 0x2 #define KFD_IOC_QUEUE_TYPE_SDMA_XGMI 0x3 +#define KFD_IOC_QUEUE_TYPE_SDMA_BY_ENG_ID 0x4 #define KFD_MAX_QUEUE_PERCENTAGE 100 #define KFD_MAX_QUEUE_PRIORITY 15 @@ -78,6 +80,8 @@ struct kfd_ioctl_create_queue_args { __u64 ctx_save_restore_address; /* to KFD */ __u32 ctx_save_restore_size; /* to KFD */ __u32 ctl_stack_size; /* to KFD */ + __u32 sdma_engine_id; /* to KFD */ + __u32 pad; }; struct kfd_ioctl_destroy_queue_args { -- cgit v1.3.1 From 0d5040e406d2c4404d26b841c4aa34cec0bf1088 Mon Sep 17 00:00:00 2001 From: Hamza Mahfooz Date: Thu, 25 Jul 2024 16:51:08 -0400 Subject: drm/vblank: add dynamic per-crtc vblank configuration support We would like to be able to enable vblank_disable_immediate unconditionally, however there are a handful of cases where a small off delay is necessary (e.g. with PSR enabled). So, we would like to be able to adjust the vblank off delay and disable imminent values dynamically for a given CRTC. Since, it will allow drivers to apply static screen optimizations more quickly and consequently allow users to benefit more so from the power savings afforded by the aforementioned optimizations, while avoiding issues in cases where an off delay is still warranted. In particular, the PSR case requires a small off delay of 2 frames, otherwise display firmware isn't able to keep up with all of the requests made to amdgpu. So, introduce drm_crtc_vblank_on_config() which is like drm_crtc_vblank_on(), but it allows drivers to specify the vblank CRTC configuration before enabling vblanking support for a given CRTC. Signed-off-by: Hamza Mahfooz Acked-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20240725205109.209743-1-hamza.mahfooz@amd.com --- drivers/gpu/drm/drm_vblank.c | 81 ++++++++++++++++++++++++++++++++------------ include/drm/drm_device.h | 5 +-- include/drm/drm_vblank.h | 37 ++++++++++++++++++-- 3 files changed, 97 insertions(+), 26 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_vblank.c b/drivers/gpu/drm/drm_vblank.c index cc3571e25a9a..c6b4cd77df72 100644 --- a/drivers/gpu/drm/drm_vblank.c +++ b/drivers/gpu/drm/drm_vblank.c @@ -131,7 +131,7 @@ * guaranteed to be enabled. * * On many hardware disabling the vblank interrupt cannot be done in a race-free - * manner, see &drm_driver.vblank_disable_immediate and + * manner, see &drm_vblank_crtc_config.disable_immediate and * &drm_driver.max_vblank_count. In that case the vblank core only disables the * vblanks after a timer has expired, which can be configured through the * ``vblankoffdelay`` module parameter. @@ -1241,6 +1241,7 @@ EXPORT_SYMBOL(drm_crtc_vblank_get); void drm_vblank_put(struct drm_device *dev, unsigned int pipe) { struct drm_vblank_crtc *vblank = drm_vblank_crtc(dev, pipe); + int vblank_offdelay = vblank->config.offdelay_ms; if (drm_WARN_ON(dev, pipe >= dev->num_crtcs)) return; @@ -1250,13 +1251,13 @@ void drm_vblank_put(struct drm_device *dev, unsigned int pipe) /* Last user schedules interrupt disable */ if (atomic_dec_and_test(&vblank->refcount)) { - if (drm_vblank_offdelay == 0) + if (!vblank_offdelay) return; - else if (drm_vblank_offdelay < 0) + else if (vblank_offdelay < 0) vblank_disable_fn(&vblank->disable_timer); - else if (!dev->vblank_disable_immediate) + else if (!vblank->config.disable_immediate) mod_timer(&vblank->disable_timer, - jiffies + ((drm_vblank_offdelay * HZ)/1000)); + jiffies + ((vblank_offdelay * HZ) / 1000)); } } @@ -1265,7 +1266,8 @@ void drm_vblank_put(struct drm_device *dev, unsigned int pipe) * @crtc: which counter to give up * * Release ownership of a given vblank counter, turning off interrupts - * if possible. Disable interrupts after drm_vblank_offdelay milliseconds. + * if possible. Disable interrupts after &drm_vblank_crtc_config.offdelay_ms + * milliseconds. */ void drm_crtc_vblank_put(struct drm_crtc *crtc) { @@ -1466,16 +1468,20 @@ void drm_crtc_set_max_vblank_count(struct drm_crtc *crtc, EXPORT_SYMBOL(drm_crtc_set_max_vblank_count); /** - * drm_crtc_vblank_on - enable vblank events on a CRTC + * drm_crtc_vblank_on_config - enable vblank events on a CRTC with custom + * configuration options * @crtc: CRTC in question + * @config: Vblank configuration value * - * This functions restores the vblank interrupt state captured with - * drm_crtc_vblank_off() again and is generally called when enabling @crtc. Note - * that calls to drm_crtc_vblank_on() and drm_crtc_vblank_off() can be - * unbalanced and so can also be unconditionally called in driver load code to - * reflect the current hardware state of the crtc. + * See drm_crtc_vblank_on(). In addition, this function allows you to provide a + * custom vblank configuration for a given CRTC. + * + * Note that @config is copied, the pointer does not need to stay valid beyond + * this function call. For details of the parameters see + * struct drm_vblank_crtc_config. */ -void drm_crtc_vblank_on(struct drm_crtc *crtc) +void drm_crtc_vblank_on_config(struct drm_crtc *crtc, + const struct drm_vblank_crtc_config *config) { struct drm_device *dev = crtc->dev; unsigned int pipe = drm_crtc_index(crtc); @@ -1488,6 +1494,8 @@ void drm_crtc_vblank_on(struct drm_crtc *crtc) drm_dbg_vbl(dev, "crtc %d, vblank enabled %d, inmodeset %d\n", pipe, vblank->enabled, vblank->inmodeset); + vblank->config = *config; + /* Drop our private "prevent drm_vblank_get" refcount */ if (vblank->inmodeset) { atomic_dec(&vblank->refcount); @@ -1500,10 +1508,33 @@ void drm_crtc_vblank_on(struct drm_crtc *crtc) * re-enable interrupts if there are users left, or the * user wishes vblank interrupts to be enabled all the time. */ - if (atomic_read(&vblank->refcount) != 0 || drm_vblank_offdelay == 0) + if (atomic_read(&vblank->refcount) != 0 || !vblank->config.offdelay_ms) drm_WARN_ON(dev, drm_vblank_enable(dev, pipe)); spin_unlock_irq(&dev->vbl_lock); } +EXPORT_SYMBOL(drm_crtc_vblank_on_config); + +/** + * drm_crtc_vblank_on - enable vblank events on a CRTC + * @crtc: CRTC in question + * + * This functions restores the vblank interrupt state captured with + * drm_crtc_vblank_off() again and is generally called when enabling @crtc. Note + * that calls to drm_crtc_vblank_on() and drm_crtc_vblank_off() can be + * unbalanced and so can also be unconditionally called in driver load code to + * reflect the current hardware state of the crtc. + * + * Note that unlike in drm_crtc_vblank_on_config(), default values are used. + */ +void drm_crtc_vblank_on(struct drm_crtc *crtc) +{ + const struct drm_vblank_crtc_config config = { + .offdelay_ms = drm_vblank_offdelay, + .disable_immediate = crtc->dev->vblank_disable_immediate + }; + + drm_crtc_vblank_on_config(crtc, &config); +} EXPORT_SYMBOL(drm_crtc_vblank_on); static void drm_vblank_restore(struct drm_device *dev, unsigned int pipe) @@ -1556,16 +1587,21 @@ static void drm_vblank_restore(struct drm_device *dev, unsigned int pipe) * * Note that drivers must have race-free high-precision timestamping support, * i.e. &drm_crtc_funcs.get_vblank_timestamp must be hooked up and - * &drm_driver.vblank_disable_immediate must be set to indicate the + * &drm_vblank_crtc_config.disable_immediate must be set to indicate the * time-stamping functions are race-free against vblank hardware counter * increments. */ void drm_crtc_vblank_restore(struct drm_crtc *crtc) { - WARN_ON_ONCE(!crtc->funcs->get_vblank_timestamp); - WARN_ON_ONCE(!crtc->dev->vblank_disable_immediate); + struct drm_device *dev = crtc->dev; + unsigned int pipe = drm_crtc_index(crtc); + struct drm_vblank_crtc *vblank = drm_vblank_crtc(dev, pipe); + + drm_WARN_ON_ONCE(dev, !crtc->funcs->get_vblank_timestamp); + drm_WARN_ON_ONCE(dev, vblank->inmodeset); + drm_WARN_ON_ONCE(dev, !vblank->config.disable_immediate); - drm_vblank_restore(crtc->dev, drm_crtc_index(crtc)); + drm_vblank_restore(dev, pipe); } EXPORT_SYMBOL(drm_crtc_vblank_restore); @@ -1754,7 +1790,7 @@ int drm_wait_vblank_ioctl(struct drm_device *dev, void *data, /* If the counter is currently enabled and accurate, short-circuit * queries to return the cached timestamp of the last vblank. */ - if (dev->vblank_disable_immediate && + if (vblank->config.disable_immediate && drm_wait_vblank_is_query(vblwait) && READ_ONCE(vblank->enabled)) { drm_wait_vblank_reply(dev, pipe, &vblwait->reply); @@ -1918,8 +1954,8 @@ bool drm_handle_vblank(struct drm_device *dev, unsigned int pipe) * been signaled. The disable has to be last (after * drm_handle_vblank_events) so that the timestamp is always accurate. */ - disable_irq = (dev->vblank_disable_immediate && - drm_vblank_offdelay > 0 && + disable_irq = (vblank->config.disable_immediate && + vblank->config.offdelay_ms > 0 && !atomic_read(&vblank->refcount)); drm_handle_vblank_events(dev, pipe); @@ -1992,7 +2028,8 @@ int drm_crtc_get_sequence_ioctl(struct drm_device *dev, void *data, pipe = drm_crtc_index(crtc); vblank = drm_crtc_vblank_crtc(crtc); - vblank_enabled = dev->vblank_disable_immediate && READ_ONCE(vblank->enabled); + vblank_enabled = READ_ONCE(vblank->config.disable_immediate) && + READ_ONCE(vblank->enabled); if (!vblank_enabled) { ret = drm_crtc_vblank_get(crtc); diff --git a/include/drm/drm_device.h b/include/drm/drm_device.h index 63767cf24371..c91f87b5242d 100644 --- a/include/drm/drm_device.h +++ b/include/drm/drm_device.h @@ -213,8 +213,9 @@ struct drm_device { * This can be set to true it the hardware has a working vblank counter * with high-precision timestamping (otherwise there are races) and the * driver uses drm_crtc_vblank_on() and drm_crtc_vblank_off() - * appropriately. See also @max_vblank_count and - * &drm_crtc_funcs.get_vblank_counter. + * appropriately. Also, see @max_vblank_count, + * &drm_crtc_funcs.get_vblank_counter and + * &drm_vblank_crtc_config.disable_immediate. */ bool vblank_disable_immediate; diff --git a/include/drm/drm_vblank.h b/include/drm/drm_vblank.h index c8f829b4307c..151ab1e85b1b 100644 --- a/include/drm/drm_vblank.h +++ b/include/drm/drm_vblank.h @@ -78,6 +78,31 @@ struct drm_pending_vblank_event { } event; }; +/** + * struct drm_vblank_crtc_config - vblank configuration for a CRTC + */ +struct drm_vblank_crtc_config { + /** + * @offdelay_ms: Vblank off delay in ms, used to determine how long + * &drm_vblank_crtc.disable_timer waits before disabling. + * + * Defaults to the value of drm_vblank_offdelay in drm_crtc_vblank_on(). + */ + int offdelay_ms; + + /** + * @disable_immediate: See &drm_device.vblank_disable_immediate + * for the exact semantics of immediate vblank disabling. + * + * Additionally, this tracks the disable immediate value per crtc, just + * in case it needs to differ from the default value for a given device. + * + * Defaults to the value of &drm_device.vblank_disable_immediate in + * drm_crtc_vblank_on(). + */ + bool disable_immediate; +}; + /** * struct drm_vblank_crtc - vblank tracking for a CRTC * @@ -99,8 +124,8 @@ struct drm_vblank_crtc { wait_queue_head_t queue; /** * @disable_timer: Disable timer for the delayed vblank disabling - * hysteresis logic. Vblank disabling is controlled through the - * drm_vblank_offdelay module option and the setting of the + * hysteresis logic. Vblank disabling is controlled through + * &drm_vblank_crtc_config.offdelay_ms and the setting of the * &drm_device.max_vblank_count value. */ struct timer_list disable_timer; @@ -198,6 +223,12 @@ struct drm_vblank_crtc { */ struct drm_display_mode hwmode; + /** + * @config: Stores vblank configuration values for a given CRTC. + * Also, see drm_crtc_vblank_on_config(). + */ + struct drm_vblank_crtc_config config; + /** * @enabled: Tracks the enabling state of the corresponding &drm_crtc to * avoid double-disabling and hence corrupting saved state. Needed by @@ -247,6 +278,8 @@ void drm_wait_one_vblank(struct drm_device *dev, unsigned int pipe); void drm_crtc_wait_one_vblank(struct drm_crtc *crtc); void drm_crtc_vblank_off(struct drm_crtc *crtc); void drm_crtc_vblank_reset(struct drm_crtc *crtc); +void drm_crtc_vblank_on_config(struct drm_crtc *crtc, + const struct drm_vblank_crtc_config *config); void drm_crtc_vblank_on(struct drm_crtc *crtc); u64 drm_crtc_accurate_vblank_count(struct drm_crtc *crtc); void drm_crtc_vblank_restore(struct drm_crtc *crtc); -- cgit v1.3.1 From f2881dfdaaa9ec873dbd383ef5512fc31e576cbb Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 29 Jul 2024 11:26:34 +0200 Subject: drm/xe/oa/uapi: Make bit masks unsigned MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When building with gcc-5: In function ‘decode_oa_format.isra.26’, inlined from ‘xe_oa_set_prop_oa_format’ at drivers/gpu/drm/xe/xe_oa.c:1664:6: ././include/linux/compiler_types.h:510:38: error: call to ‘__compiletime_assert_1336’ declared with attribute error: FIELD_GET: mask is not constant [...] ./include/linux/bitfield.h:155:3: note: in expansion of macro ‘__BF_FIELD_CHECK’ __BF_FIELD_CHECK(_mask, _reg, 0U, "FIELD_GET: "); \ ^ drivers/gpu/drm/xe/xe_oa.c:1573:18: note: in expansion of macro ‘FIELD_GET’ u32 bc_report = FIELD_GET(DRM_XE_OA_FORMAT_MASK_BC_REPORT, fmt); ^ Fixes: b6fd51c62119 ("drm/xe/oa/uapi: Define and parse OA stream properties") Signed-off-by: Geert Uytterhoeven Reviewed-by: Lucas De Marchi Link: https://patchwork.freedesktop.org/patch/msgid/20240729092634.2227611-1-geert+renesas@glider.be Signed-off-by: Lucas De Marchi --- include/uapi/drm/xe_drm.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/uapi/drm/xe_drm.h b/include/uapi/drm/xe_drm.h index 29425d7fdc77..b6fbe4988f2e 100644 --- a/include/uapi/drm/xe_drm.h +++ b/include/uapi/drm/xe_drm.h @@ -1598,10 +1598,10 @@ enum drm_xe_oa_property_id { * b. Counter select c. Counter size and d. BC report. Also refer to the * oa_formats array in drivers/gpu/drm/xe/xe_oa.c. */ -#define DRM_XE_OA_FORMAT_MASK_FMT_TYPE (0xff << 0) -#define DRM_XE_OA_FORMAT_MASK_COUNTER_SEL (0xff << 8) -#define DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE (0xff << 16) -#define DRM_XE_OA_FORMAT_MASK_BC_REPORT (0xff << 24) +#define DRM_XE_OA_FORMAT_MASK_FMT_TYPE (0xffu << 0) +#define DRM_XE_OA_FORMAT_MASK_COUNTER_SEL (0xffu << 8) +#define DRM_XE_OA_FORMAT_MASK_COUNTER_SIZE (0xffu << 16) +#define DRM_XE_OA_FORMAT_MASK_BC_REPORT (0xffu << 24) /** * @DRM_XE_OA_PROPERTY_OA_PERIOD_EXPONENT: Requests periodic OA unit -- cgit v1.3.1 From a755947e050b8751fc5402609a7d600e9c756fa7 Mon Sep 17 00:00:00 2001 From: Mitul Golani Date: Mon, 15 Jul 2024 21:55:13 +0530 Subject: drm/dp: Describe target_rr_divider in struct drm_dp_as_sdp Describe newly added parameter target_rr_divider in struct drm_dp_as_sdp. -v2: Remove extra line from commit message.(Lucas) -v3: Rebase. Fixes: a20c6d954d75 ("drm/dp: Add refresh rate divider to struct representing AS SDP") Cc: Mitul Golani Cc: Arun R Murthy Cc: Suraj Kandpal Cc: Ankit Nautiyal Cc: Jani Nikula Cc: Stephen Rothwell Signed-off-by: Mitul Golani Reviewed-by: Ankit Nautiyal Acked-by: Maarten Lankhorst Signed-off-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20240715162514.2836421-1-mitulkumar.ajitkumar.golani@intel.com --- include/drm/display/drm_dp_helper.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/drm/display/drm_dp_helper.h b/include/drm/display/drm_dp_helper.h index bbb1cdc4fc68..279624833ea9 100644 --- a/include/drm/display/drm_dp_helper.h +++ b/include/drm/display/drm_dp_helper.h @@ -112,6 +112,7 @@ struct drm_dp_vsc_sdp { * @target_rr: Target Refresh * @duration_incr_ms: Successive frame duration increase * @duration_decr_ms: Successive frame duration decrease + * @target_rr_divider: Target refresh rate divider * @mode: Adaptive Sync Operation Mode */ struct drm_dp_as_sdp { -- cgit v1.3.1 From dbaeef363ea54f4c18112874b77503c72ba60fec Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Mon, 22 Jul 2024 19:54:51 +0300 Subject: drm/dp_mst: Add a helper to queue a topology probe A follow up i915 patch will need to reprobe the MST topology after the initial probing, add a helper for this. Cc: Lyude Paul Cc: dri-devel@lists.freedesktop.org Reviewed-by: Lyude Paul Signed-off-by: Imre Deak Link: https://patchwork.freedesktop.org/patch/msgid/20240722165503.2084999-3-imre.deak@intel.com --- drivers/gpu/drm/display/drm_dp_mst_topology.c | 27 +++++++++++++++++++++++++++ include/drm/display/drm_dp_mst_helper.h | 2 ++ 2 files changed, 29 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c index eb1b7b286b76..0fbc9bbed5d8 100644 --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c @@ -3728,6 +3728,33 @@ drm_dp_mst_topology_mgr_invalidate_mstb(struct drm_dp_mst_branch *mstb) drm_dp_mst_topology_mgr_invalidate_mstb(port->mstb); } +/** + * drm_dp_mst_topology_queue_probe - Queue a topology probe + * @mgr: manager to probe + * + * Queue a work to probe the MST topology. Driver's should call this only to + * sync the topology's HW->SW state after the MST link's parameters have + * changed in a way the state could've become out-of-sync. This is the case + * for instance when the link rate between the source and first downstream + * branch device has switched between UHBR and non-UHBR rates. Except of those + * cases - for instance when a sink gets plugged/unplugged to a port - the SW + * state will get updated automatically via MST UP message notifications. + */ +void drm_dp_mst_topology_queue_probe(struct drm_dp_mst_topology_mgr *mgr) +{ + mutex_lock(&mgr->lock); + + if (drm_WARN_ON(mgr->dev, !mgr->mst_state || !mgr->mst_primary)) + goto out_unlock; + + drm_dp_mst_topology_mgr_invalidate_mstb(mgr->mst_primary); + drm_dp_mst_queue_probe_work(mgr); + +out_unlock: + mutex_unlock(&mgr->lock); +} +EXPORT_SYMBOL(drm_dp_mst_topology_queue_probe); + /** * drm_dp_mst_topology_mgr_suspend() - suspend the MST manager * @mgr: manager to suspend diff --git a/include/drm/display/drm_dp_mst_helper.h b/include/drm/display/drm_dp_mst_helper.h index cfe096389d94..02b037d3a93f 100644 --- a/include/drm/display/drm_dp_mst_helper.h +++ b/include/drm/display/drm_dp_mst_helper.h @@ -885,6 +885,8 @@ int drm_dp_check_act_status(struct drm_dp_mst_topology_mgr *mgr); void drm_dp_mst_dump_topology(struct seq_file *m, struct drm_dp_mst_topology_mgr *mgr); +void drm_dp_mst_topology_queue_probe(struct drm_dp_mst_topology_mgr *mgr); + void drm_dp_mst_topology_mgr_suspend(struct drm_dp_mst_topology_mgr *mgr); int __must_check drm_dp_mst_topology_mgr_resume(struct drm_dp_mst_topology_mgr *mgr, -- cgit v1.3.1 From 53369581dc0c68a5700ed51e1660f44c4b2bb524 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Thu, 1 Aug 2024 08:41:17 -0700 Subject: drm/printer: Allow NULL data in devcoredump printer We want to determine the size of the devcoredump before writing it out. To that end, we will run the devcoredump printer with NULL data to get the size, alloc data based on the generated offset, then run the devcorecump again with a valid data pointer to print. This necessitates not writing data to the data pointer on the initial pass, when it is NULL. v5: - Better commit message (Jonathan) - Add kerenl doc with examples (Jani) Cc: Maarten Lankhorst Acked-by: Maarten Lankhorst Signed-off-by: Matthew Brost Reviewed-by: Jonathan Cavitt Link: https://patchwork.freedesktop.org/patch/msgid/20240801154118.2547543-3-matthew.brost@intel.com --- drivers/gpu/drm/drm_print.c | 13 ++++++----- include/drm/drm_print.h | 54 ++++++++++++++++++++++++++++++++++++++++++++- 2 files changed, 61 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_print.c b/drivers/gpu/drm/drm_print.c index cf24dfdeb6b2..0081190201a7 100644 --- a/drivers/gpu/drm/drm_print.c +++ b/drivers/gpu/drm/drm_print.c @@ -100,8 +100,9 @@ void __drm_puts_coredump(struct drm_printer *p, const char *str) copy = iterator->remain; /* Copy out the bit of the string that we need */ - memcpy(iterator->data, - str + (iterator->start - iterator->offset), copy); + if (iterator->data) + memcpy(iterator->data, + str + (iterator->start - iterator->offset), copy); iterator->offset = iterator->start + copy; iterator->remain -= copy; @@ -110,7 +111,8 @@ void __drm_puts_coredump(struct drm_printer *p, const char *str) len = min_t(ssize_t, strlen(str), iterator->remain); - memcpy(iterator->data + pos, str, len); + if (iterator->data) + memcpy(iterator->data + pos, str, len); iterator->offset += len; iterator->remain -= len; @@ -140,8 +142,9 @@ void __drm_printfn_coredump(struct drm_printer *p, struct va_format *vaf) if ((iterator->offset >= iterator->start) && (len < iterator->remain)) { ssize_t pos = iterator->offset - iterator->start; - snprintf(((char *) iterator->data) + pos, - iterator->remain, "%pV", vaf); + if (iterator->data) + snprintf(((char *) iterator->data) + pos, + iterator->remain, "%pV", vaf); iterator->offset += len; iterator->remain -= len; diff --git a/include/drm/drm_print.h b/include/drm/drm_print.h index 5d9dff5149c9..d2676831d765 100644 --- a/include/drm/drm_print.h +++ b/include/drm/drm_print.h @@ -221,7 +221,8 @@ drm_vprintf(struct drm_printer *p, const char *fmt, va_list *va) /** * struct drm_print_iterator - local struct used with drm_printer_coredump - * @data: Pointer to the devcoredump output buffer + * @data: Pointer to the devcoredump output buffer, can be NULL if using + * drm_printer_coredump to determine size of devcoredump * @start: The offset within the buffer to start writing * @remain: The number of bytes to write for this iteration */ @@ -266,6 +267,57 @@ struct drm_print_iterator { * coredump_read, ...) * } * + * The above example has a time complexity of O(N^2), where N is the size of the + * devcoredump. This is acceptable for small devcoredumps but scales poorly for + * larger ones. + * + * Another use case for drm_coredump_printer is to capture the devcoredump into + * a saved buffer before the dev_coredump() callback. This involves two passes: + * one to determine the size of the devcoredump and another to print it to a + * buffer. Then, in dev_coredump(), copy from the saved buffer into the + * devcoredump read buffer. + * + * For example:: + * + * char *devcoredump_saved_buffer; + * + * ssize_t __coredump_print(char *buffer, ssize_t count, ...) + * { + * struct drm_print_iterator iter; + * struct drm_printer p; + * + * iter.data = buffer; + * iter.start = 0; + * iter.remain = count; + * + * p = drm_coredump_printer(&iter); + * + * drm_printf(p, "foo=%d\n", foo); + * ... + * return count - iter.remain; + * } + * + * void coredump_print(...) + * { + * ssize_t count; + * + * count = __coredump_print(NULL, INT_MAX, ...); + * devcoredump_saved_buffer = kvmalloc(count, GFP_KERNEL); + * __coredump_print(devcoredump_saved_buffer, count, ...); + * } + * + * void coredump_read(char *buffer, loff_t offset, size_t count, + * void *data, size_t datalen) + * { + * ... + * memcpy(buffer, devcoredump_saved_buffer + offset, count); + * ... + * } + * + * The above example has a time complexity of O(N*2), where N is the size of the + * devcoredump. This scales better than the previous example for larger + * devcoredumps. + * * RETURNS: * The &drm_printer object */ -- cgit v1.3.1 From b6b242d019ed23195c81cf00eb8290d386efb83f Mon Sep 17 00:00:00 2001 From: Hamza Mahfooz Date: Fri, 2 Aug 2024 10:59:45 -0400 Subject: Revert "drm: Introduce 'power saving policy' drm property" This reverts commit 76299a557f36d624ca32500173ad7856e1ad93c0. It was merged without meeting userspace requirements. Signed-off-by: Hamza Mahfooz Reviewed-by: Harry Wentland Link: https://patchwork.freedesktop.org/patch/msgid/20240802145946.48073-1-hamza.mahfooz@amd.com --- drivers/gpu/drm/drm_connector.c | 48 ----------------------------------------- include/drm/drm_connector.h | 2 -- include/drm/drm_mode_config.h | 5 ----- include/uapi/drm/drm_mode.h | 7 ------ 4 files changed, 62 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_connector.c b/drivers/gpu/drm/drm_connector.c index 7c44e3a1d8e0..b4f4d2f908d1 100644 --- a/drivers/gpu/drm/drm_connector.c +++ b/drivers/gpu/drm/drm_connector.c @@ -1043,11 +1043,6 @@ static const struct drm_prop_enum_list drm_scaling_mode_enum_list[] = { { DRM_MODE_SCALE_ASPECT, "Full aspect" }, }; -static const struct drm_prop_enum_list drm_power_saving_policy_enum_list[] = { - { __builtin_ffs(DRM_MODE_REQUIRE_COLOR_ACCURACY) - 1, "Require color accuracy" }, - { __builtin_ffs(DRM_MODE_REQUIRE_LOW_LATENCY) - 1, "Require low latency" }, -}; - static const struct drm_prop_enum_list drm_aspect_ratio_enum_list[] = { { DRM_MODE_PICTURE_ASPECT_NONE, "Automatic" }, { DRM_MODE_PICTURE_ASPECT_4_3, "4:3" }, @@ -1634,16 +1629,6 @@ EXPORT_SYMBOL(drm_hdmi_connector_get_output_format_name); * * Drivers can set up these properties by calling * drm_mode_create_tv_margin_properties(). - * power saving policy: - * This property is used to set the power saving policy for the connector. - * This property is populated with a bitmask of optional requirements set - * by the drm master for the drm driver to respect: - * - "Require color accuracy": Disable power saving features that will - * affect color fidelity. - * For example: Hardware assisted backlight modulation. - * - "Require low latency": Disable power saving features that will - * affect latency. - * For example: Panel self refresh (PSR) */ int drm_connector_create_standard_properties(struct drm_device *dev) @@ -2146,39 +2131,6 @@ int drm_mode_create_scaling_mode_property(struct drm_device *dev) } EXPORT_SYMBOL(drm_mode_create_scaling_mode_property); -/** - * drm_mode_create_power_saving_policy_property - create power saving policy property - * @dev: DRM device - * @supported_policies: bitmask of supported power saving policies - * - * Called by a driver the first time it's needed, must be attached to desired - * connectors. - * - * Returns: %0 - */ -int drm_mode_create_power_saving_policy_property(struct drm_device *dev, - uint64_t supported_policies) -{ - struct drm_property *power_saving; - - if (dev->mode_config.power_saving_policy) - return 0; - WARN_ON((supported_policies & DRM_MODE_POWER_SAVING_POLICY_ALL) == 0); - - power_saving = - drm_property_create_bitmask(dev, 0, "power saving policy", - drm_power_saving_policy_enum_list, - ARRAY_SIZE(drm_power_saving_policy_enum_list), - supported_policies); - if (!power_saving) - return -ENOMEM; - - dev->mode_config.power_saving_policy = power_saving; - - return 0; -} -EXPORT_SYMBOL(drm_mode_create_power_saving_policy_property); - /** * DOC: Variable refresh properties * diff --git a/include/drm/drm_connector.h b/include/drm/drm_connector.h index 5ad735253413..e3fa43291f44 100644 --- a/include/drm/drm_connector.h +++ b/include/drm/drm_connector.h @@ -2267,8 +2267,6 @@ int drm_mode_create_dp_colorspace_property(struct drm_connector *connector, u32 supported_colorspaces); int drm_mode_create_content_type_property(struct drm_device *dev); int drm_mode_create_suggested_offset_properties(struct drm_device *dev); -int drm_mode_create_power_saving_policy_property(struct drm_device *dev, - uint64_t supported_policies); int drm_connector_set_path_property(struct drm_connector *connector, const char *path); diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h index 150f9a3b649f..ab0f167474b1 100644 --- a/include/drm/drm_mode_config.h +++ b/include/drm/drm_mode_config.h @@ -969,11 +969,6 @@ struct drm_mode_config { */ struct drm_atomic_state *suspend_state; - /** - * @power_saving_policy: bitmask for power saving policy requests. - */ - struct drm_property *power_saving_policy; - const struct drm_mode_config_helper_funcs *helper_private; }; diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index 880303c2ad97..d390011b89b4 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -152,13 +152,6 @@ extern "C" { #define DRM_MODE_SCALE_CENTER 2 /* Centered, no scaling */ #define DRM_MODE_SCALE_ASPECT 3 /* Full screen, preserve aspect */ -/* power saving policy options */ -#define DRM_MODE_REQUIRE_COLOR_ACCURACY BIT(0) /* Compositor requires color accuracy */ -#define DRM_MODE_REQUIRE_LOW_LATENCY BIT(1) /* Compositor requires low latency */ - -#define DRM_MODE_POWER_SAVING_POLICY_ALL (DRM_MODE_REQUIRE_COLOR_ACCURACY |\ - DRM_MODE_REQUIRE_LOW_LATENCY) - /* Dithering mode options */ #define DRM_MODE_DITHERING_OFF 0 #define DRM_MODE_DITHERING_ON 1 -- cgit v1.3.1 From db65eb46de135338d6177f8853e0fd208f19d63e Mon Sep 17 00:00:00 2001 From: Arunpravin Paneer Selvam Date: Fri, 2 Aug 2024 11:13:19 +0530 Subject: drm/buddy: Add start address support to trim function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Add a new start parameter in trim function to specify exact address from where to start the trimming. This would help us in situations like if drivers would like to do address alignment for specific requirements. - Add a new flag DRM_BUDDY_TRIM_DISABLE. Drivers can use this flag to disable the allocator trimming part. This patch enables the drivers control trimming and they can do it themselves based on the application requirements. v1:(Matthew) - check new_start alignment with min chunk_size - use range_overflows() Signed-off-by: Arunpravin Paneer Selvam Acked-by: Alex Deucher Acked-by: Christian König Signed-off-by: Alex Deucher --- drivers/gpu/drm/drm_buddy.c | 25 +++++++++++++++++++++++-- drivers/gpu/drm/xe/xe_ttm_vram_mgr.c | 2 +- include/drm/drm_buddy.h | 2 ++ 3 files changed, 26 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_buddy.c b/drivers/gpu/drm/drm_buddy.c index 6a8e45e9d0ec..103c185bb1c8 100644 --- a/drivers/gpu/drm/drm_buddy.c +++ b/drivers/gpu/drm/drm_buddy.c @@ -851,6 +851,7 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm, * drm_buddy_block_trim - free unused pages * * @mm: DRM buddy manager + * @start: start address to begin the trimming. * @new_size: original size requested * @blocks: Input and output list of allocated blocks. * MUST contain single block as input to be trimmed. @@ -866,11 +867,13 @@ static int __alloc_contig_try_harder(struct drm_buddy *mm, * 0 on success, error code on failure. */ int drm_buddy_block_trim(struct drm_buddy *mm, + u64 *start, u64 new_size, struct list_head *blocks) { struct drm_buddy_block *parent; struct drm_buddy_block *block; + u64 block_start, block_end; LIST_HEAD(dfs); u64 new_start; int err; @@ -882,6 +885,9 @@ int drm_buddy_block_trim(struct drm_buddy *mm, struct drm_buddy_block, link); + block_start = drm_buddy_block_offset(block); + block_end = block_start + drm_buddy_block_size(mm, block); + if (WARN_ON(!drm_buddy_block_is_allocated(block))) return -EINVAL; @@ -894,6 +900,20 @@ int drm_buddy_block_trim(struct drm_buddy *mm, if (new_size == drm_buddy_block_size(mm, block)) return 0; + new_start = block_start; + if (start) { + new_start = *start; + + if (new_start < block_start) + return -EINVAL; + + if (!IS_ALIGNED(new_start, mm->chunk_size)) + return -EINVAL; + + if (range_overflows(new_start, new_size, block_end)) + return -EINVAL; + } + list_del(&block->link); mark_free(mm, block); mm->avail += drm_buddy_block_size(mm, block); @@ -904,7 +924,6 @@ int drm_buddy_block_trim(struct drm_buddy *mm, parent = block->parent; block->parent = NULL; - new_start = drm_buddy_block_offset(block); list_add(&block->tmp_link, &dfs); err = __alloc_range(mm, &dfs, new_start, new_size, blocks, NULL); if (err) { @@ -1066,7 +1085,8 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, } while (1); /* Trim the allocated block to the required size */ - if (original_size != size) { + if (!(flags & DRM_BUDDY_TRIM_DISABLE) && + original_size != size) { struct list_head *trim_list; LIST_HEAD(temp); u64 trim_size; @@ -1083,6 +1103,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, } drm_buddy_block_trim(mm, + NULL, trim_size, trim_list); diff --git a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c index fe3779fdba2c..423b261ea743 100644 --- a/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c +++ b/drivers/gpu/drm/xe/xe_ttm_vram_mgr.c @@ -150,7 +150,7 @@ static int xe_ttm_vram_mgr_new(struct ttm_resource_manager *man, } while (remaining_size); if (place->flags & TTM_PL_FLAG_CONTIGUOUS) { - if (!drm_buddy_block_trim(mm, vres->base.size, &vres->blocks)) + if (!drm_buddy_block_trim(mm, NULL, vres->base.size, &vres->blocks)) size = vres->base.size; } diff --git a/include/drm/drm_buddy.h b/include/drm/drm_buddy.h index 2a74fa9d0ce5..9689a7c5dd36 100644 --- a/include/drm/drm_buddy.h +++ b/include/drm/drm_buddy.h @@ -27,6 +27,7 @@ #define DRM_BUDDY_CONTIGUOUS_ALLOCATION BIT(2) #define DRM_BUDDY_CLEAR_ALLOCATION BIT(3) #define DRM_BUDDY_CLEARED BIT(4) +#define DRM_BUDDY_TRIM_DISABLE BIT(5) struct drm_buddy_block { #define DRM_BUDDY_HEADER_OFFSET GENMASK_ULL(63, 12) @@ -155,6 +156,7 @@ int drm_buddy_alloc_blocks(struct drm_buddy *mm, unsigned long flags); int drm_buddy_block_trim(struct drm_buddy *mm, + u64 *start, u64 new_size, struct list_head *blocks); -- cgit v1.3.1 From d27a14060f8501e556a65b346b2644be0d0a2de8 Mon Sep 17 00:00:00 2001 From: Jocelyn Falempe Date: Wed, 7 Aug 2024 15:36:12 +0200 Subject: drm/panic: Move drm_panic_register prototype to drm_crtc_internal.h drm_panic_[un]register() are only used by the core drm, and are not intended to be called by other drm drivers, so move their prototypes to drm_crtc_internal.h. Suggested-by: Daniel Vetter Signed-off-by: Jocelyn Falempe Reviewed-by: Daniel Vetter Reviewed-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20240807134902.458669-4-jfalempe@redhat.com --- drivers/gpu/drm/drm_crtc_internal.h | 4 ++++ include/drm/drm_panic.h | 12 ------------ 2 files changed, 4 insertions(+), 12 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_crtc_internal.h b/drivers/gpu/drm/drm_crtc_internal.h index 061436361998..251ff7bba40d 100644 --- a/drivers/gpu/drm/drm_crtc_internal.h +++ b/drivers/gpu/drm/drm_crtc_internal.h @@ -318,8 +318,12 @@ drm_edid_load_firmware(struct drm_connector *connector) /* drm_panic.c */ #ifdef CONFIG_DRM_PANIC bool drm_panic_is_enabled(struct drm_device *dev); +void drm_panic_register(struct drm_device *dev); +void drm_panic_unregister(struct drm_device *dev); #else static inline bool drm_panic_is_enabled(struct drm_device *dev) { return false; } +static inline void drm_panic_register(struct drm_device *dev) {} +static inline void drm_panic_unregister(struct drm_device *dev) {} #endif #endif /* __DRM_CRTC_INTERNAL_H__ */ diff --git a/include/drm/drm_panic.h b/include/drm/drm_panic.h index 73bb3f3d9ed9..a4bd3681920d 100644 --- a/include/drm/drm_panic.h +++ b/include/drm/drm_panic.h @@ -146,16 +146,4 @@ struct drm_scanout_buffer { #define drm_panic_unlock(dev, flags) \ raw_spin_unlock_irqrestore(&(dev)->mode_config.panic_lock, flags) -#ifdef CONFIG_DRM_PANIC - -void drm_panic_register(struct drm_device *dev); -void drm_panic_unregister(struct drm_device *dev); - -#else - -static inline void drm_panic_register(struct drm_device *dev) {} -static inline void drm_panic_unregister(struct drm_device *dev) {} - -#endif - #endif /* __DRM_PANIC_H__ */ -- cgit v1.3.1 From 969135862e731620b9e03bb0c21179ff1cccfd0e Mon Sep 17 00:00:00 2001 From: Jocelyn Falempe Date: Wed, 7 Aug 2024 15:36:13 +0200 Subject: drm/panic: Move copyright notice to the top Move the copyright notice to the top of drm_panic.h, and add the missing Red Hat copyright notice. Suggested-by: Thomas Zimmermann Signed-off-by: Jocelyn Falempe Reviewed-by: Daniel Vetter Reviewed-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20240807134902.458669-5-jfalempe@redhat.com --- include/drm/drm_panic.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/drm/drm_panic.h b/include/drm/drm_panic.h index a4bd3681920d..54085d5d05c3 100644 --- a/include/drm/drm_panic.h +++ b/include/drm/drm_panic.h @@ -1,4 +1,10 @@ /* SPDX-License-Identifier: GPL-2.0 or MIT */ + +/* + * Copyright (c) 2024 Intel + * Copyright (c) 2024 Red Hat + */ + #ifndef __DRM_PANIC_H__ #define __DRM_PANIC_H__ @@ -8,9 +14,6 @@ #include #include -/* - * Copyright (c) 2024 Intel - */ /** * struct drm_scanout_buffer - DRM scanout buffer -- cgit v1.3.1 From 0dc4fb69eb14320ea0fcd9657b7748eec201ccaa Mon Sep 17 00:00:00 2001 From: Mohammed Anees Date: Sun, 11 Aug 2024 06:16:51 -0400 Subject: drm: Add missing documentation for struct drm_plane_size_hint This patch takes care of the following warnings during documentation compiling: ./include/uapi/drm/drm_mode.h:869: warning: Function parameter or struct member 'width' not described in 'drm_plane_size_hint' ./include/uapi/drm/drm_mode.h:869: warning: Function parameter or struct member 'height' not described in 'drm_plane_size_hint' Signed-off-by: Mohammed Anees Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20240811101653.170223-1-pvmohammedanees2003@gmail.com --- include/uapi/drm/drm_mode.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/drm_mode.h b/include/uapi/drm/drm_mode.h index d390011b89b4..c082810c08a8 100644 --- a/include/uapi/drm/drm_mode.h +++ b/include/uapi/drm/drm_mode.h @@ -859,6 +859,8 @@ struct drm_color_lut { /** * struct drm_plane_size_hint - Plane size hints + * @width: The width of the plane in pixel + * @height: The height of the plane in pixel * * The plane SIZE_HINTS property blob contains an * array of struct drm_plane_size_hint. -- cgit v1.3.1 From 844efaef48e846b5310b0e7af5e7578762eb7e8d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 22 Apr 2024 17:33:38 +0300 Subject: drm: fixed: Don't use "proxy" headers Update header inclusions to follow IWYU (Include What You Use) principle. Signed-off-by: Andy Shevchenko Reviewed-by: Jani Nikula Link: https://patchwork.freedesktop.org/patch/msgid/20240422143338.2026791-1-andriy.shevchenko@linux.intel.com Signed-off-by: Jani Nikula --- include/drm/drm_fixed.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/drm_fixed.h b/include/drm/drm_fixed.h index ef8bc8d72039..1922188f00e8 100644 --- a/include/drm/drm_fixed.h +++ b/include/drm/drm_fixed.h @@ -25,8 +25,9 @@ #ifndef DRM_FIXED_H #define DRM_FIXED_H -#include #include +#include +#include typedef union dfixed { u32 full; -- cgit v1.3.1 From 6c22aadbf6fd0240181eb4897308153c2aabec2a Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 12 Aug 2024 10:28:28 +0200 Subject: drm/fbdev-helper: Remove drm_fb_helper_output_poll_changed() The function is unused. Remove it. Signed-off-by: Thomas Zimmermann Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20240812083000.337744-8-tzimmermann@suse.de --- drivers/gpu/drm/drm_fb_helper.c | 15 --------------- include/drm/drm_fb_helper.h | 6 ------ 2 files changed, 21 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_fb_helper.c b/drivers/gpu/drm/drm_fb_helper.c index fe5667477839..29c53f9f449c 100644 --- a/drivers/gpu/drm/drm_fb_helper.c +++ b/drivers/gpu/drm/drm_fb_helper.c @@ -2003,18 +2003,3 @@ void drm_fb_helper_lastclose(struct drm_device *dev) drm_fb_helper_restore_fbdev_mode_unlocked(dev->fb_helper); } EXPORT_SYMBOL(drm_fb_helper_lastclose); - -/** - * drm_fb_helper_output_poll_changed - DRM mode config \.output_poll_changed - * helper for fbdev emulation - * @dev: DRM device - * - * This function can be used as the - * &drm_mode_config_funcs.output_poll_changed callback for drivers that only - * need to call drm_fbdev.hotplug_event(). - */ -void drm_fb_helper_output_poll_changed(struct drm_device *dev) -{ - drm_fb_helper_hotplug_event(dev->fb_helper); -} -EXPORT_SYMBOL(drm_fb_helper_output_poll_changed); diff --git a/include/drm/drm_fb_helper.h b/include/drm/drm_fb_helper.h index 375737fd6c36..699f2790b9ac 100644 --- a/include/drm/drm_fb_helper.h +++ b/include/drm/drm_fb_helper.h @@ -271,9 +271,7 @@ int drm_fb_helper_hotplug_event(struct drm_fb_helper *fb_helper); int drm_fb_helper_initial_config(struct drm_fb_helper *fb_helper); int drm_fb_helper_debug_enter(struct fb_info *info); int drm_fb_helper_debug_leave(struct fb_info *info); - void drm_fb_helper_lastclose(struct drm_device *dev); -void drm_fb_helper_output_poll_changed(struct drm_device *dev); #else static inline void drm_fb_helper_prepare(struct drm_device *dev, struct drm_fb_helper *helper, @@ -401,10 +399,6 @@ static inline int drm_fb_helper_debug_leave(struct fb_info *info) static inline void drm_fb_helper_lastclose(struct drm_device *dev) { } - -static inline void drm_fb_helper_output_poll_changed(struct drm_device *dev) -{ -} #endif #endif -- cgit v1.3.1 From b5757a5be2fac24f5c138e8ddb3b2c7be8ba1cb3 Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 12 Aug 2024 10:28:29 +0200 Subject: drm: Remove struct drm_driver.lastclose The lastclose callback in struct drm_driver is unused. Remove it. Also update documentation. v2: - update to use drm_lastclose() - fix typo in documentation Signed-off-by: Thomas Zimmermann Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20240812083000.337744-9-tzimmermann@suse.de --- drivers/gpu/drm/drm_file.c | 28 ++++++---------------------- drivers/gpu/drm/drm_internal.h | 1 - include/drm/drm_drv.h | 28 ---------------------------- 3 files changed, 6 insertions(+), 51 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c index 513bef816ae9..e8a841e70934 100644 --- a/drivers/gpu/drm/drm_file.c +++ b/drivers/gpu/drm/drm_file.c @@ -63,15 +63,6 @@ bool drm_dev_needs_global_mutex(struct drm_device *dev) if (dev->driver->load || dev->driver->unload) return true; - /* - * Drivers with the lastclose callback assume that it's synchronized - * against concurrent opens, which again needs the BKL. The proper fix - * is to use the drm_client infrastructure with proper locking for each - * client. - */ - if (dev->driver->lastclose) - return true; - return false; } @@ -396,14 +387,8 @@ err_undo: } EXPORT_SYMBOL(drm_open); -void drm_lastclose(struct drm_device * dev) +static void drm_lastclose(struct drm_device *dev) { - drm_dbg_core(dev, "\n"); - - if (dev->driver->lastclose) - dev->driver->lastclose(dev); - drm_dbg_core(dev, "driver lastclose completed\n"); - drm_client_dev_restore(dev); if (dev_is_pci(dev->dev)) @@ -416,9 +401,9 @@ void drm_lastclose(struct drm_device * dev) * @filp: file pointer. * * This function must be used by drivers as their &file_operations.release - * method. It frees any resources associated with the open file, and calls the - * &drm_driver.postclose driver callback. If this is the last open file for the - * DRM device also proceeds to call the &drm_driver.lastclose driver callback. + * method. It frees any resources associated with the open file. If this + * is the last open file for the DRM device, it also restores the active + * in-kernel DRM client. * * RETURNS: * @@ -488,9 +473,8 @@ void drm_file_update_pid(struct drm_file *filp) * * This function may be used by drivers as their &file_operations.release * method. It frees any resources associated with the open file prior to taking - * the drm_global_mutex, which then calls the &drm_driver.postclose driver - * callback. If this is the last open file for the DRM device also proceeds to - * call the &drm_driver.lastclose driver callback. + * the drm_global_mutex. If this is the last open file for the DRM device, it + * then restores the active in-kernel DRM client. * * RETURNS: * diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h index 690505a1f7a5..23c99803af44 100644 --- a/drivers/gpu/drm/drm_internal.h +++ b/drivers/gpu/drm/drm_internal.h @@ -53,7 +53,6 @@ extern struct mutex drm_global_mutex; bool drm_dev_needs_global_mutex(struct drm_device *dev); struct drm_file *drm_file_alloc(struct drm_minor *minor); void drm_file_free(struct drm_file *file); -void drm_lastclose(struct drm_device *dev); #ifdef CONFIG_PCI diff --git a/include/drm/drm_drv.h b/include/drm/drm_drv.h index cd37936c3926..02ea4e3248fd 100644 --- a/include/drm/drm_drv.h +++ b/include/drm/drm_drv.h @@ -228,34 +228,6 @@ struct drm_driver { */ void (*postclose) (struct drm_device *, struct drm_file *); - /** - * @lastclose: - * - * Called when the last &struct drm_file has been closed and there's - * currently no userspace client for the &struct drm_device. - * - * Modern drivers should only use this to force-restore the fbdev - * framebuffer using drm_fb_helper_restore_fbdev_mode_unlocked(). - * Anything else would indicate there's something seriously wrong. - * Modern drivers can also use this to execute delayed power switching - * state changes, e.g. in conjunction with the :ref:`vga_switcheroo` - * infrastructure. - * - * This is called after @postclose hook has been called. - * - * NOTE: - * - * All legacy drivers use this callback to de-initialize the hardware. - * This is purely because of the shadow-attach model, where the DRM - * kernel driver does not really own the hardware. Instead ownershipe is - * handled with the help of userspace through an inheritedly racy dance - * to set/unset the VT into raw mode. - * - * Legacy drivers initialize the hardware in the @firstopen callback, - * which isn't even called for modern drivers. - */ - void (*lastclose) (struct drm_device *); - /** * @unload: * -- cgit v1.3.1 From 446d0f4849b101bfc35c0d00835c3e3a4804616d Mon Sep 17 00:00:00 2001 From: Thomas Zimmermann Date: Mon, 12 Aug 2024 10:28:30 +0200 Subject: drm: Remove struct drm_mode_config_funcs.output_poll_changed The output_poll_changed hook in struct drm_mode_config_funcs is unused. Remove it. The helper drm_client_dev_hotplug() implements the callback's functionality. Signed-off-by: Thomas Zimmermann Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20240812083000.337744-10-tzimmermann@suse.de --- drivers/gpu/drm/drm_probe_helper.c | 10 +--------- include/drm/drm_mode_config.h | 16 ---------------- 2 files changed, 1 insertion(+), 25 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_probe_helper.c b/drivers/gpu/drm/drm_probe_helper.c index 285290067056..92f21764246f 100644 --- a/drivers/gpu/drm/drm_probe_helper.c +++ b/drivers/gpu/drm/drm_probe_helper.c @@ -714,7 +714,7 @@ EXPORT_SYMBOL(drm_helper_probe_single_connector_modes); * @dev: drm_device whose connector state changed * * This function fires off the uevent for userspace and also calls the - * output_poll_changed function, which is most commonly used to inform the fbdev + * client hotplug function, which is most commonly used to inform the fbdev * emulation code and allow it to update the fbcon output configuration. * * Drivers should call this from their hotplug handling code when a change is @@ -730,11 +730,7 @@ EXPORT_SYMBOL(drm_helper_probe_single_connector_modes); */ void drm_kms_helper_hotplug_event(struct drm_device *dev) { - /* send a uevent + call fbdev */ drm_sysfs_hotplug_event(dev); - if (dev->mode_config.funcs->output_poll_changed) - dev->mode_config.funcs->output_poll_changed(dev); - drm_client_dev_hotplug(dev); } EXPORT_SYMBOL(drm_kms_helper_hotplug_event); @@ -750,11 +746,7 @@ void drm_kms_helper_connector_hotplug_event(struct drm_connector *connector) { struct drm_device *dev = connector->dev; - /* send a uevent + call fbdev */ drm_sysfs_connector_hotplug_event(connector); - if (dev->mode_config.funcs->output_poll_changed) - dev->mode_config.funcs->output_poll_changed(dev); - drm_client_dev_hotplug(dev); } EXPORT_SYMBOL(drm_kms_helper_connector_hotplug_event); diff --git a/include/drm/drm_mode_config.h b/include/drm/drm_mode_config.h index ab0f167474b1..271765e2e9f2 100644 --- a/include/drm/drm_mode_config.h +++ b/include/drm/drm_mode_config.h @@ -97,22 +97,6 @@ struct drm_mode_config_funcs { */ const struct drm_format_info *(*get_format_info)(const struct drm_mode_fb_cmd2 *mode_cmd); - /** - * @output_poll_changed: - * - * Callback used by helpers to inform the driver of output configuration - * changes. - * - * Drivers implementing fbdev emulation use drm_kms_helper_hotplug_event() - * to call this hook to inform the fbdev helper of output changes. - * - * This hook is deprecated, drivers should instead implement fbdev - * support with struct drm_client, which takes care of any necessary - * hotplug event forwarding already without further involvement by - * the driver. - */ - void (*output_poll_changed)(struct drm_device *dev); - /** * @mode_valid: * -- cgit v1.3.1 From 5ddb0a8aa8e4754a8fb77e284e0d6f46c2350f88 Mon Sep 17 00:00:00 2001 From: Tejas Vipin Date: Tue, 6 Aug 2024 19:29:48 +0530 Subject: drm/mipi-dsi: add more multi functions for better error handling Add more functions that can benefit from being multi style and mark older variants as deprecated to eventually convert all mipi_dsi functions to multi style. Acked-by: Maxime Ripard Signed-off-by: Tejas Vipin Reviewed-by: Douglas Anderson Acked-by: Jessica Zhang [dianders: Fixed whitespace warning when applying] Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20240806135949.468636-2-tejasvipin76@gmail.com --- drivers/gpu/drm/drm_mipi_dsi.c | 194 +++++++++++++++++++++++++++++++++++++++++ include/drm/drm_mipi_dsi.h | 10 +++ 2 files changed, 204 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/drm_mipi_dsi.c b/drivers/gpu/drm/drm_mipi_dsi.c index 969cfd5a01ae..2d96c0cd9b7e 100644 --- a/drivers/gpu/drm/drm_mipi_dsi.c +++ b/drivers/gpu/drm/drm_mipi_dsi.c @@ -603,6 +603,8 @@ EXPORT_SYMBOL(mipi_dsi_shutdown_peripheral); * mipi_dsi_turn_on_peripheral() - sends a Turn On Peripheral command * @dsi: DSI peripheral device * + * This function is deprecated. Use mipi_dsi_turn_on_peripheral_multi() instead. + * * Return: 0 on success or a negative error code on failure. */ int mipi_dsi_turn_on_peripheral(struct mipi_dsi_device *dsi) @@ -652,6 +654,7 @@ EXPORT_SYMBOL(mipi_dsi_set_maximum_return_packet_size); * @pps_selector: Select PPS from the table of pre-stored or uploaded PPS entries * * Enable or disable Display Stream Compression on the peripheral. + * This function is deprecated. Use mipi_dsi_compression_mode_ext_multi() instead. * * Return: 0 on success or a negative error code on failure. */ @@ -703,6 +706,7 @@ EXPORT_SYMBOL(mipi_dsi_compression_mode); * @pps: VESA DSC 1.1 Picture Parameter Set * * Transmit the VESA DSC 1.1 Picture Parameter Set to the peripheral. + * This function is deprecated. Use mipi_dsi_picture_parameter_set_multi() instead. * * Return: 0 on success or a negative error code on failure. */ @@ -1037,6 +1041,8 @@ EXPORT_SYMBOL(mipi_dsi_dcs_read); * mipi_dsi_dcs_nop() - send DCS nop packet * @dsi: DSI peripheral device * + * This function is deprecated. Use mipi_dsi_dcs_nop_multi() instead. + * * Return: 0 on success or a negative error code on failure. */ int mipi_dsi_dcs_nop(struct mipi_dsi_device *dsi) @@ -1055,6 +1061,8 @@ EXPORT_SYMBOL(mipi_dsi_dcs_nop); * mipi_dsi_dcs_soft_reset() - perform a software reset of the display module * @dsi: DSI peripheral device * + * This function is deprecated. Use mipi_dsi_dcs_soft_reset_multi() instead. + * * Return: 0 on success or a negative error code on failure. */ int mipi_dsi_dcs_soft_reset(struct mipi_dsi_device *dsi) @@ -1124,6 +1132,8 @@ EXPORT_SYMBOL(mipi_dsi_dcs_get_pixel_format); * display module except interface communication * @dsi: DSI peripheral device * + * This function is deprecated. Use mipi_dsi_dcs_enter_sleep_mode_multi() instead. + * * Return: 0 on success or a negative error code on failure. */ int mipi_dsi_dcs_enter_sleep_mode(struct mipi_dsi_device *dsi) @@ -1143,6 +1153,8 @@ EXPORT_SYMBOL(mipi_dsi_dcs_enter_sleep_mode); * module * @dsi: DSI peripheral device * + * This function is deprecated. Use mipi_dsi_dcs_exit_sleep_mode_multi() instead. + * * Return: 0 on success or a negative error code on failure. */ int mipi_dsi_dcs_exit_sleep_mode(struct mipi_dsi_device *dsi) @@ -1162,6 +1174,8 @@ EXPORT_SYMBOL(mipi_dsi_dcs_exit_sleep_mode); * display device * @dsi: DSI peripheral device * + * This function is deprecated. Use mipi_dsi_dcs_set_display_off_multi() instead. + * * Return: 0 on success or a negative error code on failure. */ int mipi_dsi_dcs_set_display_off(struct mipi_dsi_device *dsi) @@ -1181,6 +1195,8 @@ EXPORT_SYMBOL(mipi_dsi_dcs_set_display_off); * display device * @dsi: DSI peripheral device * + * This function is deprecated. Use mipi_dsi_dcs_set_display_on_multi() instead. + * * Return: 0 on success or a negative error code on failure */ int mipi_dsi_dcs_set_display_on(struct mipi_dsi_device *dsi) @@ -1202,6 +1218,9 @@ EXPORT_SYMBOL(mipi_dsi_dcs_set_display_on); * @start: first column of frame memory * @end: last column of frame memory * + * This function is deprecated. Use mipi_dsi_dcs_set_column_address_multi() + * instead. + * * Return: 0 on success or a negative error code on failure. */ int mipi_dsi_dcs_set_column_address(struct mipi_dsi_device *dsi, u16 start, @@ -1226,6 +1245,9 @@ EXPORT_SYMBOL(mipi_dsi_dcs_set_column_address); * @start: first page of frame memory * @end: last page of frame memory * + * This function is deprecated. Use mipi_dsi_dcs_set_page_address_multi() + * instead. + * * Return: 0 on success or a negative error code on failure. */ int mipi_dsi_dcs_set_page_address(struct mipi_dsi_device *dsi, u16 start, @@ -1268,6 +1290,8 @@ EXPORT_SYMBOL(mipi_dsi_dcs_set_tear_off); * @dsi: DSI peripheral device * @mode: the Tearing Effect Output Line mode * + * This function is deprecated. Use mipi_dsi_dcs_set_tear_on_multi() instead. + * * Return: 0 on success or a negative error code on failure */ int mipi_dsi_dcs_set_tear_on(struct mipi_dsi_device *dsi, @@ -1291,6 +1315,9 @@ EXPORT_SYMBOL(mipi_dsi_dcs_set_tear_on); * @dsi: DSI peripheral device * @format: pixel format * + * This function is deprecated. Use mipi_dsi_dcs_set_pixel_format_multi() + * instead. + * * Return: 0 on success or a negative error code on failure. */ int mipi_dsi_dcs_set_pixel_format(struct mipi_dsi_device *dsi, u8 format) @@ -1334,6 +1361,9 @@ EXPORT_SYMBOL(mipi_dsi_dcs_set_tear_scanline); * @dsi: DSI peripheral device * @brightness: brightness value * + * This function is deprecated. Use mipi_dsi_dcs_set_display_brightness_multi() + * instead. + * * Return: 0 on success or a negative error code on failure. */ int mipi_dsi_dcs_set_display_brightness(struct mipi_dsi_device *dsi, @@ -1639,6 +1669,170 @@ void mipi_dsi_dcs_set_tear_on_multi(struct mipi_dsi_multi_context *ctx, } EXPORT_SYMBOL(mipi_dsi_dcs_set_tear_on_multi); +/** + * mipi_dsi_turn_on_peripheral_multi() - sends a Turn On Peripheral command + * @ctx: Context for multiple DSI transactions + * + * Like mipi_dsi_turn_on_peripheral() but deals with errors in a way that + * makes it convenient to make several calls in a row. + */ +void mipi_dsi_turn_on_peripheral_multi(struct mipi_dsi_multi_context *ctx) +{ + struct mipi_dsi_device *dsi = ctx->dsi; + struct device *dev = &dsi->dev; + int ret; + + if (ctx->accum_err) + return; + + ret = mipi_dsi_turn_on_peripheral(dsi); + if (ret < 0) { + ctx->accum_err = ret; + dev_err(dev, "Failed to turn on peripheral: %d\n", + ctx->accum_err); + } +} +EXPORT_SYMBOL(mipi_dsi_turn_on_peripheral_multi); + +/** + * mipi_dsi_dcs_soft_reset_multi() - perform a software reset of the display module + * @ctx: Context for multiple DSI transactions + * + * Like mipi_dsi_dcs_soft_reset() but deals with errors in a way that + * makes it convenient to make several calls in a row. + */ +void mipi_dsi_dcs_soft_reset_multi(struct mipi_dsi_multi_context *ctx) +{ + struct mipi_dsi_device *dsi = ctx->dsi; + struct device *dev = &dsi->dev; + int ret; + + if (ctx->accum_err) + return; + + ret = mipi_dsi_dcs_soft_reset(dsi); + if (ret < 0) { + ctx->accum_err = ret; + dev_err(dev, "Failed to mipi_dsi_dcs_soft_reset: %d\n", + ctx->accum_err); + } +} +EXPORT_SYMBOL(mipi_dsi_dcs_soft_reset_multi); + +/** + * mipi_dsi_dcs_set_display_brightness_multi() - sets the brightness value of + * the display + * @ctx: Context for multiple DSI transactions + * @brightness: brightness value + * + * Like mipi_dsi_dcs_set_display_brightness() but deals with errors in a way that + * makes it convenient to make several calls in a row. + */ +void mipi_dsi_dcs_set_display_brightness_multi(struct mipi_dsi_multi_context *ctx, + u16 brightness) +{ + struct mipi_dsi_device *dsi = ctx->dsi; + struct device *dev = &dsi->dev; + int ret; + + if (ctx->accum_err) + return; + + ret = mipi_dsi_dcs_set_display_brightness(dsi, brightness); + if (ret < 0) { + ctx->accum_err = ret; + dev_err(dev, "Failed to write display brightness: %d\n", + ctx->accum_err); + } +} +EXPORT_SYMBOL(mipi_dsi_dcs_set_display_brightness_multi); + +/** + * mipi_dsi_dcs_set_pixel_format_multi() - sets the pixel format for the RGB image + * data used by the interface + * @ctx: Context for multiple DSI transactions + * @format: pixel format + * + * Like mipi_dsi_dcs_set_pixel_format() but deals with errors in a way that + * makes it convenient to make several calls in a row. + */ +void mipi_dsi_dcs_set_pixel_format_multi(struct mipi_dsi_multi_context *ctx, + u8 format) +{ + struct mipi_dsi_device *dsi = ctx->dsi; + struct device *dev = &dsi->dev; + int ret; + + if (ctx->accum_err) + return; + + ret = mipi_dsi_dcs_set_pixel_format(dsi, format); + if (ret < 0) { + ctx->accum_err = ret; + dev_err(dev, "Failed to set pixel format: %d\n", + ctx->accum_err); + } +} +EXPORT_SYMBOL(mipi_dsi_dcs_set_pixel_format_multi); + +/** + * mipi_dsi_dcs_set_column_address_multi() - define the column extent of the + * frame memory accessed by the host processor + * @ctx: Context for multiple DSI transactions + * @start: first column of frame memory + * @end: last column of frame memory + * + * Like mipi_dsi_dcs_set_column_address() but deals with errors in a way that + * makes it convenient to make several calls in a row. + */ +void mipi_dsi_dcs_set_column_address_multi(struct mipi_dsi_multi_context *ctx, + u16 start, u16 end) +{ + struct mipi_dsi_device *dsi = ctx->dsi; + struct device *dev = &dsi->dev; + int ret; + + if (ctx->accum_err) + return; + + ret = mipi_dsi_dcs_set_column_address(dsi, start, end); + if (ret < 0) { + ctx->accum_err = ret; + dev_err(dev, "Failed to set column address: %d\n", + ctx->accum_err); + } +} +EXPORT_SYMBOL(mipi_dsi_dcs_set_column_address_multi); + +/** + * mipi_dsi_dcs_set_page_address_multi() - define the page extent of the + * frame memory accessed by the host processor + * @ctx: Context for multiple DSI transactions + * @start: first page of frame memory + * @end: last page of frame memory + * + * Like mipi_dsi_dcs_set_page_address() but deals with errors in a way that + * makes it convenient to make several calls in a row. + */ +void mipi_dsi_dcs_set_page_address_multi(struct mipi_dsi_multi_context *ctx, + u16 start, u16 end) +{ + struct mipi_dsi_device *dsi = ctx->dsi; + struct device *dev = &dsi->dev; + int ret; + + if (ctx->accum_err) + return; + + ret = mipi_dsi_dcs_set_page_address(dsi, start, end); + if (ret < 0) { + ctx->accum_err = ret; + dev_err(dev, "Failed to set page address: %d\n", + ctx->accum_err); + } +} +EXPORT_SYMBOL(mipi_dsi_dcs_set_page_address_multi); + static int mipi_dsi_drv_probe(struct device *dev) { struct mipi_dsi_driver *drv = to_mipi_dsi_driver(dev->driver); diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h index 0f520eeeaa8e..b78aae45cae7 100644 --- a/include/drm/drm_mipi_dsi.h +++ b/include/drm/drm_mipi_dsi.h @@ -365,6 +365,16 @@ void mipi_dsi_dcs_set_display_off_multi(struct mipi_dsi_multi_context *ctx); void mipi_dsi_dcs_set_display_on_multi(struct mipi_dsi_multi_context *ctx); void mipi_dsi_dcs_set_tear_on_multi(struct mipi_dsi_multi_context *ctx, enum mipi_dsi_dcs_tear_mode mode); +void mipi_dsi_turn_on_peripheral_multi(struct mipi_dsi_multi_context *ctx); +void mipi_dsi_dcs_soft_reset_multi(struct mipi_dsi_multi_context *ctx); +void mipi_dsi_dcs_set_display_brightness_multi(struct mipi_dsi_multi_context *ctx, + u16 brightness); +void mipi_dsi_dcs_set_pixel_format_multi(struct mipi_dsi_multi_context *ctx, + u8 format); +void mipi_dsi_dcs_set_column_address_multi(struct mipi_dsi_multi_context *ctx, + u16 start, u16 end); +void mipi_dsi_dcs_set_page_address_multi(struct mipi_dsi_multi_context *ctx, + u16 start, u16 end); /** * mipi_dsi_generic_write_seq - transmit data using a generic write packet -- cgit v1.3.1 From dfa5543193f303a7270ec7c725e656970faf7d57 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 12 Aug 2024 17:28:49 +0300 Subject: drm/edid: make drm_edid_block_valid() static drm_edid_block_valid() is no longer used outside of drm_edid.c. Make it static. Acked-by: Zhi Wang Reviewed-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20240812142849.1588006-2-jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/drm_edid.c | 17 ++++------------- include/drm/drm_edid.h | 2 -- 2 files changed, 4 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_edid.c b/drivers/gpu/drm/drm_edid.c index 9fc7292f5382..ff1e47a9c83e 100644 --- a/drivers/gpu/drm/drm_edid.c +++ b/drivers/gpu/drm/drm_edid.c @@ -1966,22 +1966,14 @@ static void edid_block_dump(const char *level, const void *block, int block_num) block, EDID_LENGTH, false); } -/** - * drm_edid_block_valid - Sanity check the EDID block (base or extension) - * @_block: pointer to raw EDID block - * @block_num: type of block to validate (0 for base, extension otherwise) - * @print_bad_edid: if true, dump bad EDID blocks to the console - * @edid_corrupt: if true, the header or checksum is invalid - * +/* * Validate a base or extension EDID block and optionally dump bad blocks to * the console. - * - * Return: True if the block is valid, false otherwise. */ -bool drm_edid_block_valid(u8 *_block, int block_num, bool print_bad_edid, - bool *edid_corrupt) +static bool drm_edid_block_valid(void *_block, int block_num, bool print_bad_edid, + bool *edid_corrupt) { - struct edid *block = (struct edid *)_block; + struct edid *block = _block; enum edid_block_status status; bool is_base_block = block_num == 0; bool valid; @@ -2024,7 +2016,6 @@ bool drm_edid_block_valid(u8 *_block, int block_num, bool print_bad_edid, return valid; } -EXPORT_SYMBOL(drm_edid_block_valid); /** * drm_edid_is_valid - sanity check EDID data diff --git a/include/drm/drm_edid.h b/include/drm/drm_edid.h index 6bdfa254a1c1..eaac5e665892 100644 --- a/include/drm/drm_edid.h +++ b/include/drm/drm_edid.h @@ -440,8 +440,6 @@ int drm_add_modes_noedid(struct drm_connector *connector, int hdisplay, int vdisplay); int drm_edid_header_is_valid(const void *edid); -bool drm_edid_block_valid(u8 *raw_edid, int block, bool print_bad_edid, - bool *edid_corrupt); bool drm_edid_is_valid(struct edid *edid); void drm_edid_get_monitor_name(const struct edid *edid, char *name, int buflen); -- cgit v1.3.1 From 3942bb49728ad9e1f94d953a88af169a8f5d8099 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Wed, 14 Aug 2024 13:00:34 +0300 Subject: string: add mem_is_zero() helper to check if memory area is all zeros Almost two thirds of the memchr_inv() usages check if the memory area is all zeros, with no interest in where in the buffer the first non-zero byte is located. Checking for !memchr_inv(s, 0, n) is also not very intuitive or discoverable. Add an explicit mem_is_zero() helper for this use case. Reviewed-by: Kees Cook Reviewed-by: Andy Shevchenko Link: https://patchwork.freedesktop.org/patch/msgid/20240814100035.3100852-1-jani.nikula@intel.com Signed-off-by: Jani Nikula --- include/linux/string.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/string.h b/include/linux/string.h index 9edace076ddb..5855c5626b4b 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -279,6 +279,18 @@ static inline void memcpy_flushcache(void *dst, const void *src, size_t cnt) void *memchr_inv(const void *s, int c, size_t n); char *strreplace(char *str, char old, char new); +/** + * mem_is_zero - Check if an area of memory is all 0's. + * @s: The memory area + * @n: The size of the area + * + * Return: True if the area of memory is all 0's. + */ +static inline bool mem_is_zero(const void *s, size_t n) +{ + return !memchr_inv(s, 0, n); +} + extern void kfree_const(const void *x); extern char *kstrdup(const char *s, gfp_t gfp) __malloc; -- cgit v1.3.1 From decbfaf06db05fa1f9b33149ebb3c145b44e878f Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Fri, 16 Aug 2024 15:51:53 +0200 Subject: drm/ttm: Add a flag to allow drivers to skip clear-on-free MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add TTM_TT_FLAG_CLEARED_ON_FREE, which DRM drivers can set before releasing backing stores if they want to skip clear-on-free. Cc: Matthew Auld Cc: Thomas Hellström Suggested-by: Christian König Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20240816135154.19678-1-nirmoy.das@intel.com Signed-off-by: Nirmoy Das --- drivers/gpu/drm/ttm/ttm_pool.c | 18 +++++++++++------- include/drm/ttm/ttm_tt.h | 6 +++++- 2 files changed, 16 insertions(+), 8 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 8504dbe19c1a..935ab3cfd046 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -222,15 +222,18 @@ static void ttm_pool_unmap(struct ttm_pool *pool, dma_addr_t dma_addr, } /* Give pages into a specific pool_type */ -static void ttm_pool_type_give(struct ttm_pool_type *pt, struct page *p) +static void ttm_pool_type_give(struct ttm_pool_type *pt, struct page *p, + bool cleared) { unsigned int i, num_pages = 1 << pt->order; - for (i = 0; i < num_pages; ++i) { - if (PageHighMem(p)) - clear_highpage(p + i); - else - clear_page(page_address(p + i)); + if (!cleared) { + for (i = 0; i < num_pages; ++i) { + if (PageHighMem(p)) + clear_highpage(p + i); + else + clear_page(page_address(p + i)); + } } spin_lock(&pt->lock); @@ -394,6 +397,7 @@ static void ttm_pool_free_range(struct ttm_pool *pool, struct ttm_tt *tt, pgoff_t start_page, pgoff_t end_page) { struct page **pages = &tt->pages[start_page]; + bool cleared = tt->page_flags & TTM_TT_FLAG_CLEARED_ON_FREE; unsigned int order; pgoff_t i, nr; @@ -407,7 +411,7 @@ static void ttm_pool_free_range(struct ttm_pool *pool, struct ttm_tt *tt, pt = ttm_pool_select_type(pool, caching, order); if (pt) - ttm_pool_type_give(pt, *pages); + ttm_pool_type_give(pt, *pages, cleared); else ttm_pool_free_page(pool, caching, order, *pages); } diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h index 2b9d856ff388..cfaf49de2419 100644 --- a/include/drm/ttm/ttm_tt.h +++ b/include/drm/ttm/ttm_tt.h @@ -85,6 +85,9 @@ struct ttm_tt { * fault handling abuses the DMA api a bit and dma_map_attrs can't be * used to assure pgprot always matches. * + * TTM_TT_FLAG_CLEARED_ON_FREE: Set this if a drm driver handles + * clearing backing store + * * TTM_TT_FLAG_PRIV_POPULATED: TTM internal only. DO NOT USE. This is * set by TTM after ttm_tt_populate() has successfully returned, and is * then unset when TTM calls ttm_tt_unpopulate(). @@ -94,8 +97,9 @@ struct ttm_tt { #define TTM_TT_FLAG_EXTERNAL BIT(2) #define TTM_TT_FLAG_EXTERNAL_MAPPABLE BIT(3) #define TTM_TT_FLAG_DECRYPTED BIT(4) +#define TTM_TT_FLAG_CLEARED_ON_FREE BIT(5) -#define TTM_TT_FLAG_PRIV_POPULATED BIT(5) +#define TTM_TT_FLAG_PRIV_POPULATED BIT(6) uint32_t page_flags; /** @num_pages: Number of pages in the page array. */ uint32_t num_pages; -- cgit v1.3.1 From 051c86afc342aed1f84d66ff5d09dc9e1c1685a1 Mon Sep 17 00:00:00 2001 From: Tejas Vipin Date: Sun, 18 Aug 2024 11:38:15 +0530 Subject: drm/mipi-dsi: Add mipi_dsi_dcs_set_tear_scanline_multi mipi_dsi_dcs_set_tear_scanline_multi can heavily benefit from being converted to a multi style function as it is often called in the context of similar functions. Reviewed-by: Douglas Anderson Signed-off-by: Tejas Vipin Signed-off-by: Douglas Anderson Link: https://patchwork.freedesktop.org/patch/msgid/20240818060816.848784-2-tejasvipin76@gmail.com --- drivers/gpu/drm/drm_mipi_dsi.c | 31 +++++++++++++++++++++++++++++++ include/drm/drm_mipi_dsi.h | 2 ++ 2 files changed, 33 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/drm_mipi_dsi.c b/drivers/gpu/drm/drm_mipi_dsi.c index 2d96c0cd9b7e..2bc3973d35a1 100644 --- a/drivers/gpu/drm/drm_mipi_dsi.c +++ b/drivers/gpu/drm/drm_mipi_dsi.c @@ -1339,6 +1339,9 @@ EXPORT_SYMBOL(mipi_dsi_dcs_set_pixel_format); * @dsi: DSI peripheral device * @scanline: scanline to use as trigger * + * This function is deprecated. Use mipi_dsi_dcs_set_tear_scanline_multi() + * instead. + * * Return: 0 on success or a negative error code on failure */ int mipi_dsi_dcs_set_tear_scanline(struct mipi_dsi_device *dsi, u16 scanline) @@ -1833,6 +1836,34 @@ void mipi_dsi_dcs_set_page_address_multi(struct mipi_dsi_multi_context *ctx, } EXPORT_SYMBOL(mipi_dsi_dcs_set_page_address_multi); +/** + * mipi_dsi_dcs_set_tear_scanline_multi() - set the scanline to use as trigger for + * the Tearing Effect output signal of the display module + * @ctx: Context for multiple DSI transactions + * @scanline: scanline to use as trigger + * + * Like mipi_dsi_dcs_set_tear_scanline() but deals with errors in a way that + * makes it convenient to make several calls in a row. + */ +void mipi_dsi_dcs_set_tear_scanline_multi(struct mipi_dsi_multi_context *ctx, + u16 scanline) +{ + struct mipi_dsi_device *dsi = ctx->dsi; + struct device *dev = &dsi->dev; + int ret; + + if (ctx->accum_err) + return; + + ret = mipi_dsi_dcs_set_tear_scanline(dsi, scanline); + if (ret < 0) { + ctx->accum_err = ret; + dev_err(dev, "Failed to set tear scanline: %d\n", + ctx->accum_err); + } +} +EXPORT_SYMBOL(mipi_dsi_dcs_set_tear_scanline_multi); + static int mipi_dsi_drv_probe(struct device *dev) { struct mipi_dsi_driver *drv = to_mipi_dsi_driver(dev->driver); diff --git a/include/drm/drm_mipi_dsi.h b/include/drm/drm_mipi_dsi.h index b78aae45cae7..f725f8654611 100644 --- a/include/drm/drm_mipi_dsi.h +++ b/include/drm/drm_mipi_dsi.h @@ -375,6 +375,8 @@ void mipi_dsi_dcs_set_column_address_multi(struct mipi_dsi_multi_context *ctx, u16 start, u16 end); void mipi_dsi_dcs_set_page_address_multi(struct mipi_dsi_multi_context *ctx, u16 start, u16 end); +void mipi_dsi_dcs_set_tear_scanline_multi(struct mipi_dsi_multi_context *ctx, + u16 scanline); /** * mipi_dsi_generic_write_seq - transmit data using a generic write packet -- cgit v1.3.1 From 5151fa35ae5979821d091b80096b4c790b187bac Mon Sep 17 00:00:00 2001 From: Juha-Pekka Heikkila Date: Fri, 16 Aug 2024 14:52:28 +0300 Subject: drm/fourcc: define Intel Xe2 related tile4 ccs modifiers Add Tile4 type ccs modifiers to indicate presence of compression on Xe2. Here is defined I915_FORMAT_MOD_4_TILED_LNL_CCS which is meant for integrated graphics with igpu related limitations Here is also defined I915_FORMAT_MOD_4_TILED_BMG_CCS which is meant for discrete graphics with dgpu related limitations Signed-off-by: Juha-Pekka Heikkila Reviewed-by: Lucas De Marchi Acked-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20240816115229.531671-3-juhapekka.heikkila@gmail.com Signed-off-by: Rodrigo Vivi --- include/uapi/drm/drm_fourcc.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include') diff --git a/include/uapi/drm/drm_fourcc.h b/include/uapi/drm/drm_fourcc.h index 2d84a8052b15..78abd819fd62 100644 --- a/include/uapi/drm/drm_fourcc.h +++ b/include/uapi/drm/drm_fourcc.h @@ -702,6 +702,31 @@ extern "C" { */ #define I915_FORMAT_MOD_4_TILED_MTL_RC_CCS_CC fourcc_mod_code(INTEL, 15) +/* + * Intel Color Control Surfaces (CCS) for graphics ver. 20 unified compression + * on integrated graphics + * + * The main surface is Tile 4 and at plane index 0. For semi-planar formats + * like NV12, the Y and UV planes are Tile 4 and are located at plane indices + * 0 and 1, respectively. The CCS for all planes are stored outside of the + * GEM object in a reserved memory area dedicated for the storage of the + * CCS data for all compressible GEM objects. + */ +#define I915_FORMAT_MOD_4_TILED_LNL_CCS fourcc_mod_code(INTEL, 16) + +/* + * Intel Color Control Surfaces (CCS) for graphics ver. 20 unified compression + * on discrete graphics + * + * The main surface is Tile 4 and at plane index 0. For semi-planar formats + * like NV12, the Y and UV planes are Tile 4 and are located at plane indices + * 0 and 1, respectively. The CCS for all planes are stored outside of the + * GEM object in a reserved memory area dedicated for the storage of the + * CCS data for all compressible GEM objects. The GEM object must be stored in + * contiguous memory with a size aligned to 64KB + */ +#define I915_FORMAT_MOD_4_TILED_BMG_CCS fourcc_mod_code(INTEL, 17) + /* * Tiled, NV12MT, grouped in 64 (pixels) x 32 (lines) -sized macroblocks * -- cgit v1.3.1 From 4b570ac2eb54f66ff64f2864be6303b8d67cc7f9 Mon Sep 17 00:00:00 2001 From: Jocelyn Falempe Date: Thu, 22 Aug 2024 09:33:55 +0200 Subject: drm/rect: Add drm_rect_overlap() Check if two rectangles overlap. It's a bit similar to drm_rect_intersect() but this won't modify the rectangle. Simplifies a bit drm_panic. Signed-off-by: Jocelyn Falempe Reviewed-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/20240822073852.562286-3-jfalempe@redhat.com --- drivers/gpu/drm/drm_panic.c | 3 +-- include/drm/drm_rect.h | 15 +++++++++++++++ 2 files changed, 16 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_panic.c b/drivers/gpu/drm/drm_panic.c index 0a047152f88b..59fba23e5fd7 100644 --- a/drivers/gpu/drm/drm_panic.c +++ b/drivers/gpu/drm/drm_panic.c @@ -529,8 +529,7 @@ static void draw_panic_static_user(struct drm_scanout_buffer *sb) /* Fill with the background color, and draw text on top */ drm_panic_fill(sb, &r_screen, bg_color); - if ((r_msg.x1 >= logo_width || r_msg.y1 >= logo_height) && - logo_width <= sb->width && logo_height <= sb->height) { + if (!drm_rect_overlap(&r_logo, &r_msg)) { if (logo_mono) drm_panic_blit(sb, &r_logo, logo_mono->data, DIV_ROUND_UP(logo_width, 8), fg_color); diff --git a/include/drm/drm_rect.h b/include/drm/drm_rect.h index 73fcb899a01d..46f09cf68458 100644 --- a/include/drm/drm_rect.h +++ b/include/drm/drm_rect.h @@ -238,6 +238,21 @@ static inline void drm_rect_fp_to_int(struct drm_rect *dst, drm_rect_height(src) >> 16); } +/** + * drm_rect_overlap - Check if two rectangles overlap + * @a: first rectangle + * @b: second rectangle + * + * RETURNS: + * %true if the rectangles overlap, %false otherwise. + */ +static inline bool drm_rect_overlap(const struct drm_rect *a, + const struct drm_rect *b) +{ + return (a->x2 > b->x1 && b->x2 > a->x1 && + a->y2 > b->y1 && b->y2 > a->y1); +} + bool drm_rect_intersect(struct drm_rect *r, const struct drm_rect *clip); bool drm_rect_clip_scaled(struct drm_rect *src, struct drm_rect *dst, const struct drm_rect *clip); -- cgit v1.3.1 From 6729c73103bd7a0e60b0c980b51b5434010b4502 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Fri, 23 Aug 2024 17:11:09 +0300 Subject: drm/ttm: fix kernel-doc typo for @trylock_only MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit s/tryock_only/trylock_only/ Fixes: da966b82bf3d ("drm/ttm: Provide a generic LRU walker helper") Cc: Thomas Hellström Cc: Christian König Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20240823141110.3431423-1-jani.nikula@intel.com Signed-off-by: Jani Nikula --- include/drm/ttm/ttm_bo.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/ttm/ttm_bo.h b/include/drm/ttm/ttm_bo.h index d1a732d56259..7294dde240fb 100644 --- a/include/drm/ttm/ttm_bo.h +++ b/include/drm/ttm/ttm_bo.h @@ -222,7 +222,7 @@ struct ttm_lru_walk { struct ttm_operation_ctx *ctx; /** @ticket: The struct ww_acquire_ctx if any. */ struct ww_acquire_ctx *ticket; - /** @tryock_only: Only use trylock for locking. */ + /** @trylock_only: Only use trylock for locking. */ bool trylock_only; }; -- cgit v1.3.1 From 67733d7a71503fd3e32eeada371f8aa2516c5c95 Mon Sep 17 00:00:00 2001 From: John Harrison Date: Thu, 1 Aug 2024 20:10:51 -0700 Subject: drm/i915: ARL requires a newer GSC firmware ARL and MTL share a single GSC firmware blob. However, ARL requires a newer version of it. So add differentiate of the PCI ids for ARL from MTL and create ARL as a sub-platform of MTL. That way, all the existing workarounds and such still treat ARL as MTL exactly as before. However, now the GSC code can check for ARL and do an extra version check on the firmware before committing to it. Also, the version extraction code has various ways of failing but the return code was being ignore and so the firmware load would attempt to continue anyway. Fix that by propagating the return code to the next level out. Signed-off-by: John Harrison Fixes: 213c43676beb ("drm/i915/mtl: Remove the 'force_probe' requirement for Meteor Lake") Reviewed-by: Daniele Ceraolo Spurio Acked-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/20240802031051.3816392-1-John.C.Harrison@Intel.com Signed-off-by: Rodrigo Vivi --- drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c | 31 +++++++++++++++++++++++++++++++ drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c | 10 ++++++++-- drivers/gpu/drm/i915/i915_drv.h | 2 ++ drivers/gpu/drm/i915/intel_device_info.c | 7 +++++++ drivers/gpu/drm/i915/intel_device_info.h | 3 +++ include/drm/intel/i915_pciids.h | 11 +++++++---- 6 files changed, 58 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c index 3b69bc6616bd..551b0d7974ff 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_gsc_fw.c @@ -212,6 +212,37 @@ int intel_gsc_fw_get_binary_info(struct intel_uc_fw *gsc_fw, const void *data, s } } + if (IS_ARROWLAKE(gt->i915)) { + bool too_old = false; + + /* + * ARL requires a newer firmware than MTL did (102.0.10.1878) but the + * firmware is actually common. So, need to do an explicit version check + * here rather than using a separate table entry. And if the older + * MTL-only version is found, then just don't use GSC rather than aborting + * the driver load. + */ + if (gsc->release.major < 102) { + too_old = true; + } else if (gsc->release.major == 102) { + if (gsc->release.minor == 0) { + if (gsc->release.patch < 10) { + too_old = true; + } else if (gsc->release.patch == 10) { + if (gsc->release.build < 1878) + too_old = true; + } + } + } + + if (too_old) { + gt_info(gt, "GSC firmware too old for ARL, got %d.%d.%d.%d but need at least 102.0.10.1878", + gsc->release.major, gsc->release.minor, + gsc->release.patch, gsc->release.build); + return -EINVAL; + } + } + return 0; } diff --git a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c index d80278eb45d7..ec33ad942115 100644 --- a/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c +++ b/drivers/gpu/drm/i915/gt/uc/intel_uc_fw.c @@ -698,12 +698,18 @@ static int check_gsc_manifest(struct intel_gt *gt, const struct firmware *fw, struct intel_uc_fw *uc_fw) { + int ret; + switch (uc_fw->type) { case INTEL_UC_FW_TYPE_HUC: - intel_huc_fw_get_binary_info(uc_fw, fw->data, fw->size); + ret = intel_huc_fw_get_binary_info(uc_fw, fw->data, fw->size); + if (ret) + return ret; break; case INTEL_UC_FW_TYPE_GSC: - intel_gsc_fw_get_binary_info(uc_fw, fw->data, fw->size); + ret = intel_gsc_fw_get_binary_info(uc_fw, fw->data, fw->size); + if (ret) + return ret; break; default: MISSING_CASE(uc_fw->type); diff --git a/drivers/gpu/drm/i915/i915_drv.h b/drivers/gpu/drm/i915/i915_drv.h index eb4c33e83c7c..d772cbe15fec 100644 --- a/drivers/gpu/drm/i915/i915_drv.h +++ b/drivers/gpu/drm/i915/i915_drv.h @@ -536,6 +536,8 @@ IS_SUBPLATFORM(const struct drm_i915_private *i915, #define IS_LUNARLAKE(i915) (0 && i915) #define IS_BATTLEMAGE(i915) (0 && i915) +#define IS_ARROWLAKE(i915) \ + IS_SUBPLATFORM(i915, INTEL_METEORLAKE, INTEL_SUBPLATFORM_ARL) #define IS_DG2_G10(i915) \ IS_SUBPLATFORM(i915, INTEL_DG2, INTEL_SUBPLATFORM_G10) #define IS_DG2_G11(i915) \ diff --git a/drivers/gpu/drm/i915/intel_device_info.c b/drivers/gpu/drm/i915/intel_device_info.c index b485e959f064..3c47c625993e 100644 --- a/drivers/gpu/drm/i915/intel_device_info.c +++ b/drivers/gpu/drm/i915/intel_device_info.c @@ -200,6 +200,10 @@ static const u16 subplatform_g12_ids[] = { INTEL_DG2_G12_IDS(ID), }; +static const u16 subplatform_arl_ids[] = { + INTEL_ARL_IDS(ID), +}; + static bool find_devid(u16 id, const u16 *p, unsigned int num) { for (; num; num--, p++) { @@ -257,6 +261,9 @@ static void intel_device_info_subplatform_init(struct drm_i915_private *i915) } else if (find_devid(devid, subplatform_g12_ids, ARRAY_SIZE(subplatform_g12_ids))) { mask = BIT(INTEL_SUBPLATFORM_G12); + } else if (find_devid(devid, subplatform_arl_ids, + ARRAY_SIZE(subplatform_arl_ids))) { + mask = BIT(INTEL_SUBPLATFORM_ARL); } GEM_BUG_ON(mask & ~INTEL_SUBPLATFORM_MASK); diff --git a/drivers/gpu/drm/i915/intel_device_info.h b/drivers/gpu/drm/i915/intel_device_info.h index fb8a08623eb0..643ff1bf74ee 100644 --- a/drivers/gpu/drm/i915/intel_device_info.h +++ b/drivers/gpu/drm/i915/intel_device_info.h @@ -127,6 +127,9 @@ enum intel_platform { #define INTEL_SUBPLATFORM_N 1 #define INTEL_SUBPLATFORM_RPLU 2 +/* MTL */ +#define INTEL_SUBPLATFORM_ARL 0 + enum intel_ppgtt_type { INTEL_PPGTT_NONE = I915_GEM_PPGTT_NONE, INTEL_PPGTT_ALIASING = I915_GEM_PPGTT_ALIASING, diff --git a/include/drm/intel/i915_pciids.h b/include/drm/intel/i915_pciids.h index b21374f76df2..2bf03ebfcf73 100644 --- a/include/drm/intel/i915_pciids.h +++ b/include/drm/intel/i915_pciids.h @@ -772,15 +772,18 @@ INTEL_ATS_M75_IDS(MACRO__, ## __VA_ARGS__) /* MTL */ +#define INTEL_ARL_IDS(MACRO__, ...) \ + MACRO__(0x7D41, ## __VA_ARGS__), \ + MACRO__(0x7D51, ## __VA_ARGS__), \ + MACRO__(0x7D67, ## __VA_ARGS__), \ + MACRO__(0x7DD1, ## __VA_ARGS__) + #define INTEL_MTL_IDS(MACRO__, ...) \ + INTEL_ARL_IDS(MACRO__, ## __VA_ARGS__), \ MACRO__(0x7D40, ## __VA_ARGS__), \ - MACRO__(0x7D41, ## __VA_ARGS__), \ MACRO__(0x7D45, ## __VA_ARGS__), \ - MACRO__(0x7D51, ## __VA_ARGS__), \ MACRO__(0x7D55, ## __VA_ARGS__), \ MACRO__(0x7D60, ## __VA_ARGS__), \ - MACRO__(0x7D67, ## __VA_ARGS__), \ - MACRO__(0x7DD1, ## __VA_ARGS__), \ MACRO__(0x7DD5, ## __VA_ARGS__) /* LNL */ -- cgit v1.3.1 From cf4d37b8157ca085c17fdc1faad737465ff311b9 Mon Sep 17 00:00:00 2001 From: renjun wang Date: Sat, 24 Aug 2024 16:20:14 +0800 Subject: drm/atomic: fix kerneldoc for fake_commit field According to the context, the function description for fake_commit should be "prevent the atomic states from being freed too early" Signed-off-by: renjun wang Reviewed-by: Thomas Zimmermann Signed-off-by: Thomas Zimmermann Link: https://patchwork.freedesktop.org/patch/msgid/tencent_6EF2603DCCFAD6A8265F8AAD9D6D5BCB9309@qq.com --- include/drm/drm_atomic.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/drm_atomic.h b/include/drm/drm_atomic.h index 4d7f4c5f2001..31ca88deb10d 100644 --- a/include/drm/drm_atomic.h +++ b/include/drm/drm_atomic.h @@ -460,7 +460,7 @@ struct drm_atomic_state { * * Used for signaling unbound planes/connectors. * When a connector or plane is not bound to any CRTC, it's still important - * to preserve linearity to prevent the atomic states from being freed to early. + * to preserve linearity to prevent the atomic states from being freed too early. * * This commit (if set) is not bound to any CRTC, but will be completed when * drm_atomic_helper_commit_hw_done() is called. -- cgit v1.3.1 From 45c4d994b82b08f0ce5eb50f8da29379c92a391e Mon Sep 17 00:00:00 2001 From: Michał Winiarski Date: Fri, 23 Aug 2024 18:30:47 +0200 Subject: accel: Use XArray instead of IDR for minors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Accel minor management is based on DRM (and is also using struct drm_minor internally), since DRM is using XArray for minors, it makes sense to also convert accel. As the two implementations are identical (only difference being the underlying xarray), move the accel_minor_* functionality to DRM. Signed-off-by: Michał Winiarski Acked-by: James Zhu Acked-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20240823163048.2676257-3-michal.winiarski@intel.com Signed-off-by: Christian König --- drivers/accel/drm_accel.c | 110 +++-------------------------------------- drivers/gpu/drm/drm_drv.c | 66 ++++++++++++------------- drivers/gpu/drm/drm_file.c | 2 +- drivers/gpu/drm/drm_internal.h | 4 -- include/drm/drm_accel.h | 18 +------ include/drm/drm_file.h | 5 ++ 6 files changed, 47 insertions(+), 158 deletions(-) (limited to 'include') diff --git a/drivers/accel/drm_accel.c b/drivers/accel/drm_accel.c index 16c3edb8c46e..aa826033b0ce 100644 --- a/drivers/accel/drm_accel.c +++ b/drivers/accel/drm_accel.c @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include @@ -18,8 +18,7 @@ #include #include -static DEFINE_SPINLOCK(accel_minor_lock); -static struct idr accel_minors_idr; +DEFINE_XARRAY_ALLOC(accel_minors_xa); static struct dentry *accel_debugfs_root; @@ -117,99 +116,6 @@ void accel_set_device_instance_params(struct device *kdev, int index) kdev->type = &accel_sysfs_device_minor; } -/** - * accel_minor_alloc() - Allocates a new accel minor - * - * This function access the accel minors idr and allocates from it - * a new id to represent a new accel minor - * - * Return: A new id on success or error code in case idr_alloc failed - */ -int accel_minor_alloc(void) -{ - unsigned long flags; - int r; - - spin_lock_irqsave(&accel_minor_lock, flags); - r = idr_alloc(&accel_minors_idr, NULL, 0, ACCEL_MAX_MINORS, GFP_NOWAIT); - spin_unlock_irqrestore(&accel_minor_lock, flags); - - return r; -} - -/** - * accel_minor_remove() - Remove an accel minor - * @index: The minor id to remove. - * - * This function access the accel minors idr and removes from - * it the member with the id that is passed to this function. - */ -void accel_minor_remove(int index) -{ - unsigned long flags; - - spin_lock_irqsave(&accel_minor_lock, flags); - idr_remove(&accel_minors_idr, index); - spin_unlock_irqrestore(&accel_minor_lock, flags); -} - -/** - * accel_minor_replace() - Replace minor pointer in accel minors idr. - * @minor: Pointer to the new minor. - * @index: The minor id to replace. - * - * This function access the accel minors idr structure and replaces the pointer - * that is associated with an existing id. Because the minor pointer can be - * NULL, we need to explicitly pass the index. - * - * Return: 0 for success, negative value for error - */ -void accel_minor_replace(struct drm_minor *minor, int index) -{ - unsigned long flags; - - spin_lock_irqsave(&accel_minor_lock, flags); - idr_replace(&accel_minors_idr, minor, index); - spin_unlock_irqrestore(&accel_minor_lock, flags); -} - -/* - * Looks up the given minor-ID and returns the respective DRM-minor object. The - * refence-count of the underlying device is increased so you must release this - * object with accel_minor_release(). - * - * The object can be only a drm_minor that represents an accel device. - * - * As long as you hold this minor, it is guaranteed that the object and the - * minor->dev pointer will stay valid! However, the device may get unplugged and - * unregistered while you hold the minor. - */ -static struct drm_minor *accel_minor_acquire(unsigned int minor_id) -{ - struct drm_minor *minor; - unsigned long flags; - - spin_lock_irqsave(&accel_minor_lock, flags); - minor = idr_find(&accel_minors_idr, minor_id); - if (minor) - drm_dev_get(minor->dev); - spin_unlock_irqrestore(&accel_minor_lock, flags); - - if (!minor) { - return ERR_PTR(-ENODEV); - } else if (drm_dev_is_unplugged(minor->dev)) { - drm_dev_put(minor->dev); - return ERR_PTR(-ENODEV); - } - - return minor; -} - -static void accel_minor_release(struct drm_minor *minor) -{ - drm_dev_put(minor->dev); -} - /** * accel_open - open method for ACCEL file * @inode: device inode @@ -227,7 +133,7 @@ int accel_open(struct inode *inode, struct file *filp) struct drm_minor *minor; int retcode; - minor = accel_minor_acquire(iminor(inode)); + minor = drm_minor_acquire(&accel_minors_xa, iminor(inode)); if (IS_ERR(minor)) return PTR_ERR(minor); @@ -246,7 +152,7 @@ int accel_open(struct inode *inode, struct file *filp) err_undo: atomic_dec(&dev->open_count); - accel_minor_release(minor); + drm_minor_release(minor); return retcode; } EXPORT_SYMBOL_GPL(accel_open); @@ -257,7 +163,7 @@ static int accel_stub_open(struct inode *inode, struct file *filp) struct drm_minor *minor; int err; - minor = accel_minor_acquire(iminor(inode)); + minor = drm_minor_acquire(&accel_minors_xa, iminor(inode)); if (IS_ERR(minor)) return PTR_ERR(minor); @@ -274,7 +180,7 @@ static int accel_stub_open(struct inode *inode, struct file *filp) err = 0; out: - accel_minor_release(minor); + drm_minor_release(minor); return err; } @@ -290,15 +196,13 @@ void accel_core_exit(void) unregister_chrdev(ACCEL_MAJOR, "accel"); debugfs_remove(accel_debugfs_root); accel_sysfs_destroy(); - idr_destroy(&accel_minors_idr); + WARN_ON(!xa_empty(&accel_minors_xa)); } int __init accel_core_init(void) { int ret; - idr_init(&accel_minors_idr); - ret = accel_sysfs_init(); if (ret < 0) { DRM_ERROR("Cannot create ACCEL class: %d\n", ret); diff --git a/drivers/gpu/drm/drm_drv.c b/drivers/gpu/drm/drm_drv.c index c760dc0f814a..0feaaf74abc3 100644 --- a/drivers/gpu/drm/drm_drv.c +++ b/drivers/gpu/drm/drm_drv.c @@ -55,7 +55,7 @@ MODULE_AUTHOR("Gareth Hughes, Leif Delgass, José Fonseca, Jon Smirl"); MODULE_DESCRIPTION("DRM shared core routines"); MODULE_LICENSE("GPL and additional rights"); -static DEFINE_XARRAY_ALLOC(drm_minors_xa); +DEFINE_XARRAY_ALLOC(drm_minors_xa); /* * If the drm core fails to init for whatever reason, @@ -83,6 +83,18 @@ DEFINE_STATIC_SRCU(drm_unplug_srcu); * registered and unregistered dynamically according to device-state. */ +static struct xarray *drm_minor_get_xa(enum drm_minor_type type) +{ + if (type == DRM_MINOR_PRIMARY || type == DRM_MINOR_RENDER) + return &drm_minors_xa; +#if IS_ENABLED(CONFIG_DRM_ACCEL) + else if (type == DRM_MINOR_ACCEL) + return &accel_minors_xa; +#endif + else + return ERR_PTR(-EOPNOTSUPP); +} + static struct drm_minor **drm_minor_get_slot(struct drm_device *dev, enum drm_minor_type type) { @@ -106,18 +118,18 @@ static void drm_minor_alloc_release(struct drm_device *dev, void *data) put_device(minor->kdev); - if (minor->type == DRM_MINOR_ACCEL) - accel_minor_remove(minor->index); - else - xa_erase(&drm_minors_xa, minor->index); + xa_erase(drm_minor_get_xa(minor->type), minor->index); } -#define DRM_MINOR_LIMIT(t) ({ typeof(t) _t = (t); XA_LIMIT(64 * _t, 64 * _t + 63); }) +#define DRM_MINOR_LIMIT(t) ({ \ + typeof(t) _t = (t); \ + _t == DRM_MINOR_ACCEL ? XA_LIMIT(0, ACCEL_MAX_MINORS) : XA_LIMIT(64 * _t, 64 * _t + 63); \ +}) static int drm_minor_alloc(struct drm_device *dev, enum drm_minor_type type) { struct drm_minor *minor; - int index, r; + int r; minor = drmm_kzalloc(dev, sizeof(*minor), GFP_KERNEL); if (!minor) @@ -126,18 +138,11 @@ static int drm_minor_alloc(struct drm_device *dev, enum drm_minor_type type) minor->type = type; minor->dev = dev; - if (type == DRM_MINOR_ACCEL) { - r = accel_minor_alloc(); - index = r; - } else { - r = xa_alloc(&drm_minors_xa, &index, NULL, DRM_MINOR_LIMIT(type), GFP_KERNEL); - } - + r = xa_alloc(drm_minor_get_xa(type), &minor->index, + NULL, DRM_MINOR_LIMIT(type), GFP_KERNEL); if (r < 0) return r; - minor->index = index; - r = drmm_add_action_or_reset(dev, drm_minor_alloc_release, minor); if (r) return r; @@ -176,16 +181,12 @@ static int drm_minor_register(struct drm_device *dev, enum drm_minor_type type) goto err_debugfs; /* replace NULL with @minor so lookups will succeed from now on */ - if (minor->type == DRM_MINOR_ACCEL) { - accel_minor_replace(minor, minor->index); - } else { - entry = xa_store(&drm_minors_xa, minor->index, minor, GFP_KERNEL); - if (xa_is_err(entry)) { - ret = xa_err(entry); - goto err_debugfs; - } - WARN_ON(entry); + entry = xa_store(drm_minor_get_xa(type), minor->index, minor, GFP_KERNEL); + if (xa_is_err(entry)) { + ret = xa_err(entry); + goto err_debugfs; } + WARN_ON(entry); DRM_DEBUG("new minor registered %d\n", minor->index); return 0; @@ -204,10 +205,7 @@ static void drm_minor_unregister(struct drm_device *dev, enum drm_minor_type typ return; /* replace @minor with NULL so lookups will fail from now on */ - if (minor->type == DRM_MINOR_ACCEL) - accel_minor_replace(NULL, minor->index); - else - xa_store(&drm_minors_xa, minor->index, NULL, GFP_KERNEL); + xa_store(drm_minor_get_xa(type), minor->index, NULL, GFP_KERNEL); device_del(minor->kdev); dev_set_drvdata(minor->kdev, NULL); /* safety belt */ @@ -223,15 +221,15 @@ static void drm_minor_unregister(struct drm_device *dev, enum drm_minor_type typ * minor->dev pointer will stay valid! However, the device may get unplugged and * unregistered while you hold the minor. */ -struct drm_minor *drm_minor_acquire(unsigned int minor_id) +struct drm_minor *drm_minor_acquire(struct xarray *minor_xa, unsigned int minor_id) { struct drm_minor *minor; - xa_lock(&drm_minors_xa); - minor = xa_load(&drm_minors_xa, minor_id); + xa_lock(minor_xa); + minor = xa_load(minor_xa, minor_id); if (minor) drm_dev_get(minor->dev); - xa_unlock(&drm_minors_xa); + xa_unlock(minor_xa); if (!minor) { return ERR_PTR(-ENODEV); @@ -1024,7 +1022,7 @@ static int drm_stub_open(struct inode *inode, struct file *filp) DRM_DEBUG("\n"); - minor = drm_minor_acquire(iminor(inode)); + minor = drm_minor_acquire(&drm_minors_xa, iminor(inode)); if (IS_ERR(minor)) return PTR_ERR(minor); diff --git a/drivers/gpu/drm/drm_file.c b/drivers/gpu/drm/drm_file.c index 351591f3a27a..01fde94fe2a9 100644 --- a/drivers/gpu/drm/drm_file.c +++ b/drivers/gpu/drm/drm_file.c @@ -355,7 +355,7 @@ int drm_open(struct inode *inode, struct file *filp) struct drm_minor *minor; int retcode; - minor = drm_minor_acquire(iminor(inode)); + minor = drm_minor_acquire(&drm_minors_xa, iminor(inode)); if (IS_ERR(minor)) return PTR_ERR(minor); diff --git a/drivers/gpu/drm/drm_internal.h b/drivers/gpu/drm/drm_internal.h index 23c99803af44..1705bfc90b1e 100644 --- a/drivers/gpu/drm/drm_internal.h +++ b/drivers/gpu/drm/drm_internal.h @@ -80,10 +80,6 @@ void drm_prime_destroy_file_private(struct drm_prime_file_private *prime_fpriv); void drm_prime_remove_buf_handle(struct drm_prime_file_private *prime_fpriv, uint32_t handle); -/* drm_drv.c */ -struct drm_minor *drm_minor_acquire(unsigned int minor_id); -void drm_minor_release(struct drm_minor *minor); - /* drm_managed.c */ void drm_managed_release(struct drm_device *dev); void drmm_add_final_kfree(struct drm_device *dev, void *container); diff --git a/include/drm/drm_accel.h b/include/drm/drm_accel.h index f4d3784b1dce..8867ce0be94c 100644 --- a/include/drm/drm_accel.h +++ b/include/drm/drm_accel.h @@ -51,11 +51,10 @@ #if IS_ENABLED(CONFIG_DRM_ACCEL) +extern struct xarray accel_minors_xa; + void accel_core_exit(void); int accel_core_init(void); -void accel_minor_remove(int index); -int accel_minor_alloc(void); -void accel_minor_replace(struct drm_minor *minor, int index); void accel_set_device_instance_params(struct device *kdev, int index); int accel_open(struct inode *inode, struct file *filp); void accel_debugfs_init(struct drm_device *dev); @@ -73,19 +72,6 @@ static inline int __init accel_core_init(void) return 0; } -static inline void accel_minor_remove(int index) -{ -} - -static inline int accel_minor_alloc(void) -{ - return -EOPNOTSUPP; -} - -static inline void accel_minor_replace(struct drm_minor *minor, int index) -{ -} - static inline void accel_set_device_instance_params(struct device *kdev, int index) { } diff --git a/include/drm/drm_file.h b/include/drm/drm_file.h index ab230d3af138..8c0030c77308 100644 --- a/include/drm/drm_file.h +++ b/include/drm/drm_file.h @@ -45,6 +45,8 @@ struct drm_printer; struct device; struct file; +extern struct xarray drm_minors_xa; + /* * FIXME: Not sure we want to have drm_minor here in the end, but to avoid * header include loops we need it here for now. @@ -434,6 +436,9 @@ static inline bool drm_is_accel_client(const struct drm_file *file_priv) void drm_file_update_pid(struct drm_file *); +struct drm_minor *drm_minor_acquire(struct xarray *minors_xa, unsigned int minor_id); +void drm_minor_release(struct drm_minor *minor); + int drm_open(struct inode *inode, struct file *filp); int drm_open_helper(struct file *filp, struct drm_minor *minor); ssize_t drm_read(struct file *filp, char __user *buffer, -- cgit v1.3.1 From 789e51597d33ec0053b029127d797d86c0d857eb Mon Sep 17 00:00:00 2001 From: Nirmoy Das Date: Wed, 28 Aug 2024 10:36:35 +0200 Subject: Revert "drm/ttm: Add a flag to allow drivers to skip clear-on-free" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Remove TTM_TT_FLAG_CLEARED_ON_FREE now that XE stopped using this flag. This reverts commit decbfaf06db05fa1f9b33149ebb3c145b44e878f. Cc: Christian König Cc: Himal Prasad Ghimiray Cc: Lucas De Marchi Cc: Matthew Auld Cc: Matthew Brost Cc: Thomas Hellström Signed-off-by: Nirmoy Das Reviewed-by: Thomas Hellström Link: https://patchwork.freedesktop.org/patch/msgid/20240828083635.23601-2-nirmoy.das@intel.com Signed-off-by: Lucas De Marchi --- drivers/gpu/drm/ttm/ttm_pool.c | 18 +++++++----------- include/drm/ttm/ttm_tt.h | 6 +----- 2 files changed, 8 insertions(+), 16 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/ttm/ttm_pool.c b/drivers/gpu/drm/ttm/ttm_pool.c index 935ab3cfd046..8504dbe19c1a 100644 --- a/drivers/gpu/drm/ttm/ttm_pool.c +++ b/drivers/gpu/drm/ttm/ttm_pool.c @@ -222,18 +222,15 @@ static void ttm_pool_unmap(struct ttm_pool *pool, dma_addr_t dma_addr, } /* Give pages into a specific pool_type */ -static void ttm_pool_type_give(struct ttm_pool_type *pt, struct page *p, - bool cleared) +static void ttm_pool_type_give(struct ttm_pool_type *pt, struct page *p) { unsigned int i, num_pages = 1 << pt->order; - if (!cleared) { - for (i = 0; i < num_pages; ++i) { - if (PageHighMem(p)) - clear_highpage(p + i); - else - clear_page(page_address(p + i)); - } + for (i = 0; i < num_pages; ++i) { + if (PageHighMem(p)) + clear_highpage(p + i); + else + clear_page(page_address(p + i)); } spin_lock(&pt->lock); @@ -397,7 +394,6 @@ static void ttm_pool_free_range(struct ttm_pool *pool, struct ttm_tt *tt, pgoff_t start_page, pgoff_t end_page) { struct page **pages = &tt->pages[start_page]; - bool cleared = tt->page_flags & TTM_TT_FLAG_CLEARED_ON_FREE; unsigned int order; pgoff_t i, nr; @@ -411,7 +407,7 @@ static void ttm_pool_free_range(struct ttm_pool *pool, struct ttm_tt *tt, pt = ttm_pool_select_type(pool, caching, order); if (pt) - ttm_pool_type_give(pt, *pages, cleared); + ttm_pool_type_give(pt, *pages); else ttm_pool_free_page(pool, caching, order, *pages); } diff --git a/include/drm/ttm/ttm_tt.h b/include/drm/ttm/ttm_tt.h index cfaf49de2419..2b9d856ff388 100644 --- a/include/drm/ttm/ttm_tt.h +++ b/include/drm/ttm/ttm_tt.h @@ -85,9 +85,6 @@ struct ttm_tt { * fault handling abuses the DMA api a bit and dma_map_attrs can't be * used to assure pgprot always matches. * - * TTM_TT_FLAG_CLEARED_ON_FREE: Set this if a drm driver handles - * clearing backing store - * * TTM_TT_FLAG_PRIV_POPULATED: TTM internal only. DO NOT USE. This is * set by TTM after ttm_tt_populate() has successfully returned, and is * then unset when TTM calls ttm_tt_unpopulate(). @@ -97,9 +94,8 @@ struct ttm_tt { #define TTM_TT_FLAG_EXTERNAL BIT(2) #define TTM_TT_FLAG_EXTERNAL_MAPPABLE BIT(3) #define TTM_TT_FLAG_DECRYPTED BIT(4) -#define TTM_TT_FLAG_CLEARED_ON_FREE BIT(5) -#define TTM_TT_FLAG_PRIV_POPULATED BIT(6) +#define TTM_TT_FLAG_PRIV_POPULATED BIT(5) uint32_t page_flags; /** @num_pages: Number of pages in the page array. */ uint32_t num_pages; -- cgit v1.3.1 From 33929707b808ba7839c40c15d3e68cbc51070b31 Mon Sep 17 00:00:00 2001 From: Jani Nikula Date: Mon, 12 Aug 2024 15:23:10 +0300 Subject: drm/mst: switch to guid_t type for GUID The kernel has a guid_t type for GUIDs. Switch to using it, but avoid any functional changes here. Reviewed-by: Daniel Vetter Acked-by: Harry Wentland Acked-by: Alex Deucher Acked-by: Hamza Mahfooz Link: https://patchwork.freedesktop.org/patch/msgid/20240812122312.1567046-1-jani.nikula@intel.com Signed-off-by: Jani Nikula --- drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c | 2 +- drivers/gpu/drm/display/drm_dp_mst_topology.c | 67 +++++++++++++---------- include/drm/display/drm_dp_mst_helper.h | 12 ++-- 3 files changed, 45 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c index 91490d5d106b..f3f74c6d0e1a 100644 --- a/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c +++ b/drivers/gpu/drm/amd/display/amdgpu_dm/amdgpu_dm.c @@ -2610,7 +2610,7 @@ static void resume_mst_branch_status(struct drm_dp_mst_topology_mgr *mgr) } } - memcpy(mgr->mst_primary->guid, guid, 16); + import_guid(&mgr->mst_primary->guid, guid); out_fail: mutex_unlock(&mgr->lock); diff --git a/drivers/gpu/drm/display/drm_dp_mst_topology.c b/drivers/gpu/drm/display/drm_dp_mst_topology.c index f54d0cbfb94d..382e05cb18d7 100644 --- a/drivers/gpu/drm/display/drm_dp_mst_topology.c +++ b/drivers/gpu/drm/display/drm_dp_mst_topology.c @@ -89,7 +89,7 @@ static int drm_dp_send_enum_path_resources(struct drm_dp_mst_topology_mgr *mgr, struct drm_dp_mst_branch *mstb, struct drm_dp_mst_port *port); static bool drm_dp_validate_guid(struct drm_dp_mst_topology_mgr *mgr, - u8 *guid); + guid_t *guid); static int drm_dp_mst_register_i2c_bus(struct drm_dp_mst_port *port); static void drm_dp_mst_unregister_i2c_bus(struct drm_dp_mst_port *port); @@ -801,7 +801,7 @@ static bool drm_dp_sideband_parse_link_address(const struct drm_dp_mst_topology_ int idx = 1; int i; - memcpy(repmsg->u.link_addr.guid, &raw->msg[idx], 16); + import_guid(&repmsg->u.link_addr.guid, &raw->msg[idx]); idx += 16; repmsg->u.link_addr.nports = raw->msg[idx] & 0xf; idx++; @@ -829,7 +829,7 @@ static bool drm_dp_sideband_parse_link_address(const struct drm_dp_mst_topology_ idx++; if (idx > raw->curlen) goto fail_len; - memcpy(repmsg->u.link_addr.ports[i].peer_guid, &raw->msg[idx], 16); + import_guid(&repmsg->u.link_addr.ports[i].peer_guid, &raw->msg[idx]); idx += 16; if (idx > raw->curlen) goto fail_len; @@ -1029,7 +1029,7 @@ static bool drm_dp_sideband_parse_reply(const struct drm_dp_mst_topology_mgr *mg msg->req_type = (raw->msg[0] & 0x7f); if (msg->reply_type == DP_SIDEBAND_REPLY_NAK) { - memcpy(msg->u.nak.guid, &raw->msg[1], 16); + import_guid(&msg->u.nak.guid, &raw->msg[1]); msg->u.nak.reason = raw->msg[17]; msg->u.nak.nak_data = raw->msg[18]; return false; @@ -1078,7 +1078,7 @@ drm_dp_sideband_parse_connection_status_notify(const struct drm_dp_mst_topology_ if (idx > raw->curlen) goto fail_len; - memcpy(msg->u.conn_stat.guid, &raw->msg[idx], 16); + import_guid(&msg->u.conn_stat.guid, &raw->msg[idx]); idx += 16; if (idx > raw->curlen) goto fail_len; @@ -1107,7 +1107,7 @@ static bool drm_dp_sideband_parse_resource_status_notify(const struct drm_dp_mst if (idx > raw->curlen) goto fail_len; - memcpy(msg->u.resource_stat.guid, &raw->msg[idx], 16); + import_guid(&msg->u.resource_stat.guid, &raw->msg[idx]); idx += 16; if (idx > raw->curlen) goto fail_len; @@ -2174,20 +2174,24 @@ ssize_t drm_dp_mst_dpcd_write(struct drm_dp_aux *aux, offset, size, buffer); } -static int drm_dp_check_mstb_guid(struct drm_dp_mst_branch *mstb, u8 *guid) +static int drm_dp_check_mstb_guid(struct drm_dp_mst_branch *mstb, guid_t *guid) { int ret = 0; - memcpy(mstb->guid, guid, 16); + guid_copy(&mstb->guid, guid); + + if (!drm_dp_validate_guid(mstb->mgr, &mstb->guid)) { + u8 buf[UUID_SIZE]; + + export_guid(buf, &mstb->guid); - if (!drm_dp_validate_guid(mstb->mgr, mstb->guid)) { if (mstb->port_parent) { ret = drm_dp_send_dpcd_write(mstb->mgr, mstb->port_parent, - DP_GUID, 16, mstb->guid); + DP_GUID, sizeof(buf), buf); } else { ret = drm_dp_dpcd_write(mstb->mgr->aux, - DP_GUID, mstb->guid, 16); + DP_GUID, buf, sizeof(buf)); } } @@ -2570,9 +2574,9 @@ out: return mstb; } -static struct drm_dp_mst_branch *get_mst_branch_device_by_guid_helper( - struct drm_dp_mst_branch *mstb, - const uint8_t *guid) +static struct drm_dp_mst_branch * +get_mst_branch_device_by_guid_helper(struct drm_dp_mst_branch *mstb, + const guid_t *guid) { struct drm_dp_mst_branch *found_mstb; struct drm_dp_mst_port *port; @@ -2580,10 +2584,9 @@ static struct drm_dp_mst_branch *get_mst_branch_device_by_guid_helper( if (!mstb) return NULL; - if (memcmp(mstb->guid, guid, 16) == 0) + if (guid_equal(&mstb->guid, guid)) return mstb; - list_for_each_entry(port, &mstb->ports, next) { found_mstb = get_mst_branch_device_by_guid_helper(port->mstb, guid); @@ -2596,7 +2599,7 @@ static struct drm_dp_mst_branch *get_mst_branch_device_by_guid_helper( static struct drm_dp_mst_branch * drm_dp_get_mst_branch_device_by_guid(struct drm_dp_mst_topology_mgr *mgr, - const uint8_t *guid) + const guid_t *guid) { struct drm_dp_mst_branch *mstb; int ret; @@ -2693,17 +2696,20 @@ static void drm_dp_mst_link_probe_work(struct work_struct *work) } static bool drm_dp_validate_guid(struct drm_dp_mst_topology_mgr *mgr, - u8 *guid) + guid_t *guid) { u64 salt; + u8 buf[UUID_SIZE]; - if (memchr_inv(guid, 0, 16)) + if (!guid_is_null(guid)) return true; salt = get_jiffies_64(); - memcpy(&guid[0], &salt, sizeof(u64)); - memcpy(&guid[8], &salt, sizeof(u64)); + memcpy(&buf[0], &salt, sizeof(u64)); + memcpy(&buf[8], &salt, sizeof(u64)); + + import_guid(guid, buf); return false; } @@ -2943,7 +2949,7 @@ static int drm_dp_send_link_address(struct drm_dp_mst_topology_mgr *mgr, drm_dbg_kms(mgr->dev, "link address reply: %d\n", reply->nports); drm_dp_dump_link_address(mgr, reply); - ret = drm_dp_check_mstb_guid(mstb, reply->guid); + ret = drm_dp_check_mstb_guid(mstb, &reply->guid); if (ret) { char buf[64]; @@ -3770,8 +3776,9 @@ EXPORT_SYMBOL(drm_dp_mst_topology_mgr_suspend); int drm_dp_mst_topology_mgr_resume(struct drm_dp_mst_topology_mgr *mgr, bool sync) { + u8 buf[UUID_SIZE]; + guid_t guid; int ret; - u8 guid[16]; mutex_lock(&mgr->lock); if (!mgr->mst_primary) @@ -3792,13 +3799,15 @@ int drm_dp_mst_topology_mgr_resume(struct drm_dp_mst_topology_mgr *mgr, } /* Some hubs forget their guids after they resume */ - ret = drm_dp_dpcd_read(mgr->aux, DP_GUID, guid, 16); - if (ret != 16) { + ret = drm_dp_dpcd_read(mgr->aux, DP_GUID, buf, sizeof(buf)); + if (ret != sizeof(buf)) { drm_dbg_kms(mgr->dev, "dpcd read failed - undocked during suspend?\n"); goto out_fail; } - ret = drm_dp_check_mstb_guid(mgr->mst_primary, guid); + import_guid(&guid, buf); + + ret = drm_dp_check_mstb_guid(mgr->mst_primary, &guid); if (ret) { drm_dbg_kms(mgr->dev, "check mstb failed - undocked during suspend?\n"); goto out_fail; @@ -3976,12 +3985,12 @@ drm_dp_mst_process_up_req(struct drm_dp_mst_topology_mgr *mgr, bool hotplug = false, dowork = false; if (hdr->broadcast) { - const u8 *guid = NULL; + const guid_t *guid = NULL; if (msg->req_type == DP_CONNECTION_STATUS_NOTIFY) - guid = msg->u.conn_stat.guid; + guid = &msg->u.conn_stat.guid; else if (msg->req_type == DP_RESOURCE_STATUS_NOTIFY) - guid = msg->u.resource_stat.guid; + guid = &msg->u.resource_stat.guid; if (guid) mstb = drm_dp_get_mst_branch_device_by_guid(mgr, guid); diff --git a/include/drm/display/drm_dp_mst_helper.h b/include/drm/display/drm_dp_mst_helper.h index cfe096389d94..dd466631f174 100644 --- a/include/drm/display/drm_dp_mst_helper.h +++ b/include/drm/display/drm_dp_mst_helper.h @@ -244,18 +244,18 @@ struct drm_dp_mst_branch { bool link_address_sent; /* global unique identifier to identify branch devices */ - u8 guid[16]; + guid_t guid; }; struct drm_dp_nak_reply { - u8 guid[16]; + guid_t guid; u8 reason; u8 nak_data; }; struct drm_dp_link_address_ack_reply { - u8 guid[16]; + guid_t guid; u8 nports; struct drm_dp_link_addr_reply_port { bool input_port; @@ -265,7 +265,7 @@ struct drm_dp_link_address_ack_reply { bool ddps; bool legacy_device_plug_status; u8 dpcd_revision; - u8 peer_guid[16]; + guid_t peer_guid; u8 num_sdp_streams; u8 num_sdp_stream_sinks; } ports[16]; @@ -348,7 +348,7 @@ struct drm_dp_allocate_payload_ack_reply { }; struct drm_dp_connection_status_notify { - u8 guid[16]; + guid_t guid; u8 port_number; bool legacy_device_plug_status; bool displayport_device_plug_status; @@ -425,7 +425,7 @@ struct drm_dp_query_payload { struct drm_dp_resource_status_notify { u8 port_number; - u8 guid[16]; + guid_t guid; u16 available_pbn; }; -- cgit v1.3.1 From d7eafed3223af19add14b67a390ec2b983d890e0 Mon Sep 17 00:00:00 2001 From: Connor Abbott Date: Wed, 7 Aug 2024 14:04:58 +0100 Subject: drm/msm: Expose expanded UBWC config uapi This adds extra parameters that affect UBWC tiling that will be used by the Mesa implementation of VK_EXT_host_image_copy. Signed-off-by: Connor Abbott Patchwork: https://patchwork.freedesktop.org/patch/607401/ Signed-off-by: Rob Clark --- drivers/gpu/drm/msm/adreno/adreno_gpu.c | 6 ++++++ include/uapi/drm/msm_drm.h | 2 ++ 2 files changed, 8 insertions(+) (limited to 'include') diff --git a/drivers/gpu/drm/msm/adreno/adreno_gpu.c b/drivers/gpu/drm/msm/adreno/adreno_gpu.c index 120b23542a95..f742ebefb769 100644 --- a/drivers/gpu/drm/msm/adreno/adreno_gpu.c +++ b/drivers/gpu/drm/msm/adreno/adreno_gpu.c @@ -379,6 +379,12 @@ int adreno_get_param(struct msm_gpu *gpu, struct msm_file_private *ctx, case MSM_PARAM_RAYTRACING: *value = adreno_gpu->has_ray_tracing; return 0; + case MSM_PARAM_UBWC_SWIZZLE: + *value = adreno_gpu->ubwc_config.ubwc_swizzle; + return 0; + case MSM_PARAM_MACROTILE_MODE: + *value = adreno_gpu->ubwc_config.macrotile_mode; + return 0; default: DBG("%s: invalid param: %u", gpu->name, param); return -EINVAL; diff --git a/include/uapi/drm/msm_drm.h b/include/uapi/drm/msm_drm.h index 3fca72f73861..2377147b6af0 100644 --- a/include/uapi/drm/msm_drm.h +++ b/include/uapi/drm/msm_drm.h @@ -88,6 +88,8 @@ struct drm_msm_timespec { #define MSM_PARAM_VA_SIZE 0x0f /* RO: size of valid GPU iova range (bytes) */ #define MSM_PARAM_HIGHEST_BANK_BIT 0x10 /* RO */ #define MSM_PARAM_RAYTRACING 0x11 /* RO */ +#define MSM_PARAM_UBWC_SWIZZLE 0x12 /* RO */ +#define MSM_PARAM_MACROTILE_MODE 0x13 /* RO */ /* For backwards compat. The original support for preemption was based on * a single ring per priority level so # of priority levels equals the # -- cgit v1.3.1 From ddc94d0b17e8ea8179ecbbefacac3fba0fb77265 Mon Sep 17 00:00:00 2001 From: Matthew Brost Date: Mon, 26 Aug 2024 10:01:43 -0700 Subject: dma-buf: Split out dma fence array create into alloc and arm functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Useful to preallocate dma fence array and then arm in path of reclaim or a dma fence. v2: - s/arm/init (Christian) - Drop !array warn (Christian) v3: - Fix kernel doc typos (dim) Cc: Sumit Semwal Cc: Christian König Signed-off-by: Matthew Brost Reviewed-by: Christian König Link: https://patchwork.freedesktop.org/patch/msgid/20240826170144.2492062-2-matthew.brost@intel.com --- drivers/dma-buf/dma-fence-array.c | 78 ++++++++++++++++++++++++++++----------- include/linux/dma-fence-array.h | 6 +++ 2 files changed, 63 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/drivers/dma-buf/dma-fence-array.c b/drivers/dma-buf/dma-fence-array.c index c74ac197d5fe..8a08ffde31e7 100644 --- a/drivers/dma-buf/dma-fence-array.c +++ b/drivers/dma-buf/dma-fence-array.c @@ -144,37 +144,38 @@ const struct dma_fence_ops dma_fence_array_ops = { EXPORT_SYMBOL(dma_fence_array_ops); /** - * dma_fence_array_create - Create a custom fence array + * dma_fence_array_alloc - Allocate a custom fence array + * @num_fences: [in] number of fences to add in the array + * + * Return dma fence array on success, NULL on failure + */ +struct dma_fence_array *dma_fence_array_alloc(int num_fences) +{ + struct dma_fence_array *array; + + return kzalloc(struct_size(array, callbacks, num_fences), GFP_KERNEL); +} +EXPORT_SYMBOL(dma_fence_array_alloc); + +/** + * dma_fence_array_init - Init a custom fence array + * @array: [in] dma fence array to arm * @num_fences: [in] number of fences to add in the array * @fences: [in] array containing the fences * @context: [in] fence context to use * @seqno: [in] sequence number to use * @signal_on_any: [in] signal on any fence in the array * - * Allocate a dma_fence_array object and initialize the base fence with - * dma_fence_init(). - * In case of error it returns NULL. - * - * The caller should allocate the fences array with num_fences size - * and fill it with the fences it wants to add to the object. Ownership of this - * array is taken and dma_fence_put() is used on each fence on release. - * - * If @signal_on_any is true the fence array signals if any fence in the array - * signals, otherwise it signals when all fences in the array signal. + * Implementation of @dma_fence_array_create without allocation. Useful to init + * a preallocated dma fence array in the path of reclaim or dma fence signaling. */ -struct dma_fence_array *dma_fence_array_create(int num_fences, - struct dma_fence **fences, - u64 context, unsigned seqno, - bool signal_on_any) +void dma_fence_array_init(struct dma_fence_array *array, + int num_fences, struct dma_fence **fences, + u64 context, unsigned seqno, + bool signal_on_any) { - struct dma_fence_array *array; - WARN_ON(!num_fences || !fences); - array = kzalloc(struct_size(array, callbacks, num_fences), GFP_KERNEL); - if (!array) - return NULL; - array->num_fences = num_fences; spin_lock_init(&array->lock); @@ -200,6 +201,41 @@ struct dma_fence_array *dma_fence_array_create(int num_fences, */ while (num_fences--) WARN_ON(dma_fence_is_container(fences[num_fences])); +} +EXPORT_SYMBOL(dma_fence_array_init); + +/** + * dma_fence_array_create - Create a custom fence array + * @num_fences: [in] number of fences to add in the array + * @fences: [in] array containing the fences + * @context: [in] fence context to use + * @seqno: [in] sequence number to use + * @signal_on_any: [in] signal on any fence in the array + * + * Allocate a dma_fence_array object and initialize the base fence with + * dma_fence_init(). + * In case of error it returns NULL. + * + * The caller should allocate the fences array with num_fences size + * and fill it with the fences it wants to add to the object. Ownership of this + * array is taken and dma_fence_put() is used on each fence on release. + * + * If @signal_on_any is true the fence array signals if any fence in the array + * signals, otherwise it signals when all fences in the array signal. + */ +struct dma_fence_array *dma_fence_array_create(int num_fences, + struct dma_fence **fences, + u64 context, unsigned seqno, + bool signal_on_any) +{ + struct dma_fence_array *array; + + array = dma_fence_array_alloc(num_fences); + if (!array) + return NULL; + + dma_fence_array_init(array, num_fences, fences, + context, seqno, signal_on_any); return array; } diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h index 29c5650c1038..079b3dec0a16 100644 --- a/include/linux/dma-fence-array.h +++ b/include/linux/dma-fence-array.h @@ -79,6 +79,12 @@ to_dma_fence_array(struct dma_fence *fence) for (index = 0, fence = dma_fence_array_first(head); fence; \ ++(index), fence = dma_fence_array_next(head, index)) +struct dma_fence_array *dma_fence_array_alloc(int num_fences); +void dma_fence_array_init(struct dma_fence_array *array, + int num_fences, struct dma_fence **fences, + u64 context, unsigned seqno, + bool signal_on_any); + struct dma_fence_array *dma_fence_array_create(int num_fences, struct dma_fence **fences, u64 context, unsigned seqno, -- cgit v1.3.1 From 663b0f1e141dc60ce6c09ae6afc5f213b22d13ca Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Fri, 16 Feb 2024 11:00:10 -0500 Subject: drm/amdkfd: Document and define SVM events message macro Document how to use SMI system management interface to enable and receive SVM events. Document SVM event triggers. Define SVM events message string format macro that could be used by user mode for sscanf to parse the event. Add it to uAPI header file to make it obvious that is changing uAPI in future. No functional changes. Signed-off-by: Philip Yang Reviewed-by: James Zhu Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c | 45 ++++++------- include/uapi/linux/kfd_ioctl.h | 100 ++++++++++++++++++++++++---- 2 files changed, 109 insertions(+), 36 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c index ea6a8e43bd5b..de8b9abf7afc 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_smi_events.c @@ -235,17 +235,16 @@ void kfd_smi_event_update_gpu_reset(struct kfd_node *dev, bool post_reset, amdgpu_reset_get_desc(reset_context, reset_cause, sizeof(reset_cause)); - kfd_smi_event_add(0, dev, event, "%x %s\n", - dev->reset_seq_num, - reset_cause); + kfd_smi_event_add(0, dev, event, KFD_EVENT_FMT_UPDATE_GPU_RESET( + dev->reset_seq_num, reset_cause)); } void kfd_smi_event_update_thermal_throttling(struct kfd_node *dev, uint64_t throttle_bitmask) { - kfd_smi_event_add(0, dev, KFD_SMI_EVENT_THERMAL_THROTTLE, "%llx:%llx\n", + kfd_smi_event_add(0, dev, KFD_SMI_EVENT_THERMAL_THROTTLE, KFD_EVENT_FMT_THERMAL_THROTTLING( throttle_bitmask, - amdgpu_dpm_get_thermal_throttling_counter(dev->adev)); + amdgpu_dpm_get_thermal_throttling_counter(dev->adev))); } void kfd_smi_event_update_vmfault(struct kfd_node *dev, uint16_t pasid) @@ -256,8 +255,8 @@ void kfd_smi_event_update_vmfault(struct kfd_node *dev, uint16_t pasid) if (task_info) { /* Report VM faults from user applications, not retry from kernel */ if (task_info->pid) - kfd_smi_event_add(0, dev, KFD_SMI_EVENT_VMFAULT, "%x:%s\n", - task_info->pid, task_info->task_name); + kfd_smi_event_add(0, dev, KFD_SMI_EVENT_VMFAULT, KFD_EVENT_FMT_VMFAULT( + task_info->pid, task_info->task_name)); amdgpu_vm_put_task_info(task_info); } } @@ -267,16 +266,16 @@ void kfd_smi_event_page_fault_start(struct kfd_node *node, pid_t pid, ktime_t ts) { kfd_smi_event_add(pid, node, KFD_SMI_EVENT_PAGE_FAULT_START, - "%lld -%d @%lx(%x) %c\n", ktime_to_ns(ts), pid, - address, node->id, write_fault ? 'W' : 'R'); + KFD_EVENT_FMT_PAGEFAULT_START(ktime_to_ns(ts), pid, + address, node->id, write_fault ? 'W' : 'R')); } void kfd_smi_event_page_fault_end(struct kfd_node *node, pid_t pid, unsigned long address, bool migration) { kfd_smi_event_add(pid, node, KFD_SMI_EVENT_PAGE_FAULT_END, - "%lld -%d @%lx(%x) %c\n", ktime_get_boottime_ns(), - pid, address, node->id, migration ? 'M' : 'U'); + KFD_EVENT_FMT_PAGEFAULT_END(ktime_get_boottime_ns(), + pid, address, node->id, migration ? 'M' : 'U')); } void kfd_smi_event_migration_start(struct kfd_node *node, pid_t pid, @@ -286,9 +285,9 @@ void kfd_smi_event_migration_start(struct kfd_node *node, pid_t pid, uint32_t trigger) { kfd_smi_event_add(pid, node, KFD_SMI_EVENT_MIGRATE_START, - "%lld -%d @%lx(%lx) %x->%x %x:%x %d\n", + KFD_EVENT_FMT_MIGRATE_START( ktime_get_boottime_ns(), pid, start, end - start, - from, to, prefetch_loc, preferred_loc, trigger); + from, to, prefetch_loc, preferred_loc, trigger)); } void kfd_smi_event_migration_end(struct kfd_node *node, pid_t pid, @@ -296,24 +295,24 @@ void kfd_smi_event_migration_end(struct kfd_node *node, pid_t pid, uint32_t from, uint32_t to, uint32_t trigger) { kfd_smi_event_add(pid, node, KFD_SMI_EVENT_MIGRATE_END, - "%lld -%d @%lx(%lx) %x->%x %d\n", + KFD_EVENT_FMT_MIGRATE_END( ktime_get_boottime_ns(), pid, start, end - start, - from, to, trigger); + from, to, trigger)); } void kfd_smi_event_queue_eviction(struct kfd_node *node, pid_t pid, uint32_t trigger) { kfd_smi_event_add(pid, node, KFD_SMI_EVENT_QUEUE_EVICTION, - "%lld -%d %x %d\n", ktime_get_boottime_ns(), pid, - node->id, trigger); + KFD_EVENT_FMT_QUEUE_EVICTION(ktime_get_boottime_ns(), pid, + node->id, trigger)); } void kfd_smi_event_queue_restore(struct kfd_node *node, pid_t pid) { kfd_smi_event_add(pid, node, KFD_SMI_EVENT_QUEUE_RESTORE, - "%lld -%d %x\n", ktime_get_boottime_ns(), pid, - node->id); + KFD_EVENT_FMT_QUEUE_RESTORE(ktime_get_boottime_ns(), pid, + node->id, 0)); } void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm) @@ -330,8 +329,8 @@ void kfd_smi_event_queue_restore_rescheduled(struct mm_struct *mm) kfd_smi_event_add(p->lead_thread->pid, pdd->dev, KFD_SMI_EVENT_QUEUE_RESTORE, - "%lld -%d %x %c\n", ktime_get_boottime_ns(), - p->lead_thread->pid, pdd->dev->id, 'R'); + KFD_EVENT_FMT_QUEUE_RESTORE(ktime_get_boottime_ns(), + p->lead_thread->pid, pdd->dev->id, 'R')); } kfd_unref_process(p); } @@ -341,8 +340,8 @@ void kfd_smi_event_unmap_from_gpu(struct kfd_node *node, pid_t pid, uint32_t trigger) { kfd_smi_event_add(pid, node, KFD_SMI_EVENT_UNMAP_FROM_GPU, - "%lld -%d @%lx(%lx) %x %d\n", ktime_get_boottime_ns(), - pid, address, last - address + 1, node->id, trigger); + KFD_EVENT_FMT_UNMAP_FROM_GPU(ktime_get_boottime_ns(), + pid, address, last - address + 1, node->id, trigger)); } int kfd_smi_event_open(struct kfd_node *dev, uint32_t *fd) diff --git a/include/uapi/linux/kfd_ioctl.h b/include/uapi/linux/kfd_ioctl.h index 71a7ce5f2d4c..717307d6b5b7 100644 --- a/include/uapi/linux/kfd_ioctl.h +++ b/include/uapi/linux/kfd_ioctl.h @@ -540,26 +540,29 @@ enum kfd_smi_event { KFD_SMI_EVENT_ALL_PROCESS = 64 }; +/* The reason of the page migration event */ enum KFD_MIGRATE_TRIGGERS { - KFD_MIGRATE_TRIGGER_PREFETCH, - KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, - KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU, - KFD_MIGRATE_TRIGGER_TTM_EVICTION + KFD_MIGRATE_TRIGGER_PREFETCH, /* Prefetch to GPU VRAM or system memory */ + KFD_MIGRATE_TRIGGER_PAGEFAULT_GPU, /* GPU page fault recover */ + KFD_MIGRATE_TRIGGER_PAGEFAULT_CPU, /* CPU page fault recover */ + KFD_MIGRATE_TRIGGER_TTM_EVICTION /* TTM eviction */ }; +/* The reason of user queue evition event */ enum KFD_QUEUE_EVICTION_TRIGGERS { - KFD_QUEUE_EVICTION_TRIGGER_SVM, - KFD_QUEUE_EVICTION_TRIGGER_USERPTR, - KFD_QUEUE_EVICTION_TRIGGER_TTM, - KFD_QUEUE_EVICTION_TRIGGER_SUSPEND, - KFD_QUEUE_EVICTION_CRIU_CHECKPOINT, - KFD_QUEUE_EVICTION_CRIU_RESTORE + KFD_QUEUE_EVICTION_TRIGGER_SVM, /* SVM buffer migration */ + KFD_QUEUE_EVICTION_TRIGGER_USERPTR, /* userptr movement */ + KFD_QUEUE_EVICTION_TRIGGER_TTM, /* TTM move buffer */ + KFD_QUEUE_EVICTION_TRIGGER_SUSPEND, /* GPU suspend */ + KFD_QUEUE_EVICTION_CRIU_CHECKPOINT, /* CRIU checkpoint */ + KFD_QUEUE_EVICTION_CRIU_RESTORE /* CRIU restore */ }; +/* The reason of unmap buffer from GPU event */ enum KFD_SVM_UNMAP_TRIGGERS { - KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY, - KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE, - KFD_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU + KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY, /* MMU notifier CPU buffer movement */ + KFD_SVM_UNMAP_TRIGGER_MMU_NOTIFY_MIGRATE,/* MMU notifier page migration */ + KFD_SVM_UNMAP_TRIGGER_UNMAP_FROM_CPU /* Unmap to free the buffer */ }; #define KFD_SMI_EVENT_MASK_FROM_INDEX(i) (1ULL << ((i) - 1)) @@ -570,6 +573,77 @@ struct kfd_ioctl_smi_events_args { __u32 anon_fd; /* from KFD */ }; +/* + * SVM event tracing via SMI system management interface + * + * Open event file descriptor + * use ioctl AMDKFD_IOC_SMI_EVENTS, pass in gpuid and return a anonymous file + * descriptor to receive SMI events. + * If calling with sudo permission, then file descriptor can be used to receive + * SVM events from all processes, otherwise, to only receive SVM events of same + * process. + * + * To enable the SVM event + * Write event file descriptor with KFD_SMI_EVENT_MASK_FROM_INDEX(event) bitmap + * mask to start record the event to the kfifo, use bitmap mask combination + * for multiple events. New event mask will overwrite the previous event mask. + * KFD_SMI_EVENT_MASK_FROM_INDEX(KFD_SMI_EVENT_ALL_PROCESS) bit requires sudo + * permisson to receive SVM events from all process. + * + * To receive the event + * Application can poll file descriptor to wait for the events, then read event + * from the file into a buffer. Each event is one line string message, starting + * with the event id, then the event specific information. + * + * To decode event information + * The following event format string macro can be used with sscanf to decode + * the specific event information. + * event triggers: the reason to generate the event, defined as enum for unmap, + * eviction and migrate events. + * node, from, to, prefetch_loc, preferred_loc: GPU ID, or 0 for system memory. + * addr: user mode address, in pages + * size: in pages + * pid: the process ID to generate the event + * ns: timestamp in nanosecond-resolution, starts at system boot time but + * stops during suspend + * migrate_update: GPU page fault is recovered by 'M' for migrate, 'U' for update + * rw: 'W' for write page fault, 'R' for read page fault + * rescheduled: 'R' if the queue restore failed and rescheduled to try again + */ +#define KFD_EVENT_FMT_UPDATE_GPU_RESET(reset_seq_num, reset_cause)\ + "%x %s\n", (reset_seq_num), (reset_cause) + +#define KFD_EVENT_FMT_THERMAL_THROTTLING(bitmask, counter)\ + "%llx:%llx\n", (bitmask), (counter) + +#define KFD_EVENT_FMT_VMFAULT(pid, task_name)\ + "%x:%s\n", (pid), (task_name) + +#define KFD_EVENT_FMT_PAGEFAULT_START(ns, pid, addr, node, rw)\ + "%lld -%d @%lx(%x) %c\n", (ns), (pid), (addr), (node), (rw) + +#define KFD_EVENT_FMT_PAGEFAULT_END(ns, pid, addr, node, migrate_update)\ + "%lld -%d @%lx(%x) %c\n", (ns), (pid), (addr), (node), (migrate_update) + +#define KFD_EVENT_FMT_MIGRATE_START(ns, pid, start, size, from, to, prefetch_loc,\ + preferred_loc, migrate_trigger)\ + "%lld -%d @%lx(%lx) %x->%x %x:%x %d\n", (ns), (pid), (start), (size),\ + (from), (to), (prefetch_loc), (preferred_loc), (migrate_trigger) + +#define KFD_EVENT_FMT_MIGRATE_END(ns, pid, start, size, from, to, migrate_trigger)\ + "%lld -%d @%lx(%lx) %x->%x %d\n", (ns), (pid), (start), (size),\ + (from), (to), (migrate_trigger) + +#define KFD_EVENT_FMT_QUEUE_EVICTION(ns, pid, node, evict_trigger)\ + "%lld -%d %x %d\n", (ns), (pid), (node), (evict_trigger) + +#define KFD_EVENT_FMT_QUEUE_RESTORE(ns, pid, node, rescheduled)\ + "%lld -%d %x %c\n", (ns), (pid), (node), (rescheduled) + +#define KFD_EVENT_FMT_UNMAP_FROM_GPU(ns, pid, addr, size, node, unmap_trigger)\ + "%lld -%d @%lx(%lx) %x %d\n", (ns), (pid), (addr), (size),\ + (node), (unmap_trigger) + /************************************************************************************************** * CRIU IOCTLs (Checkpoint Restore In Userspace) * -- cgit v1.3.1 From b2d4da31a1f40b05a61076efd4c79b88439003b7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 2 Aug 2024 09:56:28 -0400 Subject: drm: new helper: drm_gem_prime_handle_to_dmabuf() Once something had been put into descriptor table, the only thing you can do with it is returning descriptor to userland - you can't withdraw it on subsequent failure exit, etc. You certainly can't count upon it staying in the same slot of descriptor table - another thread could've played with close(2)/dup2(2)/whatnot. drm_gem_prime_handle_to_fd() creates a dmabuf, allocates a descriptor and attaches dmabuf's file to it (the last two steps are done in dma_buf_fd()). That's nice when all you are going to do is passing a descriptor to userland. If you just need to work with the resulting object or have something else to be done that might fail, drm_gem_prime_handle_to_fd() is racy. The problem is analogous to one with anon_inode_getfd(), and solution is similar to what anon_inode_getfile() provides. Add drm_gem_prime_handle_to_dmabuf() - the "set dmabuf up" parts of drm_gem_prime_handle_to_fd() without the descriptor-related ones. Instead of inserting into descriptor table and returning the file descriptor it just returns the struct file. drm_gem_prime_handle_to_fd() becomes a wrapper for it. Other users will be introduced in the next commit. Acked-by: Thomas Zimmermann Signed-off-by: Al Viro Signed-off-by: Alex Deucher --- drivers/gpu/drm/drm_prime.c | 84 +++++++++++++++++++++++++++++---------------- include/drm/drm_prime.h | 3 ++ 2 files changed, 57 insertions(+), 30 deletions(-) (limited to 'include') diff --git a/drivers/gpu/drm/drm_prime.c b/drivers/gpu/drm/drm_prime.c index 03bd3c7bd0dc..0e3f8adf162f 100644 --- a/drivers/gpu/drm/drm_prime.c +++ b/drivers/gpu/drm/drm_prime.c @@ -410,22 +410,30 @@ static struct dma_buf *export_and_register_object(struct drm_device *dev, } /** - * drm_gem_prime_handle_to_fd - PRIME export function for GEM drivers + * drm_gem_prime_handle_to_dmabuf - PRIME export function for GEM drivers * @dev: dev to export the buffer from * @file_priv: drm file-private structure * @handle: buffer handle to export * @flags: flags like DRM_CLOEXEC - * @prime_fd: pointer to storage for the fd id of the create dma-buf * * This is the PRIME export function which must be used mandatorily by GEM * drivers to ensure correct lifetime management of the underlying GEM object. * The actual exporting from GEM object to a dma-buf is done through the * &drm_gem_object_funcs.export callback. + * + * Unlike drm_gem_prime_handle_to_fd(), it returns the struct dma_buf it + * has created, without attaching it to any file descriptors. The difference + * between those two is similar to that between anon_inode_getfile() and + * anon_inode_getfd(); insertion into descriptor table is something you + * can not revert if any cleanup is needed, so the descriptor-returning + * variants should only be used when you are past the last failure exit + * and the only thing left is passing the new file descriptor to userland. + * When all you need is the object itself or when you need to do something + * else that might fail, use that one instead. */ -int drm_gem_prime_handle_to_fd(struct drm_device *dev, +struct dma_buf *drm_gem_prime_handle_to_dmabuf(struct drm_device *dev, struct drm_file *file_priv, uint32_t handle, - uint32_t flags, - int *prime_fd) + uint32_t flags) { struct drm_gem_object *obj; int ret = 0; @@ -434,14 +442,14 @@ int drm_gem_prime_handle_to_fd(struct drm_device *dev, mutex_lock(&file_priv->prime.lock); obj = drm_gem_object_lookup(file_priv, handle); if (!obj) { - ret = -ENOENT; + dmabuf = ERR_PTR(-ENOENT); goto out_unlock; } dmabuf = drm_prime_lookup_buf_by_handle(&file_priv->prime, handle); if (dmabuf) { get_dma_buf(dmabuf); - goto out_have_handle; + goto out; } mutex_lock(&dev->object_name_lock); @@ -463,7 +471,6 @@ int drm_gem_prime_handle_to_fd(struct drm_device *dev, /* normally the created dma-buf takes ownership of the ref, * but if that fails then drop the ref */ - ret = PTR_ERR(dmabuf); mutex_unlock(&dev->object_name_lock); goto out; } @@ -478,34 +485,51 @@ out_have_obj: ret = drm_prime_add_buf_handle(&file_priv->prime, dmabuf, handle); mutex_unlock(&dev->object_name_lock); - if (ret) - goto fail_put_dmabuf; - -out_have_handle: - ret = dma_buf_fd(dmabuf, flags); - /* - * We must _not_ remove the buffer from the handle cache since the newly - * created dma buf is already linked in the global obj->dma_buf pointer, - * and that is invariant as long as a userspace gem handle exists. - * Closing the handle will clean out the cache anyway, so we don't leak. - */ - if (ret < 0) { - goto fail_put_dmabuf; - } else { - *prime_fd = ret; - ret = 0; + if (ret) { + dma_buf_put(dmabuf); + dmabuf = ERR_PTR(ret); } - - goto out; - -fail_put_dmabuf: - dma_buf_put(dmabuf); out: drm_gem_object_put(obj); out_unlock: mutex_unlock(&file_priv->prime.lock); + return dmabuf; +} +EXPORT_SYMBOL(drm_gem_prime_handle_to_dmabuf); - return ret; +/** + * drm_gem_prime_handle_to_fd - PRIME export function for GEM drivers + * @dev: dev to export the buffer from + * @file_priv: drm file-private structure + * @handle: buffer handle to export + * @flags: flags like DRM_CLOEXEC + * @prime_fd: pointer to storage for the fd id of the create dma-buf + * + * This is the PRIME export function which must be used mandatorily by GEM + * drivers to ensure correct lifetime management of the underlying GEM object. + * The actual exporting from GEM object to a dma-buf is done through the + * &drm_gem_object_funcs.export callback. + */ +int drm_gem_prime_handle_to_fd(struct drm_device *dev, + struct drm_file *file_priv, uint32_t handle, + uint32_t flags, + int *prime_fd) +{ + struct dma_buf *dmabuf; + int fd = get_unused_fd_flags(flags); + + if (fd < 0) + return fd; + + dmabuf = drm_gem_prime_handle_to_dmabuf(dev, file_priv, handle, flags); + if (IS_ERR(dmabuf)) { + put_unused_fd(fd); + return PTR_ERR(dmabuf); + } + + fd_install(fd, dmabuf->file); + *prime_fd = fd; + return 0; } EXPORT_SYMBOL(drm_gem_prime_handle_to_fd); diff --git a/include/drm/drm_prime.h b/include/drm/drm_prime.h index 2a1d01e5b56b..fa085c44d4ca 100644 --- a/include/drm/drm_prime.h +++ b/include/drm/drm_prime.h @@ -69,6 +69,9 @@ void drm_gem_dmabuf_release(struct dma_buf *dma_buf); int drm_gem_prime_fd_to_handle(struct drm_device *dev, struct drm_file *file_priv, int prime_fd, uint32_t *handle); +struct dma_buf *drm_gem_prime_handle_to_dmabuf(struct drm_device *dev, + struct drm_file *file_priv, uint32_t handle, + uint32_t flags); int drm_gem_prime_handle_to_fd(struct drm_device *dev, struct drm_file *file_priv, uint32_t handle, uint32_t flags, int *prime_fd); -- cgit v1.3.1