From 9095e5544061b16d1b331aca3f32c76cbd656d72 Mon Sep 17 00:00:00 2001 From: Philip Yang Date: Tue, 30 Apr 2024 13:51:51 -0400 Subject: drm/amdkfd: Remove arbitrary timeout for hmm_range_fault On system with khugepaged enabled and user cases with THP buffer, the hmm_range_fault may takes > 15 seconds to return -EBUSY, the arbitrary timeout value is not accurate, cause memory allocation failure. Remove the arbitrary timeout value, return EAGAIN to application if hmm_range_fault return EBUSY, then userspace libdrm and Thunk will call ioctl again. Change EAGAIN to debug message as this is not error. Signed-off-by: Philip Yang Reviewed-by: Felix Kuehling Signed-off-by: Alex Deucher --- drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c | 12 +++--------- 1 file changed, 3 insertions(+), 9 deletions(-) (limited to 'drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c') diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c index 431ec72655ec..e36fede7f74c 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_hmm.c @@ -202,20 +202,12 @@ int amdgpu_hmm_range_get_pages(struct mmu_interval_notifier *notifier, pr_debug("hmm range: start = 0x%lx, end = 0x%lx", hmm_range->start, hmm_range->end); - /* Assuming 64MB takes maximum 1 second to fault page address */ - timeout = max((hmm_range->end - hmm_range->start) >> 26, 1UL); - timeout *= HMM_RANGE_DEFAULT_TIMEOUT; - timeout = jiffies + msecs_to_jiffies(timeout); + timeout = jiffies + msecs_to_jiffies(HMM_RANGE_DEFAULT_TIMEOUT); retry: hmm_range->notifier_seq = mmu_interval_read_begin(notifier); r = hmm_range_fault(hmm_range); if (unlikely(r)) { - schedule(); - /* - * FIXME: This timeout should encompass the retry from - * mmu_interval_read_retry() as well. - */ if (r == -EBUSY && !time_after(jiffies, timeout)) goto retry; goto out_free_pfns; @@ -247,6 +239,8 @@ out_free_pfns: out_free_range: kfree(hmm_range); + if (r == -EBUSY) + r = -EAGAIN; return r; } -- cgit v1.2.3-70-g09d2