diff options
Diffstat (limited to 'drivers/gpu/drm/i915/gem')
20 files changed, 675 insertions, 251 deletions
diff --git a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c index 0512afdd20d8..b3b398fe689c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_clflush.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_clflush.c @@ -113,7 +113,7 @@ bool i915_gem_clflush_object(struct drm_i915_gem_object *obj, clflush = clflush_work_create(obj); if (clflush) { i915_sw_fence_await_reservation(&clflush->base.chain, - obj->base.resv, NULL, true, + obj->base.resv, true, i915_fence_timeout(i915), I915_FENCE_GFP); dma_resv_add_fence(obj->base.resv, &clflush->base.dma, diff --git a/drivers/gpu/drm/i915/gem/i915_gem_context.c b/drivers/gpu/drm/i915/gem/i915_gem_context.c index 1e29b1e6d186..01402f3c58f6 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_context.c @@ -1452,7 +1452,7 @@ static void engines_idle_release(struct i915_gem_context *ctx, int err; /* serialises with execbuf */ - set_bit(CONTEXT_CLOSED_BIT, &ce->flags); + intel_context_close(ce); if (!intel_context_pin_if_active(ce)) continue; @@ -2298,7 +2298,6 @@ int i915_gem_context_create_ioctl(struct drm_device *dev, void *data, } args->ctx_id = id; - drm_dbg(&i915->drm, "HW context %d created\n", args->ctx_id); return 0; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c index 07eee1c09aaf..ec6f7ae47783 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_dmabuf.c @@ -25,43 +25,44 @@ static struct drm_i915_gem_object *dma_buf_to_obj(struct dma_buf *buf) return to_intel_bo(buf->priv); } -static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attachment, +static struct sg_table *i915_gem_map_dma_buf(struct dma_buf_attachment *attach, enum dma_data_direction dir) { - struct drm_i915_gem_object *obj = dma_buf_to_obj(attachment->dmabuf); - struct sg_table *st; + struct drm_i915_gem_object *obj = dma_buf_to_obj(attach->dmabuf); + struct sg_table *sgt; struct scatterlist *src, *dst; int ret, i; - /* Copy sg so that we make an independent mapping */ - st = kmalloc(sizeof(struct sg_table), GFP_KERNEL); - if (st == NULL) { + /* + * Make a copy of the object's sgt, so that we can make an independent + * mapping + */ + sgt = kmalloc(sizeof(*sgt), GFP_KERNEL); + if (!sgt) { ret = -ENOMEM; goto err; } - ret = sg_alloc_table(st, obj->mm.pages->nents, GFP_KERNEL); + ret = sg_alloc_table(sgt, obj->mm.pages->orig_nents, GFP_KERNEL); if (ret) goto err_free; - src = obj->mm.pages->sgl; - dst = st->sgl; - for (i = 0; i < obj->mm.pages->nents; i++) { + dst = sgt->sgl; + for_each_sg(obj->mm.pages->sgl, src, obj->mm.pages->orig_nents, i) { sg_set_page(dst, sg_page(src), src->length, 0); dst = sg_next(dst); - src = sg_next(src); } - ret = dma_map_sgtable(attachment->dev, st, dir, DMA_ATTR_SKIP_CPU_SYNC); + ret = dma_map_sgtable(attach->dev, sgt, dir, DMA_ATTR_SKIP_CPU_SYNC); if (ret) goto err_free_sg; - return st; + return sgt; err_free_sg: - sg_free_table(st); + sg_free_table(sgt); err_free: - kfree(st); + kfree(sgt); err: return ERR_PTR(ret); } @@ -236,15 +237,15 @@ struct dma_buf *i915_gem_prime_export(struct drm_gem_object *gem_obj, int flags) static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) { struct drm_i915_private *i915 = to_i915(obj->base.dev); - struct sg_table *pages; + struct sg_table *sgt; unsigned int sg_page_sizes; assert_object_held(obj); - pages = dma_buf_map_attachment(obj->base.import_attach, - DMA_BIDIRECTIONAL); - if (IS_ERR(pages)) - return PTR_ERR(pages); + sgt = dma_buf_map_attachment(obj->base.import_attach, + DMA_BIDIRECTIONAL); + if (IS_ERR(sgt)) + return PTR_ERR(sgt); /* * DG1 is special here since it still snoops transactions even with @@ -261,16 +262,16 @@ static int i915_gem_object_get_pages_dmabuf(struct drm_i915_gem_object *obj) (!HAS_LLC(i915) && !IS_DG1(i915))) wbinvd_on_all_cpus(); - sg_page_sizes = i915_sg_dma_sizes(pages->sgl); - __i915_gem_object_set_pages(obj, pages, sg_page_sizes); + sg_page_sizes = i915_sg_dma_sizes(sgt->sgl); + __i915_gem_object_set_pages(obj, sgt, sg_page_sizes); return 0; } static void i915_gem_object_put_pages_dmabuf(struct drm_i915_gem_object *obj, - struct sg_table *pages) + struct sg_table *sgt) { - dma_buf_unmap_attachment(obj->base.import_attach, pages, + dma_buf_unmap_attachment(obj->base.import_attach, sgt, DMA_BIDIRECTIONAL); } @@ -313,7 +314,7 @@ struct drm_gem_object *i915_gem_prime_import(struct drm_device *dev, get_dma_buf(dma_buf); obj = i915_gem_object_alloc(); - if (obj == NULL) { + if (!obj) { ret = -ENOMEM; goto fail_detach; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c index 845023c14eb3..1160723c9d2d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_execbuffer.c @@ -2954,11 +2954,6 @@ await_fence_array(struct i915_execbuffer *eb, int err; for (n = 0; n < eb->num_fences; n++) { - struct drm_syncobj *syncobj; - unsigned int flags; - - syncobj = ptr_unpack_bits(eb->fences[n].syncobj, &flags, 2); - if (!eb->fences[n].dma_fence) continue; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_internal.c b/drivers/gpu/drm/i915/gem/i915_gem_internal.c index c698f95af15f..629acb403a2c 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_internal.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_internal.c @@ -6,7 +6,6 @@ #include <linux/scatterlist.h> #include <linux/slab.h> -#include <linux/swiotlb.h> #include "i915_drv.h" #include "i915_gem.h" @@ -38,22 +37,12 @@ static int i915_gem_object_get_pages_internal(struct drm_i915_gem_object *obj) struct scatterlist *sg; unsigned int sg_page_sizes; unsigned int npages; - int max_order; + int max_order = MAX_ORDER; + unsigned int max_segment; gfp_t gfp; - max_order = MAX_ORDER; -#ifdef CONFIG_SWIOTLB - if (is_swiotlb_active(obj->base.dev->dev)) { - unsigned int max_segment; - - max_segment = swiotlb_max_segment(); - if (max_segment) { - max_segment = max_t(unsigned int, max_segment, - PAGE_SIZE) >> PAGE_SHIFT; - max_order = min(max_order, ilog2(max_segment)); - } - } -#endif + max_segment = i915_sg_segment_size(i915->drm.dev) >> PAGE_SHIFT; + max_order = min(max_order, get_order(max_segment)); gfp = GFP_KERNEL | __GFP_HIGHMEM | __GFP_RECLAIMABLE; if (IS_I965GM(i915) || IS_I965G(i915)) { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/i915_gem_mman.c index 73d9eda1d6b7..e63329bc8065 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_mman.c @@ -413,7 +413,7 @@ retry: vma->mmo = mmo; if (CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND) - intel_wakeref_auto(&to_gt(i915)->userfault_wakeref, + intel_wakeref_auto(&i915->runtime_pm.userfault_wakeref, msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)); if (write) { @@ -557,11 +557,13 @@ void i915_gem_object_runtime_pm_release_mmap_offset(struct drm_i915_gem_object * drm_vma_node_unmap(&bo->base.vma_node, bdev->dev_mapping); - if (obj->userfault_count) { - /* rpm wakeref provide exclusive access */ - list_del(&obj->userfault_link); - obj->userfault_count = 0; - } + /* + * We have exclusive access here via runtime suspend. All other callers + * must first grab the rpm wakeref. + */ + GEM_BUG_ON(!obj->userfault_count); + list_del(&obj->userfault_link); + obj->userfault_count = 0; } void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj) @@ -587,13 +589,6 @@ void i915_gem_object_release_mmap_offset(struct drm_i915_gem_object *obj) spin_lock(&obj->mmo.lock); } spin_unlock(&obj->mmo.lock); - - if (obj->userfault_count) { - mutex_lock(&to_gt(to_i915(obj->base.dev))->lmem_userfault_lock); - list_del(&obj->userfault_link); - mutex_unlock(&to_gt(to_i915(obj->base.dev))->lmem_userfault_lock); - obj->userfault_count = 0; - } } static struct i915_mmap_offset * diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.c b/drivers/gpu/drm/i915/gem/i915_gem_object.c index 6b8710ba8ded..733696057761 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.c @@ -458,6 +458,16 @@ i915_gem_object_read_from_page_iomap(struct drm_i915_gem_object *obj, u64 offset io_mapping_unmap(src_map); } +static bool object_has_mappable_iomem(struct drm_i915_gem_object *obj) +{ + GEM_BUG_ON(!i915_gem_object_has_iomem(obj)); + + if (IS_DGFX(to_i915(obj->base.dev))) + return i915_ttm_resource_mappable(i915_gem_to_ttm(obj)->resource); + + return true; +} + /** * i915_gem_object_read_from_page - read data from the page of a GEM object * @obj: GEM object to read from @@ -480,7 +490,7 @@ int i915_gem_object_read_from_page(struct drm_i915_gem_object *obj, u64 offset, if (i915_gem_object_has_struct_page(obj)) i915_gem_object_read_from_page_kmap(obj, offset, dst, size); - else if (i915_gem_object_has_iomem(obj)) + else if (i915_gem_object_has_iomem(obj) && object_has_mappable_iomem(obj)) i915_gem_object_read_from_page_iomap(obj, offset, dst, size); else return -ENODEV; diff --git a/drivers/gpu/drm/i915/gem/i915_gem_object.h b/drivers/gpu/drm/i915/gem/i915_gem_object.h index 1723af9b0f6a..6b9ecff42bb5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_object.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_object.h @@ -482,6 +482,10 @@ void *__must_check i915_gem_object_pin_map(struct drm_i915_gem_object *obj, void *__must_check i915_gem_object_pin_map_unlocked(struct drm_i915_gem_object *obj, enum i915_map_type type); +enum i915_map_type i915_coherent_map_type(struct drm_i915_private *i915, + struct drm_i915_gem_object *obj, + bool always_coherent); + void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj, unsigned long offset, unsigned long size); diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pages.c b/drivers/gpu/drm/i915/gem/i915_gem_pages.c index 4df50b049cea..16f845663ff2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pages.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pages.c @@ -466,6 +466,18 @@ void *i915_gem_object_pin_map_unlocked(struct drm_i915_gem_object *obj, return ret; } +enum i915_map_type i915_coherent_map_type(struct drm_i915_private *i915, + struct drm_i915_gem_object *obj, + bool always_coherent) +{ + if (i915_gem_object_is_lmem(obj)) + return I915_MAP_WC; + if (HAS_LLC(i915) || always_coherent) + return I915_MAP_WB; + else + return I915_MAP_WC; +} + void __i915_gem_object_flush_map(struct drm_i915_gem_object *obj, unsigned long offset, unsigned long size) diff --git a/drivers/gpu/drm/i915/gem/i915_gem_pm.c b/drivers/gpu/drm/i915/gem/i915_gem_pm.c index 3428f735e786..0d812f4d787d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_pm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_pm.c @@ -22,9 +22,12 @@ void i915_gem_suspend(struct drm_i915_private *i915) { + struct intel_gt *gt; + unsigned int i; + GEM_TRACE("%s\n", dev_name(i915->drm.dev)); - intel_wakeref_auto(&to_gt(i915)->userfault_wakeref, 0); + intel_wakeref_auto(&i915->runtime_pm.userfault_wakeref, 0); flush_workqueue(i915->wq); /* @@ -36,7 +39,8 @@ void i915_gem_suspend(struct drm_i915_private *i915) * state. Fortunately, the kernel_context is disposable and we do * not rely on its state. */ - intel_gt_suspend_prepare(to_gt(i915)); + for_each_gt(gt, i915, i) + intel_gt_suspend_prepare(gt); i915_gem_drain_freed_objects(i915); } @@ -131,7 +135,9 @@ void i915_gem_suspend_late(struct drm_i915_private *i915) &i915->mm.purge_list, NULL }, **phase; + struct intel_gt *gt; unsigned long flags; + unsigned int i; bool flush = false; /* @@ -154,7 +160,8 @@ void i915_gem_suspend_late(struct drm_i915_private *i915) * machine in an unusable condition. */ - intel_gt_suspend_late(to_gt(i915)); + for_each_gt(gt, i915, i) + intel_gt_suspend_late(gt); spin_lock_irqsave(&i915->mm.obj_lock, flags); for (phase = phases; *phase; phase++) { @@ -212,7 +219,8 @@ int i915_gem_freeze_late(struct drm_i915_private *i915) void i915_gem_resume(struct drm_i915_private *i915) { - int ret; + struct intel_gt *gt; + int ret, i, j; GEM_TRACE("%s\n", dev_name(i915->drm.dev)); @@ -224,8 +232,25 @@ void i915_gem_resume(struct drm_i915_private *i915) * guarantee that the context image is complete. So let's just reset * it and start again. */ - intel_gt_resume(to_gt(i915)); + for_each_gt(gt, i915, i) + if (intel_gt_resume(gt)) + goto err_wedged; ret = lmem_restore(i915, I915_TTM_BACKUP_ALLOW_GPU); GEM_WARN_ON(ret); + + return; + +err_wedged: + for_each_gt(gt, i915, j) { + if (!intel_gt_is_wedged(gt)) { + dev_err(i915->drm.dev, + "Failed to re-initialize GPU[%u], declaring it wedged!\n", + j); + intel_gt_set_wedged(gt); + } + + if (j == i) + break; + } } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c index f42ca1179f37..2f7804492cd5 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_shmem.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_shmem.c @@ -194,7 +194,7 @@ static int shmem_get_pages(struct drm_i915_gem_object *obj) struct intel_memory_region *mem = obj->mm.region; struct address_space *mapping = obj->base.filp->f_mapping; const unsigned long page_count = obj->base.size / PAGE_SIZE; - unsigned int max_segment = i915_sg_segment_size(); + unsigned int max_segment = i915_sg_segment_size(i915->drm.dev); struct sg_table *st; struct sgt_iter sgt_iter; struct page *page; @@ -369,14 +369,14 @@ __i915_gem_object_release_shmem(struct drm_i915_gem_object *obj, __start_cpu_write(obj); /* - * On non-LLC platforms, force the flush-on-acquire if this is ever + * On non-LLC igfx platforms, force the flush-on-acquire if this is ever * swapped-in. Our async flush path is not trust worthy enough yet(and * happens in the wrong order), and with some tricks it's conceivable * for userspace to change the cache-level to I915_CACHE_NONE after the * pages are swapped-in, and since execbuf binds the object before doing * the async flush, we have a race window. */ - if (!HAS_LLC(i915)) + if (!HAS_LLC(i915) && !IS_DGFX(i915)) obj->cache_dirty = true; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c index acc561c0f0aa..0c70711818ed 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_stolen.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_stolen.c @@ -77,22 +77,26 @@ void i915_gem_stolen_remove_node(struct drm_i915_private *i915, mutex_unlock(&i915->mm.stolen_lock); } -static int i915_adjust_stolen(struct drm_i915_private *i915, - struct resource *dsm) +static bool valid_stolen_size(struct drm_i915_private *i915, struct resource *dsm) +{ + return (dsm->start != 0 || HAS_LMEMBAR_SMEM_STOLEN(i915)) && dsm->end > dsm->start; +} + +static int adjust_stolen(struct drm_i915_private *i915, + struct resource *dsm) { struct i915_ggtt *ggtt = to_gt(i915)->ggtt; struct intel_uncore *uncore = ggtt->vm.gt->uncore; - struct resource *r; - if (dsm->start == 0 || dsm->end <= dsm->start) + if (!valid_stolen_size(i915, dsm)) return -EINVAL; /* + * Make sure we don't clobber the GTT if it's within stolen memory + * * TODO: We have yet too encounter the case where the GTT wasn't at the * end of stolen. With that assumption we could simplify this. */ - - /* Make sure we don't clobber the GTT if it's within stolen memory */ if (GRAPHICS_VER(i915) <= 4 && !IS_G33(i915) && !IS_PINEVIEW(i915) && !IS_G4X(i915)) { struct resource stolen[2] = {*dsm, *dsm}; @@ -131,12 +135,25 @@ static int i915_adjust_stolen(struct drm_i915_private *i915, } } + if (!valid_stolen_size(i915, dsm)) + return -EINVAL; + + return 0; +} + +static int request_smem_stolen(struct drm_i915_private *i915, + struct resource *dsm) +{ + struct resource *r; + /* - * With stolen lmem, we don't need to check if the address range - * overlaps with the non-stolen system memory range, since lmem is local - * to the gpu. + * With stolen lmem, we don't need to request system memory for the + * address range since it's local to the gpu. + * + * Starting MTL, in IGFX devices the stolen memory is exposed via + * LMEMBAR and shall be considered similar to stolen lmem. */ - if (HAS_LMEM(i915)) + if (HAS_LMEM(i915) || HAS_LMEMBAR_SMEM_STOLEN(i915)) return 0; /* @@ -371,8 +388,6 @@ static void icl_get_stolen_reserved(struct drm_i915_private *i915, drm_dbg(&i915->drm, "GEN6_STOLEN_RESERVED = 0x%016llx\n", reg_val); - *base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK; - switch (reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK) { case GEN8_STOLEN_RESERVED_1M: *size = 1024 * 1024; @@ -390,41 +405,30 @@ static void icl_get_stolen_reserved(struct drm_i915_private *i915, *size = 8 * 1024 * 1024; MISSING_CASE(reg_val & GEN8_STOLEN_RESERVED_SIZE_MASK); } + + if (HAS_LMEMBAR_SMEM_STOLEN(i915)) + /* the base is initialized to stolen top so subtract size to get base */ + *base -= *size; + else + *base = reg_val & GEN11_STOLEN_RESERVED_ADDR_MASK; } -static int i915_gem_init_stolen(struct intel_memory_region *mem) +/* + * Initialize i915->dsm_reserved to contain the reserved space within the Data + * Stolen Memory. This is a range on the top of DSM that is reserved, not to + * be used by driver, so must be excluded from the region passed to the + * allocator later. In the spec this is also called as WOPCM. + * + * Our expectation is that the reserved space is at the top of the stolen + * region, as it has been the case for every platform, and *never* at the + * bottom, so the calculation here can be simplified. + */ +static int init_reserved_stolen(struct drm_i915_private *i915) { - struct drm_i915_private *i915 = mem->i915; struct intel_uncore *uncore = &i915->uncore; resource_size_t reserved_base, stolen_top; - resource_size_t reserved_total, reserved_size; - - mutex_init(&i915->mm.stolen_lock); - - if (intel_vgpu_active(i915)) { - drm_notice(&i915->drm, - "%s, disabling use of stolen memory\n", - "iGVT-g active"); - return 0; - } - - if (i915_vtd_active(i915) && GRAPHICS_VER(i915) < 8) { - drm_notice(&i915->drm, - "%s, disabling use of stolen memory\n", - "DMAR active"); - return 0; - } - - if (resource_size(&mem->region) == 0) - return 0; - - i915->dsm = mem->region; - - if (i915_adjust_stolen(i915, &i915->dsm)) - return 0; - - GEM_BUG_ON(i915->dsm.start == 0); - GEM_BUG_ON(i915->dsm.end <= i915->dsm.start); + resource_size_t reserved_size; + int ret = 0; stolen_top = i915->dsm.end + 1; reserved_base = stolen_top; @@ -455,17 +459,16 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem) &reserved_base, &reserved_size); } - /* - * Our expectation is that the reserved space is at the top of the - * stolen region and *never* at the bottom. If we see !reserved_base, - * it likely means we failed to read the registers correctly. - */ + /* No reserved stolen */ + if (reserved_base == stolen_top) + goto bail_out; + if (!reserved_base) { drm_err(&i915->drm, "inconsistent reservation %pa + %pa; ignoring\n", &reserved_base, &reserved_size); - reserved_base = stolen_top; - reserved_size = 0; + ret = -EINVAL; + goto bail_out; } i915->dsm_reserved = @@ -475,19 +478,55 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem) drm_err(&i915->drm, "Stolen reserved area %pR outside stolen memory %pR\n", &i915->dsm_reserved, &i915->dsm); - return 0; + ret = -EINVAL; + goto bail_out; } + return 0; + +bail_out: + i915->dsm_reserved = + (struct resource)DEFINE_RES_MEM(reserved_base, 0); + + return ret; +} + +static int i915_gem_init_stolen(struct intel_memory_region *mem) +{ + struct drm_i915_private *i915 = mem->i915; + + mutex_init(&i915->mm.stolen_lock); + + if (intel_vgpu_active(i915)) { + drm_notice(&i915->drm, + "%s, disabling use of stolen memory\n", + "iGVT-g active"); + return -ENOSPC; + } + + if (i915_vtd_active(i915) && GRAPHICS_VER(i915) < 8) { + drm_notice(&i915->drm, + "%s, disabling use of stolen memory\n", + "DMAR active"); + return -ENOSPC; + } + + if (adjust_stolen(i915, &mem->region)) + return -ENOSPC; + + if (request_smem_stolen(i915, &mem->region)) + return -ENOSPC; + + i915->dsm = mem->region; + + if (init_reserved_stolen(i915)) + return -ENOSPC; + /* Exclude the reserved region from driver use */ - mem->region.end = reserved_base - 1; + mem->region.end = i915->dsm_reserved.start - 1; mem->io_size = min(mem->io_size, resource_size(&mem->region)); - /* It is possible for the reserved area to end before the end of stolen - * memory, so just consider the start. */ - reserved_total = stolen_top - reserved_base; - - i915->stolen_usable_size = - resource_size(&i915->dsm) - reserved_total; + i915->stolen_usable_size = resource_size(&mem->region); drm_dbg(&i915->drm, "Memory reserved for graphics device: %lluK, usable: %lluK\n", @@ -495,7 +534,7 @@ static int i915_gem_init_stolen(struct intel_memory_region *mem) (u64)i915->stolen_usable_size >> 10); if (i915->stolen_usable_size == 0) - return 0; + return -ENOSPC; /* Basic memrange allocator for stolen space. */ drm_mm_init(&i915->mm.stolen, 0, i915->stolen_usable_size); @@ -733,11 +772,17 @@ i915_gem_object_create_stolen(struct drm_i915_private *i915, static int init_stolen_smem(struct intel_memory_region *mem) { + int err; + /* * Initialise stolen early so that we may reserve preallocated * objects for the BIOS to KMS transition. */ - return i915_gem_init_stolen(mem); + err = i915_gem_init_stolen(mem); + if (err) + drm_dbg(&mem->i915->drm, "Skip stolen region: failed to setup\n"); + + return 0; } static int release_stolen_smem(struct intel_memory_region *mem) @@ -754,26 +799,25 @@ static const struct intel_memory_region_ops i915_region_stolen_smem_ops = { static int init_stolen_lmem(struct intel_memory_region *mem) { + struct drm_i915_private *i915 = mem->i915; int err; if (GEM_WARN_ON(resource_size(&mem->region) == 0)) - return -ENODEV; + return 0; - /* - * TODO: For stolen lmem we mostly just care about populating the dsm - * related bits and setting up the drm_mm allocator for the range. - * Perhaps split up i915_gem_init_stolen() for this. - */ err = i915_gem_init_stolen(mem); - if (err) - return err; + if (err) { + drm_dbg(&mem->i915->drm, "Skip stolen region: failed to setup\n"); + return 0; + } - if (mem->io_size && !io_mapping_init_wc(&mem->iomap, - mem->io_start, - mem->io_size)) { - err = -EIO; + if (mem->io_size && + !io_mapping_init_wc(&mem->iomap, mem->io_start, mem->io_size)) goto err_cleanup; - } + + drm_dbg(&i915->drm, "Stolen Local memory IO start: %pa\n", + &mem->io_start); + drm_dbg(&i915->drm, "Stolen Local DSM base: %pa\n", &mem->region.start); return 0; @@ -796,6 +840,29 @@ static const struct intel_memory_region_ops i915_region_stolen_lmem_ops = { .init_object = _i915_gem_object_stolen_init, }; +static int mtl_get_gms_size(struct intel_uncore *uncore) +{ + u16 ggc, gms; + + ggc = intel_uncore_read16(uncore, GGC); + + /* check GGMS, should be fixed 0x3 (8MB) */ + if ((ggc & GGMS_MASK) != GGMS_MASK) + return -EIO; + + /* return valid GMS value, -EIO if invalid */ + gms = REG_FIELD_GET(GMS_MASK, ggc); + switch (gms) { + case 0x0 ... 0x04: + return gms * 32; + case 0xf0 ... 0xfe: + return (gms - 0xf0 + 1) * 4; + default: + MISSING_CASE(gms); + return -EIO; + } +} + struct intel_memory_region * i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type, u16 instance) @@ -806,6 +873,7 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type, struct intel_memory_region *mem; resource_size_t io_start, io_size; resource_size_t min_page_size; + int ret; if (WARN_ON_ONCE(instance)) return ERR_PTR(-ENODEV); @@ -813,12 +881,8 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type, if (!i915_pci_resource_valid(pdev, GEN12_LMEM_BAR)) return ERR_PTR(-ENXIO); - /* Use DSM base address instead for stolen memory */ - dsm_base = intel_uncore_read64(uncore, GEN12_DSMBASE); - if (IS_DG1(uncore->i915)) { + if (HAS_LMEMBAR_SMEM_STOLEN(i915) || IS_DG1(i915)) { lmem_size = pci_resource_len(pdev, GEN12_LMEM_BAR); - if (WARN_ON(lmem_size < dsm_base)) - return ERR_PTR(-ENODEV); } else { resource_size_t lmem_range; @@ -827,13 +891,39 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type, lmem_size *= SZ_1G; } - dsm_size = lmem_size - dsm_base; - if (pci_resource_len(pdev, GEN12_LMEM_BAR) < lmem_size) { + if (HAS_LMEMBAR_SMEM_STOLEN(i915)) { + /* + * MTL dsm size is in GGC register. + * Also MTL uses offset to DSMBASE in ptes, so i915 + * uses dsm_base = 0 to setup stolen region. + */ + ret = mtl_get_gms_size(uncore); + if (ret < 0) { + drm_err(&i915->drm, "invalid MTL GGC register setting\n"); + return ERR_PTR(ret); + } + + dsm_base = 0; + dsm_size = (resource_size_t)(ret * SZ_1M); + + GEM_BUG_ON(pci_resource_len(pdev, GEN12_LMEM_BAR) != SZ_256M); + GEM_BUG_ON((dsm_size + SZ_8M) > lmem_size); + } else { + /* Use DSM base address instead for stolen memory */ + dsm_base = intel_uncore_read64(uncore, GEN12_DSMBASE) & GEN12_BDSM_MASK; + if (WARN_ON(lmem_size < dsm_base)) + return ERR_PTR(-ENODEV); + dsm_size = lmem_size - dsm_base; + } + + io_size = dsm_size; + if (HAS_LMEMBAR_SMEM_STOLEN(i915)) { + io_start = pci_resource_start(pdev, GEN12_LMEM_BAR) + SZ_8M; + } else if (pci_resource_len(pdev, GEN12_LMEM_BAR) < lmem_size) { io_start = 0; io_size = 0; } else { io_start = pci_resource_start(pdev, GEN12_LMEM_BAR) + dsm_base; - io_size = dsm_size; } min_page_size = HAS_64K_PAGES(i915) ? I915_GTT_PAGE_SIZE_64K : @@ -847,16 +937,6 @@ i915_gem_stolen_lmem_setup(struct drm_i915_private *i915, u16 type, if (IS_ERR(mem)) return mem; - /* - * TODO: consider creating common helper to just print all the - * interesting stuff from intel_memory_region, which we can use for all - * our probed regions. - */ - - drm_dbg(&i915->drm, "Stolen Local memory IO start: %pa\n", - &mem->io_start); - drm_dbg(&i915->drm, "Stolen Local DSM base: %pa\n", &dsm_base); - intel_memory_region_set_name(mem, "stolen-local"); mem->private = true; @@ -881,6 +961,7 @@ i915_gem_stolen_smem_setup(struct drm_i915_private *i915, u16 type, intel_memory_region_set_name(mem, "stolen-system"); mem->private = true; + return mem; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c index 7a1e92c11946..25129af70f70 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.c @@ -189,7 +189,7 @@ static int i915_ttm_tt_shmem_populate(struct ttm_device *bdev, struct drm_i915_private *i915 = container_of(bdev, typeof(*i915), bdev); struct intel_memory_region *mr = i915->mm.regions[INTEL_MEMORY_SYSTEM]; struct i915_ttm_tt *i915_tt = container_of(ttm, typeof(*i915_tt), ttm); - const unsigned int max_segment = i915_sg_segment_size(); + const unsigned int max_segment = i915_sg_segment_size(i915->drm.dev); const size_t size = (size_t)ttm->num_pages << PAGE_SHIFT; struct file *filp = i915_tt->filp; struct sgt_iter sgt_iter; @@ -279,7 +279,7 @@ static struct ttm_tt *i915_ttm_tt_create(struct ttm_buffer_object *bo, struct i915_ttm_tt *i915_tt; int ret; - if (!obj) + if (i915_ttm_is_ghost_object(bo)) return NULL; i915_tt = kzalloc(sizeof(*i915_tt), GFP_KERNEL); @@ -362,7 +362,7 @@ static bool i915_ttm_eviction_valuable(struct ttm_buffer_object *bo, { struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); - if (!obj) + if (i915_ttm_is_ghost_object(bo)) return false; /* @@ -509,18 +509,9 @@ static int i915_ttm_shrink(struct drm_i915_gem_object *obj, unsigned int flags) static void i915_ttm_delete_mem_notify(struct ttm_buffer_object *bo) { struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); - intel_wakeref_t wakeref = 0; - - if (bo->resource && likely(obj)) { - /* ttm_bo_release() already has dma_resv_lock */ - if (i915_ttm_cpu_maps_iomem(bo->resource)) - wakeref = intel_runtime_pm_get(&to_i915(obj->base.dev)->runtime_pm); + if (bo->resource && !i915_ttm_is_ghost_object(bo)) { __i915_gem_object_pages_fini(obj); - - if (wakeref) - intel_runtime_pm_put(&to_i915(obj->base.dev)->runtime_pm, wakeref); - i915_ttm_free_cached_io_rsgt(obj); } } @@ -538,7 +529,7 @@ static struct i915_refct_sgt *i915_ttm_tt_get_st(struct ttm_tt *ttm) ret = sg_alloc_table_from_pages_segment(st, ttm->pages, ttm->num_pages, 0, (unsigned long)ttm->num_pages << PAGE_SHIFT, - i915_sg_segment_size(), GFP_KERNEL); + i915_sg_segment_size(i915_tt->dev), GFP_KERNEL); if (ret) { st->sgl = NULL; return ERR_PTR(ret); @@ -624,7 +615,7 @@ static void i915_ttm_swap_notify(struct ttm_buffer_object *bo) struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); int ret; - if (!obj) + if (i915_ttm_is_ghost_object(bo)) return; ret = i915_ttm_move_notify(bo); @@ -657,7 +648,7 @@ static int i915_ttm_io_mem_reserve(struct ttm_device *bdev, struct ttm_resource struct drm_i915_gem_object *obj = i915_ttm_to_gem(mem->bo); bool unknown_state; - if (!obj) + if (i915_ttm_is_ghost_object(mem->bo)) return -EINVAL; if (!kref_get_unless_zero(&obj->base.refcount)) @@ -690,7 +681,7 @@ static unsigned long i915_ttm_io_mem_pfn(struct ttm_buffer_object *bo, unsigned long base; unsigned int ofs; - GEM_BUG_ON(!obj); + GEM_BUG_ON(i915_ttm_is_ghost_object(bo)); GEM_WARN_ON(bo->ttm); base = obj->mm.region->iomap.base - obj->mm.region->region.start; @@ -699,6 +690,50 @@ static unsigned long i915_ttm_io_mem_pfn(struct ttm_buffer_object *bo, return ((base + sg_dma_address(sg)) >> PAGE_SHIFT) + ofs; } +static int i915_ttm_access_memory(struct ttm_buffer_object *bo, + unsigned long offset, void *buf, + int len, int write) +{ + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); + resource_size_t iomap = obj->mm.region->iomap.base - + obj->mm.region->region.start; + unsigned long page = offset >> PAGE_SHIFT; + unsigned long bytes_left = len; + + /* + * TODO: For now just let it fail if the resource is non-mappable, + * otherwise we need to perform the memcpy from the gpu here, without + * interfering with the object (like moving the entire thing). + */ + if (!i915_ttm_resource_mappable(bo->resource)) + return -EIO; + + offset -= page << PAGE_SHIFT; + do { + unsigned long bytes = min(bytes_left, PAGE_SIZE - offset); + void __iomem *ptr; + dma_addr_t daddr; + + daddr = i915_gem_object_get_dma_address(obj, page); + ptr = ioremap_wc(iomap + daddr + offset, bytes); + if (!ptr) + return -EIO; + + if (write) + memcpy_toio(ptr, buf, bytes); + else + memcpy_fromio(buf, ptr, bytes); + iounmap(ptr); + + page++; + buf += bytes; + bytes_left -= bytes; + offset = 0; + } while (bytes_left); + + return len; +} + /* * All callbacks need to take care not to downcast a struct ttm_buffer_object * without checking its subclass, since it might be a TTM ghost object. @@ -715,6 +750,7 @@ static struct ttm_device_funcs i915_ttm_bo_driver = { .delete_mem_notify = i915_ttm_delete_mem_notify, .io_mem_reserve = i915_ttm_io_mem_reserve, .io_mem_pfn = i915_ttm_io_mem_pfn, + .access_memory = i915_ttm_access_memory, }; /** @@ -990,13 +1026,12 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf) struct vm_area_struct *area = vmf->vma; struct ttm_buffer_object *bo = area->vm_private_data; struct drm_device *dev = bo->base.dev; - struct drm_i915_gem_object *obj; + struct drm_i915_gem_object *obj = i915_ttm_to_gem(bo); intel_wakeref_t wakeref = 0; vm_fault_t ret; int idx; - obj = i915_ttm_to_gem(bo); - if (!obj) + if (i915_ttm_is_ghost_object(bo)) return VM_FAULT_SIGBUS; /* Sanity check that we allow writing into this object */ @@ -1035,7 +1070,8 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf) } if (err) { - drm_dbg(dev, "Unable to make resource CPU accessible\n"); + drm_dbg(dev, "Unable to make resource CPU accessible(err = %pe)\n", + ERR_PTR(err)); dma_resv_unlock(bo->base.resv); ret = VM_FAULT_SIGBUS; goto out_rpm; @@ -1053,16 +1089,19 @@ static vm_fault_t vm_fault_ttm(struct vm_fault *vmf) if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) goto out_rpm; - /* ttm_bo_vm_reserve() already has dma_resv_lock */ + /* + * ttm_bo_vm_reserve() already has dma_resv_lock. + * userfault_count is protected by dma_resv lock and rpm wakeref. + */ if (ret == VM_FAULT_NOPAGE && wakeref && !obj->userfault_count) { obj->userfault_count = 1; - mutex_lock(&to_gt(to_i915(obj->base.dev))->lmem_userfault_lock); - list_add(&obj->userfault_link, &to_gt(to_i915(obj->base.dev))->lmem_userfault_list); - mutex_unlock(&to_gt(to_i915(obj->base.dev))->lmem_userfault_lock); + spin_lock(&to_i915(obj->base.dev)->runtime_pm.lmem_userfault_lock); + list_add(&obj->userfault_link, &to_i915(obj->base.dev)->runtime_pm.lmem_userfault_list); + spin_unlock(&to_i915(obj->base.dev)->runtime_pm.lmem_userfault_lock); } if (wakeref & CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND) - intel_wakeref_auto(&to_gt(to_i915(obj->base.dev))->userfault_wakeref, + intel_wakeref_auto(&to_i915(obj->base.dev)->runtime_pm.userfault_wakeref, msecs_to_jiffies_timeout(CONFIG_DRM_I915_USERFAULT_AUTOSUSPEND)); i915_ttm_adjust_lru(obj); @@ -1094,7 +1133,7 @@ static void ttm_vm_open(struct vm_area_struct *vma) struct drm_i915_gem_object *obj = i915_ttm_to_gem(vma->vm_private_data); - GEM_BUG_ON(!obj); + GEM_BUG_ON(i915_ttm_is_ghost_object(vma->vm_private_data)); i915_gem_object_get(obj); } @@ -1103,7 +1142,7 @@ static void ttm_vm_close(struct vm_area_struct *vma) struct drm_i915_gem_object *obj = i915_ttm_to_gem(vma->vm_private_data); - GEM_BUG_ON(!obj); + GEM_BUG_ON(i915_ttm_is_ghost_object(vma->vm_private_data)); i915_gem_object_put(obj); } @@ -1124,7 +1163,27 @@ static u64 i915_ttm_mmap_offset(struct drm_i915_gem_object *obj) static void i915_ttm_unmap_virtual(struct drm_i915_gem_object *obj) { + struct ttm_buffer_object *bo = i915_gem_to_ttm(obj); + intel_wakeref_t wakeref = 0; + + assert_object_held_shared(obj); + + if (i915_ttm_cpu_maps_iomem(bo->resource)) { + wakeref = intel_runtime_pm_get(&to_i915(obj->base.dev)->runtime_pm); + + /* userfault_count is protected by obj lock and rpm wakeref. */ + if (obj->userfault_count) { + spin_lock(&to_i915(obj->base.dev)->runtime_pm.lmem_userfault_lock); + list_del(&obj->userfault_link); + spin_unlock(&to_i915(obj->base.dev)->runtime_pm.lmem_userfault_lock); + obj->userfault_count = 0; + } + } + ttm_bo_unmap_virtual(i915_gem_to_ttm(obj)); + + if (wakeref) + intel_runtime_pm_put(&to_i915(obj->base.dev)->runtime_pm, wakeref); } static const struct drm_i915_gem_object_ops i915_gem_ttm_obj_ops = { diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h index e4842b4296fc..2a94a99ef76b 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm.h +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm.h @@ -28,18 +28,26 @@ i915_gem_to_ttm(struct drm_i915_gem_object *obj) void i915_ttm_bo_destroy(struct ttm_buffer_object *bo); /** + * i915_ttm_is_ghost_object - Check if the ttm bo is a ghost object. + * @bo: Pointer to the ttm buffer object + * + * Return: True if the ttm bo is not a i915 object but a ghost ttm object, + * False otherwise. + */ +static inline bool i915_ttm_is_ghost_object(struct ttm_buffer_object *bo) +{ + return bo->destroy != i915_ttm_bo_destroy; +} + +/** * i915_ttm_to_gem - Convert a struct ttm_buffer_object to an embedding * struct drm_i915_gem_object. * - * Return: Pointer to the embedding struct ttm_buffer_object, or NULL - * if the object was not an i915 ttm object. + * Return: Pointer to the embedding struct ttm_buffer_object. */ static inline struct drm_i915_gem_object * i915_ttm_to_gem(struct ttm_buffer_object *bo) { - if (bo->destroy != i915_ttm_bo_destroy) - return NULL; - return container_of(bo, struct drm_i915_gem_object, __do_not_access); } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c index 9a7e50534b84..f59f812dc6d2 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_ttm_move.c @@ -560,7 +560,7 @@ int i915_ttm_move(struct ttm_buffer_object *bo, bool evict, bool clear; int ret; - if (GEM_WARN_ON(!obj)) { + if (GEM_WARN_ON(i915_ttm_is_ghost_object(bo))) { ttm_bo_move_null(bo, dst_mem); return 0; } diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index d4398948f016..1b1a22716722 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -129,7 +129,7 @@ static void i915_gem_object_userptr_drop_ref(struct drm_i915_gem_object *obj) static int i915_gem_userptr_get_pages(struct drm_i915_gem_object *obj) { const unsigned long num_pages = obj->base.size >> PAGE_SHIFT; - unsigned int max_segment = i915_sg_segment_size(); + unsigned int max_segment = i915_sg_segment_size(obj->base.dev->dev); struct sg_table *st; unsigned int sg_page_sizes; struct page **pvec; @@ -292,7 +292,7 @@ int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj) if (!i915_gem_object_is_readonly(obj)) gup_flags |= FOLL_WRITE; - pinned = ret = 0; + pinned = 0; while (pinned < num_pages) { ret = pin_user_pages_fast(obj->userptr.ptr + pinned * PAGE_SIZE, num_pages - pinned, gup_flags, @@ -302,7 +302,6 @@ int i915_gem_object_userptr_submit_init(struct drm_i915_gem_object *obj) pinned += ret; } - ret = 0; ret = i915_gem_object_lock_interruptible(obj, NULL); if (ret) diff --git a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c index c570cf780079..0cb99e75b0bc 100644 --- a/drivers/gpu/drm/i915/gem/selftests/huge_pages.c +++ b/drivers/gpu/drm/i915/gem/selftests/huge_pages.c @@ -1161,7 +1161,8 @@ static int igt_write_huge(struct drm_i915_private *i915, GEM_BUG_ON(!i915_gem_object_has_pinned_pages(obj)); size = obj->base.size; - if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) + if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K && + !HAS_64K_PAGES(i915)) size = round_up(size, I915_GTT_PAGE_SIZE_2M); n = 0; @@ -1214,6 +1215,10 @@ static int igt_write_huge(struct drm_i915_private *i915, * size and ensure the vma offset is at the start of the pt * boundary, however to improve coverage we opt for testing both * aligned and unaligned offsets. + * + * With PS64 this is no longer the case, but to ensure we + * sometimes get the compact layout for smaller objects, apply + * the round_up anyway. */ if (obj->mm.page_sizes.sg & I915_GTT_PAGE_SIZE_64K) offset_low = round_down(offset_low, @@ -1411,6 +1416,7 @@ static int igt_ppgtt_sanity_check(void *arg) { SZ_2M + SZ_4K, SZ_64K | SZ_4K }, { SZ_2M + SZ_4K, SZ_2M | SZ_4K }, { SZ_2M + SZ_64K, SZ_2M | SZ_64K }, + { SZ_2M + SZ_64K, SZ_64K }, }; int i, j; int err; @@ -1540,6 +1546,154 @@ out_put: return err; } +static int igt_ppgtt_mixed(void *arg) +{ + struct drm_i915_private *i915 = arg; + const unsigned long flags = PIN_OFFSET_FIXED | PIN_USER; + struct drm_i915_gem_object *obj, *on; + struct i915_gem_engines *engines; + struct i915_gem_engines_iter it; + struct i915_address_space *vm; + struct i915_gem_context *ctx; + struct intel_context *ce; + struct file *file; + I915_RND_STATE(prng); + LIST_HEAD(objects); + struct intel_memory_region *mr; + struct i915_vma *vma; + unsigned int count; + u32 i, addr; + int *order; + int n, err; + + /* + * Sanity check mixing 4K and 64K pages within the same page-table via + * the new PS64 TLB hint. + */ + + if (!HAS_64K_PAGES(i915)) { + pr_info("device lacks PS64, skipping\n"); + return 0; + } + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + ctx = hugepage_ctx(i915, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out; + } + vm = i915_gem_context_get_eb_vm(ctx); + + i = 0; + addr = 0; + do { + u32 sz; + + sz = i915_prandom_u32_max_state(SZ_4M, &prng); + sz = max_t(u32, sz, SZ_4K); + + mr = i915->mm.regions[INTEL_REGION_LMEM_0]; + if (i & 1) + mr = i915->mm.regions[INTEL_REGION_SMEM]; + + obj = i915_gem_object_create_region(mr, sz, 0, 0); + if (IS_ERR(obj)) { + err = PTR_ERR(obj); + goto out_vm; + } + + list_add_tail(&obj->st_link, &objects); + + vma = i915_vma_instance(obj, vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto err_put; + } + + addr = round_up(addr, mr->min_page_size); + err = i915_vma_pin(vma, 0, 0, addr | flags); + if (err) + goto err_put; + + if (mr->type == INTEL_MEMORY_LOCAL && + (vma->resource->page_sizes_gtt & I915_GTT_PAGE_SIZE_4K)) { + err = -EINVAL; + goto err_put; + } + + addr += obj->base.size; + i++; + } while (addr <= SZ_16M); + + n = 0; + count = 0; + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + count++; + if (!intel_engine_can_store_dword(ce->engine)) + continue; + + n++; + } + i915_gem_context_unlock_engines(ctx); + if (!n) + goto err_put; + + order = i915_random_order(count * count, &prng); + if (!order) { + err = -ENOMEM; + goto err_put; + } + + i = 0; + addr = 0; + engines = i915_gem_context_lock_engines(ctx); + list_for_each_entry(obj, &objects, st_link) { + u32 rnd = i915_prandom_u32_max_state(UINT_MAX, &prng); + + addr = round_up(addr, obj->mm.region->min_page_size); + + ce = engines->engines[order[i] % engines->num_engines]; + i = (i + 1) % (count * count); + if (!ce || !intel_engine_can_store_dword(ce->engine)) + continue; + + err = __igt_write_huge(ce, obj, obj->base.size, addr, 0, rnd); + if (err) + break; + + err = __igt_write_huge(ce, obj, obj->base.size, addr, + offset_in_page(rnd) / sizeof(u32), rnd + 1); + if (err) + break; + + err = __igt_write_huge(ce, obj, obj->base.size, addr, + (PAGE_SIZE / sizeof(u32)) - 1, + rnd + 2); + if (err) + break; + + addr += obj->base.size; + + cond_resched(); + } + + i915_gem_context_unlock_engines(ctx); + kfree(order); +err_put: + list_for_each_entry_safe(obj, on, &objects, st_link) { + list_del(&obj->st_link); + i915_gem_object_put(obj); + } +out_vm: + i915_vm_put(vm); +out: + fput(file); + return err; +} + static int igt_tmpfs_fallback(void *arg) { struct drm_i915_private *i915 = arg; @@ -1803,6 +1957,7 @@ int i915_gem_huge_page_live_selftests(struct drm_i915_private *i915) SUBTEST(igt_ppgtt_smoke_huge), SUBTEST(igt_ppgtt_sanity_check), SUBTEST(igt_ppgtt_compact), + SUBTEST(igt_ppgtt_mixed), }; if (!HAS_PPGTT(i915)) { diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c index c6ad67b90e8a..d8864444432b 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_context.c @@ -179,97 +179,108 @@ out_file: } struct parallel_switch { - struct task_struct *tsk; + struct kthread_worker *worker; + struct kthread_work work; struct intel_context *ce[2]; + int result; }; -static int __live_parallel_switch1(void *data) +static void __live_parallel_switch1(struct kthread_work *work) { - struct parallel_switch *arg = data; + struct parallel_switch *arg = + container_of(work, typeof(*arg), work); IGT_TIMEOUT(end_time); unsigned long count; count = 0; + arg->result = 0; do { struct i915_request *rq = NULL; - int err, n; + int n; - err = 0; - for (n = 0; !err && n < ARRAY_SIZE(arg->ce); n++) { + for (n = 0; !arg->result && n < ARRAY_SIZE(arg->ce); n++) { struct i915_request *prev = rq; rq = i915_request_create(arg->ce[n]); if (IS_ERR(rq)) { i915_request_put(prev); - return PTR_ERR(rq); + arg->result = PTR_ERR(rq); + break; } i915_request_get(rq); if (prev) { - err = i915_request_await_dma_fence(rq, &prev->fence); + arg->result = + i915_request_await_dma_fence(rq, + &prev->fence); i915_request_put(prev); } i915_request_add(rq); } + + if (IS_ERR_OR_NULL(rq)) + break; + if (i915_request_wait(rq, 0, HZ) < 0) - err = -ETIME; + arg->result = -ETIME; + i915_request_put(rq); - if (err) - return err; count++; - } while (!__igt_timeout(end_time, NULL)); + } while (!arg->result && !__igt_timeout(end_time, NULL)); - pr_info("%s: %lu switches (sync)\n", arg->ce[0]->engine->name, count); - return 0; + pr_info("%s: %lu switches (sync) <%d>\n", + arg->ce[0]->engine->name, count, arg->result); } -static int __live_parallel_switchN(void *data) +static void __live_parallel_switchN(struct kthread_work *work) { - struct parallel_switch *arg = data; + struct parallel_switch *arg = + container_of(work, typeof(*arg), work); struct i915_request *rq = NULL; IGT_TIMEOUT(end_time); unsigned long count; int n; count = 0; + arg->result = 0; do { - for (n = 0; n < ARRAY_SIZE(arg->ce); n++) { + for (n = 0; !arg->result && n < ARRAY_SIZE(arg->ce); n++) { struct i915_request *prev = rq; - int err = 0; rq = i915_request_create(arg->ce[n]); if (IS_ERR(rq)) { i915_request_put(prev); - return PTR_ERR(rq); + arg->result = PTR_ERR(rq); + break; } i915_request_get(rq); if (prev) { - err = i915_request_await_dma_fence(rq, &prev->fence); + arg->result = + i915_request_await_dma_fence(rq, + &prev->fence); i915_request_put(prev); } i915_request_add(rq); - if (err) { - i915_request_put(rq); - return err; - } } count++; - } while (!__igt_timeout(end_time, NULL)); - i915_request_put(rq); + } while (!arg->result && !__igt_timeout(end_time, NULL)); - pr_info("%s: %lu switches (many)\n", arg->ce[0]->engine->name, count); - return 0; + if (!IS_ERR_OR_NULL(rq)) + i915_request_put(rq); + + pr_info("%s: %lu switches (many) <%d>\n", + arg->ce[0]->engine->name, count, arg->result); } static int live_parallel_switch(void *arg) { struct drm_i915_private *i915 = arg; - static int (* const func[])(void *arg) = { + static void (* const func[])(struct kthread_work *) = { __live_parallel_switch1, __live_parallel_switchN, NULL, @@ -277,7 +288,7 @@ static int live_parallel_switch(void *arg) struct parallel_switch *data = NULL; struct i915_gem_engines *engines; struct i915_gem_engines_iter it; - int (* const *fn)(void *arg); + void (* const *fn)(struct kthread_work *); struct i915_gem_context *ctx; struct intel_context *ce; struct file *file; @@ -348,9 +359,22 @@ static int live_parallel_switch(void *arg) } } + for (n = 0; n < count; n++) { + struct kthread_worker *worker; + + if (!data[n].ce[0]) + continue; + + worker = kthread_create_worker(0, "igt/parallel:%s", + data[n].ce[0]->engine->name); + if (IS_ERR(worker)) + goto out; + + data[n].worker = worker; + } + for (fn = func; !err && *fn; fn++) { struct igt_live_test t; - int n; err = igt_live_test_begin(&t, i915, __func__, ""); if (err) @@ -360,30 +384,17 @@ static int live_parallel_switch(void *arg) if (!data[n].ce[0]) continue; - data[n].tsk = kthread_run(*fn, &data[n], - "igt/parallel:%s", - data[n].ce[0]->engine->name); - if (IS_ERR(data[n].tsk)) { - err = PTR_ERR(data[n].tsk); - break; - } - get_task_struct(data[n].tsk); + data[n].result = 0; + kthread_init_work(&data[n].work, *fn); + kthread_queue_work(data[n].worker, &data[n].work); } - yield(); /* start all threads before we kthread_stop() */ - for (n = 0; n < count; n++) { - int status; - - if (IS_ERR_OR_NULL(data[n].tsk)) - continue; - - status = kthread_stop(data[n].tsk); - if (status && !err) - err = status; - - put_task_struct(data[n].tsk); - data[n].tsk = NULL; + if (data[n].ce[0]) { + kthread_flush_work(&data[n].work); + if (data[n].result && !err) + err = data[n].result; + } } if (igt_live_test_end(&t)) @@ -399,6 +410,9 @@ out: intel_context_unpin(data[n].ce[m]); intel_context_put(data[n].ce[m]); } + + if (data[n].worker) + kthread_destroy_worker(data[n].worker); } kfree(data); out_file: diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c index f2f3cfad807b..e57f9390076c 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_dmabuf.c @@ -6,8 +6,12 @@ #include "i915_drv.h" #include "i915_selftest.h" +#include "gem/i915_gem_context.h" +#include "mock_context.h" #include "mock_dmabuf.h" +#include "igt_gem_utils.h" +#include "selftests/mock_drm.h" #include "selftests/mock_gem_device.h" static int igt_dmabuf_export(void *arg) @@ -140,6 +144,75 @@ out_ret: return err; } +static int verify_access(struct drm_i915_private *i915, + struct drm_i915_gem_object *native_obj, + struct drm_i915_gem_object *import_obj) +{ + struct i915_gem_engines_iter it; + struct i915_gem_context *ctx; + struct intel_context *ce; + struct i915_vma *vma; + struct file *file; + u32 *vaddr; + int err = 0, i; + + file = mock_file(i915); + if (IS_ERR(file)) + return PTR_ERR(file); + + ctx = live_context(i915, file); + if (IS_ERR(ctx)) { + err = PTR_ERR(ctx); + goto out_file; + } + + for_each_gem_engine(ce, i915_gem_context_lock_engines(ctx), it) { + if (intel_engine_can_store_dword(ce->engine)) + break; + } + i915_gem_context_unlock_engines(ctx); + if (!ce) + goto out_file; + + vma = i915_vma_instance(import_obj, ce->vm, NULL); + if (IS_ERR(vma)) { + err = PTR_ERR(vma); + goto out_file; + } + + err = i915_vma_pin(vma, 0, 0, PIN_USER); + if (err) + goto out_file; + + err = igt_gpu_fill_dw(ce, vma, 0, + vma->size >> PAGE_SHIFT, 0xdeadbeaf); + i915_vma_unpin(vma); + if (err) + goto out_file; + + err = i915_gem_object_wait(import_obj, 0, MAX_SCHEDULE_TIMEOUT); + if (err) + goto out_file; + + vaddr = i915_gem_object_pin_map_unlocked(native_obj, I915_MAP_WB); + if (IS_ERR(vaddr)) { + err = PTR_ERR(vaddr); + goto out_file; + } + + for (i = 0; i < native_obj->base.size / sizeof(u32); i += PAGE_SIZE / sizeof(u32)) { + if (vaddr[i] != 0xdeadbeaf) { + pr_err("Data mismatch [%d]=%u\n", i, vaddr[i]); + err = -EINVAL; + goto out_file; + } + } + +out_file: + fput(file); + return err; +} + static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915, struct intel_memory_region **regions, unsigned int num_regions) @@ -154,7 +227,7 @@ static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915, force_different_devices = true; - obj = __i915_gem_object_create_user(i915, PAGE_SIZE, + obj = __i915_gem_object_create_user(i915, SZ_8M, regions, num_regions); if (IS_ERR(obj)) { pr_err("__i915_gem_object_create_user failed with err=%ld\n", @@ -206,6 +279,10 @@ static int igt_dmabuf_import_same_driver(struct drm_i915_private *i915, i915_gem_object_unlock(import_obj); + err = verify_access(i915, obj, import_obj); + if (err) + goto out_import; + /* Now try a fake an importer */ import_attach = dma_buf_attach(dmabuf, obj->base.dev->dev); if (IS_ERR(import_attach)) { diff --git a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c index b73c91aa5450..1cae24349a96 100644 --- a/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c +++ b/drivers/gpu/drm/i915/gem/selftests/i915_gem_mman.c @@ -8,6 +8,7 @@ #include <linux/prime_numbers.h> #include "gem/i915_gem_internal.h" +#include "gem/i915_gem_lmem.h" #include "gem/i915_gem_region.h" #include "gem/i915_gem_ttm.h" #include "gem/i915_gem_ttm_move.h" |