diff options
-rw-r--r-- | drivers/misc/habanalabs/common/device.c | 15 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/habanalabs.h | 27 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/memory.c | 31 | ||||
-rw-r--r-- | drivers/misc/habanalabs/common/mmu/mmu.c | 50 | ||||
-rw-r--r-- | drivers/misc/habanalabs/gaudi/gaudi.c | 8 | ||||
-rw-r--r-- | drivers/misc/habanalabs/goya/goya.c | 8 |
6 files changed, 103 insertions, 36 deletions
diff --git a/drivers/misc/habanalabs/common/device.c b/drivers/misc/habanalabs/common/device.c index 0908ac301c70..b4f14c6d3970 100644 --- a/drivers/misc/habanalabs/common/device.c +++ b/drivers/misc/habanalabs/common/device.c @@ -612,11 +612,18 @@ static int device_early_init(struct hl_device *hdev) goto free_eq_wq; } + hdev->pf_wq = alloc_workqueue("hl-prefetch", WQ_UNBOUND, 0); + if (!hdev->pf_wq) { + dev_err(hdev->dev, "Failed to allocate MMU prefetch workqueue\n"); + rc = -ENOMEM; + goto free_ts_free_wq; + } + hdev->hl_chip_info = kzalloc(sizeof(struct hwmon_chip_info), GFP_KERNEL); if (!hdev->hl_chip_info) { rc = -ENOMEM; - goto free_ts_free_wq; + goto free_pf_wq; } rc = hl_mmu_if_set_funcs(hdev); @@ -655,6 +662,8 @@ free_cb_mgr: hl_mem_mgr_fini(&hdev->kernel_mem_mgr); free_chip_info: kfree(hdev->hl_chip_info); +free_pf_wq: + destroy_workqueue(hdev->pf_wq); free_ts_free_wq: destroy_workqueue(hdev->ts_free_obj_wq); free_eq_wq: @@ -695,6 +704,7 @@ static void device_early_fini(struct hl_device *hdev) kfree(hdev->hl_chip_info); + destroy_workqueue(hdev->pf_wq); destroy_workqueue(hdev->ts_free_obj_wq); destroy_workqueue(hdev->eq_wq); destroy_workqueue(hdev->device_reset_work.wq); @@ -891,6 +901,9 @@ static void cleanup_resources(struct hl_device *hdev, bool hard_reset, bool fw_r /* Go over all the queues, release all CS and their jobs */ hl_cs_rollback_all(hdev, skip_wq_flush); + /* flush the MMU prefetch workqueue */ + flush_workqueue(hdev->pf_wq); + /* Release all pending user interrupts, each pending user interrupt * holds a reference to user context */ diff --git a/drivers/misc/habanalabs/common/habanalabs.h b/drivers/misc/habanalabs/common/habanalabs.h index 8977ec67dba7..632037b29922 100644 --- a/drivers/misc/habanalabs/common/habanalabs.h +++ b/drivers/misc/habanalabs/common/habanalabs.h @@ -1402,8 +1402,7 @@ struct hl_asic_funcs { u32 flags); int (*mmu_invalidate_cache_range)(struct hl_device *hdev, bool is_hard, u32 flags, u32 asid, u64 va, u64 size); - int (*mmu_prefetch_cache_range)(struct hl_device *hdev, u32 flags, u32 asid, u64 va, - u64 size); + int (*mmu_prefetch_cache_range)(struct hl_ctx *ctx, u32 flags, u32 asid, u64 va, u64 size); int (*send_heartbeat)(struct hl_device *hdev); int (*debug_coresight)(struct hl_device *hdev, struct hl_ctx *ctx, void *data); bool (*is_device_idle)(struct hl_device *hdev, u64 *mask_arr, @@ -2476,6 +2475,24 @@ struct hl_mmu_funcs { }; /** + * struct hl_prefetch_work - prefetch work structure handler + * @pf_work: actual work struct. + * @ctx: compute context. + * @va: virtual address to pre-fetch. + * @size: pre-fetch size. + * @flags: operation flags. + * @asid: ASID for maintenance operation. + */ +struct hl_prefetch_work { + struct work_struct pf_work; + struct hl_ctx *ctx; + u64 va; + u64 size; + u32 flags; + u32 asid; +}; + +/* * number of user contexts allowed to call wait_for_multi_cs ioctl in * parallel */ @@ -2648,6 +2665,7 @@ struct hl_reset_info { * context. * @eq_wq: work queue of event queue for executing work in process context. * @ts_free_obj_wq: work queue for timestamp registration objects release. + * @pf_wq: work queue for MMU pre-fetch operations. * @kernel_ctx: Kernel driver context structure. * @kernel_queues: array of hl_hw_queue. * @cs_mirror_list: CS mirror list for TDR. @@ -2760,6 +2778,7 @@ struct hl_reset_info { * @supports_wait_for_multi_cs: true if wait for multi CS is supported * @is_compute_ctx_active: Whether there is an active compute context executing. * @compute_ctx_in_release: true if the current compute context is being released. + * @supports_mmu_prefetch: true if prefetch is supported, otherwise false. */ struct hl_device { struct pci_dev *pdev; @@ -2781,6 +2800,7 @@ struct hl_device { struct workqueue_struct **cq_wq; struct workqueue_struct *eq_wq; struct workqueue_struct *ts_free_obj_wq; + struct workqueue_struct *pf_wq; struct hl_ctx *kernel_ctx; struct hl_hw_queue *kernel_queues; struct list_head cs_mirror_list; @@ -2882,6 +2902,7 @@ struct hl_device { u8 stream_master_qid_arr_size; u8 is_compute_ctx_active; u8 compute_ctx_in_release; + u8 supports_mmu_prefetch; /* Parameters for bring-up */ u64 nic_ports_mask; @@ -3163,7 +3184,7 @@ int hl_mmu_unmap_contiguous(struct hl_ctx *ctx, u64 virt_addr, u32 size); int hl_mmu_invalidate_cache(struct hl_device *hdev, bool is_hard, u32 flags); int hl_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard, u32 flags, u32 asid, u64 va, u64 size); -int hl_mmu_prefetch_cache_range(struct hl_device *hdev, u32 flags, u32 asid, u64 va, u64 size); +int hl_mmu_prefetch_cache_range(struct hl_ctx *ctx, u32 flags, u32 asid, u64 va, u64 size); u64 hl_mmu_get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte); u64 hl_mmu_get_hop_pte_phys_addr(struct hl_ctx *ctx, struct hl_mmu_properties *mmu_prop, u8 hop_idx, u64 hop_addr, u64 virt_addr); diff --git a/drivers/misc/habanalabs/common/memory.c b/drivers/misc/habanalabs/common/memory.c index ecf3c094242a..087a55654a4d 100644 --- a/drivers/misc/habanalabs/common/memory.c +++ b/drivers/misc/habanalabs/common/memory.c @@ -1102,21 +1102,24 @@ static int get_paddr_from_handle(struct hl_ctx *ctx, struct hl_mem_in *args, * map a device virtual block to this pages and return the start address of * this block. */ -static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, - u64 *device_addr) +static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, u64 *device_addr) { - struct hl_device *hdev = ctx->hdev; - struct hl_vm *vm = &hdev->vm; struct hl_vm_phys_pg_pack *phys_pg_pack; + enum hl_va_range_type va_range_type = 0; + struct hl_device *hdev = ctx->hdev; struct hl_userptr *userptr = NULL; + u32 handle = 0, va_block_align; struct hl_vm_hash_node *hnode; + struct hl_vm *vm = &hdev->vm; struct hl_va_range *va_range; - enum vm_type *vm_type; + bool is_userptr, do_prefetch; u64 ret_vaddr, hint_addr; - u32 handle = 0, va_block_align; + enum vm_type *vm_type; int rc; - bool is_userptr = args->flags & HL_MEM_USERPTR; - enum hl_va_range_type va_range_type = 0; + + /* set map flags */ + is_userptr = args->flags & HL_MEM_USERPTR; + do_prefetch = hdev->supports_mmu_prefetch && (args->flags & HL_MEM_PREFETCH); /* Assume failure */ *device_addr = 0; @@ -1250,15 +1253,19 @@ static int map_device_va(struct hl_ctx *ctx, struct hl_mem_in *args, if (rc) goto map_err; - if (args->flags & HL_MEM_PREFETCH) { - rc = hl_mmu_prefetch_cache_range(hdev, *vm_type, ctx->asid, ret_vaddr, + mutex_unlock(&ctx->mmu_lock); + + /* + * prefetch is done upon user's request. it is performed in WQ as and so can + * be outside the MMU lock. the operation itself is already protected by the mmu lock + */ + if (do_prefetch) { + rc = hl_mmu_prefetch_cache_range(ctx, *vm_type, ctx->asid, ret_vaddr, phys_pg_pack->total_size); if (rc) goto map_err; } - mutex_unlock(&ctx->mmu_lock); - ret_vaddr += phys_pg_pack->offset; hnode->ptr = vm_type; diff --git a/drivers/misc/habanalabs/common/mmu/mmu.c b/drivers/misc/habanalabs/common/mmu/mmu.c index 04e53af4c67f..ae9b4923c32b 100644 --- a/drivers/misc/habanalabs/common/mmu/mmu.c +++ b/drivers/misc/habanalabs/common/mmu/mmu.c @@ -665,15 +665,53 @@ int hl_mmu_invalidate_cache_range(struct hl_device *hdev, bool is_hard, return rc; } -int hl_mmu_prefetch_cache_range(struct hl_device *hdev, u32 flags, u32 asid, u64 va, u64 size) +static void hl_mmu_prefetch_work_function(struct work_struct *work) { - int rc; + struct hl_prefetch_work *pfw = container_of(work, struct hl_prefetch_work, pf_work); + struct hl_ctx *ctx = pfw->ctx; - rc = hdev->asic_funcs->mmu_prefetch_cache_range(hdev, flags, asid, va, size); - if (rc) - dev_err_ratelimited(hdev->dev, "MMU cache range prefetch failed\n"); + if (!hl_device_operational(ctx->hdev, NULL)) + goto put_ctx; - return rc; + mutex_lock(&ctx->mmu_lock); + + ctx->hdev->asic_funcs->mmu_prefetch_cache_range(ctx, pfw->flags, pfw->asid, + pfw->va, pfw->size); + + mutex_unlock(&ctx->mmu_lock); + +put_ctx: + /* + * context was taken in the common mmu prefetch function- see comment there about + * context handling. + */ + hl_ctx_put(ctx); + kfree(pfw); +} + +int hl_mmu_prefetch_cache_range(struct hl_ctx *ctx, u32 flags, u32 asid, u64 va, u64 size) +{ + struct hl_prefetch_work *handle_pf_work; + + handle_pf_work = kmalloc(sizeof(*handle_pf_work), GFP_KERNEL); + if (!handle_pf_work) + return -ENOMEM; + + INIT_WORK(&handle_pf_work->pf_work, hl_mmu_prefetch_work_function); + handle_pf_work->ctx = ctx; + handle_pf_work->va = va; + handle_pf_work->size = size; + handle_pf_work->flags = flags; + handle_pf_work->asid = asid; + + /* + * as actual prefetch is done in a WQ we must get the context (and put it + * at the end of the work function) + */ + hl_ctx_get(ctx->hdev, ctx); + queue_work(ctx->hdev->pf_wq, &handle_pf_work->pf_work); + + return 0; } u64 hl_mmu_get_next_hop_addr(struct hl_ctx *ctx, u64 curr_pte) diff --git a/drivers/misc/habanalabs/gaudi/gaudi.c b/drivers/misc/habanalabs/gaudi/gaudi.c index 1c388537de33..96a83317b302 100644 --- a/drivers/misc/habanalabs/gaudi/gaudi.c +++ b/drivers/misc/habanalabs/gaudi/gaudi.c @@ -9174,12 +9174,6 @@ static void gaudi_get_valid_dram_page_orders(struct hl_info_dev_memalloc_page_si info->page_order_bitmask = 0; } -static int gaudi_mmu_prefetch_cache_range(struct hl_device *hdev, u32 flags, u32 asid, u64 va, - u64 size) -{ - return 0; -} - static ssize_t infineon_ver_show(struct device *dev, struct device_attribute *attr, char *buf) { struct hl_device *hdev = dev_get_drvdata(dev); @@ -9244,7 +9238,7 @@ static const struct hl_asic_funcs gaudi_funcs = { .write_pte = gaudi_write_pte, .mmu_invalidate_cache = gaudi_mmu_invalidate_cache, .mmu_invalidate_cache_range = gaudi_mmu_invalidate_cache_range, - .mmu_prefetch_cache_range = gaudi_mmu_prefetch_cache_range, + .mmu_prefetch_cache_range = NULL, .send_heartbeat = gaudi_send_heartbeat, .debug_coresight = gaudi_debug_coresight, .is_device_idle = gaudi_is_device_idle, diff --git a/drivers/misc/habanalabs/goya/goya.c b/drivers/misc/habanalabs/goya/goya.c index f2d4362f6a46..4cde505a7416 100644 --- a/drivers/misc/habanalabs/goya/goya.c +++ b/drivers/misc/habanalabs/goya/goya.c @@ -5428,12 +5428,6 @@ static int goya_get_monitor_dump(struct hl_device *hdev, void *data) return -EOPNOTSUPP; } -static int goya_mmu_prefetch_cache_range(struct hl_device *hdev, u32 flags, u32 asid, u64 va, - u64 size) -{ - return 0; -} - static int goya_scrub_device_dram(struct hl_device *hdev, u64 val) { return -EOPNOTSUPP; @@ -5480,7 +5474,7 @@ static const struct hl_asic_funcs goya_funcs = { .write_pte = goya_write_pte, .mmu_invalidate_cache = goya_mmu_invalidate_cache, .mmu_invalidate_cache_range = goya_mmu_invalidate_cache_range, - .mmu_prefetch_cache_range = goya_mmu_prefetch_cache_range, + .mmu_prefetch_cache_range = NULL, .send_heartbeat = goya_send_heartbeat, .debug_coresight = goya_debug_coresight, .is_device_idle = goya_is_device_idle, |