diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdkfd')
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_chardev.c | 42 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device.c | 40 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c | 43 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c | 6 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c | 2 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_priv.h | 16 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_process.c | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c | 9 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_topology.c | 6 | ||||
-rw-r--r-- | drivers/gpu/drm/amd/amdkfd/kfd_topology.h | 5 |
12 files changed, 155 insertions, 17 deletions
diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c index 0ec5f25adf56..ff47b1f69b68 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_chardev.c @@ -215,6 +215,7 @@ static int set_queue_properties_from_user(struct queue_properties *q_properties, } q_properties->is_interop = false; + q_properties->is_gws = false; q_properties->queue_percent = args->queue_percentage; q_properties->priority = args->queue_priority; q_properties->queue_address = args->ring_base_address; @@ -1584,6 +1585,45 @@ copy_from_user_failed: return err; } +static int kfd_ioctl_alloc_queue_gws(struct file *filep, + struct kfd_process *p, void *data) +{ + int retval; + struct kfd_ioctl_alloc_queue_gws_args *args = data; + struct queue *q; + struct kfd_dev *dev; + + mutex_lock(&p->mutex); + q = pqm_get_user_queue(&p->pqm, args->queue_id); + + if (q) { + dev = q->device; + } else { + retval = -EINVAL; + goto out_unlock; + } + + if (!dev->gws) { + retval = -ENODEV; + goto out_unlock; + } + + if (dev->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) { + retval = -ENODEV; + goto out_unlock; + } + + retval = pqm_set_gws(&p->pqm, args->queue_id, args->num_gws ? dev->gws : NULL); + mutex_unlock(&p->mutex); + + args->first_gws = 0; + return retval; + +out_unlock: + mutex_unlock(&p->mutex); + return retval; +} + static int kfd_ioctl_get_dmabuf_info(struct file *filep, struct kfd_process *p, void *data) { @@ -1786,6 +1826,8 @@ static const struct amdkfd_ioctl_desc amdkfd_ioctls[] = { AMDKFD_IOCTL_DEF(AMDKFD_IOC_IMPORT_DMABUF, kfd_ioctl_import_dmabuf, 0), + AMDKFD_IOCTL_DEF(AMDKFD_IOC_ALLOC_QUEUE_GWS, + kfd_ioctl_alloc_queue_gws, 0), }; #define AMDKFD_CORE_IOCTL_COUNT ARRAY_SIZE(amdkfd_ioctls) diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device.c b/drivers/gpu/drm/amd/amdkfd/kfd_device.c index 05bc6d96ec52..0491ab2b4a9b 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device.c @@ -569,6 +569,23 @@ static void kfd_cwsr_init(struct kfd_dev *kfd) } } +static int kfd_gws_init(struct kfd_dev *kfd) +{ + int ret = 0; + + if (kfd->dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) + return 0; + + if (hws_gws_support + || (kfd->device_info->asic_family >= CHIP_VEGA10 + && kfd->device_info->asic_family <= CHIP_RAVEN + && kfd->mec2_fw_version >= 0x1b3)) + ret = amdgpu_amdkfd_alloc_gws(kfd->kgd, + amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws); + + return ret; +} + bool kgd2kfd_device_init(struct kfd_dev *kfd, struct drm_device *ddev, const struct kgd2kfd_shared_resources *gpu_resources) @@ -578,6 +595,8 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, kfd->ddev = ddev; kfd->mec_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd, KGD_ENGINE_MEC1); + kfd->mec2_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd, + KGD_ENGINE_MEC2); kfd->sdma_fw_version = amdgpu_amdkfd_get_fw_version(kfd->kgd, KGD_ENGINE_SDMA1); kfd->shared_resources = *gpu_resources; @@ -598,13 +617,6 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, } else kfd->max_proc_per_quantum = hws_max_conc_proc; - /* Allocate global GWS that is shared by all KFD processes */ - if (hws_gws_support && amdgpu_amdkfd_alloc_gws(kfd->kgd, - amdgpu_amdkfd_get_num_gws(kfd->kgd), &kfd->gws)) { - dev_err(kfd_device, "Could not allocate %d gws\n", - amdgpu_amdkfd_get_num_gws(kfd->kgd)); - goto out; - } /* calculate max size of mqds needed for queues */ size = max_num_of_queues_per_device * kfd->device_info->mqd_size_aligned; @@ -662,6 +674,15 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, goto device_queue_manager_error; } + /* If supported on this device, allocate global GWS that is shared + * by all KFD processes + */ + if (kfd_gws_init(kfd)) { + dev_err(kfd_device, "Could not allocate %d gws\n", + amdgpu_amdkfd_get_num_gws(kfd->kgd)); + goto gws_error; + } + if (kfd_iommu_device_init(kfd)) { dev_err(kfd_device, "Error initializing iommuv2\n"); goto device_iommu_error; @@ -691,6 +712,7 @@ bool kgd2kfd_device_init(struct kfd_dev *kfd, kfd_topology_add_device_error: kfd_resume_error: device_iommu_error: +gws_error: device_queue_manager_uninit(kfd->dqm); device_queue_manager_error: kfd_interrupt_exit(kfd); @@ -701,7 +723,7 @@ kfd_doorbell_error: kfd_gtt_sa_init_error: amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem); alloc_gtt_mem_failure: - if (hws_gws_support) + if (kfd->gws) amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws); dev_err(kfd_device, "device %x:%x NOT added due to errors\n", @@ -720,7 +742,7 @@ void kgd2kfd_device_exit(struct kfd_dev *kfd) kfd_doorbell_fini(kfd); kfd_gtt_sa_fini(kfd); amdgpu_amdkfd_free_gtt_mem(kfd->kgd, kfd->gtt_mem); - if (hws_gws_support) + if (kfd->gws) amdgpu_amdkfd_free_gws(kfd->kgd, kfd->gws); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c index 77ea0f0cb163..ae9547791813 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.c @@ -505,8 +505,13 @@ static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm, deallocate_vmid(dqm, qpd, q); } qpd->queue_count--; - if (q->properties.is_active) + if (q->properties.is_active) { decrement_queue_count(dqm, q->properties.type); + if (q->properties.is_gws) { + dqm->gws_queue_count--; + qpd->mapped_gws_queue = false; + } + } return retval; } @@ -583,6 +588,20 @@ static int update_queue(struct device_queue_manager *dqm, struct queue *q) else if (!q->properties.is_active && prev_active) decrement_queue_count(dqm, q->properties.type); + if (q->gws && !q->properties.is_gws) { + if (q->properties.is_active) { + dqm->gws_queue_count++; + pdd->qpd.mapped_gws_queue = true; + } + q->properties.is_gws = true; + } else if (!q->gws && q->properties.is_gws) { + if (q->properties.is_active) { + dqm->gws_queue_count--; + pdd->qpd.mapped_gws_queue = false; + } + q->properties.is_gws = false; + } + if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) retval = map_queues_cpsch(dqm); else if (q->properties.is_active && @@ -631,6 +650,10 @@ static int evict_process_queues_nocpsch(struct device_queue_manager *dqm, q->properties.type)]; q->properties.is_active = false; decrement_queue_count(dqm, q->properties.type); + if (q->properties.is_gws) { + dqm->gws_queue_count--; + qpd->mapped_gws_queue = false; + } if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n")) continue; @@ -744,6 +767,10 @@ static int restore_process_queues_nocpsch(struct device_queue_manager *dqm, q->properties.type)]; q->properties.is_active = true; increment_queue_count(dqm, q->properties.type); + if (q->properties.is_gws) { + dqm->gws_queue_count++; + qpd->mapped_gws_queue = true; + } if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n")) continue; @@ -913,6 +940,7 @@ static int initialize_nocpsch(struct device_queue_manager *dqm) INIT_LIST_HEAD(&dqm->queues); dqm->active_queue_count = dqm->next_pipe_to_allocate = 0; dqm->active_cp_queue_count = 0; + dqm->gws_queue_count = 0; for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) { int pipe_offset = pipe * get_queues_per_pipe(dqm); @@ -1082,7 +1110,7 @@ static int initialize_cpsch(struct device_queue_manager *dqm) INIT_LIST_HEAD(&dqm->queues); dqm->active_queue_count = dqm->processes_count = 0; dqm->active_cp_queue_count = 0; - + dqm->gws_queue_count = 0; dqm->active_runlist = false; dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm)); dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm)); @@ -1432,6 +1460,10 @@ static int destroy_queue_cpsch(struct device_queue_manager *dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0); if (retval == -ETIME) qpd->reset_wavefronts = true; + if (q->properties.is_gws) { + dqm->gws_queue_count--; + qpd->mapped_gws_queue = false; + } } /* @@ -1650,8 +1682,13 @@ static int process_termination_cpsch(struct device_queue_manager *dqm, else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) deallocate_sdma_queue(dqm, q); - if (q->properties.is_active) + if (q->properties.is_active) { decrement_queue_count(dqm, q->properties.type); + if (q->properties.is_gws) { + dqm->gws_queue_count--; + qpd->mapped_gws_queue = false; + } + } dqm->total_queue_count--; } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h index 50d919f814e9..4afa015c69b1 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_device_queue_manager.h @@ -182,6 +182,7 @@ struct device_queue_manager { unsigned int processes_count; unsigned int active_queue_count; unsigned int active_cp_queue_count; + unsigned int gws_queue_count; unsigned int total_queue_count; unsigned int next_pipe_to_allocate; unsigned int *allocated_queues; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c index bae706462f96..a2b77d1df854 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_kernel_queue.c @@ -126,6 +126,7 @@ static bool kq_initialize(struct kernel_queue *kq, struct kfd_dev *dev, prop.queue_size = queue_size; prop.is_interop = false; + prop.is_gws = false; prop.priority = 1; prop.queue_percent = 100; prop.type = type; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c index efdb75e7677b..685ca82d42fe 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager.c @@ -41,7 +41,7 @@ static void pm_calc_rlib_size(struct packet_manager *pm, unsigned int *rlib_size, bool *over_subscription) { - unsigned int process_count, queue_count, compute_queue_count; + unsigned int process_count, queue_count, compute_queue_count, gws_queue_count; unsigned int map_queue_size; unsigned int max_proc_per_quantum = 1; struct kfd_dev *dev = pm->dqm->dev; @@ -49,6 +49,7 @@ static void pm_calc_rlib_size(struct packet_manager *pm, process_count = pm->dqm->processes_count; queue_count = pm->dqm->active_queue_count; compute_queue_count = pm->dqm->active_cp_queue_count; + gws_queue_count = pm->dqm->gws_queue_count; /* check if there is over subscription * Note: the arbitration between the number of VMIDs and @@ -61,7 +62,8 @@ static void pm_calc_rlib_size(struct packet_manager *pm, max_proc_per_quantum = dev->max_proc_per_quantum; if ((process_count > max_proc_per_quantum) || - compute_queue_count > get_cp_queues_num(pm->dqm)) { + compute_queue_count > get_cp_queues_num(pm->dqm) || + gws_queue_count > 1) { *over_subscription = true; pr_debug("Over subscribed runlist\n"); } diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c index 2de01009f1b6..bdca9dc5f118 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_packet_manager_v9.c @@ -43,7 +43,7 @@ static int pm_map_process_v9(struct packet_manager *pm, packet->bitfields2.pasid = qpd->pqm->process->pasid; packet->bitfields14.gds_size = qpd->gds_size & 0x3F; packet->bitfields14.gds_size_hi = (qpd->gds_size >> 6) & 0xF; - packet->bitfields14.num_gws = qpd->num_gws; + packet->bitfields14.num_gws = (qpd->mapped_gws_queue) ? qpd->num_gws : 0; packet->bitfields14.num_oac = qpd->num_oac; packet->bitfields14.sdma_enable = 1; packet->bitfields14.num_queues = (qpd->is_debug) ? 0 : qpd->queue_count; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h index 4a3049841086..d48b33449267 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_priv.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_priv.h @@ -282,6 +282,7 @@ struct kfd_dev { /* Firmware versions */ uint16_t mec_fw_version; + uint16_t mec2_fw_version; uint16_t sdma_fw_version; /* Maximum process number mapped to HW scheduler */ @@ -410,6 +411,10 @@ enum KFD_QUEUE_PRIORITY { * @is_active: Defines if the queue is active or not. @is_active and * @is_evicted are protected by the DQM lock. * + * @is_gws: Defines if the queue has been updated to be GWS-capable or not. + * @is_gws should be protected by the DQM lock, since changing it can yield the + * possibility of updating DQM state on number of GWS queues. + * * @vmid: If the scheduling mode is no cp scheduling the field defines the vmid * of the queue. * @@ -432,6 +437,7 @@ struct queue_properties { bool is_interop; bool is_evicted; bool is_active; + bool is_gws; /* Not relevant for user mode queues in cp scheduling */ unsigned int vmid; /* Relevant only for sdma queues*/ @@ -563,6 +569,14 @@ struct qcm_process_device { */ bool reset_wavefronts; + /* This flag tells us if this process has a GWS-capable + * queue that will be mapped into the runlist. It's + * possible to request a GWS BO, but not have the queue + * currently mapped, and this changes how the MAP_PROCESS + * PM4 packet is configured. + */ + bool mapped_gws_queue; + /* * All the memory management data should be here too */ @@ -923,6 +937,8 @@ int pqm_set_gws(struct process_queue_manager *pqm, unsigned int qid, void *gws); struct kernel_queue *pqm_get_kernel_queue(struct process_queue_manager *pqm, unsigned int qid); +struct queue *pqm_get_user_queue(struct process_queue_manager *pqm, + unsigned int qid); int pqm_get_wave_state(struct process_queue_manager *pqm, unsigned int qid, void __user *ctl_stack, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index fe0cd49d4ea7..82b4c5a9382a 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -858,6 +858,7 @@ struct kfd_process_device *kfd_create_process_device_data(struct kfd_dev *dev, pdd->qpd.dqm = dev->dqm; pdd->qpd.pqm = &p->pqm; pdd->qpd.evicted = 0; + pdd->qpd.mapped_gws_queue = false; pdd->process = p; pdd->bound = PDD_UNBOUND; pdd->already_dequeued = false; diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c index 084c35f55d59..eb1635ac8988 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process_queue_manager.c @@ -476,6 +476,15 @@ struct kernel_queue *pqm_get_kernel_queue( return NULL; } +struct queue *pqm_get_user_queue(struct process_queue_manager *pqm, + unsigned int qid) +{ + struct process_queue_node *pqn; + + pqn = get_queue_by_qid(pqm, qid); + return pqn ? pqn->q : NULL; +} + int pqm_get_wave_state(struct process_queue_manager *pqm, unsigned int qid, void __user *ctl_stack, diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c index 8e6409bc7c91..bc4a22df12d7 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.c @@ -1301,6 +1301,10 @@ int kfd_topology_add_device(struct kfd_dev *gpu) dev->node_props.vendor_id = gpu->pdev->vendor; dev->node_props.device_id = gpu->pdev->device; + dev->node_props.capability |= + ((amdgpu_amdkfd_get_asic_rev_id(dev->gpu->kgd) << + HSA_CAP_ASIC_REVISION_SHIFT) & + HSA_CAP_ASIC_REVISION_MASK); dev->node_props.location_id = pci_dev_id(gpu->pdev); dev->node_props.max_engine_clk_fcompute = amdgpu_amdkfd_get_max_engine_clock_in_mhz(dev->gpu->kgd); @@ -1315,7 +1319,7 @@ int kfd_topology_add_device(struct kfd_dev *gpu) gpu->device_info->num_xgmi_sdma_engines; dev->node_props.num_sdma_queues_per_engine = gpu->device_info->num_sdma_queues_per_engine; - dev->node_props.num_gws = (hws_gws_support && + dev->node_props.num_gws = (dev->gpu->gws && dev->gpu->dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) ? amdgpu_amdkfd_get_num_gws(dev->gpu->kgd) : 0; dev->node_props.num_cp_queues = get_cp_queues_num(dev->gpu->dqm); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h index 46eeecaf1b68..0c51bd3dcd59 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_topology.h +++ b/drivers/gpu/drm/amd/amdkfd/kfd_topology.h @@ -41,7 +41,6 @@ #define HSA_CAP_WATCH_POINTS_TOTALBITS_SHIFT 8 #define HSA_CAP_DOORBELL_TYPE_TOTALBITS_MASK 0x00003000 #define HSA_CAP_DOORBELL_TYPE_TOTALBITS_SHIFT 12 -#define HSA_CAP_RESERVED 0xffffc000 #define HSA_CAP_DOORBELL_TYPE_PRE_1_0 0x0 #define HSA_CAP_DOORBELL_TYPE_1_0 0x1 @@ -51,6 +50,10 @@ #define HSA_CAP_SRAM_EDCSUPPORTED 0x00080000 #define HSA_CAP_MEM_EDCSUPPORTED 0x00100000 #define HSA_CAP_RASEVENTNOTIFY 0x00200000 +#define HSA_CAP_ASIC_REVISION_MASK 0x03c00000 +#define HSA_CAP_ASIC_REVISION_SHIFT 22 + +#define HSA_CAP_RESERVED 0xfc078000 struct kfd_node_properties { uint64_t hive_id; |