diff options
Diffstat (limited to 'drivers/gpu/drm/amd/amdgpu')
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c | 13 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c | 9 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c | 4 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c | 2 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c | 32 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c | 20 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c | 13 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c | 6 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c | 6 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/imu_v11_0.c | 2 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c | 6 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h | 1 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/mes_v11_0.c | 34 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/nv.c | 1 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c | 109 | ||||
| -rw-r--r-- | drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c | 17 | 
16 files changed, 153 insertions, 122 deletions
diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c index 67abf8dcd30a..6b6d46e29e6e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_amdkfd_gpuvm.c @@ -1918,9 +1918,6 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device *adev,  		return -EINVAL;  	} -	/* delete kgd_mem from kfd_bo_list to avoid re-validating -	 * this BO in BO's restoring after eviction. -	 */  	mutex_lock(&mem->process_info->lock);  	ret = amdgpu_bo_reserve(bo, true); @@ -1943,7 +1940,6 @@ int amdgpu_amdkfd_gpuvm_map_gtt_bo_to_kernel(struct amdgpu_device *adev,  	amdgpu_amdkfd_remove_eviction_fence(  		bo, mem->process_info->eviction_fence); -	list_del_init(&mem->validate_list.head);  	if (size)  		*size = amdgpu_bo_size(bo); @@ -2512,12 +2508,15 @@ int amdgpu_amdkfd_gpuvm_restore_process_bos(void *info, struct dma_fence **ef)  	process_info->eviction_fence = new_fence;  	*ef = dma_fence_get(&new_fence->base); -	/* Attach new eviction fence to all BOs */ +	/* Attach new eviction fence to all BOs except pinned ones */  	list_for_each_entry(mem, &process_info->kfd_bo_list, -		validate_list.head) +		validate_list.head) { +		if (mem->bo->tbo.pin_count) +			continue; +  		amdgpu_bo_fence(mem->bo,  			&process_info->eviction_fence->base, true); - +	}  	/* Attach eviction fence to PD / PT BOs */  	list_for_each_entry(peer_vm, &process_info->vm_list_head,  			    vm_list_node) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c index ede2fa56f6c9..16699158e00d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gfx.c @@ -594,17 +594,20 @@ int amdgpu_get_gfx_off_status(struct amdgpu_device *adev, uint32_t *value)  int amdgpu_gfx_ras_late_init(struct amdgpu_device *adev, struct ras_common_if *ras_block)  {  	int r; -	r = amdgpu_ras_block_late_init(adev, ras_block); -	if (r) -		return r;  	if (amdgpu_ras_is_supported(adev, ras_block->block)) {  		if (!amdgpu_persistent_edc_harvesting_supported(adev))  			amdgpu_ras_reset_error_status(adev, AMDGPU_RAS_BLOCK__GFX); +		r = amdgpu_ras_block_late_init(adev, ras_block); +		if (r) +			return r; +  		r = amdgpu_irq_get(adev, &adev->gfx.cp_ecc_error_irq, 0);  		if (r)  			goto late_fini; +	} else { +		amdgpu_ras_feature_enable_on_boot(adev, ras_block, 0);  	}  	return 0; diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c index 798c56214a23..aebc384531ac 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_gmc.c @@ -518,6 +518,8 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)  	case IP_VERSION(9, 1, 0):  	/* RENOIR looks like RAVEN */  	case IP_VERSION(9, 3, 0): +	/* GC 10.3.7 */ +	case IP_VERSION(10, 3, 7):  		if (amdgpu_tmz == 0) {  			adev->gmc.tmz_enabled = false;  			dev_info(adev->dev, @@ -540,8 +542,6 @@ void amdgpu_gmc_tmz_set(struct amdgpu_device *adev)  	case IP_VERSION(10, 3, 1):  	/* YELLOW_CARP*/  	case IP_VERSION(10, 3, 3): -	/* GC 10.3.7 */ -	case IP_VERSION(10, 3, 7):  		/* Don't enable it by default yet.  		 */  		if (amdgpu_tmz < 1) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c index 801f6fa692e9..6de63ea6687e 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_kms.c @@ -642,7 +642,6 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)  			    atomic64_read(&adev->visible_pin_size),  			    vram_gtt.vram_size);  		vram_gtt.gtt_size = ttm_manager_type(&adev->mman.bdev, TTM_PL_TT)->size; -		vram_gtt.gtt_size *= PAGE_SIZE;  		vram_gtt.gtt_size -= atomic64_read(&adev->gart_pin_size);  		return copy_to_user(out, &vram_gtt,  				    min((size_t)size, sizeof(vram_gtt))) ? -EFAULT : 0; @@ -675,7 +674,6 @@ int amdgpu_info_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)  			mem.cpu_accessible_vram.usable_heap_size * 3 / 4;  		mem.gtt.total_heap_size = gtt_man->size; -		mem.gtt.total_heap_size *= PAGE_SIZE;  		mem.gtt.usable_heap_size = mem.gtt.total_heap_size -  			atomic64_read(&adev->gart_pin_size);  		mem.gtt.heap_usage = ttm_resource_manager_usage(gtt_man); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c index 2de9309a4193..dac202ae864d 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ras.c @@ -197,6 +197,13 @@ static ssize_t amdgpu_ras_debugfs_read(struct file *f, char __user *buf,  	if (amdgpu_ras_query_error_status(obj->adev, &info))  		return -EINVAL; +	/* Hardware counter will be reset automatically after the query on Vega20 and Arcturus */ +	if (obj->adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) && +	    obj->adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) { +		if (amdgpu_ras_reset_error_status(obj->adev, info.head.block)) +			dev_warn(obj->adev->dev, "Failed to reset error counter and error status"); +	} +  	s = snprintf(val, sizeof(val), "%s: %lu\n%s: %lu\n",  			"ue", info.ue_count,  			"ce", info.ce_count); @@ -550,9 +557,10 @@ static ssize_t amdgpu_ras_sysfs_read(struct device *dev,  	if (amdgpu_ras_query_error_status(obj->adev, &info))  		return -EINVAL; -	if (obj->adev->asic_type == CHIP_ALDEBARAN) { +	if (obj->adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) && +	    obj->adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) {  		if (amdgpu_ras_reset_error_status(obj->adev, info.head.block)) -			DRM_WARN("Failed to reset error counter and error status"); +			dev_warn(obj->adev->dev, "Failed to reset error counter and error status");  	}  	return sysfs_emit(buf, "%s: %lu\n%s: %lu\n", "ue", info.ue_count, @@ -1027,9 +1035,6 @@ int amdgpu_ras_query_error_status(struct amdgpu_device *adev,  		}  	} -	if (!amdgpu_persistent_edc_harvesting_supported(adev)) -		amdgpu_ras_reset_error_status(adev, info->head.block); -  	return 0;  } @@ -1149,6 +1154,12 @@ int amdgpu_ras_query_error_count(struct amdgpu_device *adev,  		if (res)  			return res; +		if (adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) && +		    adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) { +			if (amdgpu_ras_reset_error_status(adev, info.head.block)) +				dev_warn(adev->dev, "Failed to reset error counter and error status"); +		} +  		ce += info.ce_count;  		ue += info.ue_count;  	} @@ -1792,6 +1803,12 @@ static void amdgpu_ras_log_on_err_counter(struct amdgpu_device *adev)  			continue;  		amdgpu_ras_query_error_status(adev, &info); + +		if (adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 2) && +		    adev->ip_versions[MP0_HWIP][0] != IP_VERSION(11, 0, 4)) { +			if (amdgpu_ras_reset_error_status(adev, info.head.block)) +				dev_warn(adev->dev, "Failed to reset error counter and error status"); +		}  	}  } @@ -2278,8 +2295,9 @@ static void amdgpu_ras_check_supported(struct amdgpu_device *adev)  	    !amdgpu_ras_asic_supported(adev))  		return; -	if (!(amdgpu_sriov_vf(adev) && -		(adev->ip_versions[MP1_HWIP][0] == IP_VERSION(13, 0, 2)))) +	/* If driver run on sriov guest side, only enable ras for aldebaran */ +	if (amdgpu_sriov_vf(adev) && +		adev->ip_versions[MP1_HWIP][0] != IP_VERSION(13, 0, 2))  		return;  	if (!adev->gmc.xgmi.connected_to_cpu) { diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c index be6f76a30ac6..3b4c19412625 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_ttm.c @@ -1798,18 +1798,26 @@ int amdgpu_ttm_init(struct amdgpu_device *adev)  	DRM_INFO("amdgpu: %uM of VRAM memory ready\n",  		 (unsigned) (adev->gmc.real_vram_size / (1024 * 1024))); -	/* Compute GTT size, either bsaed on 3/4th the size of RAM size +	/* Compute GTT size, either based on 1/2 the size of RAM size  	 * or whatever the user passed on module init */  	if (amdgpu_gtt_size == -1) {  		struct sysinfo si;  		si_meminfo(&si); -		gtt_size = min(max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20), -			       adev->gmc.mc_vram_size), -			       ((uint64_t)si.totalram * si.mem_unit * 3/4)); -	} -	else +		/* Certain GL unit tests for large textures can cause problems +		 * with the OOM killer since there is no way to link this memory +		 * to a process.  This was originally mitigated (but not necessarily +		 * eliminated) by limiting the GTT size.  The problem is this limit +		 * is often too low for many modern games so just make the limit 1/2 +		 * of system memory which aligns with TTM. The OOM accounting needs +		 * to be addressed, but we shouldn't prevent common 3D applications +		 * from being usable just to potentially mitigate that corner case. +		 */ +		gtt_size = max((AMDGPU_DEFAULT_GTT_SIZE_MB << 20), +			       (u64)si.totalram * si.mem_unit / 2); +	} else {  		gtt_size = (uint64_t)amdgpu_gtt_size << 20; +	}  	/* Initialize GTT memory pool */  	r = amdgpu_gtt_mgr_init(adev, gtt_size); diff --git a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c index 2ceeaa4c793a..dc76d2b3ce52 100644 --- a/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c +++ b/drivers/gpu/drm/amd/amdgpu/amdgpu_vm.c @@ -679,6 +679,7 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,  {  	struct amdgpu_vm_update_params params;  	struct amdgpu_vm_bo_base *entry; +	bool flush_tlb_needed = false;  	int r, idx;  	if (list_empty(&vm->relocated)) @@ -697,6 +698,9 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,  		goto error;  	list_for_each_entry(entry, &vm->relocated, vm_status) { +		/* vm_flush_needed after updating moved PDEs */ +		flush_tlb_needed |= entry->moved; +  		r = amdgpu_vm_pde_update(¶ms, entry);  		if (r)  			goto error; @@ -706,8 +710,8 @@ int amdgpu_vm_update_pdes(struct amdgpu_device *adev,  	if (r)  		goto error; -	/* vm_flush_needed after updating PDEs */ -	atomic64_inc(&vm->tlb_seq); +	if (flush_tlb_needed) +		atomic64_inc(&vm->tlb_seq);  	while (!list_empty(&vm->relocated)) {  		entry = list_first_entry(&vm->relocated, @@ -789,6 +793,11 @@ int amdgpu_vm_update_range(struct amdgpu_device *adev, struct amdgpu_vm *vm,  	flush_tlb |= adev->gmc.xgmi.num_physical_nodes &&  		     adev->ip_versions[GC_HWIP][0] == IP_VERSION(9, 4, 0); +	/* +	 * On GFX8 and older any 8 PTE block with a valid bit set enters the TLB +	 */ +	flush_tlb |= adev->ip_versions[GC_HWIP][0] < IP_VERSION(9, 0, 0); +  	memset(¶ms, 0, sizeof(params));  	params.adev = adev;  	params.vm = vm; diff --git a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c index 8c0a3fc7aaa6..a4a6751b1e44 100644 --- a/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gfx_v11_0.c @@ -1096,6 +1096,7 @@ static void gfx_v11_0_read_wave_data(struct amdgpu_device *adev, uint32_t simd,  	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_STS2);  	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_IB_DBG1);  	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_M0); +	dst[(*no_fields)++] = wave_read_ind(adev, wave, ixSQ_WAVE_MODE);  }  static void gfx_v11_0_read_wave_sgprs(struct amdgpu_device *adev, uint32_t simd, @@ -1316,7 +1317,7 @@ static void gfx_v11_0_rlc_backdoor_autoload_copy_ucode(struct amdgpu_device *ade  		memset(ptr + toc_offset + fw_size, 0, toc_fw_size - fw_size);  	if ((id != SOC21_FIRMWARE_ID_RS64_PFP) && (id != SOC21_FIRMWARE_ID_RS64_ME)) -		*(uint64_t *)fw_autoload_mask |= 1 << id; +		*(uint64_t *)fw_autoload_mask |= 1ULL << id;  }  static void gfx_v11_0_rlc_backdoor_autoload_copy_toc_ucode(struct amdgpu_device *adev, @@ -1983,7 +1984,7 @@ static int gfx_v11_0_init_csb(struct amdgpu_device *adev)  	return 0;  } -void gfx_v11_0_rlc_stop(struct amdgpu_device *adev) +static void gfx_v11_0_rlc_stop(struct amdgpu_device *adev)  {  	u32 tmp = RREG32_SOC15(GC, 0, regRLC_CNTL); @@ -6028,6 +6029,7 @@ static void gfx_v11_0_handle_priv_fault(struct amdgpu_device *adev,  		break;  	default:  		BUG(); +		break;  	}  } diff --git a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c index a0c0b7d9f444..7f4b480ae66e 100644 --- a/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/gmc_v11_0.c @@ -638,6 +638,12 @@ static int gmc_v11_0_mc_init(struct amdgpu_device *adev)  	adev->gmc.aper_base = pci_resource_start(adev->pdev, 0);  	adev->gmc.aper_size = pci_resource_len(adev->pdev, 0); +#ifdef CONFIG_X86_64 +	if ((adev->flags & AMD_IS_APU) && !amdgpu_passthrough(adev)) { +		adev->gmc.aper_base = adev->mmhub.funcs->get_mc_fb_offset(adev); +		adev->gmc.aper_size = adev->gmc.real_vram_size; +	} +#endif  	/* In case the PCI BAR is larger than the actual amount of vram */  	adev->gmc.visible_vram_size = adev->gmc.aper_size;  	if (adev->gmc.visible_vram_size > adev->gmc.real_vram_size) diff --git a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c index 5d2dfeff8fe5..d63d3f2b8a16 100644 --- a/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/imu_v11_0.c @@ -299,7 +299,7 @@ static const struct imu_rlc_ram_golden imu_rlc_ram_golden_11_0_2[] =  	IMU_RLC_RAM_GOLDEN_VALUE(GC, 0, regCPG_PSP_DEBUG, CPG_PSP_DEBUG__GPA_OVERRIDE_MASK, 0)  }; -void program_imu_rlc_ram(struct amdgpu_device *adev, +static void program_imu_rlc_ram(struct amdgpu_device *adev,  				const struct imu_rlc_ram_golden *regs,  				const u32 array_size)  { diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c index d2722adabd1b..f3c1af5130ab 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.c @@ -535,6 +535,10 @@ void jpeg_v2_0_dec_ring_emit_ib(struct amdgpu_ring *ring,  {  	unsigned vmid = AMDGPU_JOB_GET_VMID(job); +	amdgpu_ring_write(ring,	PACKETJ(mmUVD_JPEG_IH_CTRL_INTERNAL_OFFSET, +		0, 0, PACKETJ_TYPE0)); +	amdgpu_ring_write(ring, (vmid << JPEG_IH_CTRL__IH_VMID__SHIFT)); +  	amdgpu_ring_write(ring, PACKETJ(mmUVD_LMI_JRBC_IB_VMID_INTERNAL_OFFSET,  		0, 0, PACKETJ_TYPE0));  	amdgpu_ring_write(ring, (vmid | (vmid << 4))); @@ -768,7 +772,7 @@ static const struct amdgpu_ring_funcs jpeg_v2_0_dec_ring_vm_funcs = {  		8 + /* jpeg_v2_0_dec_ring_emit_vm_flush */  		18 + 18 + /* jpeg_v2_0_dec_ring_emit_fence x2 vm fence */  		8 + 16, -	.emit_ib_size = 22, /* jpeg_v2_0_dec_ring_emit_ib */ +	.emit_ib_size = 24, /* jpeg_v2_0_dec_ring_emit_ib */  	.emit_ib = jpeg_v2_0_dec_ring_emit_ib,  	.emit_fence = jpeg_v2_0_dec_ring_emit_fence,  	.emit_vm_flush = jpeg_v2_0_dec_ring_emit_vm_flush, diff --git a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h index 1a03baa59755..654e43e83e2c 100644 --- a/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h +++ b/drivers/gpu/drm/amd/amdgpu/jpeg_v2_0.h @@ -41,6 +41,7 @@  #define mmUVD_JRBC_RB_REF_DATA_INTERNAL_OFFSET				0x4084  #define mmUVD_JRBC_STATUS_INTERNAL_OFFSET				0x4089  #define mmUVD_JPEG_PITCH_INTERNAL_OFFSET				0x401f +#define mmUVD_JPEG_IH_CTRL_INTERNAL_OFFSET				0x4149  #define JRBC_DEC_EXTERNAL_REG_WRITE_ADDR				0x18000 diff --git a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c index fcf51947bb18..7eee004cf3ce 100644 --- a/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c +++ b/drivers/gpu/drm/amd/amdgpu/mes_v11_0.c @@ -541,7 +541,7 @@ static void mes_v11_0_enable(struct amdgpu_device *adev, bool enable)  /* This function is for backdoor MES firmware */  static int mes_v11_0_load_microcode(struct amdgpu_device *adev, -				    enum admgpu_mes_pipe pipe) +				    enum admgpu_mes_pipe pipe, bool prime_icache)  {  	int r;  	uint32_t data; @@ -593,16 +593,18 @@ static int mes_v11_0_load_microcode(struct amdgpu_device *adev,  	/* Set 0x3FFFF (256K-1) to CP_MES_MDBOUND_LO */  	WREG32_SOC15(GC, 0, regCP_MES_MDBOUND_LO, 0x3FFFF); -	/* invalidate ICACHE */ -	data = RREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL); -	data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0); -	data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1); -	WREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL, data); +	if (prime_icache) { +		/* invalidate ICACHE */ +		data = RREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL); +		data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 0); +		data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, INVALIDATE_CACHE, 1); +		WREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL, data); -	/* prime the ICACHE. */ -	data = RREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL); -	data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1); -	WREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL, data); +		/* prime the ICACHE. */ +		data = RREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL); +		data = REG_SET_FIELD(data, CP_MES_IC_OP_CNTL, PRIME_ICACHE, 1); +		WREG32_SOC15(GC, 0, regCP_MES_IC_OP_CNTL, data); +	}  	soc21_grbm_select(adev, 0, 0, 0, 0);  	mutex_unlock(&adev->srbm_mutex); @@ -1044,17 +1046,19 @@ static int mes_v11_0_kiq_hw_init(struct amdgpu_device *adev)  	int r = 0;  	if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) { -		r = mes_v11_0_load_microcode(adev, AMDGPU_MES_KIQ_PIPE); + +		r = mes_v11_0_load_microcode(adev, AMDGPU_MES_SCHED_PIPE, false);  		if (r) { -			DRM_ERROR("failed to load MES kiq fw, r=%d\n", r); +			DRM_ERROR("failed to load MES fw, r=%d\n", r);  			return r;  		} -		r = mes_v11_0_load_microcode(adev, AMDGPU_MES_SCHED_PIPE); +		r = mes_v11_0_load_microcode(adev, AMDGPU_MES_KIQ_PIPE, true);  		if (r) { -			DRM_ERROR("failed to load MES fw, r=%d\n", r); +			DRM_ERROR("failed to load MES kiq fw, r=%d\n", r);  			return r;  		} +  	}  	mes_v11_0_enable(adev, true); @@ -1086,7 +1090,7 @@ static int mes_v11_0_hw_init(void *handle)  	if (!adev->enable_mes_kiq) {  		if (adev->firmware.load_type == AMDGPU_FW_LOAD_DIRECT) {  			r = mes_v11_0_load_microcode(adev, -					     AMDGPU_MES_SCHED_PIPE); +					     AMDGPU_MES_SCHED_PIPE, true);  			if (r) {  				DRM_ERROR("failed to MES fw, r=%d\n", r);  				return r; diff --git a/drivers/gpu/drm/amd/amdgpu/nv.c b/drivers/gpu/drm/amd/amdgpu/nv.c index d016e3c3e221..b3fba8dea63c 100644 --- a/drivers/gpu/drm/amd/amdgpu/nv.c +++ b/drivers/gpu/drm/amd/amdgpu/nv.c @@ -170,6 +170,7 @@ static const struct amdgpu_video_codec_info yc_video_codecs_decode_array[] = {  	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_HEVC, 8192, 4352, 186)},  	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_VP9, 8192, 4352, 0)},  	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_JPEG, 4096, 4096, 0)}, +	{codec_info_build(AMDGPU_INFO_VIDEO_CAPS_CODEC_IDX_AV1, 8192, 4352, 0)},  };  static const struct amdgpu_video_codecs yc_video_codecs_decode = { diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c index 06b2635b142a..83c6ccaaa9e4 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v5_2.c @@ -469,6 +469,7 @@ static void sdma_v5_2_ring_emit_fence(struct amdgpu_ring *ring, u64 addr, u64 se  	}  } +  /**   * sdma_v5_2_gfx_stop - stop the gfx async dma engines   * @@ -514,21 +515,17 @@ static void sdma_v5_2_rlc_stop(struct amdgpu_device *adev)  }  /** - * sdma_v5_2_ctx_switch_enable_for_instance - start the async dma engines - * context switch for an instance + * sdma_v5_2_ctx_switch_enable - stop the async dma engines context switch   *   * @adev: amdgpu_device pointer - * @instance_idx: the index of the SDMA instance + * @enable: enable/disable the DMA MEs context switch.   * - * Unhalt the async dma engines context switch. + * Halt or unhalt the async dma engines context switch.   */ -static void sdma_v5_2_ctx_switch_enable_for_instance(struct amdgpu_device *adev, int instance_idx) +static void sdma_v5_2_ctx_switch_enable(struct amdgpu_device *adev, bool enable)  {  	u32 f32_cntl, phase_quantum = 0; - -	if (WARN_ON(instance_idx >= adev->sdma.num_instances)) { -		return; -	} +	int i;  	if (amdgpu_sdma_phase_quantum) {  		unsigned value = amdgpu_sdma_phase_quantum; @@ -552,68 +549,50 @@ static void sdma_v5_2_ctx_switch_enable_for_instance(struct amdgpu_device *adev,  		phase_quantum =  			value << SDMA0_PHASE0_QUANTUM__VALUE__SHIFT |  			unit  << SDMA0_PHASE0_QUANTUM__UNIT__SHIFT; - -		WREG32_SOC15_IP(GC, -			sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE0_QUANTUM), -			phase_quantum); -		WREG32_SOC15_IP(GC, -			sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE1_QUANTUM), -		    phase_quantum); -		WREG32_SOC15_IP(GC, -			sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_PHASE2_QUANTUM), -		    phase_quantum);  	} -	if (!amdgpu_sriov_vf(adev)) { -		f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_CNTL)); -		f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, -				AUTO_CTXSW_ENABLE, 1); -		WREG32(sdma_v5_2_get_reg_offset(adev, instance_idx, mmSDMA0_CNTL), f32_cntl); +	for (i = 0; i < adev->sdma.num_instances; i++) { +		if (enable && amdgpu_sdma_phase_quantum) { +			WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE0_QUANTUM), +			       phase_quantum); +			WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE1_QUANTUM), +			       phase_quantum); +			WREG32_SOC15_IP(GC, sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_PHASE2_QUANTUM), +			       phase_quantum); +		} + +		if (!amdgpu_sriov_vf(adev)) { +			f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL)); +			f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, +					AUTO_CTXSW_ENABLE, enable ? 1 : 0); +			WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl); +		}  	} +  }  /** - * sdma_v5_2_ctx_switch_disable_all - stop the async dma engines context switch + * sdma_v5_2_enable - stop the async dma engines   *   * @adev: amdgpu_device pointer + * @enable: enable/disable the DMA MEs.   * - * Halt the async dma engines context switch. + * Halt or unhalt the async dma engines.   */ -static void sdma_v5_2_ctx_switch_disable_all(struct amdgpu_device *adev) +static void sdma_v5_2_enable(struct amdgpu_device *adev, bool enable)  {  	u32 f32_cntl;  	int i; -	if (amdgpu_sriov_vf(adev)) -		return; - -	for (i = 0; i < adev->sdma.num_instances; i++) { -		f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL)); -		f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_CNTL, -				AUTO_CTXSW_ENABLE, 0); -		WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_CNTL), f32_cntl); +	if (!enable) { +		sdma_v5_2_gfx_stop(adev); +		sdma_v5_2_rlc_stop(adev);  	} -} - -/** - * sdma_v5_2_halt - stop the async dma engines - * - * @adev: amdgpu_device pointer - * - * Halt the async dma engines. - */ -static void sdma_v5_2_halt(struct amdgpu_device *adev) -{ -	int i; -	u32 f32_cntl; - -	sdma_v5_2_gfx_stop(adev); -	sdma_v5_2_rlc_stop(adev);  	if (!amdgpu_sriov_vf(adev)) {  		for (i = 0; i < adev->sdma.num_instances; i++) {  			f32_cntl = RREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL)); -			f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, 1); +			f32_cntl = REG_SET_FIELD(f32_cntl, SDMA0_F32_CNTL, HALT, enable ? 0 : 1);  			WREG32(sdma_v5_2_get_reg_offset(adev, i, mmSDMA0_F32_CNTL), f32_cntl);  		}  	} @@ -625,9 +604,6 @@ static void sdma_v5_2_halt(struct amdgpu_device *adev)   * @adev: amdgpu_device pointer   *   * Set up the gfx DMA ring buffers and enable them. - * It assumes that the dma engine is stopped for each instance. - * The function enables the engine and preemptions sequentially for each instance. - *   * Returns 0 for success, error for failure.   */  static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev) @@ -769,7 +745,10 @@ static int sdma_v5_2_gfx_resume(struct amdgpu_device *adev)  		ring->sched.ready = true; -		sdma_v5_2_ctx_switch_enable_for_instance(adev, i); +		if (amdgpu_sriov_vf(adev)) { /* bare-metal sequence doesn't need below to lines */ +			sdma_v5_2_ctx_switch_enable(adev, true); +			sdma_v5_2_enable(adev, true); +		}  		r = amdgpu_ring_test_ring(ring);  		if (r) { @@ -813,7 +792,7 @@ static int sdma_v5_2_load_microcode(struct amdgpu_device *adev)  	int i, j;  	/* halt the MEs */ -	sdma_v5_2_halt(adev); +	sdma_v5_2_enable(adev, false);  	for (i = 0; i < adev->sdma.num_instances; i++) {  		if (!adev->sdma.instance[i].fw) @@ -885,8 +864,8 @@ static int sdma_v5_2_start(struct amdgpu_device *adev)  	int r = 0;  	if (amdgpu_sriov_vf(adev)) { -		sdma_v5_2_ctx_switch_disable_all(adev); -		sdma_v5_2_halt(adev); +		sdma_v5_2_ctx_switch_enable(adev, false); +		sdma_v5_2_enable(adev, false);  		/* set RB registers */  		r = sdma_v5_2_gfx_resume(adev); @@ -910,10 +889,12 @@ static int sdma_v5_2_start(struct amdgpu_device *adev)  		amdgpu_gfx_off_ctrl(adev, false);  	sdma_v5_2_soft_reset(adev); +	/* unhalt the MEs */ +	sdma_v5_2_enable(adev, true); +	/* enable sdma ring preemption */ +	sdma_v5_2_ctx_switch_enable(adev, true); -	/* Soft reset supposes to disable the dma engine and preemption. -	 * Now start the gfx rings and rlc compute queues. -	 */ +	/* start the gfx rings and rlc compute queues */  	r = sdma_v5_2_gfx_resume(adev);  	if (adev->in_s0ix)  		amdgpu_gfx_off_ctrl(adev, true); @@ -1447,8 +1428,8 @@ static int sdma_v5_2_hw_fini(void *handle)  	if (amdgpu_sriov_vf(adev))  		return 0; -	sdma_v5_2_ctx_switch_disable_all(adev); -	sdma_v5_2_halt(adev); +	sdma_v5_2_ctx_switch_enable(adev, false); +	sdma_v5_2_enable(adev, false);  	return 0;  } diff --git a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c index 3cabceee5f57..39405f0db824 100644 --- a/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/vcn_v3_0.c @@ -1761,23 +1761,21 @@ static const struct amdgpu_ring_funcs vcn_v3_0_dec_sw_ring_vm_funcs = {  	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,  }; -static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p, -				struct amdgpu_job *job) +static int vcn_v3_0_limit_sched(struct amdgpu_cs_parser *p)  {  	struct drm_gpu_scheduler **scheds;  	/* The create msg must be in the first IB submitted */ -	if (atomic_read(&job->base.entity->fence_seq)) +	if (atomic_read(&p->entity->fence_seq))  		return -EINVAL;  	scheds = p->adev->gpu_sched[AMDGPU_HW_IP_VCN_DEC]  		[AMDGPU_RING_PRIO_DEFAULT].sched; -	drm_sched_entity_modify_sched(job->base.entity, scheds, 1); +	drm_sched_entity_modify_sched(p->entity, scheds, 1);  	return 0;  } -static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job, -			    uint64_t addr) +static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, uint64_t addr)  {  	struct ttm_operation_ctx ctx = { false, false };  	struct amdgpu_bo_va_mapping *map; @@ -1848,7 +1846,7 @@ static int vcn_v3_0_dec_msg(struct amdgpu_cs_parser *p, struct amdgpu_job *job,  		if (create[0] == 0x7 || create[0] == 0x10 || create[0] == 0x11)  			continue; -		r = vcn_v3_0_limit_sched(p, job); +		r = vcn_v3_0_limit_sched(p);  		if (r)  			goto out;  	} @@ -1862,7 +1860,7 @@ static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,  					   struct amdgpu_job *job,  					   struct amdgpu_ib *ib)  { -	struct amdgpu_ring *ring = to_amdgpu_ring(job->base.sched); +	struct amdgpu_ring *ring = to_amdgpu_ring(p->entity->rq->sched);  	uint32_t msg_lo = 0, msg_hi = 0;  	unsigned i;  	int r; @@ -1881,8 +1879,7 @@ static int vcn_v3_0_ring_patch_cs_in_place(struct amdgpu_cs_parser *p,  			msg_hi = val;  		} else if (reg == PACKET0(p->adev->vcn.internal.cmd, 0) &&  			   val == 0) { -			r = vcn_v3_0_dec_msg(p, job, -					     ((u64)msg_hi) << 32 | msg_lo); +			r = vcn_v3_0_dec_msg(p, ((u64)msg_hi) << 32 | msg_lo);  			if (r)  				return r;  		}  | 
