diff options
| author | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2017-01-24 09:57:18 -0800 | 
|---|---|---|
| committer | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2017-01-24 09:57:18 -0800 | 
| commit | 62ed8ceda1699acae01b666497f004bfd3d67a6f (patch) | |
| tree | fe38c83c49dfd568b540666948ef78cb9d082c38 /drivers/gpu/drm/vc4/vc4_validate_shaders.c | |
| parent | 1c3415a06b1016a596bfe59e0cfee56c773aa958 (diff) | |
| parent | 7a308bb3016f57e5be11a677d15b821536419d36 (diff) | |
Merge tag 'v4.10-rc5' into for-linus
Sync up with mainline to apply fixup to a commit that came through
power supply tree.
Diffstat (limited to 'drivers/gpu/drm/vc4/vc4_validate_shaders.c')
| -rw-r--r-- | drivers/gpu/drm/vc4/vc4_validate_shaders.c | 82 | 
1 files changed, 71 insertions, 11 deletions
diff --git a/drivers/gpu/drm/vc4/vc4_validate_shaders.c b/drivers/gpu/drm/vc4/vc4_validate_shaders.c index 2543cf5b8b51..5dba13dd1e9b 100644 --- a/drivers/gpu/drm/vc4/vc4_validate_shaders.c +++ b/drivers/gpu/drm/vc4/vc4_validate_shaders.c @@ -83,6 +83,13 @@ struct vc4_shader_validation_state {  	 * basic blocks.  	 */  	bool needs_uniform_address_for_loop; + +	/* Set when we find an instruction writing the top half of the +	 * register files.  If we allowed writing the unusable regs in +	 * a threaded shader, then the other shader running on our +	 * QPU's clamp validation would be invalid. +	 */ +	bool all_registers_used;  };  static uint32_t @@ -119,6 +126,13 @@ raddr_add_a_to_live_reg_index(uint64_t inst)  }  static bool +live_reg_is_upper_half(uint32_t lri) +{ +	return	(lri >= 16 && lri < 32) || +		(lri >= 32 + 16 && lri < 32 + 32); +} + +static bool  is_tmu_submit(uint32_t waddr)  {  	return (waddr == QPU_W_TMU0_S || @@ -390,6 +404,9 @@ check_reg_write(struct vc4_validated_shader_info *validated_shader,  		} else {  			validation_state->live_immediates[lri] = ~0;  		} + +		if (live_reg_is_upper_half(lri)) +			validation_state->all_registers_used = true;  	}  	switch (waddr) { @@ -598,6 +615,11 @@ check_instruction_reads(struct vc4_validated_shader_info *validated_shader,  		}  	} +	if ((raddr_a >= 16 && raddr_a < 32) || +	    (raddr_b >= 16 && raddr_b < 32 && sig != QPU_SIG_SMALL_IMM)) { +		validation_state->all_registers_used = true; +	} +  	return true;  } @@ -608,9 +630,7 @@ static bool  vc4_validate_branches(struct vc4_shader_validation_state *validation_state)  {  	uint32_t max_branch_target = 0; -	bool found_shader_end = false;  	int ip; -	int shader_end_ip = 0;  	int last_branch = -2;  	for (ip = 0; ip < validation_state->max_ip; ip++) { @@ -621,8 +641,13 @@ vc4_validate_branches(struct vc4_shader_validation_state *validation_state)  		uint32_t branch_target_ip;  		if (sig == QPU_SIG_PROG_END) { -			shader_end_ip = ip; -			found_shader_end = true; +			/* There are two delay slots after program end is +			 * signaled that are still executed, then we're +			 * finished.  validation_state->max_ip is the +			 * instruction after the last valid instruction in the +			 * program. +			 */ +			validation_state->max_ip = ip + 3;  			continue;  		} @@ -676,15 +701,9 @@ vc4_validate_branches(struct vc4_shader_validation_state *validation_state)  		}  		set_bit(after_delay_ip, validation_state->branch_targets);  		max_branch_target = max(max_branch_target, after_delay_ip); - -		/* There are two delay slots after program end is signaled -		 * that are still executed, then we're finished. -		 */ -		if (found_shader_end && ip == shader_end_ip + 2) -			break;  	} -	if (max_branch_target > shader_end_ip) { +	if (max_branch_target > validation_state->max_ip - 3) {  		DRM_ERROR("Branch landed after QPU_SIG_PROG_END");  		return false;  	} @@ -756,6 +775,7 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)  {  	bool found_shader_end = false;  	int shader_end_ip = 0; +	uint32_t last_thread_switch_ip = -3;  	uint32_t ip;  	struct vc4_validated_shader_info *validated_shader = NULL;  	struct vc4_shader_validation_state validation_state; @@ -788,6 +808,17 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)  		if (!vc4_handle_branch_target(&validation_state))  			goto fail; +		if (ip == last_thread_switch_ip + 3) { +			/* Reset r0-r3 live clamp data */ +			int i; + +			for (i = 64; i < LIVE_REG_COUNT; i++) { +				validation_state.live_min_clamp_offsets[i] = ~0; +				validation_state.live_max_clamp_regs[i] = false; +				validation_state.live_immediates[i] = ~0; +			} +		} +  		switch (sig) {  		case QPU_SIG_NONE:  		case QPU_SIG_WAIT_FOR_SCOREBOARD: @@ -797,6 +828,8 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)  		case QPU_SIG_LOAD_TMU1:  		case QPU_SIG_PROG_END:  		case QPU_SIG_SMALL_IMM: +		case QPU_SIG_THREAD_SWITCH: +		case QPU_SIG_LAST_THREAD_SWITCH:  			if (!check_instruction_writes(validated_shader,  						      &validation_state)) {  				DRM_ERROR("Bad write at ip %d\n", ip); @@ -812,6 +845,18 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)  				shader_end_ip = ip;  			} +			if (sig == QPU_SIG_THREAD_SWITCH || +			    sig == QPU_SIG_LAST_THREAD_SWITCH) { +				validated_shader->is_threaded = true; + +				if (ip < last_thread_switch_ip + 3) { +					DRM_ERROR("Thread switch too soon after " +						  "last switch at ip %d\n", ip); +					goto fail; +				} +				last_thread_switch_ip = ip; +			} +  			break;  		case QPU_SIG_LOAD_IMM: @@ -826,6 +871,13 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)  			if (!check_branch(inst, validated_shader,  					  &validation_state, ip))  				goto fail; + +			if (ip < last_thread_switch_ip + 3) { +				DRM_ERROR("Branch in thread switch at ip %d", +					  ip); +				goto fail; +			} +  			break;  		default:  			DRM_ERROR("Unsupported QPU signal %d at " @@ -847,6 +899,14 @@ vc4_validate_shader(struct drm_gem_cma_object *shader_obj)  		goto fail;  	} +	/* Might corrupt other thread */ +	if (validated_shader->is_threaded && +	    validation_state.all_registers_used) { +		DRM_ERROR("Shader uses threading, but uses the upper " +			  "half of the registers, too\n"); +		goto fail; +	} +  	/* If we did a backwards branch and we haven't emitted a uniforms  	 * reset since then, we still need the uniforms stream to have the  	 * uniforms address available so that the backwards branch can do its  | 
