vcn_v2_5.c 62.3 KB
Newer Older
L
Leo Liu 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
/*
 * Copyright 2019 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include <linux/firmware.h>
25
#include <drm/drm_drv.h>
26

L
Leo Liu 已提交
27 28
#include "amdgpu.h"
#include "amdgpu_vcn.h"
29
#include "amdgpu_pm.h"
L
Leo Liu 已提交
30 31 32
#include "soc15.h"
#include "soc15d.h"
#include "vcn_v2_0.h"
33
#include "mmsch_v1_0.h"
L
Leo Liu 已提交
34 35 36 37 38 39 40 41 42 43 44 45 46 47 48

#include "vcn/vcn_2_5_offset.h"
#include "vcn/vcn_2_5_sh_mask.h"
#include "ivsrcid/vcn/irqsrcs_vcn_2_0.h"

#define mmUVD_CONTEXT_ID_INTERNAL_OFFSET			0x27
#define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET			0x0f
#define mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET			0x10
#define mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET			0x11
#define mmUVD_NO_OP_INTERNAL_OFFSET				0x29
#define mmUVD_GP_SCRATCH8_INTERNAL_OFFSET			0x66
#define mmUVD_SCRATCH9_INTERNAL_OFFSET				0xc01d

#define mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET			0x431
#define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET		0x3b4
49
#define mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET		0x3b5
L
Leo Liu 已提交
50 51
#define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET			0x25c

L
Leo Liu 已提交
52
#define VCN25_MAX_HW_INSTANCES_ARCTURUS			2
53

L
Leo Liu 已提交
54 55 56 57 58
static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev);
static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev);
static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev);
static int vcn_v2_5_set_powergating_state(void *handle,
				enum amd_powergating_state state);
59 60
static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev,
				int inst_idx, struct dpg_pause_state *new_state);
61
static int vcn_v2_5_sriov_start(struct amdgpu_device *adev);
L
Leo Liu 已提交
62

63 64 65 66 67
static int amdgpu_ih_clientid_vcns[] = {
	SOC15_IH_CLIENTID_VCN,
	SOC15_IH_CLIENTID_VCN1
};

L
Leo Liu 已提交
68 69 70 71 72 73 74 75 76 77
/**
 * vcn_v2_5_early_init - set function pointers
 *
 * @handle: amdgpu_device pointer
 *
 * Set ring and irq function pointers
 */
static int vcn_v2_5_early_init(void *handle)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
78

79 80 81 82 83
	if (amdgpu_sriov_vf(adev)) {
		adev->vcn.num_vcn_inst = 2;
		adev->vcn.harvest_config = 0;
		adev->vcn.num_enc_rings = 1;
	} else {
84 85 86 87 88 89 90 91 92 93 94 95
		u32 harvest;
		int i;
		adev->vcn.num_vcn_inst = VCN25_MAX_HW_INSTANCES_ARCTURUS;
		for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
			harvest = RREG32_SOC15(VCN, i, mmCC_UVD_HARVESTING);
			if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK)
				adev->vcn.harvest_config |= 1 << i;
		}
		if (adev->vcn.harvest_config == (AMDGPU_VCN_HARVEST_VCN0 |
					AMDGPU_VCN_HARVEST_VCN1))
			/* both instances are harvested, disable the block */
			return -ENOENT;
96

97 98
		adev->vcn.num_enc_rings = 2;
	}
L
Leo Liu 已提交
99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116

	vcn_v2_5_set_dec_ring_funcs(adev);
	vcn_v2_5_set_enc_ring_funcs(adev);
	vcn_v2_5_set_irq_funcs(adev);

	return 0;
}

/**
 * vcn_v2_5_sw_init - sw init for VCN block
 *
 * @handle: amdgpu_device pointer
 *
 * Load firmware and sw initialization
 */
static int vcn_v2_5_sw_init(void *handle)
{
	struct amdgpu_ring *ring;
117
	int i, j, r;
L
Leo Liu 已提交
118 119
	struct amdgpu_device *adev = (struct amdgpu_device *)handle;

120
	for (j = 0; j < adev->vcn.num_vcn_inst; j++) {
121 122
		if (adev->vcn.harvest_config & (1 << j))
			continue;
123 124 125 126 127 128 129 130 131 132 133 134 135
		/* VCN DEC TRAP */
		r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j],
				VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->vcn.inst[j].irq);
		if (r)
			return r;

		/* VCN ENC TRAP */
		for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
			r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j],
				i + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[j].irq);
			if (r)
				return r;
		}
L
Leo Liu 已提交
136 137 138 139 140 141 142 143 144 145 146 147 148
	}

	r = amdgpu_vcn_sw_init(adev);
	if (r)
		return r;

	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
		const struct common_firmware_header *hdr;
		hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
		adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].ucode_id = AMDGPU_UCODE_ID_VCN;
		adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw;
		adev->firmware.fw_size +=
			ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
149 150 151 152 153 154 155

		if (adev->vcn.num_vcn_inst == VCN25_MAX_HW_INSTANCES_ARCTURUS) {
			adev->firmware.ucode[AMDGPU_UCODE_ID_VCN1].ucode_id = AMDGPU_UCODE_ID_VCN1;
			adev->firmware.ucode[AMDGPU_UCODE_ID_VCN1].fw = adev->vcn.fw;
			adev->firmware.fw_size +=
				ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
		}
O
Oak Zeng 已提交
156
		dev_info(adev->dev, "Will use PSP to load VCN firmware\n");
L
Leo Liu 已提交
157 158 159 160 161 162
	}

	r = amdgpu_vcn_resume(adev);
	if (r)
		return r;

163
	for (j = 0; j < adev->vcn.num_vcn_inst; j++) {
164 165
		volatile struct amdgpu_fw_shared *fw_shared;

166 167
		if (adev->vcn.harvest_config & (1 << j))
			continue;
168 169 170 171 172 173 174 175
		adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET;
		adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET;
		adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET;
		adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET;
		adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET;
		adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET;

		adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET;
176
		adev->vcn.inst[j].external.scratch9 = SOC15_REG_OFFSET(VCN, j, mmUVD_SCRATCH9);
177
		adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET;
178
		adev->vcn.inst[j].external.data0 = SOC15_REG_OFFSET(VCN, j, mmUVD_GPCOM_VCPU_DATA0);
179
		adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET;
180
		adev->vcn.inst[j].external.data1 = SOC15_REG_OFFSET(VCN, j, mmUVD_GPCOM_VCPU_DATA1);
181
		adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET;
182
		adev->vcn.inst[j].external.cmd = SOC15_REG_OFFSET(VCN, j, mmUVD_GPCOM_VCPU_CMD);
183
		adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET;
184
		adev->vcn.inst[j].external.nop = SOC15_REG_OFFSET(VCN, j, mmUVD_NO_OP);
185 186

		ring = &adev->vcn.inst[j].ring_dec;
187
		ring->use_doorbell = true;
188 189 190

		ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
				(amdgpu_sriov_vf(adev) ? 2*j : 8*j);
191
		sprintf(ring->name, "vcn_dec_%d", j);
192
		r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq,
193
				     0, AMDGPU_RING_PRIO_DEFAULT, NULL);
L
Leo Liu 已提交
194 195 196
		if (r)
			return r;

197
		for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
198 199
			enum amdgpu_ring_priority_level hw_prio = amdgpu_vcn_get_enc_ring_prio(i);

200 201
			ring = &adev->vcn.inst[j].ring_enc[i];
			ring->use_doorbell = true;
202 203 204 205

			ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
					(amdgpu_sriov_vf(adev) ? (1 + i + 2*j) : (2 + i + 8*j));

206
			sprintf(ring->name, "vcn_enc_%d.%d", j, i);
207 208
			r = amdgpu_ring_init(adev, ring, 512,
					     &adev->vcn.inst[j].irq, 0,
209
					     hw_prio, NULL);
210 211 212
			if (r)
				return r;
		}
213 214 215

		fw_shared = adev->vcn.inst[j].fw_shared_cpu_addr;
		fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_VCN_MULTI_QUEUE_FLAG);
216
	}
L
Leo Liu 已提交
217

218 219 220 221 222 223
	if (amdgpu_sriov_vf(adev)) {
		r = amdgpu_virt_alloc_mm_table(adev);
		if (r)
			return r;
	}

224 225 226
	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
		adev->vcn.pause_dpg_mode = vcn_v2_5_pause_dpg_mode;

L
Leo Liu 已提交
227 228 229 230 231 232 233 234 235 236 237 238
	return 0;
}

/**
 * vcn_v2_5_sw_fini - sw fini for VCN block
 *
 * @handle: amdgpu_device pointer
 *
 * VCN suspend and free up sw allocation
 */
static int vcn_v2_5_sw_fini(void *handle)
{
239
	int i, r, idx;
L
Leo Liu 已提交
240
	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
241 242
	volatile struct amdgpu_fw_shared *fw_shared;

243 244 245 246 247 248 249 250
	if (drm_dev_enter(&adev->ddev, &idx)) {
		for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
			if (adev->vcn.harvest_config & (1 << i))
				continue;
			fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr;
			fw_shared->present_flag_0 = 0;
		}
		drm_dev_exit(idx);
251
	}
L
Leo Liu 已提交
252

253

254 255 256
	if (amdgpu_sriov_vf(adev))
		amdgpu_virt_free_mm_table(adev);

L
Leo Liu 已提交
257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275
	r = amdgpu_vcn_suspend(adev);
	if (r)
		return r;

	r = amdgpu_vcn_sw_fini(adev);

	return r;
}

/**
 * vcn_v2_5_hw_init - start and test VCN block
 *
 * @handle: amdgpu_device pointer
 *
 * Initialize the hardware, boot up the VCPU and do some testing
 */
static int vcn_v2_5_hw_init(void *handle)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
276
	struct amdgpu_ring *ring;
277 278 279 280
	int i, j, r = 0;

	if (amdgpu_sriov_vf(adev))
		r = vcn_v2_5_sriov_start(adev);
L
Leo Liu 已提交
281

282
	for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
283 284
		if (adev->vcn.harvest_config & (1 << j))
			continue;
285

286 287 288 289 290 291
		if (amdgpu_sriov_vf(adev)) {
			adev->vcn.inst[j].ring_enc[0].sched.ready = true;
			adev->vcn.inst[j].ring_enc[1].sched.ready = false;
			adev->vcn.inst[j].ring_enc[2].sched.ready = false;
			adev->vcn.inst[j].ring_dec.sched.ready = true;
		} else {
L
Leo Liu 已提交
292

293 294 295 296
			ring = &adev->vcn.inst[j].ring_dec;

			adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
						     ring->doorbell_index, j);
L
Leo Liu 已提交
297

298 299
			r = amdgpu_ring_test_helper(ring);
			if (r)
300
				goto done;
301 302 303 304 305 306 307

			for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
				ring = &adev->vcn.inst[j].ring_enc[i];
				r = amdgpu_ring_test_helper(ring);
				if (r)
					goto done;
			}
308 309
		}
	}
L
Leo Liu 已提交
310

L
Leo Liu 已提交
311 312
done:
	if (!r)
313 314
		DRM_INFO("VCN decode and encode initialized successfully(under %s).\n",
			(adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode");
L
Leo Liu 已提交
315 316 317 318 319 320 321 322 323 324 325 326 327 328

	return r;
}

/**
 * vcn_v2_5_hw_fini - stop the hardware block
 *
 * @handle: amdgpu_device pointer
 *
 * Stop the VCN block, mark ring as not ready any more
 */
static int vcn_v2_5_hw_fini(void *handle)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
329
	int i;
L
Leo Liu 已提交
330

331 332
	cancel_delayed_work_sync(&adev->vcn.idle_work);

333
	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
334 335
		if (adev->vcn.harvest_config & (1 << i))
			continue;
L
Leo Liu 已提交
336

337 338 339
		if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
		    (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
		     RREG32_SOC15(VCN, i, mmUVD_STATUS)))
340 341
			vcn_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE);
	}
L
Leo Liu 已提交
342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387

	return 0;
}

/**
 * vcn_v2_5_suspend - suspend VCN block
 *
 * @handle: amdgpu_device pointer
 *
 * HW fini and suspend VCN block
 */
static int vcn_v2_5_suspend(void *handle)
{
	int r;
	struct amdgpu_device *adev = (struct amdgpu_device *)handle;

	r = vcn_v2_5_hw_fini(adev);
	if (r)
		return r;

	r = amdgpu_vcn_suspend(adev);

	return r;
}

/**
 * vcn_v2_5_resume - resume VCN block
 *
 * @handle: amdgpu_device pointer
 *
 * Resume firmware and hw init VCN block
 */
static int vcn_v2_5_resume(void *handle)
{
	int r;
	struct amdgpu_device *adev = (struct amdgpu_device *)handle;

	r = amdgpu_vcn_resume(adev);
	if (r)
		return r;

	r = vcn_v2_5_hw_init(adev);

	return r;
}

388 389 390 391 392 393 394 395 396 397 398
/**
 * vcn_v2_5_mc_resume - memory controller programming
 *
 * @adev: amdgpu_device pointer
 *
 * Let the VCN memory controller know it's offsets
 */
static void vcn_v2_5_mc_resume(struct amdgpu_device *adev)
{
	uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
	uint32_t offset;
399
	int i;
400

401
	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
402 403
		if (adev->vcn.harvest_config & (1 << i))
			continue;
404 405
		/* cache window 0: fw */
		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
406
			WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
407
				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo));
408
			WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
409
				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi));
410
			WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET0, 0);
411 412
			offset = 0;
		} else {
413
			WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
414
				lower_32_bits(adev->vcn.inst[i].gpu_addr));
415
			WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
416 417
				upper_32_bits(adev->vcn.inst[i].gpu_addr));
			offset = size;
418
			WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET0,
419 420
				AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
		}
421
		WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE0, size);
422 423

		/* cache window 1: stack */
424
		WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
425
			lower_32_bits(adev->vcn.inst[i].gpu_addr + offset));
426
		WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
427
			upper_32_bits(adev->vcn.inst[i].gpu_addr + offset));
428 429
		WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET1, 0);
		WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);
430 431

		/* cache window 2: context */
432
		WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
433
			lower_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
434
		WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
435
			upper_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
436 437
		WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET2, 0);
		WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
438 439

		/* non-cache window */
440
		WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
441
			lower_32_bits(adev->vcn.inst[i].fw_shared_gpu_addr));
442
		WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
443
			upper_32_bits(adev->vcn.inst[i].fw_shared_gpu_addr));
444 445
		WREG32_SOC15(VCN, i, mmUVD_VCPU_NONCACHE_OFFSET0, 0);
		WREG32_SOC15(VCN, i, mmUVD_VCPU_NONCACHE_SIZE0,
446
			AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)));
447 448 449
	}
}

450
static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
451 452 453 454 455 456
{
	uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
	uint32_t offset;

	/* cache window 0: fw */
	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
457
		if (!indirect) {
458
			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
459
				VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
460
				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_lo), 0, indirect);
461
			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
462
				VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
463
				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_hi), 0, indirect);
464
			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
465
				VCN, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
466
		} else {
467
			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
468
				VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
469
			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
470
				VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
471
			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
472
				VCN, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
473
		}
474 475
		offset = 0;
	} else {
476
		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
477
			VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
478
			lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
479
		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
480
			VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
481 482
			upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
		offset = size;
483
		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
484
			VCN, 0, mmUVD_VCPU_CACHE_OFFSET0),
485 486 487
			AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
	}

488
	if (!indirect)
489
		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
490
			VCN, 0, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
491
	else
492
		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
493
			VCN, 0, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
494 495

	/* cache window 1: stack */
496
	if (!indirect) {
497
		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
498
			VCN, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
499
			lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
500
		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
501
			VCN, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
502
			upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
503
		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
504
			VCN, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
505
	} else {
506
		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
507
			VCN, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
508
		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
509
			VCN, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
510
		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
511
			VCN, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
512
	}
513
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
514
		VCN, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
515 516

	/* cache window 2: context */
517
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
518
		VCN, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
519
		lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
520
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
521
		VCN, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
522
		upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
523
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
524
		VCN, 0, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
525
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
526
		VCN, 0, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
527 528

	/* non-cache window */
529
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
530
		VCN, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
531
		lower_32_bits(adev->vcn.inst[inst_idx].fw_shared_gpu_addr), 0, indirect);
532
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
533
		VCN, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
534
		upper_32_bits(adev->vcn.inst[inst_idx].fw_shared_gpu_addr), 0, indirect);
535
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
536
		VCN, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
537
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
538
		VCN, 0, mmUVD_VCPU_NONCACHE_SIZE0),
539
		AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)), 0, indirect);
540 541

	/* VCN global tiling registers */
542
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
543
		VCN, 0, mmUVD_GFX8_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
544 545
}

546 547 548 549 550 551 552 553 554 555
/**
 * vcn_v2_5_disable_clock_gating - disable VCN clock gating
 *
 * @adev: amdgpu_device pointer
 *
 * Disable clock gating for VCN block
 */
static void vcn_v2_5_disable_clock_gating(struct amdgpu_device *adev)
{
	uint32_t data;
556
	int i;
557

558
	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
559 560
		if (adev->vcn.harvest_config & (1 << i))
			continue;
561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594
		/* UVD disable CGC */
		data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
		if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
			data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
		else
			data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
		data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
		data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
		WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);

		data = RREG32_SOC15(VCN, i, mmUVD_CGC_GATE);
		data &= ~(UVD_CGC_GATE__SYS_MASK
			| UVD_CGC_GATE__UDEC_MASK
			| UVD_CGC_GATE__MPEG2_MASK
			| UVD_CGC_GATE__REGS_MASK
			| UVD_CGC_GATE__RBC_MASK
			| UVD_CGC_GATE__LMI_MC_MASK
			| UVD_CGC_GATE__LMI_UMC_MASK
			| UVD_CGC_GATE__IDCT_MASK
			| UVD_CGC_GATE__MPRD_MASK
			| UVD_CGC_GATE__MPC_MASK
			| UVD_CGC_GATE__LBSI_MASK
			| UVD_CGC_GATE__LRBBM_MASK
			| UVD_CGC_GATE__UDEC_RE_MASK
			| UVD_CGC_GATE__UDEC_CM_MASK
			| UVD_CGC_GATE__UDEC_IT_MASK
			| UVD_CGC_GATE__UDEC_DB_MASK
			| UVD_CGC_GATE__UDEC_MP_MASK
			| UVD_CGC_GATE__WCB_MASK
			| UVD_CGC_GATE__VCPU_MASK
			| UVD_CGC_GATE__MMSCH_MASK);

		WREG32_SOC15(VCN, i, mmUVD_CGC_GATE, data);

J
James Zhu 已提交
595
		SOC15_WAIT_ON_RREG(VCN, i, mmUVD_CGC_GATE, 0,  0xFFFFFFFF);
596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660

		data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
		data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK
			| UVD_CGC_CTRL__UDEC_CM_MODE_MASK
			| UVD_CGC_CTRL__UDEC_IT_MODE_MASK
			| UVD_CGC_CTRL__UDEC_DB_MODE_MASK
			| UVD_CGC_CTRL__UDEC_MP_MODE_MASK
			| UVD_CGC_CTRL__SYS_MODE_MASK
			| UVD_CGC_CTRL__UDEC_MODE_MASK
			| UVD_CGC_CTRL__MPEG2_MODE_MASK
			| UVD_CGC_CTRL__REGS_MODE_MASK
			| UVD_CGC_CTRL__RBC_MODE_MASK
			| UVD_CGC_CTRL__LMI_MC_MODE_MASK
			| UVD_CGC_CTRL__LMI_UMC_MODE_MASK
			| UVD_CGC_CTRL__IDCT_MODE_MASK
			| UVD_CGC_CTRL__MPRD_MODE_MASK
			| UVD_CGC_CTRL__MPC_MODE_MASK
			| UVD_CGC_CTRL__LBSI_MODE_MASK
			| UVD_CGC_CTRL__LRBBM_MODE_MASK
			| UVD_CGC_CTRL__WCB_MODE_MASK
			| UVD_CGC_CTRL__VCPU_MODE_MASK
			| UVD_CGC_CTRL__MMSCH_MODE_MASK);
		WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);

		/* turn on */
		data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE);
		data |= (UVD_SUVD_CGC_GATE__SRE_MASK
			| UVD_SUVD_CGC_GATE__SIT_MASK
			| UVD_SUVD_CGC_GATE__SMP_MASK
			| UVD_SUVD_CGC_GATE__SCM_MASK
			| UVD_SUVD_CGC_GATE__SDB_MASK
			| UVD_SUVD_CGC_GATE__SRE_H264_MASK
			| UVD_SUVD_CGC_GATE__SRE_HEVC_MASK
			| UVD_SUVD_CGC_GATE__SIT_H264_MASK
			| UVD_SUVD_CGC_GATE__SIT_HEVC_MASK
			| UVD_SUVD_CGC_GATE__SCM_H264_MASK
			| UVD_SUVD_CGC_GATE__SCM_HEVC_MASK
			| UVD_SUVD_CGC_GATE__SDB_H264_MASK
			| UVD_SUVD_CGC_GATE__SDB_HEVC_MASK
			| UVD_SUVD_CGC_GATE__SCLR_MASK
			| UVD_SUVD_CGC_GATE__UVD_SC_MASK
			| UVD_SUVD_CGC_GATE__ENT_MASK
			| UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK
			| UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK
			| UVD_SUVD_CGC_GATE__SITE_MASK
			| UVD_SUVD_CGC_GATE__SRE_VP9_MASK
			| UVD_SUVD_CGC_GATE__SCM_VP9_MASK
			| UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK
			| UVD_SUVD_CGC_GATE__SDB_VP9_MASK
			| UVD_SUVD_CGC_GATE__IME_HEVC_MASK);
		WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE, data);

		data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL);
		data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
			| UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
			| UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
			| UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
			| UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
			| UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
			| UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
			| UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
			| UVD_SUVD_CGC_CTRL__IME_MODE_MASK
			| UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
		WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data);
	}
661 662
}

663
static void vcn_v2_5_clock_gating_dpg_mode(struct amdgpu_device *adev,
664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694
		uint8_t sram_sel, int inst_idx, uint8_t indirect)
{
	uint32_t reg_data = 0;

	/* enable sw clock gating control */
	if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
		reg_data = 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
	else
		reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
	reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
	reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
	reg_data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK |
		 UVD_CGC_CTRL__UDEC_CM_MODE_MASK |
		 UVD_CGC_CTRL__UDEC_IT_MODE_MASK |
		 UVD_CGC_CTRL__UDEC_DB_MODE_MASK |
		 UVD_CGC_CTRL__UDEC_MP_MODE_MASK |
		 UVD_CGC_CTRL__SYS_MODE_MASK |
		 UVD_CGC_CTRL__UDEC_MODE_MASK |
		 UVD_CGC_CTRL__MPEG2_MODE_MASK |
		 UVD_CGC_CTRL__REGS_MODE_MASK |
		 UVD_CGC_CTRL__RBC_MODE_MASK |
		 UVD_CGC_CTRL__LMI_MC_MODE_MASK |
		 UVD_CGC_CTRL__LMI_UMC_MODE_MASK |
		 UVD_CGC_CTRL__IDCT_MODE_MASK |
		 UVD_CGC_CTRL__MPRD_MODE_MASK |
		 UVD_CGC_CTRL__MPC_MODE_MASK |
		 UVD_CGC_CTRL__LBSI_MODE_MASK |
		 UVD_CGC_CTRL__LRBBM_MODE_MASK |
		 UVD_CGC_CTRL__WCB_MODE_MASK |
		 UVD_CGC_CTRL__VCPU_MODE_MASK |
		 UVD_CGC_CTRL__MMSCH_MODE_MASK);
695
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
696
		VCN, 0, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect);
697 698

	/* turn off clock gating */
699
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
700
		VCN, 0, mmUVD_CGC_GATE), 0, sram_sel, indirect);
701 702

	/* turn on SUVD clock gating */
703
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
704
		VCN, 0, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect);
705 706

	/* turn on sw mode in UVD_SUVD_CGC_CTRL */
707
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
708
		VCN, 0, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect);
709 710
}

711 712 713 714 715 716 717 718 719 720
/**
 * vcn_v2_5_enable_clock_gating - enable VCN clock gating
 *
 * @adev: amdgpu_device pointer
 *
 * Enable clock gating for VCN block
 */
static void vcn_v2_5_enable_clock_gating(struct amdgpu_device *adev)
{
	uint32_t data = 0;
721
	int i;
722

723
	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
724 725
		if (adev->vcn.harvest_config & (1 << i))
			continue;
726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770
		/* enable UVD CGC */
		data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
		if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
			data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
		else
			data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
		data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
		data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
		WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);

		data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
		data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK
			| UVD_CGC_CTRL__UDEC_CM_MODE_MASK
			| UVD_CGC_CTRL__UDEC_IT_MODE_MASK
			| UVD_CGC_CTRL__UDEC_DB_MODE_MASK
			| UVD_CGC_CTRL__UDEC_MP_MODE_MASK
			| UVD_CGC_CTRL__SYS_MODE_MASK
			| UVD_CGC_CTRL__UDEC_MODE_MASK
			| UVD_CGC_CTRL__MPEG2_MODE_MASK
			| UVD_CGC_CTRL__REGS_MODE_MASK
			| UVD_CGC_CTRL__RBC_MODE_MASK
			| UVD_CGC_CTRL__LMI_MC_MODE_MASK
			| UVD_CGC_CTRL__LMI_UMC_MODE_MASK
			| UVD_CGC_CTRL__IDCT_MODE_MASK
			| UVD_CGC_CTRL__MPRD_MODE_MASK
			| UVD_CGC_CTRL__MPC_MODE_MASK
			| UVD_CGC_CTRL__LBSI_MODE_MASK
			| UVD_CGC_CTRL__LRBBM_MODE_MASK
			| UVD_CGC_CTRL__WCB_MODE_MASK
			| UVD_CGC_CTRL__VCPU_MODE_MASK);
		WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);

		data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL);
		data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
			| UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
			| UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
			| UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
			| UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
			| UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
			| UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
			| UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
			| UVD_SUVD_CGC_CTRL__IME_MODE_MASK
			| UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
		WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data);
	}
771 772
}

773 774
static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
{
775
	volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr;
776 777 778 779
	struct amdgpu_ring *ring;
	uint32_t rb_bufsz, tmp;

	/* disable register anti-hang mechanism */
780
	WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 1,
781 782
		~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
	/* enable dynamic power gating mode */
783
	tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_POWER_STATUS);
784 785
	tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK;
	tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK;
786
	WREG32_SOC15(VCN, inst_idx, mmUVD_POWER_STATUS, tmp);
787

788
	if (indirect)
789
		adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t *)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;
790

791 792 793 794 795 796 797
	/* enable clock gating */
	vcn_v2_5_clock_gating_dpg_mode(adev, 0, inst_idx, indirect);

	/* enable VCPU clock */
	tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
	tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
	tmp |= UVD_VCPU_CNTL__BLK_RST_MASK;
798
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
799
		VCN, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect);
800 801

	/* disable master interupt */
802
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
803
		VCN, 0, mmUVD_MASTINT_EN), 0, 0, indirect);
804 805 806 807 808 809 810 811 812 813

	/* setup mmUVD_LMI_CTRL */
	tmp = (0x8 | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
		UVD_LMI_CTRL__REQ_MODE_MASK |
		UVD_LMI_CTRL__CRC_RESET_MASK |
		UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
		UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
		UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
		(8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
		0x00100000L);
814
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
815
		VCN, 0, mmUVD_LMI_CTRL), tmp, 0, indirect);
816

817
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
818
		VCN, 0, mmUVD_MPC_CNTL),
819 820
		0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect);

821
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
822
		VCN, 0, mmUVD_MPC_SET_MUXA0),
823 824 825 826 827
		((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
		 (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
		 (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
		 (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect);

828
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
829
		VCN, 0, mmUVD_MPC_SET_MUXB0),
830 831 832 833 834
		((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
		 (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
		 (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
		 (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect);

835
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
836
		VCN, 0, mmUVD_MPC_SET_MUX),
837 838 839 840 841 842
		((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
		 (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
		 (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);

	vcn_v2_5_mc_resume_dpg_mode(adev, inst_idx, indirect);

843
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
844
		VCN, 0, mmUVD_REG_XX_MASK), 0x10, 0, indirect);
845
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
846
		VCN, 0, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect);
847 848

	/* enable LMI MC and UMC channels */
849
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
850
		VCN, 0, mmUVD_LMI_CTRL2), 0, 0, indirect);
851 852

	/* unblock VCPU register access */
853
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
854
		VCN, 0, mmUVD_RB_ARB_CTRL), 0, 0, indirect);
855 856 857

	tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
	tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
858
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
859
		VCN, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect);
860 861

	/* enable master interrupt */
862
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
863
		VCN, 0, mmUVD_MASTINT_EN),
864 865
		UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);

866 867 868 869 870
	if (indirect)
		psp_update_vcn_sram(adev, inst_idx, adev->vcn.inst[inst_idx].dpg_sram_gpu_addr,
				    (uint32_t)((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr -
					       (uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr));

871 872 873 874 875 876 877 878
	ring = &adev->vcn.inst[inst_idx].ring_dec;
	/* force RBC into idle state */
	rb_bufsz = order_base_2(ring->ring_size);
	tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
	tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
	tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
	tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
	tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
879
	WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_CNTL, tmp);
880

881
	/* Stall DPG before WPTR/RPTR reset */
882
	WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
883 884
		UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK,
		~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
885
	fw_shared->multi_queue.decode_queue_mode |= FW_QUEUE_RING_RESET;
886

887
	/* set the write pointer delay */
888
	WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR_CNTL, 0);
889 890

	/* set the wb address */
891
	WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR_ADDR,
892 893
		(upper_32_bits(ring->gpu_addr) >> 2));

B
Bernard Zhao 已提交
894
	/* program the RB_BASE for ring buffer */
895
	WREG32_SOC15(VCN, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
896
		lower_32_bits(ring->gpu_addr));
897
	WREG32_SOC15(VCN, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
898 899 900
		upper_32_bits(ring->gpu_addr));

	/* Initialize the ring buffer's read and write pointers */
901
	WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR, 0);
902

903
	WREG32_SOC15(VCN, inst_idx, mmUVD_SCRATCH2, 0);
904

905 906
	ring->wptr = RREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR);
	WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR,
907 908
		lower_32_bits(ring->wptr));

909
	fw_shared->multi_queue.decode_queue_mode &= ~FW_QUEUE_RING_RESET;
910
	/* Unstall DPG */
911
	WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
912 913
		0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);

914 915 916
	return 0;
}

917 918
static int vcn_v2_5_start(struct amdgpu_device *adev)
{
919
	struct amdgpu_ring *ring;
920
	uint32_t rb_bufsz, tmp;
921
	int i, j, k, r;
922

923 924 925
	if (adev->pm.dpm_enabled)
		amdgpu_dpm_enable_uvd(adev, true);

926
	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
927 928
		if (adev->vcn.harvest_config & (1 << i))
			continue;
929 930 931 932
		if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
			r = vcn_v2_5_start_dpg_mode(adev, i, adev->vcn.indirect_sram);
			continue;
		}
933

934
		/* disable register anti-hang mechanism */
935
		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_POWER_STATUS), 0,
936
			~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
937

938
		/* set uvd status busy */
939 940
		tmp = RREG32_SOC15(VCN, i, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY;
		WREG32_SOC15(VCN, i, mmUVD_STATUS, tmp);
941
	}
942

943 944 945
	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
		return 0;

946 947 948
	/*SW clock gating */
	vcn_v2_5_disable_clock_gating(adev);

949
	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
950 951
		if (adev->vcn.harvest_config & (1 << i))
			continue;
952
		/* enable VCPU clock */
953
		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
954 955 956
			UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);

		/* disable master interrupt */
957
		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN), 0,
958 959 960
			~UVD_MASTINT_EN__VCPU_EN_MASK);

		/* setup mmUVD_LMI_CTRL */
961
		tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL);
962
		tmp &= ~0xff;
963
		WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL, tmp | 0x8|
964 965 966 967 968 969
			UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK	|
			UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
			UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
			UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);

		/* setup mmUVD_MPC_CNTL */
970
		tmp = RREG32_SOC15(VCN, i, mmUVD_MPC_CNTL);
971 972 973 974 975
		tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
		tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
		WREG32_SOC15(VCN, i, mmUVD_MPC_CNTL, tmp);

		/* setup UVD_MPC_SET_MUXA0 */
976
		WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXA0,
977 978 979 980 981 982
			((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
			(0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
			(0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
			(0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));

		/* setup UVD_MPC_SET_MUXB0 */
983
		WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXB0,
984 985 986 987 988 989
			((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
			(0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
			(0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
			(0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));

		/* setup mmUVD_MPC_SET_MUX */
990
		WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUX,
991 992 993 994
			((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
			(0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
			(0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
	}
995 996 997

	vcn_v2_5_mc_resume(adev);

998
	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
999
		volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr;
1000 1001
		if (adev->vcn.harvest_config & (1 << i))
			continue;
1002
		/* VCN global tiling registers */
1003
		WREG32_SOC15(VCN, i, mmUVD_GFX8_ADDR_CONFIG,
1004
			adev->gfx.config.gb_addr_config);
1005
		WREG32_SOC15(VCN, i, mmUVD_GFX8_ADDR_CONFIG,
1006
			adev->gfx.config.gb_addr_config);
1007

1008
		/* enable LMI MC and UMC channels */
1009
		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_LMI_CTRL2), 0,
1010
			~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
1011

1012
		/* unblock VCPU register access */
1013
		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL), 0,
1014
			~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
1015

1016
		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
1017
			~UVD_VCPU_CNTL__BLK_RST_MASK);
1018

1019 1020 1021 1022
		for (k = 0; k < 10; ++k) {
			uint32_t status;

			for (j = 0; j < 100; ++j) {
1023
				status = RREG32_SOC15(VCN, i, mmUVD_STATUS);
1024 1025 1026 1027 1028 1029 1030 1031
				if (status & 2)
					break;
				if (amdgpu_emu_mode == 1)
					msleep(500);
				else
					mdelay(10);
			}
			r = 0;
1032 1033 1034
			if (status & 2)
				break;

1035
			DRM_ERROR("VCN decode not responding, trying to reset the VCPU!!!\n");
1036
			WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
1037 1038 1039
				UVD_VCPU_CNTL__BLK_RST_MASK,
				~UVD_VCPU_CNTL__BLK_RST_MASK);
			mdelay(10);
1040
			WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
1041
				~UVD_VCPU_CNTL__BLK_RST_MASK);
1042

1043 1044 1045
			mdelay(10);
			r = -1;
		}
1046

1047 1048 1049 1050
		if (r) {
			DRM_ERROR("VCN decode not responding, giving up!!!\n");
			return r;
		}
1051

1052
		/* enable master interrupt */
1053
		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN),
1054 1055 1056 1057
			UVD_MASTINT_EN__VCPU_EN_MASK,
			~UVD_MASTINT_EN__VCPU_EN_MASK);

		/* clear the busy bit of VCN_STATUS */
1058
		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS), 0,
1059 1060
			~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));

1061
		WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_VMID, 0);
1062 1063 1064 1065 1066 1067 1068 1069 1070

		ring = &adev->vcn.inst[i].ring_dec;
		/* force RBC into idle state */
		rb_bufsz = order_base_2(ring->ring_size);
		tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
1071
		WREG32_SOC15(VCN, i, mmUVD_RBC_RB_CNTL, tmp);
1072

1073
		fw_shared->multi_queue.decode_queue_mode |= FW_QUEUE_RING_RESET;
B
Bernard Zhao 已提交
1074
		/* program the RB_BASE for ring buffer */
1075
		WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
1076
			lower_32_bits(ring->gpu_addr));
1077
		WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
1078 1079 1080
			upper_32_bits(ring->gpu_addr));

		/* Initialize the ring buffer's read and write pointers */
1081
		WREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR, 0);
1082

1083 1084
		ring->wptr = RREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR);
		WREG32_SOC15(VCN, i, mmUVD_RBC_RB_WPTR,
1085
				lower_32_bits(ring->wptr));
1086 1087 1088
		fw_shared->multi_queue.decode_queue_mode &= ~FW_QUEUE_RING_RESET;

		fw_shared->multi_queue.encode_generalpurpose_queue_mode |= FW_QUEUE_RING_RESET;
1089
		ring = &adev->vcn.inst[i].ring_enc[0];
1090 1091 1092 1093 1094
		WREG32_SOC15(VCN, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
		WREG32_SOC15(VCN, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
		WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO, ring->gpu_addr);
		WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
		WREG32_SOC15(VCN, i, mmUVD_RB_SIZE, ring->ring_size / 4);
1095
		fw_shared->multi_queue.encode_generalpurpose_queue_mode &= ~FW_QUEUE_RING_RESET;
1096

1097
		fw_shared->multi_queue.encode_lowlatency_queue_mode |= FW_QUEUE_RING_RESET;
1098
		ring = &adev->vcn.inst[i].ring_enc[1];
1099 1100 1101 1102 1103
		WREG32_SOC15(VCN, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
		WREG32_SOC15(VCN, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
		WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO2, ring->gpu_addr);
		WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
		WREG32_SOC15(VCN, i, mmUVD_RB_SIZE2, ring->ring_size / 4);
1104
		fw_shared->multi_queue.encode_lowlatency_queue_mode &= ~FW_QUEUE_RING_RESET;
1105
	}
1106

L
Leo Liu 已提交
1107
	return 0;
1108 1109
}

1110 1111 1112 1113 1114
static int vcn_v2_5_mmsch_start(struct amdgpu_device *adev,
				struct amdgpu_mm_table *table)
{
	uint32_t data = 0, loop = 0, size = 0;
	uint64_t addr = table->gpu_addr;
1115
	struct mmsch_v1_1_init_header *header = NULL;
1116 1117 1118 1119 1120 1121 1122 1123

	header = (struct mmsch_v1_1_init_header *)table->cpu_addr;
	size = header->total_size;

	/*
	 * 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of
	 *  memory descriptor location
	 */
1124 1125
	WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_ADDR_LO, lower_32_bits(addr));
	WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_ADDR_HI, upper_32_bits(addr));
1126 1127

	/* 2, update vmid of descriptor */
1128
	data = RREG32_SOC15(VCN, 0, mmMMSCH_VF_VMID);
1129 1130 1131
	data &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
	/* use domain0 for MM scheduler */
	data |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
1132
	WREG32_SOC15(VCN, 0, mmMMSCH_VF_VMID, data);
1133 1134

	/* 3, notify mmsch about the size of this descriptor */
1135
	WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_SIZE, size);
1136 1137

	/* 4, set resp to zero */
1138
	WREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_RESP, 0);
1139 1140 1141 1142 1143

	/*
	 * 5, kick off the initialization and wait until
	 * VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero
	 */
1144
	WREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_HOST, 0x10000001);
1145

1146
	data = RREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_RESP);
1147 1148 1149
	loop = 10;
	while ((data & 0x10000002) != 0x10000002) {
		udelay(100);
1150
		data = RREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_RESP);
1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165
		loop--;
		if (!loop)
			break;
	}

	if (!loop) {
		dev_err(adev->dev,
			"failed to init MMSCH, mmMMSCH_VF_MAILBOX_RESP = %x\n",
			data);
		return -EBUSY;
	}

	return 0;
}

1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192
static int vcn_v2_5_sriov_start(struct amdgpu_device *adev)
{
	struct amdgpu_ring *ring;
	uint32_t offset, size, tmp, i, rb_bufsz;
	uint32_t table_size = 0;
	struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
	struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
	struct mmsch_v1_0_cmd_end end = { { 0 } };
	uint32_t *init_table = adev->virt.mm_table.cpu_addr;
	struct mmsch_v1_1_init_header *header = (struct mmsch_v1_1_init_header *)init_table;

	direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
	direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
	end.cmd_header.command_type = MMSCH_COMMAND__END;

	header->version = MMSCH_VERSION;
	header->total_size = sizeof(struct mmsch_v1_1_init_header) >> 2;
	init_table += header->total_size;

	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
		header->eng[i].table_offset = header->total_size;
		header->eng[i].init_status = 0;
		header->eng[i].table_size = 0;

		table_size = 0;

		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(
1193
			SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS),
1194 1195 1196 1197 1198 1199
			~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);

		size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
		/* mc resume*/
		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
			MMSCH_V1_0_INSERT_DIRECT_WT(
1200
				SOC15_REG_OFFSET(VCN, i,
1201 1202 1203
					mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
				adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo);
			MMSCH_V1_0_INSERT_DIRECT_WT(
1204
				SOC15_REG_OFFSET(VCN, i,
1205 1206 1207 1208
					mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
				adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi);
			offset = 0;
			MMSCH_V1_0_INSERT_DIRECT_WT(
1209
				SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_OFFSET0), 0);
1210 1211
		} else {
			MMSCH_V1_0_INSERT_DIRECT_WT(
1212
				SOC15_REG_OFFSET(VCN, i,
1213 1214 1215
					mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
				lower_32_bits(adev->vcn.inst[i].gpu_addr));
			MMSCH_V1_0_INSERT_DIRECT_WT(
1216
				SOC15_REG_OFFSET(VCN, i,
1217 1218 1219 1220
					mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
				upper_32_bits(adev->vcn.inst[i].gpu_addr));
			offset = size;
			MMSCH_V1_0_INSERT_DIRECT_WT(
1221
				SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_OFFSET0),
1222 1223 1224 1225
				AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
		}

		MMSCH_V1_0_INSERT_DIRECT_WT(
1226
			SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_SIZE0),
1227 1228
			size);
		MMSCH_V1_0_INSERT_DIRECT_WT(
1229
			SOC15_REG_OFFSET(VCN, i,
1230 1231 1232
				mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
			lower_32_bits(adev->vcn.inst[i].gpu_addr + offset));
		MMSCH_V1_0_INSERT_DIRECT_WT(
1233
			SOC15_REG_OFFSET(VCN, i,
1234 1235 1236
				mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
			upper_32_bits(adev->vcn.inst[i].gpu_addr + offset));
		MMSCH_V1_0_INSERT_DIRECT_WT(
1237
			SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_OFFSET1),
1238 1239
			0);
		MMSCH_V1_0_INSERT_DIRECT_WT(
1240
			SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_SIZE1),
1241 1242
			AMDGPU_VCN_STACK_SIZE);
		MMSCH_V1_0_INSERT_DIRECT_WT(
1243
			SOC15_REG_OFFSET(VCN, i,
1244 1245 1246 1247
				mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
			lower_32_bits(adev->vcn.inst[i].gpu_addr + offset +
				AMDGPU_VCN_STACK_SIZE));
		MMSCH_V1_0_INSERT_DIRECT_WT(
1248
			SOC15_REG_OFFSET(VCN, i,
1249 1250 1251 1252
				mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
			upper_32_bits(adev->vcn.inst[i].gpu_addr + offset +
				AMDGPU_VCN_STACK_SIZE));
		MMSCH_V1_0_INSERT_DIRECT_WT(
1253
			SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_OFFSET2),
1254 1255
			0);
		MMSCH_V1_0_INSERT_DIRECT_WT(
1256
			SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_SIZE2),
1257 1258 1259 1260 1261 1262
			AMDGPU_VCN_CONTEXT_SIZE);

		ring = &adev->vcn.inst[i].ring_enc[0];
		ring->wptr = 0;

		MMSCH_V1_0_INSERT_DIRECT_WT(
1263
			SOC15_REG_OFFSET(VCN, i, mmUVD_RB_BASE_LO),
1264 1265
			lower_32_bits(ring->gpu_addr));
		MMSCH_V1_0_INSERT_DIRECT_WT(
1266
			SOC15_REG_OFFSET(VCN, i, mmUVD_RB_BASE_HI),
1267 1268
			upper_32_bits(ring->gpu_addr));
		MMSCH_V1_0_INSERT_DIRECT_WT(
1269
			SOC15_REG_OFFSET(VCN, i, mmUVD_RB_SIZE),
1270 1271 1272 1273 1274
			ring->ring_size / 4);

		ring = &adev->vcn.inst[i].ring_dec;
		ring->wptr = 0;
		MMSCH_V1_0_INSERT_DIRECT_WT(
1275
			SOC15_REG_OFFSET(VCN, i,
1276 1277 1278
				mmUVD_LMI_RBC_RB_64BIT_BAR_LOW),
			lower_32_bits(ring->gpu_addr));
		MMSCH_V1_0_INSERT_DIRECT_WT(
1279
			SOC15_REG_OFFSET(VCN, i,
1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290
				mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH),
			upper_32_bits(ring->gpu_addr));

		/* force RBC into idle state */
		rb_bufsz = order_base_2(ring->ring_size);
		tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
		MMSCH_V1_0_INSERT_DIRECT_WT(
1291
			SOC15_REG_OFFSET(VCN, i, mmUVD_RBC_RB_CNTL), tmp);
1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305

		/* add end packet */
		memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
		table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
		init_table += sizeof(struct mmsch_v1_0_cmd_end) / 4;

		/* refine header */
		header->eng[i].table_size = table_size;
		header->total_size += table_size;
	}

	return vcn_v2_5_mmsch_start(adev, &adev->virt.mm_table);
}

1306
static int vcn_v2_5_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
1307 1308 1309 1310
{
	uint32_t tmp;

	/* Wait for power status to be 1 */
1311
	SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1,
J
James Zhu 已提交
1312
		UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1313 1314

	/* wait for read ptr to be equal to write ptr */
1315
	tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR);
J
James Zhu 已提交
1316
	SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF);
1317

1318
	tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2);
J
James Zhu 已提交
1319
	SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF);
1320

1321
	tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF;
J
James Zhu 已提交
1322
	SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF);
1323

1324
	SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1,
J
James Zhu 已提交
1325
		UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1326 1327

	/* disable dynamic power gating mode */
1328
	WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 0,
1329 1330 1331 1332 1333
			~UVD_POWER_STATUS__UVD_PG_MODE_MASK);

	return 0;
}

1334 1335 1336
static int vcn_v2_5_stop(struct amdgpu_device *adev)
{
	uint32_t tmp;
L
Leo Liu 已提交
1337
	int i, r = 0;
1338

1339
	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1340 1341
		if (adev->vcn.harvest_config & (1 << i))
			continue;
1342 1343
		if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
			r = vcn_v2_5_stop_dpg_mode(adev, i);
1344
			continue;
1345 1346
		}

1347
		/* wait for vcn idle */
J
James Zhu 已提交
1348
		r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7);
1349 1350
		if (r)
			return r;
1351

1352 1353 1354 1355
		tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
			UVD_LMI_STATUS__READ_CLEAN_MASK |
			UVD_LMI_STATUS__WRITE_CLEAN_MASK |
			UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
J
James Zhu 已提交
1356
		r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp);
1357 1358
		if (r)
			return r;
1359

1360 1361 1362 1363
		/* block LMI UMC channel */
		tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2);
		tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
		WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2, tmp);
1364

1365 1366
		tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK|
			UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
J
James Zhu 已提交
1367
		r = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp);
1368 1369
		if (r)
			return r;
1370

1371
		/* block VCPU register access */
1372
		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL),
1373 1374
			UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
			~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
1375

1376
		/* reset VCPU */
1377
		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
1378 1379
			UVD_VCPU_CNTL__BLK_RST_MASK,
			~UVD_VCPU_CNTL__BLK_RST_MASK);
1380

1381
		/* disable VCPU clock */
1382
		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
1383
			~(UVD_VCPU_CNTL__CLK_EN_MASK));
1384

1385 1386
		/* clear status */
		WREG32_SOC15(VCN, i, mmUVD_STATUS, 0);
1387

1388
		vcn_v2_5_enable_clock_gating(adev);
1389

1390
		/* enable register anti-hang mechanism */
1391
		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_POWER_STATUS),
1392 1393 1394
			UVD_POWER_STATUS__UVD_POWER_STATUS_MASK,
			~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
	}
1395

1396 1397 1398
	if (adev->pm.dpm_enabled)
		amdgpu_dpm_enable_uvd(adev, false);

1399 1400 1401
	return 0;
}

1402 1403 1404 1405 1406
static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev,
				int inst_idx, struct dpg_pause_state *new_state)
{
	struct amdgpu_ring *ring;
	uint32_t reg_data = 0;
1407
	int ret_code = 0;
1408 1409

	/* pause/unpause if state is changed */
1410
	if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) {
1411
		DRM_DEBUG("dpg pause state changed %d -> %d",
1412
			adev->vcn.inst[inst_idx].pause_state.fw_based,	new_state->fw_based);
1413
		reg_data = RREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE) &
1414 1415 1416
			(~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);

		if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
J
James Zhu 已提交
1417 1418
			ret_code = SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 0x1,
				UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1419 1420

			if (!ret_code) {
1421 1422
				volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr;

1423 1424
				/* pause DPG */
				reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
1425
				WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE, reg_data);
1426 1427

				/* wait for ACK */
1428
				SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_DPG_PAUSE,
1429
					   UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
J
James Zhu 已提交
1430
					   UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);
1431

1432
				/* Stall DPG before WPTR/RPTR reset */
1433
				WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
1434 1435 1436
					   UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK,
					   ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);

1437
				/* Restore */
1438
				fw_shared->multi_queue.encode_generalpurpose_queue_mode |= FW_QUEUE_RING_RESET;
1439
				ring = &adev->vcn.inst[inst_idx].ring_enc[0];
1440
				ring->wptr = 0;
1441 1442 1443 1444 1445
				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO, ring->gpu_addr);
				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE, ring->ring_size / 4);
				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
1446
				fw_shared->multi_queue.encode_generalpurpose_queue_mode &= ~FW_QUEUE_RING_RESET;
1447

1448
				fw_shared->multi_queue.encode_lowlatency_queue_mode |= FW_QUEUE_RING_RESET;
1449
				ring = &adev->vcn.inst[inst_idx].ring_enc[1];
1450
				ring->wptr = 0;
1451 1452 1453 1454 1455
				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO2, ring->gpu_addr);
				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE2, ring->ring_size / 4);
				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
1456
				fw_shared->multi_queue.encode_lowlatency_queue_mode &= ~FW_QUEUE_RING_RESET;
1457

1458
				/* Unstall DPG */
1459
				WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
1460 1461
					   0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);

1462
				SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS,
J
James Zhu 已提交
1463
					   UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1464 1465 1466
			}
		} else {
			reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
1467 1468
			WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE, reg_data);
			SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 0x1,
J
James Zhu 已提交
1469
				UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
1470
		}
1471
		adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based;
1472 1473 1474 1475 1476
	}

	return 0;
}

1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487
/**
 * vcn_v2_5_dec_ring_get_rptr - get read pointer
 *
 * @ring: amdgpu_ring pointer
 *
 * Returns the current hardware read pointer
 */
static uint64_t vcn_v2_5_dec_ring_get_rptr(struct amdgpu_ring *ring)
{
	struct amdgpu_device *adev = ring->adev;

1488
	return RREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_RPTR);
1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499 1500 1501
}

/**
 * vcn_v2_5_dec_ring_get_wptr - get write pointer
 *
 * @ring: amdgpu_ring pointer
 *
 * Returns the current hardware write pointer
 */
static uint64_t vcn_v2_5_dec_ring_get_wptr(struct amdgpu_ring *ring)
{
	struct amdgpu_device *adev = ring->adev;

1502 1503 1504
	if (ring->use_doorbell)
		return adev->wb.wb[ring->wptr_offs];
	else
1505
		return RREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_WPTR);
1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518
}

/**
 * vcn_v2_5_dec_ring_set_wptr - set write pointer
 *
 * @ring: amdgpu_ring pointer
 *
 * Commits the write pointer to the hardware
 */
static void vcn_v2_5_dec_ring_set_wptr(struct amdgpu_ring *ring)
{
	struct amdgpu_device *adev = ring->adev;

1519 1520 1521 1522
	if (ring->use_doorbell) {
		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
	} else {
1523
		WREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
1524
	}
1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543
}

static const struct amdgpu_ring_funcs vcn_v2_5_dec_ring_vm_funcs = {
	.type = AMDGPU_RING_TYPE_VCN_DEC,
	.align_mask = 0xf,
	.vmhub = AMDGPU_MMHUB_1,
	.get_rptr = vcn_v2_5_dec_ring_get_rptr,
	.get_wptr = vcn_v2_5_dec_ring_get_wptr,
	.set_wptr = vcn_v2_5_dec_ring_set_wptr,
	.emit_frame_size =
		SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
		8 + /* vcn_v2_0_dec_ring_emit_vm_flush */
		14 + 14 + /* vcn_v2_0_dec_ring_emit_fence x2 vm fence */
		6,
	.emit_ib_size = 8, /* vcn_v2_0_dec_ring_emit_ib */
	.emit_ib = vcn_v2_0_dec_ring_emit_ib,
	.emit_fence = vcn_v2_0_dec_ring_emit_fence,
	.emit_vm_flush = vcn_v2_0_dec_ring_emit_vm_flush,
1544
	.test_ring = vcn_v2_0_dec_ring_test_ring,
1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556
	.test_ib = amdgpu_vcn_dec_ring_test_ib,
	.insert_nop = vcn_v2_0_dec_ring_insert_nop,
	.insert_start = vcn_v2_0_dec_ring_insert_start,
	.insert_end = vcn_v2_0_dec_ring_insert_end,
	.pad_ib = amdgpu_ring_generic_pad_ib,
	.begin_use = amdgpu_vcn_ring_begin_use,
	.end_use = amdgpu_vcn_ring_end_use,
	.emit_wreg = vcn_v2_0_dec_ring_emit_wreg,
	.emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait,
	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
};

1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586
static const struct amdgpu_ring_funcs vcn_v2_6_dec_ring_vm_funcs = {
	.type = AMDGPU_RING_TYPE_VCN_DEC,
	.align_mask = 0xf,
	.vmhub = AMDGPU_MMHUB_0,
	.get_rptr = vcn_v2_5_dec_ring_get_rptr,
	.get_wptr = vcn_v2_5_dec_ring_get_wptr,
	.set_wptr = vcn_v2_5_dec_ring_set_wptr,
	.emit_frame_size =
		SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
		8 + /* vcn_v2_0_dec_ring_emit_vm_flush */
		14 + 14 + /* vcn_v2_0_dec_ring_emit_fence x2 vm fence */
		6,
	.emit_ib_size = 8, /* vcn_v2_0_dec_ring_emit_ib */
	.emit_ib = vcn_v2_0_dec_ring_emit_ib,
	.emit_fence = vcn_v2_0_dec_ring_emit_fence,
	.emit_vm_flush = vcn_v2_0_dec_ring_emit_vm_flush,
	.test_ring = vcn_v2_0_dec_ring_test_ring,
	.test_ib = amdgpu_vcn_dec_ring_test_ib,
	.insert_nop = vcn_v2_0_dec_ring_insert_nop,
	.insert_start = vcn_v2_0_dec_ring_insert_start,
	.insert_end = vcn_v2_0_dec_ring_insert_end,
	.pad_ib = amdgpu_ring_generic_pad_ib,
	.begin_use = amdgpu_vcn_ring_begin_use,
	.end_use = amdgpu_vcn_ring_end_use,
	.emit_wreg = vcn_v2_0_dec_ring_emit_wreg,
	.emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait,
	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
};

1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597
/**
 * vcn_v2_5_enc_ring_get_rptr - get enc read pointer
 *
 * @ring: amdgpu_ring pointer
 *
 * Returns the current hardware enc read pointer
 */
static uint64_t vcn_v2_5_enc_ring_get_rptr(struct amdgpu_ring *ring)
{
	struct amdgpu_device *adev = ring->adev;

1598
	if (ring == &adev->vcn.inst[ring->me].ring_enc[0])
1599
		return RREG32_SOC15(VCN, ring->me, mmUVD_RB_RPTR);
1600
	else
1601
		return RREG32_SOC15(VCN, ring->me, mmUVD_RB_RPTR2);
1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614
}

/**
 * vcn_v2_5_enc_ring_get_wptr - get enc write pointer
 *
 * @ring: amdgpu_ring pointer
 *
 * Returns the current hardware enc write pointer
 */
static uint64_t vcn_v2_5_enc_ring_get_wptr(struct amdgpu_ring *ring)
{
	struct amdgpu_device *adev = ring->adev;

1615
	if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) {
1616 1617 1618
		if (ring->use_doorbell)
			return adev->wb.wb[ring->wptr_offs];
		else
1619
			return RREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR);
1620 1621 1622 1623
	} else {
		if (ring->use_doorbell)
			return adev->wb.wb[ring->wptr_offs];
		else
1624
			return RREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR2);
1625
	}
1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638
}

/**
 * vcn_v2_5_enc_ring_set_wptr - set enc write pointer
 *
 * @ring: amdgpu_ring pointer
 *
 * Commits the enc write pointer to the hardware
 */
static void vcn_v2_5_enc_ring_set_wptr(struct amdgpu_ring *ring)
{
	struct amdgpu_device *adev = ring->adev;

1639
	if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) {
1640 1641 1642 1643
		if (ring->use_doorbell) {
			adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
			WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
		} else {
1644
			WREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
1645 1646 1647 1648 1649 1650
		}
	} else {
		if (ring->use_doorbell) {
			adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
			WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
		} else {
1651
			WREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
1652 1653
		}
	}
1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685
}

static const struct amdgpu_ring_funcs vcn_v2_5_enc_ring_vm_funcs = {
	.type = AMDGPU_RING_TYPE_VCN_ENC,
	.align_mask = 0x3f,
	.nop = VCN_ENC_CMD_NO_OP,
	.vmhub = AMDGPU_MMHUB_1,
	.get_rptr = vcn_v2_5_enc_ring_get_rptr,
	.get_wptr = vcn_v2_5_enc_ring_get_wptr,
	.set_wptr = vcn_v2_5_enc_ring_set_wptr,
	.emit_frame_size =
		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
		4 + /* vcn_v2_0_enc_ring_emit_vm_flush */
		5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */
		1, /* vcn_v2_0_enc_ring_insert_end */
	.emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */
	.emit_ib = vcn_v2_0_enc_ring_emit_ib,
	.emit_fence = vcn_v2_0_enc_ring_emit_fence,
	.emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush,
	.test_ring = amdgpu_vcn_enc_ring_test_ring,
	.test_ib = amdgpu_vcn_enc_ring_test_ib,
	.insert_nop = amdgpu_ring_insert_nop,
	.insert_end = vcn_v2_0_enc_ring_insert_end,
	.pad_ib = amdgpu_ring_generic_pad_ib,
	.begin_use = amdgpu_vcn_ring_begin_use,
	.end_use = amdgpu_vcn_ring_end_use,
	.emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
	.emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
};

1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715
static const struct amdgpu_ring_funcs vcn_v2_6_enc_ring_vm_funcs = {
        .type = AMDGPU_RING_TYPE_VCN_ENC,
        .align_mask = 0x3f,
        .nop = VCN_ENC_CMD_NO_OP,
        .vmhub = AMDGPU_MMHUB_0,
        .get_rptr = vcn_v2_5_enc_ring_get_rptr,
        .get_wptr = vcn_v2_5_enc_ring_get_wptr,
        .set_wptr = vcn_v2_5_enc_ring_set_wptr,
        .emit_frame_size =
                SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
                SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
                4 + /* vcn_v2_0_enc_ring_emit_vm_flush */
                5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */
                1, /* vcn_v2_0_enc_ring_insert_end */
        .emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */
        .emit_ib = vcn_v2_0_enc_ring_emit_ib,
        .emit_fence = vcn_v2_0_enc_ring_emit_fence,
        .emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush,
        .test_ring = amdgpu_vcn_enc_ring_test_ring,
        .test_ib = amdgpu_vcn_enc_ring_test_ib,
        .insert_nop = amdgpu_ring_insert_nop,
        .insert_end = vcn_v2_0_enc_ring_insert_end,
        .pad_ib = amdgpu_ring_generic_pad_ib,
        .begin_use = amdgpu_vcn_ring_begin_use,
        .end_use = amdgpu_vcn_ring_end_use,
        .emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
        .emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
        .emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
};

1716 1717
static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev)
{
1718 1719 1720
	int i;

	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1721 1722
		if (adev->vcn.harvest_config & (1 << i))
			continue;
1723 1724 1725 1726
		if (adev->asic_type == CHIP_ARCTURUS)
			adev->vcn.inst[i].ring_dec.funcs = &vcn_v2_5_dec_ring_vm_funcs;
		else /* CHIP_ALDEBARAN */
			adev->vcn.inst[i].ring_dec.funcs = &vcn_v2_6_dec_ring_vm_funcs;
1727 1728 1729
		adev->vcn.inst[i].ring_dec.me = i;
		DRM_INFO("VCN(%d) decode is enabled in VM mode\n", i);
	}
1730 1731
}

1732 1733
static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev)
{
1734
	int i, j;
1735

1736
	for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
1737 1738
		if (adev->vcn.harvest_config & (1 << j))
			continue;
1739
		for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
1740 1741 1742 1743
			if (adev->asic_type == CHIP_ARCTURUS)
				adev->vcn.inst[j].ring_enc[i].funcs = &vcn_v2_5_enc_ring_vm_funcs;
			else /* CHIP_ALDEBARAN */
				adev->vcn.inst[j].ring_enc[i].funcs = &vcn_v2_6_enc_ring_vm_funcs;
1744 1745 1746 1747
			adev->vcn.inst[j].ring_enc[i].me = j;
		}
		DRM_INFO("VCN(%d) encode is enabled in VM mode\n", j);
	}
1748 1749
}

L
Leo Liu 已提交
1750 1751 1752
static bool vcn_v2_5_is_idle(void *handle)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1753 1754 1755
	int i, ret = 1;

	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1756 1757
		if (adev->vcn.harvest_config & (1 << i))
			continue;
1758 1759
		ret &= (RREG32_SOC15(VCN, i, mmUVD_STATUS) == UVD_STATUS__IDLE);
	}
L
Leo Liu 已提交
1760

1761
	return ret;
L
Leo Liu 已提交
1762 1763 1764 1765 1766
}

static int vcn_v2_5_wait_for_idle(void *handle)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1767
	int i, ret = 0;
L
Leo Liu 已提交
1768

1769
	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1770 1771
		if (adev->vcn.harvest_config & (1 << i))
			continue;
J
James Zhu 已提交
1772 1773
		ret = SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE,
			UVD_STATUS__IDLE);
1774 1775 1776
		if (ret)
			return ret;
	}
L
Leo Liu 已提交
1777 1778 1779 1780 1781 1782 1783

	return ret;
}

static int vcn_v2_5_set_clockgating_state(void *handle,
					  enum amd_clockgating_state state)
{
1784
	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1785
	bool enable = (state == AMD_CG_STATE_GATE);
1786

1787 1788 1789
	if (amdgpu_sriov_vf(adev))
		return 0;

1790
	if (enable) {
1791
		if (!vcn_v2_5_is_idle(handle))
1792 1793 1794 1795 1796 1797
			return -EBUSY;
		vcn_v2_5_enable_clock_gating(adev);
	} else {
		vcn_v2_5_disable_clock_gating(adev);
	}

L
Leo Liu 已提交
1798 1799 1800 1801 1802 1803
	return 0;
}

static int vcn_v2_5_set_powergating_state(void *handle,
					  enum amd_powergating_state state)
{
1804 1805 1806
	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
	int ret;

1807 1808 1809
	if (amdgpu_sriov_vf(adev))
		return 0;

1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821
	if(state == adev->vcn.cur_state)
		return 0;

	if (state == AMD_PG_STATE_GATE)
		ret = vcn_v2_5_stop(adev);
	else
		ret = vcn_v2_5_start(adev);

	if(!ret)
		adev->vcn.cur_state = state;

	return ret;
L
Leo Liu 已提交
1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835
}

static int vcn_v2_5_set_interrupt_state(struct amdgpu_device *adev,
					struct amdgpu_irq_src *source,
					unsigned type,
					enum amdgpu_interrupt_state state)
{
	return 0;
}

static int vcn_v2_5_process_interrupt(struct amdgpu_device *adev,
				      struct amdgpu_irq_src *source,
				      struct amdgpu_iv_entry *entry)
{
1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848 1849
	uint32_t ip_instance;

	switch (entry->client_id) {
	case SOC15_IH_CLIENTID_VCN:
		ip_instance = 0;
		break;
	case SOC15_IH_CLIENTID_VCN1:
		ip_instance = 1;
		break;
	default:
		DRM_ERROR("Unhandled client id: %d\n", entry->client_id);
		return 0;
	}

L
Leo Liu 已提交
1850 1851 1852 1853
	DRM_DEBUG("IH: VCN TRAP\n");

	switch (entry->src_id) {
	case VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT:
1854
		amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_dec);
L
Leo Liu 已提交
1855 1856
		break;
	case VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
1857
		amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]);
L
Leo Liu 已提交
1858 1859
		break;
	case VCN_2_0__SRCID__UVD_ENC_LOW_LATENCY:
1860
		amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[1]);
L
Leo Liu 已提交
1861 1862 1863 1864 1865 1866 1867 1868 1869 1870 1871 1872 1873 1874 1875 1876 1877
		break;
	default:
		DRM_ERROR("Unhandled interrupt: %d %d\n",
			  entry->src_id, entry->src_data[0]);
		break;
	}

	return 0;
}

static const struct amdgpu_irq_src_funcs vcn_v2_5_irq_funcs = {
	.set = vcn_v2_5_set_interrupt_state,
	.process = vcn_v2_5_process_interrupt,
};

static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev)
{
1878 1879 1880
	int i;

	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1881 1882
		if (adev->vcn.harvest_config & (1 << i))
			continue;
1883
		adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 1;
1884 1885
		adev->vcn.inst[i].irq.funcs = &vcn_v2_5_irq_funcs;
	}
L
Leo Liu 已提交
1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907
}

static const struct amd_ip_funcs vcn_v2_5_ip_funcs = {
	.name = "vcn_v2_5",
	.early_init = vcn_v2_5_early_init,
	.late_init = NULL,
	.sw_init = vcn_v2_5_sw_init,
	.sw_fini = vcn_v2_5_sw_fini,
	.hw_init = vcn_v2_5_hw_init,
	.hw_fini = vcn_v2_5_hw_fini,
	.suspend = vcn_v2_5_suspend,
	.resume = vcn_v2_5_resume,
	.is_idle = vcn_v2_5_is_idle,
	.wait_for_idle = vcn_v2_5_wait_for_idle,
	.check_soft_reset = NULL,
	.pre_soft_reset = NULL,
	.soft_reset = NULL,
	.post_soft_reset = NULL,
	.set_clockgating_state = vcn_v2_5_set_clockgating_state,
	.set_powergating_state = vcn_v2_5_set_powergating_state,
};

1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927
static const struct amd_ip_funcs vcn_v2_6_ip_funcs = {
        .name = "vcn_v2_6",
        .early_init = vcn_v2_5_early_init,
        .late_init = NULL,
        .sw_init = vcn_v2_5_sw_init,
        .sw_fini = vcn_v2_5_sw_fini,
        .hw_init = vcn_v2_5_hw_init,
        .hw_fini = vcn_v2_5_hw_fini,
        .suspend = vcn_v2_5_suspend,
        .resume = vcn_v2_5_resume,
        .is_idle = vcn_v2_5_is_idle,
        .wait_for_idle = vcn_v2_5_wait_for_idle,
        .check_soft_reset = NULL,
        .pre_soft_reset = NULL,
        .soft_reset = NULL,
        .post_soft_reset = NULL,
        .set_clockgating_state = vcn_v2_5_set_clockgating_state,
        .set_powergating_state = vcn_v2_5_set_powergating_state,
};

L
Leo Liu 已提交
1928 1929 1930 1931 1932 1933 1934 1935
const struct amdgpu_ip_block_version vcn_v2_5_ip_block =
{
		.type = AMD_IP_BLOCK_TYPE_VCN,
		.major = 2,
		.minor = 5,
		.rev = 0,
		.funcs = &vcn_v2_5_ip_funcs,
};
1936 1937 1938 1939 1940 1941 1942 1943 1944

const struct amdgpu_ip_block_version vcn_v2_6_ip_block =
{
		.type = AMD_IP_BLOCK_TYPE_VCN,
		.major = 2,
		.minor = 6,
		.rev = 0,
		.funcs = &vcn_v2_6_ip_funcs,
};