vcn_v2_5.c 58.6 KB
Newer Older
L
Leo Liu 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
/*
 * Copyright 2019 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include <linux/firmware.h>
25

L
Leo Liu 已提交
26 27
#include "amdgpu.h"
#include "amdgpu_vcn.h"
28
#include "amdgpu_pm.h"
L
Leo Liu 已提交
29 30 31
#include "soc15.h"
#include "soc15d.h"
#include "vcn_v2_0.h"
32
#include "mmsch_v1_0.h"
L
Leo Liu 已提交
33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50

#include "vcn/vcn_2_5_offset.h"
#include "vcn/vcn_2_5_sh_mask.h"
#include "ivsrcid/vcn/irqsrcs_vcn_2_0.h"

#define mmUVD_CONTEXT_ID_INTERNAL_OFFSET			0x27
#define mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET			0x0f
#define mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET			0x10
#define mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET			0x11
#define mmUVD_NO_OP_INTERNAL_OFFSET				0x29
#define mmUVD_GP_SCRATCH8_INTERNAL_OFFSET			0x66
#define mmUVD_SCRATCH9_INTERNAL_OFFSET				0xc01d

#define mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET			0x431
#define mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET		0x3b4
#define mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET 	0x3b5
#define mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET			0x25c

L
Leo Liu 已提交
51
#define VCN25_MAX_HW_INSTANCES_ARCTURUS			2
52

L
Leo Liu 已提交
53 54 55 56 57
static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev);
static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev);
static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev);
static int vcn_v2_5_set_powergating_state(void *handle,
				enum amd_powergating_state state);
58 59
static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev,
				int inst_idx, struct dpg_pause_state *new_state);
60
static int vcn_v2_5_sriov_start(struct amdgpu_device *adev);
L
Leo Liu 已提交
61

62 63 64 65 66
static int amdgpu_ih_clientid_vcns[] = {
	SOC15_IH_CLIENTID_VCN,
	SOC15_IH_CLIENTID_VCN1
};

L
Leo Liu 已提交
67 68 69 70 71 72 73 74 75 76
/**
 * vcn_v2_5_early_init - set function pointers
 *
 * @handle: amdgpu_device pointer
 *
 * Set ring and irq function pointers
 */
static int vcn_v2_5_early_init(void *handle)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
77

78 79 80 81 82
	if (amdgpu_sriov_vf(adev)) {
		adev->vcn.num_vcn_inst = 2;
		adev->vcn.harvest_config = 0;
		adev->vcn.num_enc_rings = 1;
	} else {
83 84 85 86 87 88
		if (adev->asic_type == CHIP_ARCTURUS) {
			u32 harvest;
			int i;

			adev->vcn.num_vcn_inst = VCN25_MAX_HW_INSTANCES_ARCTURUS;
			for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
89
				harvest = RREG32_SOC15(VCN, i, mmCC_UVD_HARVESTING);
90 91 92 93 94 95 96 97 98 99 100
				if (harvest & CC_UVD_HARVESTING__UVD_DISABLE_MASK)
					adev->vcn.harvest_config |= 1 << i;
			}

			if (adev->vcn.harvest_config == (AMDGPU_VCN_HARVEST_VCN0 |
						AMDGPU_VCN_HARVEST_VCN1))
				/* both instances are harvested, disable the block */
				return -ENOENT;
		} else
			adev->vcn.num_vcn_inst = 1;

101 102
		adev->vcn.num_enc_rings = 2;
	}
L
Leo Liu 已提交
103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120

	vcn_v2_5_set_dec_ring_funcs(adev);
	vcn_v2_5_set_enc_ring_funcs(adev);
	vcn_v2_5_set_irq_funcs(adev);

	return 0;
}

/**
 * vcn_v2_5_sw_init - sw init for VCN block
 *
 * @handle: amdgpu_device pointer
 *
 * Load firmware and sw initialization
 */
static int vcn_v2_5_sw_init(void *handle)
{
	struct amdgpu_ring *ring;
121
	int i, j, r;
L
Leo Liu 已提交
122 123
	struct amdgpu_device *adev = (struct amdgpu_device *)handle;

124
	for (j = 0; j < adev->vcn.num_vcn_inst; j++) {
125 126
		if (adev->vcn.harvest_config & (1 << j))
			continue;
127 128 129 130 131 132 133 134 135 136 137 138 139
		/* VCN DEC TRAP */
		r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j],
				VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT, &adev->vcn.inst[j].irq);
		if (r)
			return r;

		/* VCN ENC TRAP */
		for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
			r = amdgpu_irq_add_id(adev, amdgpu_ih_clientid_vcns[j],
				i + VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE, &adev->vcn.inst[j].irq);
			if (r)
				return r;
		}
L
Leo Liu 已提交
140 141 142 143 144 145 146 147 148 149 150 151 152
	}

	r = amdgpu_vcn_sw_init(adev);
	if (r)
		return r;

	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
		const struct common_firmware_header *hdr;
		hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
		adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].ucode_id = AMDGPU_UCODE_ID_VCN;
		adev->firmware.ucode[AMDGPU_UCODE_ID_VCN].fw = adev->vcn.fw;
		adev->firmware.fw_size +=
			ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
153 154 155 156 157 158 159

		if (adev->vcn.num_vcn_inst == VCN25_MAX_HW_INSTANCES_ARCTURUS) {
			adev->firmware.ucode[AMDGPU_UCODE_ID_VCN1].ucode_id = AMDGPU_UCODE_ID_VCN1;
			adev->firmware.ucode[AMDGPU_UCODE_ID_VCN1].fw = adev->vcn.fw;
			adev->firmware.fw_size +=
				ALIGN(le32_to_cpu(hdr->ucode_size_bytes), PAGE_SIZE);
		}
L
Leo Liu 已提交
160 161 162 163 164 165 166
		DRM_INFO("PSP loading VCN firmware\n");
	}

	r = amdgpu_vcn_resume(adev);
	if (r)
		return r;

167
	for (j = 0; j < adev->vcn.num_vcn_inst; j++) {
168 169
		volatile struct amdgpu_fw_shared *fw_shared;

170 171
		if (adev->vcn.harvest_config & (1 << j))
			continue;
172 173 174 175 176 177 178 179
		adev->vcn.internal.context_id = mmUVD_CONTEXT_ID_INTERNAL_OFFSET;
		adev->vcn.internal.ib_vmid = mmUVD_LMI_RBC_IB_VMID_INTERNAL_OFFSET;
		adev->vcn.internal.ib_bar_low = mmUVD_LMI_RBC_IB_64BIT_BAR_LOW_INTERNAL_OFFSET;
		adev->vcn.internal.ib_bar_high = mmUVD_LMI_RBC_IB_64BIT_BAR_HIGH_INTERNAL_OFFSET;
		adev->vcn.internal.ib_size = mmUVD_RBC_IB_SIZE_INTERNAL_OFFSET;
		adev->vcn.internal.gp_scratch8 = mmUVD_GP_SCRATCH8_INTERNAL_OFFSET;

		adev->vcn.internal.scratch9 = mmUVD_SCRATCH9_INTERNAL_OFFSET;
180
		adev->vcn.inst[j].external.scratch9 = SOC15_REG_OFFSET(VCN, j, mmUVD_SCRATCH9);
181
		adev->vcn.internal.data0 = mmUVD_GPCOM_VCPU_DATA0_INTERNAL_OFFSET;
182
		adev->vcn.inst[j].external.data0 = SOC15_REG_OFFSET(VCN, j, mmUVD_GPCOM_VCPU_DATA0);
183
		adev->vcn.internal.data1 = mmUVD_GPCOM_VCPU_DATA1_INTERNAL_OFFSET;
184
		adev->vcn.inst[j].external.data1 = SOC15_REG_OFFSET(VCN, j, mmUVD_GPCOM_VCPU_DATA1);
185
		adev->vcn.internal.cmd = mmUVD_GPCOM_VCPU_CMD_INTERNAL_OFFSET;
186
		adev->vcn.inst[j].external.cmd = SOC15_REG_OFFSET(VCN, j, mmUVD_GPCOM_VCPU_CMD);
187
		adev->vcn.internal.nop = mmUVD_NO_OP_INTERNAL_OFFSET;
188
		adev->vcn.inst[j].external.nop = SOC15_REG_OFFSET(VCN, j, mmUVD_NO_OP);
189 190

		ring = &adev->vcn.inst[j].ring_dec;
191
		ring->use_doorbell = true;
192 193 194

		ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
				(amdgpu_sriov_vf(adev) ? 2*j : 8*j);
195
		sprintf(ring->name, "vcn_dec_%d", j);
196 197
		r = amdgpu_ring_init(adev, ring, 512, &adev->vcn.inst[j].irq,
				     0, AMDGPU_RING_PRIO_DEFAULT);
L
Leo Liu 已提交
198 199 200
		if (r)
			return r;

201 202 203
		for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
			ring = &adev->vcn.inst[j].ring_enc[i];
			ring->use_doorbell = true;
204 205 206 207

			ring->doorbell_index = (adev->doorbell_index.vcn.vcn_ring0_1 << 1) +
					(amdgpu_sriov_vf(adev) ? (1 + i + 2*j) : (2 + i + 8*j));

208
			sprintf(ring->name, "vcn_enc_%d.%d", j, i);
209 210 211
			r = amdgpu_ring_init(adev, ring, 512,
					     &adev->vcn.inst[j].irq, 0,
					     AMDGPU_RING_PRIO_DEFAULT);
212 213 214
			if (r)
				return r;
		}
215 216 217

		fw_shared = adev->vcn.inst[j].fw_shared_cpu_addr;
		fw_shared->present_flag_0 = cpu_to_le32(AMDGPU_VCN_MULTI_QUEUE_FLAG);
218
	}
L
Leo Liu 已提交
219

220 221 222 223 224 225
	if (amdgpu_sriov_vf(adev)) {
		r = amdgpu_virt_alloc_mm_table(adev);
		if (r)
			return r;
	}

226 227 228
	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
		adev->vcn.pause_dpg_mode = vcn_v2_5_pause_dpg_mode;

L
Leo Liu 已提交
229 230 231 232 233 234 235 236 237 238 239 240
	return 0;
}

/**
 * vcn_v2_5_sw_fini - sw fini for VCN block
 *
 * @handle: amdgpu_device pointer
 *
 * VCN suspend and free up sw allocation
 */
static int vcn_v2_5_sw_fini(void *handle)
{
241
	int i, r;
L
Leo Liu 已提交
242
	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
243 244 245 246 247 248 249 250
	volatile struct amdgpu_fw_shared *fw_shared;

	for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
		if (adev->vcn.harvest_config & (1 << i))
			continue;
		fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr;
		fw_shared->present_flag_0 = 0;
	}
L
Leo Liu 已提交
251

252 253 254
	if (amdgpu_sriov_vf(adev))
		amdgpu_virt_free_mm_table(adev);

L
Leo Liu 已提交
255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273
	r = amdgpu_vcn_suspend(adev);
	if (r)
		return r;

	r = amdgpu_vcn_sw_fini(adev);

	return r;
}

/**
 * vcn_v2_5_hw_init - start and test VCN block
 *
 * @handle: amdgpu_device pointer
 *
 * Initialize the hardware, boot up the VCPU and do some testing
 */
static int vcn_v2_5_hw_init(void *handle)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
274
	struct amdgpu_ring *ring;
275 276 277 278
	int i, j, r = 0;

	if (amdgpu_sriov_vf(adev))
		r = vcn_v2_5_sriov_start(adev);
L
Leo Liu 已提交
279

280
	for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
281 282
		if (adev->vcn.harvest_config & (1 << j))
			continue;
283

284 285 286 287 288 289
		if (amdgpu_sriov_vf(adev)) {
			adev->vcn.inst[j].ring_enc[0].sched.ready = true;
			adev->vcn.inst[j].ring_enc[1].sched.ready = false;
			adev->vcn.inst[j].ring_enc[2].sched.ready = false;
			adev->vcn.inst[j].ring_dec.sched.ready = true;
		} else {
L
Leo Liu 已提交
290

291 292 293 294
			ring = &adev->vcn.inst[j].ring_dec;

			adev->nbio.funcs->vcn_doorbell_range(adev, ring->use_doorbell,
						     ring->doorbell_index, j);
L
Leo Liu 已提交
295

296 297
			r = amdgpu_ring_test_helper(ring);
			if (r)
298
				goto done;
299 300 301 302 303 304 305

			for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
				ring = &adev->vcn.inst[j].ring_enc[i];
				r = amdgpu_ring_test_helper(ring);
				if (r)
					goto done;
			}
306 307
		}
	}
L
Leo Liu 已提交
308

L
Leo Liu 已提交
309 310
done:
	if (!r)
311 312
		DRM_INFO("VCN decode and encode initialized successfully(under %s).\n",
			(adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)?"DPG Mode":"SPG Mode");
L
Leo Liu 已提交
313 314 315 316 317 318 319 320 321 322 323 324 325 326

	return r;
}

/**
 * vcn_v2_5_hw_fini - stop the hardware block
 *
 * @handle: amdgpu_device pointer
 *
 * Stop the VCN block, mark ring as not ready any more
 */
static int vcn_v2_5_hw_fini(void *handle)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
327
	int i;
L
Leo Liu 已提交
328

329
	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
330 331
		if (adev->vcn.harvest_config & (1 << i))
			continue;
L
Leo Liu 已提交
332

333 334 335
		if ((adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) ||
		    (adev->vcn.cur_state != AMD_PG_STATE_GATE &&
		     RREG32_SOC15(VCN, i, mmUVD_STATUS)))
336 337
			vcn_v2_5_set_powergating_state(adev, AMD_PG_STATE_GATE);
	}
L
Leo Liu 已提交
338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383

	return 0;
}

/**
 * vcn_v2_5_suspend - suspend VCN block
 *
 * @handle: amdgpu_device pointer
 *
 * HW fini and suspend VCN block
 */
static int vcn_v2_5_suspend(void *handle)
{
	int r;
	struct amdgpu_device *adev = (struct amdgpu_device *)handle;

	r = vcn_v2_5_hw_fini(adev);
	if (r)
		return r;

	r = amdgpu_vcn_suspend(adev);

	return r;
}

/**
 * vcn_v2_5_resume - resume VCN block
 *
 * @handle: amdgpu_device pointer
 *
 * Resume firmware and hw init VCN block
 */
static int vcn_v2_5_resume(void *handle)
{
	int r;
	struct amdgpu_device *adev = (struct amdgpu_device *)handle;

	r = amdgpu_vcn_resume(adev);
	if (r)
		return r;

	r = vcn_v2_5_hw_init(adev);

	return r;
}

384 385 386 387 388 389 390 391 392 393 394
/**
 * vcn_v2_5_mc_resume - memory controller programming
 *
 * @adev: amdgpu_device pointer
 *
 * Let the VCN memory controller know it's offsets
 */
static void vcn_v2_5_mc_resume(struct amdgpu_device *adev)
{
	uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
	uint32_t offset;
395
	int i;
396

397
	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
398 399
		if (adev->vcn.harvest_config & (1 << i))
			continue;
400 401
		/* cache window 0: fw */
		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
402
			WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
403
				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo));
404
			WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
405
				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi));
406
			WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET0, 0);
407 408
			offset = 0;
		} else {
409
			WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW,
410
				lower_32_bits(adev->vcn.inst[i].gpu_addr));
411
			WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH,
412 413
				upper_32_bits(adev->vcn.inst[i].gpu_addr));
			offset = size;
414
			WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET0,
415 416
				AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
		}
417
		WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE0, size);
418 419

		/* cache window 1: stack */
420
		WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW,
421
			lower_32_bits(adev->vcn.inst[i].gpu_addr + offset));
422
		WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH,
423
			upper_32_bits(adev->vcn.inst[i].gpu_addr + offset));
424 425
		WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET1, 0);
		WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE1, AMDGPU_VCN_STACK_SIZE);
426 427

		/* cache window 2: context */
428
		WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW,
429
			lower_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
430
		WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH,
431
			upper_32_bits(adev->vcn.inst[i].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE));
432 433
		WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_OFFSET2, 0);
		WREG32_SOC15(VCN, i, mmUVD_VCPU_CACHE_SIZE2, AMDGPU_VCN_CONTEXT_SIZE);
434 435

		/* non-cache window */
436
		WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW,
437
			lower_32_bits(adev->vcn.inst[i].fw_shared_gpu_addr));
438
		WREG32_SOC15(VCN, i, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH,
439
			upper_32_bits(adev->vcn.inst[i].fw_shared_gpu_addr));
440 441
		WREG32_SOC15(VCN, i, mmUVD_VCPU_NONCACHE_OFFSET0, 0);
		WREG32_SOC15(VCN, i, mmUVD_VCPU_NONCACHE_SIZE0,
442
			AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)));
443 444 445
	}
}

446
static void vcn_v2_5_mc_resume_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
447 448 449 450 451 452
{
	uint32_t size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
	uint32_t offset;

	/* cache window 0: fw */
	if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
453
		if (!indirect) {
454
			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
455
				VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
456
				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_lo), 0, indirect);
457
			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
458
				VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
459
				(adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + inst_idx].tmr_mc_addr_hi), 0, indirect);
460
			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
461
				VCN, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
462
		} else {
463
			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
464
				VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW), 0, 0, indirect);
465
			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
466
				VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH), 0, 0, indirect);
467
			WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
468
				VCN, 0, mmUVD_VCPU_CACHE_OFFSET0), 0, 0, indirect);
469
		}
470 471
		offset = 0;
	} else {
472
		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
473
			VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
474
			lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
475
		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
476
			VCN, 0, mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
477 478
			upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr), 0, indirect);
		offset = size;
479
		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
480
			VCN, 0, mmUVD_VCPU_CACHE_OFFSET0),
481 482 483
			AMDGPU_UVD_FIRMWARE_OFFSET >> 3, 0, indirect);
	}

484
	if (!indirect)
485
		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
486
			VCN, 0, mmUVD_VCPU_CACHE_SIZE0), size, 0, indirect);
487
	else
488
		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
489
			VCN, 0, mmUVD_VCPU_CACHE_SIZE0), 0, 0, indirect);
490 491

	/* cache window 1: stack */
492
	if (!indirect) {
493
		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
494
			VCN, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
495
			lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
496
		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
497
			VCN, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
498
			upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset), 0, indirect);
499
		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
500
			VCN, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
501
	} else {
502
		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
503
			VCN, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW), 0, 0, indirect);
504
		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
505
			VCN, 0, mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH), 0, 0, indirect);
506
		WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
507
			VCN, 0, mmUVD_VCPU_CACHE_OFFSET1), 0, 0, indirect);
508
	}
509
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
510
		VCN, 0, mmUVD_VCPU_CACHE_SIZE1), AMDGPU_VCN_STACK_SIZE, 0, indirect);
511 512

	/* cache window 2: context */
513
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
514
		VCN, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
515
		lower_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
516
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
517
		VCN, 0, mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
518
		upper_32_bits(adev->vcn.inst[inst_idx].gpu_addr + offset + AMDGPU_VCN_STACK_SIZE), 0, indirect);
519
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
520
		VCN, 0, mmUVD_VCPU_CACHE_OFFSET2), 0, 0, indirect);
521
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
522
		VCN, 0, mmUVD_VCPU_CACHE_SIZE2), AMDGPU_VCN_CONTEXT_SIZE, 0, indirect);
523 524

	/* non-cache window */
525
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
526
		VCN, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_LOW),
527
		lower_32_bits(adev->vcn.inst[inst_idx].fw_shared_gpu_addr), 0, indirect);
528
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
529
		VCN, 0, mmUVD_LMI_VCPU_NC0_64BIT_BAR_HIGH),
530
		upper_32_bits(adev->vcn.inst[inst_idx].fw_shared_gpu_addr), 0, indirect);
531
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
532
		VCN, 0, mmUVD_VCPU_NONCACHE_OFFSET0), 0, 0, indirect);
533
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
534
		VCN, 0, mmUVD_VCPU_NONCACHE_SIZE0),
535
		AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared)), 0, indirect);
536 537

	/* VCN global tiling registers */
538
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
539
		VCN, 0, mmUVD_GFX8_ADDR_CONFIG), adev->gfx.config.gb_addr_config, 0, indirect);
540 541
}

542 543 544 545 546 547 548 549 550 551 552
/**
 * vcn_v2_5_disable_clock_gating - disable VCN clock gating
 *
 * @adev: amdgpu_device pointer
 *
 * Disable clock gating for VCN block
 */
static void vcn_v2_5_disable_clock_gating(struct amdgpu_device *adev)
{
	uint32_t data;
	int ret = 0;
553
	int i;
554

555
	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
556 557
		if (adev->vcn.harvest_config & (1 << i))
			continue;
558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590 591 592 593 594 595 596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657
		/* UVD disable CGC */
		data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
		if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
			data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
		else
			data &= ~UVD_CGC_CTRL__DYN_CLOCK_MODE_MASK;
		data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
		data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
		WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);

		data = RREG32_SOC15(VCN, i, mmUVD_CGC_GATE);
		data &= ~(UVD_CGC_GATE__SYS_MASK
			| UVD_CGC_GATE__UDEC_MASK
			| UVD_CGC_GATE__MPEG2_MASK
			| UVD_CGC_GATE__REGS_MASK
			| UVD_CGC_GATE__RBC_MASK
			| UVD_CGC_GATE__LMI_MC_MASK
			| UVD_CGC_GATE__LMI_UMC_MASK
			| UVD_CGC_GATE__IDCT_MASK
			| UVD_CGC_GATE__MPRD_MASK
			| UVD_CGC_GATE__MPC_MASK
			| UVD_CGC_GATE__LBSI_MASK
			| UVD_CGC_GATE__LRBBM_MASK
			| UVD_CGC_GATE__UDEC_RE_MASK
			| UVD_CGC_GATE__UDEC_CM_MASK
			| UVD_CGC_GATE__UDEC_IT_MASK
			| UVD_CGC_GATE__UDEC_DB_MASK
			| UVD_CGC_GATE__UDEC_MP_MASK
			| UVD_CGC_GATE__WCB_MASK
			| UVD_CGC_GATE__VCPU_MASK
			| UVD_CGC_GATE__MMSCH_MASK);

		WREG32_SOC15(VCN, i, mmUVD_CGC_GATE, data);

		SOC15_WAIT_ON_RREG(VCN, i, mmUVD_CGC_GATE, 0,  0xFFFFFFFF, ret);

		data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
		data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK
			| UVD_CGC_CTRL__UDEC_CM_MODE_MASK
			| UVD_CGC_CTRL__UDEC_IT_MODE_MASK
			| UVD_CGC_CTRL__UDEC_DB_MODE_MASK
			| UVD_CGC_CTRL__UDEC_MP_MODE_MASK
			| UVD_CGC_CTRL__SYS_MODE_MASK
			| UVD_CGC_CTRL__UDEC_MODE_MASK
			| UVD_CGC_CTRL__MPEG2_MODE_MASK
			| UVD_CGC_CTRL__REGS_MODE_MASK
			| UVD_CGC_CTRL__RBC_MODE_MASK
			| UVD_CGC_CTRL__LMI_MC_MODE_MASK
			| UVD_CGC_CTRL__LMI_UMC_MODE_MASK
			| UVD_CGC_CTRL__IDCT_MODE_MASK
			| UVD_CGC_CTRL__MPRD_MODE_MASK
			| UVD_CGC_CTRL__MPC_MODE_MASK
			| UVD_CGC_CTRL__LBSI_MODE_MASK
			| UVD_CGC_CTRL__LRBBM_MODE_MASK
			| UVD_CGC_CTRL__WCB_MODE_MASK
			| UVD_CGC_CTRL__VCPU_MODE_MASK
			| UVD_CGC_CTRL__MMSCH_MODE_MASK);
		WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);

		/* turn on */
		data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE);
		data |= (UVD_SUVD_CGC_GATE__SRE_MASK
			| UVD_SUVD_CGC_GATE__SIT_MASK
			| UVD_SUVD_CGC_GATE__SMP_MASK
			| UVD_SUVD_CGC_GATE__SCM_MASK
			| UVD_SUVD_CGC_GATE__SDB_MASK
			| UVD_SUVD_CGC_GATE__SRE_H264_MASK
			| UVD_SUVD_CGC_GATE__SRE_HEVC_MASK
			| UVD_SUVD_CGC_GATE__SIT_H264_MASK
			| UVD_SUVD_CGC_GATE__SIT_HEVC_MASK
			| UVD_SUVD_CGC_GATE__SCM_H264_MASK
			| UVD_SUVD_CGC_GATE__SCM_HEVC_MASK
			| UVD_SUVD_CGC_GATE__SDB_H264_MASK
			| UVD_SUVD_CGC_GATE__SDB_HEVC_MASK
			| UVD_SUVD_CGC_GATE__SCLR_MASK
			| UVD_SUVD_CGC_GATE__UVD_SC_MASK
			| UVD_SUVD_CGC_GATE__ENT_MASK
			| UVD_SUVD_CGC_GATE__SIT_HEVC_DEC_MASK
			| UVD_SUVD_CGC_GATE__SIT_HEVC_ENC_MASK
			| UVD_SUVD_CGC_GATE__SITE_MASK
			| UVD_SUVD_CGC_GATE__SRE_VP9_MASK
			| UVD_SUVD_CGC_GATE__SCM_VP9_MASK
			| UVD_SUVD_CGC_GATE__SIT_VP9_DEC_MASK
			| UVD_SUVD_CGC_GATE__SDB_VP9_MASK
			| UVD_SUVD_CGC_GATE__IME_HEVC_MASK);
		WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_GATE, data);

		data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL);
		data &= ~(UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
			| UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
			| UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
			| UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
			| UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
			| UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
			| UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
			| UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
			| UVD_SUVD_CGC_CTRL__IME_MODE_MASK
			| UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
		WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data);
	}
658 659
}

660
static void vcn_v2_5_clock_gating_dpg_mode(struct amdgpu_device *adev,
661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691
		uint8_t sram_sel, int inst_idx, uint8_t indirect)
{
	uint32_t reg_data = 0;

	/* enable sw clock gating control */
	if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
		reg_data = 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
	else
		reg_data = 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
	reg_data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
	reg_data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
	reg_data &= ~(UVD_CGC_CTRL__UDEC_RE_MODE_MASK |
		 UVD_CGC_CTRL__UDEC_CM_MODE_MASK |
		 UVD_CGC_CTRL__UDEC_IT_MODE_MASK |
		 UVD_CGC_CTRL__UDEC_DB_MODE_MASK |
		 UVD_CGC_CTRL__UDEC_MP_MODE_MASK |
		 UVD_CGC_CTRL__SYS_MODE_MASK |
		 UVD_CGC_CTRL__UDEC_MODE_MASK |
		 UVD_CGC_CTRL__MPEG2_MODE_MASK |
		 UVD_CGC_CTRL__REGS_MODE_MASK |
		 UVD_CGC_CTRL__RBC_MODE_MASK |
		 UVD_CGC_CTRL__LMI_MC_MODE_MASK |
		 UVD_CGC_CTRL__LMI_UMC_MODE_MASK |
		 UVD_CGC_CTRL__IDCT_MODE_MASK |
		 UVD_CGC_CTRL__MPRD_MODE_MASK |
		 UVD_CGC_CTRL__MPC_MODE_MASK |
		 UVD_CGC_CTRL__LBSI_MODE_MASK |
		 UVD_CGC_CTRL__LRBBM_MODE_MASK |
		 UVD_CGC_CTRL__WCB_MODE_MASK |
		 UVD_CGC_CTRL__VCPU_MODE_MASK |
		 UVD_CGC_CTRL__MMSCH_MODE_MASK);
692
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
693
		VCN, 0, mmUVD_CGC_CTRL), reg_data, sram_sel, indirect);
694 695

	/* turn off clock gating */
696
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
697
		VCN, 0, mmUVD_CGC_GATE), 0, sram_sel, indirect);
698 699

	/* turn on SUVD clock gating */
700
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
701
		VCN, 0, mmUVD_SUVD_CGC_GATE), 1, sram_sel, indirect);
702 703

	/* turn on sw mode in UVD_SUVD_CGC_CTRL */
704
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
705
		VCN, 0, mmUVD_SUVD_CGC_CTRL), 0, sram_sel, indirect);
706 707
}

708 709 710 711 712 713 714 715 716 717
/**
 * vcn_v2_5_enable_clock_gating - enable VCN clock gating
 *
 * @adev: amdgpu_device pointer
 *
 * Enable clock gating for VCN block
 */
static void vcn_v2_5_enable_clock_gating(struct amdgpu_device *adev)
{
	uint32_t data = 0;
718
	int i;
719

720
	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
721 722
		if (adev->vcn.harvest_config & (1 << i))
			continue;
723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767
		/* enable UVD CGC */
		data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
		if (adev->cg_flags & AMD_CG_SUPPORT_VCN_MGCG)
			data |= 1 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
		else
			data |= 0 << UVD_CGC_CTRL__DYN_CLOCK_MODE__SHIFT;
		data |= 1 << UVD_CGC_CTRL__CLK_GATE_DLY_TIMER__SHIFT;
		data |= 4 << UVD_CGC_CTRL__CLK_OFF_DELAY__SHIFT;
		WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);

		data = RREG32_SOC15(VCN, i, mmUVD_CGC_CTRL);
		data |= (UVD_CGC_CTRL__UDEC_RE_MODE_MASK
			| UVD_CGC_CTRL__UDEC_CM_MODE_MASK
			| UVD_CGC_CTRL__UDEC_IT_MODE_MASK
			| UVD_CGC_CTRL__UDEC_DB_MODE_MASK
			| UVD_CGC_CTRL__UDEC_MP_MODE_MASK
			| UVD_CGC_CTRL__SYS_MODE_MASK
			| UVD_CGC_CTRL__UDEC_MODE_MASK
			| UVD_CGC_CTRL__MPEG2_MODE_MASK
			| UVD_CGC_CTRL__REGS_MODE_MASK
			| UVD_CGC_CTRL__RBC_MODE_MASK
			| UVD_CGC_CTRL__LMI_MC_MODE_MASK
			| UVD_CGC_CTRL__LMI_UMC_MODE_MASK
			| UVD_CGC_CTRL__IDCT_MODE_MASK
			| UVD_CGC_CTRL__MPRD_MODE_MASK
			| UVD_CGC_CTRL__MPC_MODE_MASK
			| UVD_CGC_CTRL__LBSI_MODE_MASK
			| UVD_CGC_CTRL__LRBBM_MODE_MASK
			| UVD_CGC_CTRL__WCB_MODE_MASK
			| UVD_CGC_CTRL__VCPU_MODE_MASK);
		WREG32_SOC15(VCN, i, mmUVD_CGC_CTRL, data);

		data = RREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL);
		data |= (UVD_SUVD_CGC_CTRL__SRE_MODE_MASK
			| UVD_SUVD_CGC_CTRL__SIT_MODE_MASK
			| UVD_SUVD_CGC_CTRL__SMP_MODE_MASK
			| UVD_SUVD_CGC_CTRL__SCM_MODE_MASK
			| UVD_SUVD_CGC_CTRL__SDB_MODE_MASK
			| UVD_SUVD_CGC_CTRL__SCLR_MODE_MASK
			| UVD_SUVD_CGC_CTRL__UVD_SC_MODE_MASK
			| UVD_SUVD_CGC_CTRL__ENT_MODE_MASK
			| UVD_SUVD_CGC_CTRL__IME_MODE_MASK
			| UVD_SUVD_CGC_CTRL__SITE_MODE_MASK);
		WREG32_SOC15(VCN, i, mmUVD_SUVD_CGC_CTRL, data);
	}
768 769
}

770 771
static int vcn_v2_5_start_dpg_mode(struct amdgpu_device *adev, int inst_idx, bool indirect)
{
772
	volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr;
773 774 775 776
	struct amdgpu_ring *ring;
	uint32_t rb_bufsz, tmp;

	/* disable register anti-hang mechanism */
777
	WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 1,
778 779
		~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
	/* enable dynamic power gating mode */
780
	tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_POWER_STATUS);
781 782
	tmp |= UVD_POWER_STATUS__UVD_PG_MODE_MASK;
	tmp |= UVD_POWER_STATUS__UVD_PG_EN_MASK;
783
	WREG32_SOC15(VCN, inst_idx, mmUVD_POWER_STATUS, tmp);
784

785 786 787
	if (indirect)
		adev->vcn.inst[inst_idx].dpg_sram_curr_addr = (uint32_t*)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr;

788 789 790 791 792 793 794
	/* enable clock gating */
	vcn_v2_5_clock_gating_dpg_mode(adev, 0, inst_idx, indirect);

	/* enable VCPU clock */
	tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
	tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
	tmp |= UVD_VCPU_CNTL__BLK_RST_MASK;
795
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
796
		VCN, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect);
797 798

	/* disable master interupt */
799
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
800
		VCN, 0, mmUVD_MASTINT_EN), 0, 0, indirect);
801 802 803 804 805 806 807 808 809 810

	/* setup mmUVD_LMI_CTRL */
	tmp = (0x8 | UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK |
		UVD_LMI_CTRL__REQ_MODE_MASK |
		UVD_LMI_CTRL__CRC_RESET_MASK |
		UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
		UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
		UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK |
		(8 << UVD_LMI_CTRL__WRITE_CLEAN_TIMER__SHIFT) |
		0x00100000L);
811
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
812
		VCN, 0, mmUVD_LMI_CTRL), tmp, 0, indirect);
813

814
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
815
		VCN, 0, mmUVD_MPC_CNTL),
816 817
		0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT, 0, indirect);

818
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
819
		VCN, 0, mmUVD_MPC_SET_MUXA0),
820 821 822 823 824
		((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
		 (0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
		 (0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
		 (0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)), 0, indirect);

825
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
826
		VCN, 0, mmUVD_MPC_SET_MUXB0),
827 828 829 830 831
		((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
		 (0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
		 (0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
		 (0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)), 0, indirect);

832
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
833
		VCN, 0, mmUVD_MPC_SET_MUX),
834 835 836 837 838 839
		((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
		 (0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
		 (0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)), 0, indirect);

	vcn_v2_5_mc_resume_dpg_mode(adev, inst_idx, indirect);

840
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
841
		VCN, 0, mmUVD_REG_XX_MASK), 0x10, 0, indirect);
842
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
843
		VCN, 0, mmUVD_RBC_XX_IB_REG_CHECK), 0x3, 0, indirect);
844 845

	/* enable LMI MC and UMC channels */
846
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
847
		VCN, 0, mmUVD_LMI_CTRL2), 0, 0, indirect);
848 849

	/* unblock VCPU register access */
850
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
851
		VCN, 0, mmUVD_RB_ARB_CTRL), 0, 0, indirect);
852 853 854

	tmp = (0xFF << UVD_VCPU_CNTL__PRB_TIMEOUT_VAL__SHIFT);
	tmp |= UVD_VCPU_CNTL__CLK_EN_MASK;
855
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
856
		VCN, 0, mmUVD_VCPU_CNTL), tmp, 0, indirect);
857 858

	/* enable master interrupt */
859
	WREG32_SOC15_DPG_MODE(inst_idx, SOC15_DPG_MODE_OFFSET(
860
		VCN, 0, mmUVD_MASTINT_EN),
861 862
		UVD_MASTINT_EN__VCPU_EN_MASK, 0, indirect);

863 864 865 866 867
	if (indirect)
		psp_update_vcn_sram(adev, inst_idx, adev->vcn.inst[inst_idx].dpg_sram_gpu_addr,
				    (uint32_t)((uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_curr_addr -
					       (uintptr_t)adev->vcn.inst[inst_idx].dpg_sram_cpu_addr));

868 869 870 871 872 873 874 875
	ring = &adev->vcn.inst[inst_idx].ring_dec;
	/* force RBC into idle state */
	rb_bufsz = order_base_2(ring->ring_size);
	tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
	tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
	tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
	tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
	tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
876
	WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_CNTL, tmp);
877

878
	/* Stall DPG before WPTR/RPTR reset */
879
	WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
880 881
		UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK,
		~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);
882
	fw_shared->multi_queue.decode_queue_mode |= FW_QUEUE_RING_RESET;
883

884
	/* set the write pointer delay */
885
	WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR_CNTL, 0);
886 887

	/* set the wb address */
888
	WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR_ADDR,
889 890 891
		(upper_32_bits(ring->gpu_addr) >> 2));

	/* programm the RB_BASE for ring buffer */
892
	WREG32_SOC15(VCN, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
893
		lower_32_bits(ring->gpu_addr));
894
	WREG32_SOC15(VCN, inst_idx, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
895 896 897
		upper_32_bits(ring->gpu_addr));

	/* Initialize the ring buffer's read and write pointers */
898
	WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR, 0);
899

900
	WREG32_SOC15(VCN, inst_idx, mmUVD_SCRATCH2, 0);
901

902 903
	ring->wptr = RREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_RPTR);
	WREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR,
904 905
		lower_32_bits(ring->wptr));

906
	fw_shared->multi_queue.decode_queue_mode &= ~FW_QUEUE_RING_RESET;
907
	/* Unstall DPG */
908
	WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
909 910
		0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);

911 912 913
	return 0;
}

914 915
static int vcn_v2_5_start(struct amdgpu_device *adev)
{
916
	struct amdgpu_ring *ring;
917
	uint32_t rb_bufsz, tmp;
918
	int i, j, k, r;
919

920 921 922
	if (adev->pm.dpm_enabled)
		amdgpu_dpm_enable_uvd(adev, true);

923
	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
924 925
		if (adev->vcn.harvest_config & (1 << i))
			continue;
926 927 928 929
		if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
			r = vcn_v2_5_start_dpg_mode(adev, i, adev->vcn.indirect_sram);
			continue;
		}
930

931
		/* disable register anti-hang mechanism */
932
		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_POWER_STATUS), 0,
933
			~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
934

935
		/* set uvd status busy */
936 937
		tmp = RREG32_SOC15(VCN, i, mmUVD_STATUS) | UVD_STATUS__UVD_BUSY;
		WREG32_SOC15(VCN, i, mmUVD_STATUS, tmp);
938
	}
939

940 941 942
	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)
		return 0;

943 944 945
	/*SW clock gating */
	vcn_v2_5_disable_clock_gating(adev);

946
	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
947 948
		if (adev->vcn.harvest_config & (1 << i))
			continue;
949
		/* enable VCPU clock */
950
		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
951 952 953
			UVD_VCPU_CNTL__CLK_EN_MASK, ~UVD_VCPU_CNTL__CLK_EN_MASK);

		/* disable master interrupt */
954
		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN), 0,
955 956 957
			~UVD_MASTINT_EN__VCPU_EN_MASK);

		/* setup mmUVD_LMI_CTRL */
958
		tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL);
959
		tmp &= ~0xff;
960
		WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL, tmp | 0x8|
961 962 963 964 965 966
			UVD_LMI_CTRL__WRITE_CLEAN_TIMER_EN_MASK	|
			UVD_LMI_CTRL__MASK_MC_URGENT_MASK |
			UVD_LMI_CTRL__DATA_COHERENCY_EN_MASK |
			UVD_LMI_CTRL__VCPU_DATA_COHERENCY_EN_MASK);

		/* setup mmUVD_MPC_CNTL */
967
		tmp = RREG32_SOC15(VCN, i, mmUVD_MPC_CNTL);
968 969 970 971 972
		tmp &= ~UVD_MPC_CNTL__REPLACEMENT_MODE_MASK;
		tmp |= 0x2 << UVD_MPC_CNTL__REPLACEMENT_MODE__SHIFT;
		WREG32_SOC15(VCN, i, mmUVD_MPC_CNTL, tmp);

		/* setup UVD_MPC_SET_MUXA0 */
973
		WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXA0,
974 975 976 977 978 979
			((0x1 << UVD_MPC_SET_MUXA0__VARA_1__SHIFT) |
			(0x2 << UVD_MPC_SET_MUXA0__VARA_2__SHIFT) |
			(0x3 << UVD_MPC_SET_MUXA0__VARA_3__SHIFT) |
			(0x4 << UVD_MPC_SET_MUXA0__VARA_4__SHIFT)));

		/* setup UVD_MPC_SET_MUXB0 */
980
		WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUXB0,
981 982 983 984 985 986
			((0x1 << UVD_MPC_SET_MUXB0__VARB_1__SHIFT) |
			(0x2 << UVD_MPC_SET_MUXB0__VARB_2__SHIFT) |
			(0x3 << UVD_MPC_SET_MUXB0__VARB_3__SHIFT) |
			(0x4 << UVD_MPC_SET_MUXB0__VARB_4__SHIFT)));

		/* setup mmUVD_MPC_SET_MUX */
987
		WREG32_SOC15(VCN, i, mmUVD_MPC_SET_MUX,
988 989 990 991
			((0x0 << UVD_MPC_SET_MUX__SET_0__SHIFT) |
			(0x1 << UVD_MPC_SET_MUX__SET_1__SHIFT) |
			(0x2 << UVD_MPC_SET_MUX__SET_2__SHIFT)));
	}
992 993 994

	vcn_v2_5_mc_resume(adev);

995
	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
996
		volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[i].fw_shared_cpu_addr;
997 998
		if (adev->vcn.harvest_config & (1 << i))
			continue;
999
		/* VCN global tiling registers */
1000
		WREG32_SOC15(VCN, i, mmUVD_GFX8_ADDR_CONFIG,
1001
			adev->gfx.config.gb_addr_config);
1002
		WREG32_SOC15(VCN, i, mmUVD_GFX8_ADDR_CONFIG,
1003
			adev->gfx.config.gb_addr_config);
1004

1005
		/* enable LMI MC and UMC channels */
1006
		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_LMI_CTRL2), 0,
1007
			~UVD_LMI_CTRL2__STALL_ARB_UMC_MASK);
1008

1009
		/* unblock VCPU register access */
1010
		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL), 0,
1011
			~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
1012

1013
		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
1014
			~UVD_VCPU_CNTL__BLK_RST_MASK);
1015

1016 1017 1018 1019
		for (k = 0; k < 10; ++k) {
			uint32_t status;

			for (j = 0; j < 100; ++j) {
1020
				status = RREG32_SOC15(VCN, i, mmUVD_STATUS);
1021 1022 1023 1024 1025 1026 1027 1028
				if (status & 2)
					break;
				if (amdgpu_emu_mode == 1)
					msleep(500);
				else
					mdelay(10);
			}
			r = 0;
1029 1030 1031
			if (status & 2)
				break;

1032
			DRM_ERROR("VCN decode not responding, trying to reset the VCPU!!!\n");
1033
			WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
1034 1035 1036
				UVD_VCPU_CNTL__BLK_RST_MASK,
				~UVD_VCPU_CNTL__BLK_RST_MASK);
			mdelay(10);
1037
			WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
1038
				~UVD_VCPU_CNTL__BLK_RST_MASK);
1039

1040 1041 1042
			mdelay(10);
			r = -1;
		}
1043

1044 1045 1046 1047
		if (r) {
			DRM_ERROR("VCN decode not responding, giving up!!!\n");
			return r;
		}
1048

1049
		/* enable master interrupt */
1050
		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_MASTINT_EN),
1051 1052 1053 1054
			UVD_MASTINT_EN__VCPU_EN_MASK,
			~UVD_MASTINT_EN__VCPU_EN_MASK);

		/* clear the busy bit of VCN_STATUS */
1055
		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS), 0,
1056 1057
			~(2 << UVD_STATUS__VCPU_REPORT__SHIFT));

1058
		WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_VMID, 0);
1059 1060 1061 1062 1063 1064 1065 1066 1067

		ring = &adev->vcn.inst[i].ring_dec;
		/* force RBC into idle state */
		rb_bufsz = order_base_2(ring->ring_size);
		tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
1068
		WREG32_SOC15(VCN, i, mmUVD_RBC_RB_CNTL, tmp);
1069

1070
		fw_shared->multi_queue.decode_queue_mode |= FW_QUEUE_RING_RESET;
1071
		/* programm the RB_BASE for ring buffer */
1072
		WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_LOW,
1073
			lower_32_bits(ring->gpu_addr));
1074
		WREG32_SOC15(VCN, i, mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH,
1075 1076 1077
			upper_32_bits(ring->gpu_addr));

		/* Initialize the ring buffer's read and write pointers */
1078
		WREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR, 0);
1079

1080 1081
		ring->wptr = RREG32_SOC15(VCN, i, mmUVD_RBC_RB_RPTR);
		WREG32_SOC15(VCN, i, mmUVD_RBC_RB_WPTR,
1082
				lower_32_bits(ring->wptr));
1083 1084 1085
		fw_shared->multi_queue.decode_queue_mode &= ~FW_QUEUE_RING_RESET;

		fw_shared->multi_queue.encode_generalpurpose_queue_mode |= FW_QUEUE_RING_RESET;
1086
		ring = &adev->vcn.inst[i].ring_enc[0];
1087 1088 1089 1090 1091
		WREG32_SOC15(VCN, i, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
		WREG32_SOC15(VCN, i, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
		WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO, ring->gpu_addr);
		WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
		WREG32_SOC15(VCN, i, mmUVD_RB_SIZE, ring->ring_size / 4);
1092
		fw_shared->multi_queue.encode_generalpurpose_queue_mode &= ~FW_QUEUE_RING_RESET;
1093

1094
		fw_shared->multi_queue.encode_lowlatency_queue_mode |= FW_QUEUE_RING_RESET;
1095
		ring = &adev->vcn.inst[i].ring_enc[1];
1096 1097 1098 1099 1100
		WREG32_SOC15(VCN, i, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
		WREG32_SOC15(VCN, i, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
		WREG32_SOC15(VCN, i, mmUVD_RB_BASE_LO2, ring->gpu_addr);
		WREG32_SOC15(VCN, i, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
		WREG32_SOC15(VCN, i, mmUVD_RB_SIZE2, ring->ring_size / 4);
1101
		fw_shared->multi_queue.encode_lowlatency_queue_mode &= ~FW_QUEUE_RING_RESET;
1102
	}
1103

L
Leo Liu 已提交
1104
	return 0;
1105 1106
}

1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120
static int vcn_v2_5_mmsch_start(struct amdgpu_device *adev,
				struct amdgpu_mm_table *table)
{
	uint32_t data = 0, loop = 0, size = 0;
	uint64_t addr = table->gpu_addr;
	struct mmsch_v1_1_init_header *header = NULL;;

	header = (struct mmsch_v1_1_init_header *)table->cpu_addr;
	size = header->total_size;

	/*
	 * 1, write to vce_mmsch_vf_ctx_addr_lo/hi register with GPU mc addr of
	 *  memory descriptor location
	 */
1121 1122
	WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_ADDR_LO, lower_32_bits(addr));
	WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_ADDR_HI, upper_32_bits(addr));
1123 1124

	/* 2, update vmid of descriptor */
1125
	data = RREG32_SOC15(VCN, 0, mmMMSCH_VF_VMID);
1126 1127 1128
	data &= ~MMSCH_VF_VMID__VF_CTX_VMID_MASK;
	/* use domain0 for MM scheduler */
	data |= (0 << MMSCH_VF_VMID__VF_CTX_VMID__SHIFT);
1129
	WREG32_SOC15(VCN, 0, mmMMSCH_VF_VMID, data);
1130 1131

	/* 3, notify mmsch about the size of this descriptor */
1132
	WREG32_SOC15(VCN, 0, mmMMSCH_VF_CTX_SIZE, size);
1133 1134

	/* 4, set resp to zero */
1135
	WREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_RESP, 0);
1136 1137 1138 1139 1140

	/*
	 * 5, kick off the initialization and wait until
	 * VCE_MMSCH_VF_MAILBOX_RESP becomes non-zero
	 */
1141
	WREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_HOST, 0x10000001);
1142

1143
	data = RREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_RESP);
1144 1145 1146
	loop = 10;
	while ((data & 0x10000002) != 0x10000002) {
		udelay(100);
1147
		data = RREG32_SOC15(VCN, 0, mmMMSCH_VF_MAILBOX_RESP);
1148 1149 1150 1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162
		loop--;
		if (!loop)
			break;
	}

	if (!loop) {
		dev_err(adev->dev,
			"failed to init MMSCH, mmMMSCH_VF_MAILBOX_RESP = %x\n",
			data);
		return -EBUSY;
	}

	return 0;
}

1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189
static int vcn_v2_5_sriov_start(struct amdgpu_device *adev)
{
	struct amdgpu_ring *ring;
	uint32_t offset, size, tmp, i, rb_bufsz;
	uint32_t table_size = 0;
	struct mmsch_v1_0_cmd_direct_write direct_wt = { { 0 } };
	struct mmsch_v1_0_cmd_direct_read_modify_write direct_rd_mod_wt = { { 0 } };
	struct mmsch_v1_0_cmd_end end = { { 0 } };
	uint32_t *init_table = adev->virt.mm_table.cpu_addr;
	struct mmsch_v1_1_init_header *header = (struct mmsch_v1_1_init_header *)init_table;

	direct_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_WRITE;
	direct_rd_mod_wt.cmd_header.command_type = MMSCH_COMMAND__DIRECT_REG_READ_MODIFY_WRITE;
	end.cmd_header.command_type = MMSCH_COMMAND__END;

	header->version = MMSCH_VERSION;
	header->total_size = sizeof(struct mmsch_v1_1_init_header) >> 2;
	init_table += header->total_size;

	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
		header->eng[i].table_offset = header->total_size;
		header->eng[i].init_status = 0;
		header->eng[i].table_size = 0;

		table_size = 0;

		MMSCH_V1_0_INSERT_DIRECT_RD_MOD_WT(
1190
			SOC15_REG_OFFSET(VCN, i, mmUVD_STATUS),
1191 1192 1193 1194 1195 1196
			~UVD_STATUS__UVD_BUSY, UVD_STATUS__UVD_BUSY);

		size = AMDGPU_GPU_PAGE_ALIGN(adev->vcn.fw->size + 4);
		/* mc resume*/
		if (adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) {
			MMSCH_V1_0_INSERT_DIRECT_WT(
1197
				SOC15_REG_OFFSET(VCN, i,
1198 1199 1200
					mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
				adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_lo);
			MMSCH_V1_0_INSERT_DIRECT_WT(
1201
				SOC15_REG_OFFSET(VCN, i,
1202 1203 1204 1205
					mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
				adev->firmware.ucode[AMDGPU_UCODE_ID_VCN + i].tmr_mc_addr_hi);
			offset = 0;
			MMSCH_V1_0_INSERT_DIRECT_WT(
1206
				SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_OFFSET0), 0);
1207 1208
		} else {
			MMSCH_V1_0_INSERT_DIRECT_WT(
1209
				SOC15_REG_OFFSET(VCN, i,
1210 1211 1212
					mmUVD_LMI_VCPU_CACHE_64BIT_BAR_LOW),
				lower_32_bits(adev->vcn.inst[i].gpu_addr));
			MMSCH_V1_0_INSERT_DIRECT_WT(
1213
				SOC15_REG_OFFSET(VCN, i,
1214 1215 1216 1217
					mmUVD_LMI_VCPU_CACHE_64BIT_BAR_HIGH),
				upper_32_bits(adev->vcn.inst[i].gpu_addr));
			offset = size;
			MMSCH_V1_0_INSERT_DIRECT_WT(
1218
				SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_OFFSET0),
1219 1220 1221 1222
				AMDGPU_UVD_FIRMWARE_OFFSET >> 3);
		}

		MMSCH_V1_0_INSERT_DIRECT_WT(
1223
			SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_SIZE0),
1224 1225
			size);
		MMSCH_V1_0_INSERT_DIRECT_WT(
1226
			SOC15_REG_OFFSET(VCN, i,
1227 1228 1229
				mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_LOW),
			lower_32_bits(adev->vcn.inst[i].gpu_addr + offset));
		MMSCH_V1_0_INSERT_DIRECT_WT(
1230
			SOC15_REG_OFFSET(VCN, i,
1231 1232 1233
				mmUVD_LMI_VCPU_CACHE1_64BIT_BAR_HIGH),
			upper_32_bits(adev->vcn.inst[i].gpu_addr + offset));
		MMSCH_V1_0_INSERT_DIRECT_WT(
1234
			SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_OFFSET1),
1235 1236
			0);
		MMSCH_V1_0_INSERT_DIRECT_WT(
1237
			SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_SIZE1),
1238 1239
			AMDGPU_VCN_STACK_SIZE);
		MMSCH_V1_0_INSERT_DIRECT_WT(
1240
			SOC15_REG_OFFSET(VCN, i,
1241 1242 1243 1244
				mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_LOW),
			lower_32_bits(adev->vcn.inst[i].gpu_addr + offset +
				AMDGPU_VCN_STACK_SIZE));
		MMSCH_V1_0_INSERT_DIRECT_WT(
1245
			SOC15_REG_OFFSET(VCN, i,
1246 1247 1248 1249
				mmUVD_LMI_VCPU_CACHE2_64BIT_BAR_HIGH),
			upper_32_bits(adev->vcn.inst[i].gpu_addr + offset +
				AMDGPU_VCN_STACK_SIZE));
		MMSCH_V1_0_INSERT_DIRECT_WT(
1250
			SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_OFFSET2),
1251 1252
			0);
		MMSCH_V1_0_INSERT_DIRECT_WT(
1253
			SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CACHE_SIZE2),
1254 1255 1256 1257 1258 1259
			AMDGPU_VCN_CONTEXT_SIZE);

		ring = &adev->vcn.inst[i].ring_enc[0];
		ring->wptr = 0;

		MMSCH_V1_0_INSERT_DIRECT_WT(
1260
			SOC15_REG_OFFSET(VCN, i, mmUVD_RB_BASE_LO),
1261 1262
			lower_32_bits(ring->gpu_addr));
		MMSCH_V1_0_INSERT_DIRECT_WT(
1263
			SOC15_REG_OFFSET(VCN, i, mmUVD_RB_BASE_HI),
1264 1265
			upper_32_bits(ring->gpu_addr));
		MMSCH_V1_0_INSERT_DIRECT_WT(
1266
			SOC15_REG_OFFSET(VCN, i, mmUVD_RB_SIZE),
1267 1268 1269 1270 1271
			ring->ring_size / 4);

		ring = &adev->vcn.inst[i].ring_dec;
		ring->wptr = 0;
		MMSCH_V1_0_INSERT_DIRECT_WT(
1272
			SOC15_REG_OFFSET(VCN, i,
1273 1274 1275
				mmUVD_LMI_RBC_RB_64BIT_BAR_LOW),
			lower_32_bits(ring->gpu_addr));
		MMSCH_V1_0_INSERT_DIRECT_WT(
1276
			SOC15_REG_OFFSET(VCN, i,
1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287
				mmUVD_LMI_RBC_RB_64BIT_BAR_HIGH),
			upper_32_bits(ring->gpu_addr));

		/* force RBC into idle state */
		rb_bufsz = order_base_2(ring->ring_size);
		tmp = REG_SET_FIELD(0, UVD_RBC_RB_CNTL, RB_BUFSZ, rb_bufsz);
		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_BLKSZ, 1);
		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_FETCH, 1);
		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_NO_UPDATE, 1);
		tmp = REG_SET_FIELD(tmp, UVD_RBC_RB_CNTL, RB_RPTR_WR_EN, 1);
		MMSCH_V1_0_INSERT_DIRECT_WT(
1288
			SOC15_REG_OFFSET(VCN, i, mmUVD_RBC_RB_CNTL), tmp);
1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302

		/* add end packet */
		memcpy((void *)init_table, &end, sizeof(struct mmsch_v1_0_cmd_end));
		table_size += sizeof(struct mmsch_v1_0_cmd_end) / 4;
		init_table += sizeof(struct mmsch_v1_0_cmd_end) / 4;

		/* refine header */
		header->eng[i].table_size = table_size;
		header->total_size += table_size;
	}

	return vcn_v2_5_mmsch_start(adev, &adev->virt.mm_table);
}

1303
static int vcn_v2_5_stop_dpg_mode(struct amdgpu_device *adev, int inst_idx)
1304 1305 1306 1307 1308
{
	int ret_code = 0;
	uint32_t tmp;

	/* Wait for power status to be 1 */
1309
	SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1,
1310 1311 1312
		UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);

	/* wait for read ptr to be equal to write ptr */
1313 1314
	tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR);
	SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RB_RPTR, tmp, 0xFFFFFFFF, ret_code);
1315

1316 1317
	tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2);
	SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RB_RPTR2, tmp, 0xFFFFFFFF, ret_code);
1318

1319 1320
	tmp = RREG32_SOC15(VCN, inst_idx, mmUVD_RBC_RB_WPTR) & 0x7FFFFFFF;
	SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_RBC_RB_RPTR, tmp, 0xFFFFFFFF, ret_code);
1321

1322
	SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 1,
1323 1324 1325
		UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);

	/* disable dynamic power gating mode */
1326
	WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS), 0,
1327 1328 1329 1330 1331
			~UVD_POWER_STATUS__UVD_PG_MODE_MASK);

	return 0;
}

1332 1333 1334
static int vcn_v2_5_stop(struct amdgpu_device *adev)
{
	uint32_t tmp;
L
Leo Liu 已提交
1335
	int i, r = 0;
1336

1337
	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1338 1339
		if (adev->vcn.harvest_config & (1 << i))
			continue;
1340 1341
		if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG) {
			r = vcn_v2_5_stop_dpg_mode(adev, i);
1342
			continue;
1343 1344
		}

1345 1346 1347 1348
		/* wait for vcn idle */
		SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE, 0x7, r);
		if (r)
			return r;
1349

1350 1351 1352 1353 1354 1355 1356
		tmp = UVD_LMI_STATUS__VCPU_LMI_WRITE_CLEAN_MASK |
			UVD_LMI_STATUS__READ_CLEAN_MASK |
			UVD_LMI_STATUS__WRITE_CLEAN_MASK |
			UVD_LMI_STATUS__WRITE_CLEAN_RAW_MASK;
		SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp, r);
		if (r)
			return r;
1357

1358 1359 1360 1361
		/* block LMI UMC channel */
		tmp = RREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2);
		tmp |= UVD_LMI_CTRL2__STALL_ARB_UMC_MASK;
		WREG32_SOC15(VCN, i, mmUVD_LMI_CTRL2, tmp);
1362

1363 1364 1365 1366 1367
		tmp = UVD_LMI_STATUS__UMC_READ_CLEAN_RAW_MASK|
			UVD_LMI_STATUS__UMC_WRITE_CLEAN_RAW_MASK;
		SOC15_WAIT_ON_RREG(VCN, i, mmUVD_LMI_STATUS, tmp, tmp, r);
		if (r)
			return r;
1368

1369
		/* block VCPU register access */
1370
		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_RB_ARB_CTRL),
1371 1372
			UVD_RB_ARB_CTRL__VCPU_DIS_MASK,
			~UVD_RB_ARB_CTRL__VCPU_DIS_MASK);
1373

1374
		/* reset VCPU */
1375
		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL),
1376 1377
			UVD_VCPU_CNTL__BLK_RST_MASK,
			~UVD_VCPU_CNTL__BLK_RST_MASK);
1378

1379
		/* disable VCPU clock */
1380
		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_VCPU_CNTL), 0,
1381
			~(UVD_VCPU_CNTL__CLK_EN_MASK));
1382

1383 1384
		/* clear status */
		WREG32_SOC15(VCN, i, mmUVD_STATUS, 0);
1385

1386
		vcn_v2_5_enable_clock_gating(adev);
1387

1388
		/* enable register anti-hang mechanism */
1389
		WREG32_P(SOC15_REG_OFFSET(VCN, i, mmUVD_POWER_STATUS),
1390 1391 1392
			UVD_POWER_STATUS__UVD_POWER_STATUS_MASK,
			~UVD_POWER_STATUS__UVD_POWER_STATUS_MASK);
	}
1393

1394 1395 1396
	if (adev->pm.dpm_enabled)
		amdgpu_dpm_enable_uvd(adev, false);

1397 1398 1399
	return 0;
}

1400 1401 1402 1403 1404
static int vcn_v2_5_pause_dpg_mode(struct amdgpu_device *adev,
				int inst_idx, struct dpg_pause_state *new_state)
{
	struct amdgpu_ring *ring;
	uint32_t reg_data = 0;
1405
	int ret_code = 0;
1406 1407

	/* pause/unpause if state is changed */
1408
	if (adev->vcn.inst[inst_idx].pause_state.fw_based != new_state->fw_based) {
1409
		DRM_DEBUG("dpg pause state changed %d -> %d",
1410
			adev->vcn.inst[inst_idx].pause_state.fw_based,	new_state->fw_based);
1411
		reg_data = RREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE) &
1412 1413 1414
			(~UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK);

		if (new_state->fw_based == VCN_DPG_STATE__PAUSE) {
1415
			SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 0x1,
1416 1417 1418
				UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);

			if (!ret_code) {
1419 1420
				volatile struct amdgpu_fw_shared *fw_shared = adev->vcn.inst[inst_idx].fw_shared_cpu_addr;

1421 1422
				/* pause DPG */
				reg_data |= UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
1423
				WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE, reg_data);
1424 1425

				/* wait for ACK */
1426
				SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_DPG_PAUSE,
1427 1428 1429
					   UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK,
					   UVD_DPG_PAUSE__NJ_PAUSE_DPG_ACK_MASK, ret_code);

1430
				/* Stall DPG before WPTR/RPTR reset */
1431
				WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
1432 1433 1434
					   UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK,
					   ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);

1435
				/* Restore */
1436
				fw_shared->multi_queue.encode_generalpurpose_queue_mode |= FW_QUEUE_RING_RESET;
1437
				ring = &adev->vcn.inst[inst_idx].ring_enc[0];
1438
				ring->wptr = 0;
1439 1440 1441 1442 1443
				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO, ring->gpu_addr);
				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI, upper_32_bits(ring->gpu_addr));
				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE, ring->ring_size / 4);
				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR, lower_32_bits(ring->wptr));
				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
1444
				fw_shared->multi_queue.encode_generalpurpose_queue_mode &= ~FW_QUEUE_RING_RESET;
1445

1446
				fw_shared->multi_queue.encode_lowlatency_queue_mode |= FW_QUEUE_RING_RESET;
1447
				ring = &adev->vcn.inst[inst_idx].ring_enc[1];
1448
				ring->wptr = 0;
1449 1450 1451 1452 1453
				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_LO2, ring->gpu_addr);
				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_BASE_HI2, upper_32_bits(ring->gpu_addr));
				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_SIZE2, ring->ring_size / 4);
				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_RPTR2, lower_32_bits(ring->wptr));
				WREG32_SOC15(VCN, inst_idx, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
1454
				fw_shared->multi_queue.encode_lowlatency_queue_mode &= ~FW_QUEUE_RING_RESET;
1455

1456
				/* Unstall DPG */
1457
				WREG32_P(SOC15_REG_OFFSET(VCN, inst_idx, mmUVD_POWER_STATUS),
1458 1459
					   0, ~UVD_POWER_STATUS__STALL_DPG_POWER_UP_MASK);

1460
				SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS,
J
James Zhu 已提交
1461
					   UVD_PGFSM_CONFIG__UVDM_UVDU_PWR_ON, UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
1462 1463 1464
			}
		} else {
			reg_data &= ~UVD_DPG_PAUSE__NJ_PAUSE_DPG_REQ_MASK;
1465 1466
			WREG32_SOC15(VCN, inst_idx, mmUVD_DPG_PAUSE, reg_data);
			SOC15_WAIT_ON_RREG(VCN, inst_idx, mmUVD_POWER_STATUS, 0x1,
1467
				UVD_POWER_STATUS__UVD_POWER_STATUS_MASK, ret_code);
1468
		}
1469
		adev->vcn.inst[inst_idx].pause_state.fw_based = new_state->fw_based;
1470 1471 1472 1473 1474
	}

	return 0;
}

1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485
/**
 * vcn_v2_5_dec_ring_get_rptr - get read pointer
 *
 * @ring: amdgpu_ring pointer
 *
 * Returns the current hardware read pointer
 */
static uint64_t vcn_v2_5_dec_ring_get_rptr(struct amdgpu_ring *ring)
{
	struct amdgpu_device *adev = ring->adev;

1486
	return RREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_RPTR);
1487 1488 1489 1490 1491 1492 1493 1494 1495 1496 1497 1498 1499
}

/**
 * vcn_v2_5_dec_ring_get_wptr - get write pointer
 *
 * @ring: amdgpu_ring pointer
 *
 * Returns the current hardware write pointer
 */
static uint64_t vcn_v2_5_dec_ring_get_wptr(struct amdgpu_ring *ring)
{
	struct amdgpu_device *adev = ring->adev;

1500 1501 1502
	if (ring->use_doorbell)
		return adev->wb.wb[ring->wptr_offs];
	else
1503
		return RREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_WPTR);
1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516
}

/**
 * vcn_v2_5_dec_ring_set_wptr - set write pointer
 *
 * @ring: amdgpu_ring pointer
 *
 * Commits the write pointer to the hardware
 */
static void vcn_v2_5_dec_ring_set_wptr(struct amdgpu_ring *ring)
{
	struct amdgpu_device *adev = ring->adev;

1517 1518 1519 1520
	if (ring->use_doorbell) {
		adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
		WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
	} else {
1521
		WREG32_SOC15(VCN, ring->me, mmUVD_RBC_RB_WPTR, lower_32_bits(ring->wptr));
1522
	}
1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541
}

static const struct amdgpu_ring_funcs vcn_v2_5_dec_ring_vm_funcs = {
	.type = AMDGPU_RING_TYPE_VCN_DEC,
	.align_mask = 0xf,
	.vmhub = AMDGPU_MMHUB_1,
	.get_rptr = vcn_v2_5_dec_ring_get_rptr,
	.get_wptr = vcn_v2_5_dec_ring_get_wptr,
	.set_wptr = vcn_v2_5_dec_ring_set_wptr,
	.emit_frame_size =
		SOC15_FLUSH_GPU_TLB_NUM_WREG * 6 +
		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 8 +
		8 + /* vcn_v2_0_dec_ring_emit_vm_flush */
		14 + 14 + /* vcn_v2_0_dec_ring_emit_fence x2 vm fence */
		6,
	.emit_ib_size = 8, /* vcn_v2_0_dec_ring_emit_ib */
	.emit_ib = vcn_v2_0_dec_ring_emit_ib,
	.emit_fence = vcn_v2_0_dec_ring_emit_fence,
	.emit_vm_flush = vcn_v2_0_dec_ring_emit_vm_flush,
1542
	.test_ring = vcn_v2_0_dec_ring_test_ring,
1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554
	.test_ib = amdgpu_vcn_dec_ring_test_ib,
	.insert_nop = vcn_v2_0_dec_ring_insert_nop,
	.insert_start = vcn_v2_0_dec_ring_insert_start,
	.insert_end = vcn_v2_0_dec_ring_insert_end,
	.pad_ib = amdgpu_ring_generic_pad_ib,
	.begin_use = amdgpu_vcn_ring_begin_use,
	.end_use = amdgpu_vcn_ring_end_use,
	.emit_wreg = vcn_v2_0_dec_ring_emit_wreg,
	.emit_reg_wait = vcn_v2_0_dec_ring_emit_reg_wait,
	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
};

1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565
/**
 * vcn_v2_5_enc_ring_get_rptr - get enc read pointer
 *
 * @ring: amdgpu_ring pointer
 *
 * Returns the current hardware enc read pointer
 */
static uint64_t vcn_v2_5_enc_ring_get_rptr(struct amdgpu_ring *ring)
{
	struct amdgpu_device *adev = ring->adev;

1566
	if (ring == &adev->vcn.inst[ring->me].ring_enc[0])
1567
		return RREG32_SOC15(VCN, ring->me, mmUVD_RB_RPTR);
1568
	else
1569
		return RREG32_SOC15(VCN, ring->me, mmUVD_RB_RPTR2);
1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582
}

/**
 * vcn_v2_5_enc_ring_get_wptr - get enc write pointer
 *
 * @ring: amdgpu_ring pointer
 *
 * Returns the current hardware enc write pointer
 */
static uint64_t vcn_v2_5_enc_ring_get_wptr(struct amdgpu_ring *ring)
{
	struct amdgpu_device *adev = ring->adev;

1583
	if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) {
1584 1585 1586
		if (ring->use_doorbell)
			return adev->wb.wb[ring->wptr_offs];
		else
1587
			return RREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR);
1588 1589 1590 1591
	} else {
		if (ring->use_doorbell)
			return adev->wb.wb[ring->wptr_offs];
		else
1592
			return RREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR2);
1593
	}
1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606
}

/**
 * vcn_v2_5_enc_ring_set_wptr - set enc write pointer
 *
 * @ring: amdgpu_ring pointer
 *
 * Commits the enc write pointer to the hardware
 */
static void vcn_v2_5_enc_ring_set_wptr(struct amdgpu_ring *ring)
{
	struct amdgpu_device *adev = ring->adev;

1607
	if (ring == &adev->vcn.inst[ring->me].ring_enc[0]) {
1608 1609 1610 1611
		if (ring->use_doorbell) {
			adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
			WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
		} else {
1612
			WREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR, lower_32_bits(ring->wptr));
1613 1614 1615 1616 1617 1618
		}
	} else {
		if (ring->use_doorbell) {
			adev->wb.wb[ring->wptr_offs] = lower_32_bits(ring->wptr);
			WDOORBELL32(ring->doorbell_index, lower_32_bits(ring->wptr));
		} else {
1619
			WREG32_SOC15(VCN, ring->me, mmUVD_RB_WPTR2, lower_32_bits(ring->wptr));
1620 1621
		}
	}
1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653
}

static const struct amdgpu_ring_funcs vcn_v2_5_enc_ring_vm_funcs = {
	.type = AMDGPU_RING_TYPE_VCN_ENC,
	.align_mask = 0x3f,
	.nop = VCN_ENC_CMD_NO_OP,
	.vmhub = AMDGPU_MMHUB_1,
	.get_rptr = vcn_v2_5_enc_ring_get_rptr,
	.get_wptr = vcn_v2_5_enc_ring_get_wptr,
	.set_wptr = vcn_v2_5_enc_ring_set_wptr,
	.emit_frame_size =
		SOC15_FLUSH_GPU_TLB_NUM_WREG * 3 +
		SOC15_FLUSH_GPU_TLB_NUM_REG_WAIT * 4 +
		4 + /* vcn_v2_0_enc_ring_emit_vm_flush */
		5 + 5 + /* vcn_v2_0_enc_ring_emit_fence x2 vm fence */
		1, /* vcn_v2_0_enc_ring_insert_end */
	.emit_ib_size = 5, /* vcn_v2_0_enc_ring_emit_ib */
	.emit_ib = vcn_v2_0_enc_ring_emit_ib,
	.emit_fence = vcn_v2_0_enc_ring_emit_fence,
	.emit_vm_flush = vcn_v2_0_enc_ring_emit_vm_flush,
	.test_ring = amdgpu_vcn_enc_ring_test_ring,
	.test_ib = amdgpu_vcn_enc_ring_test_ib,
	.insert_nop = amdgpu_ring_insert_nop,
	.insert_end = vcn_v2_0_enc_ring_insert_end,
	.pad_ib = amdgpu_ring_generic_pad_ib,
	.begin_use = amdgpu_vcn_ring_begin_use,
	.end_use = amdgpu_vcn_ring_end_use,
	.emit_wreg = vcn_v2_0_enc_ring_emit_wreg,
	.emit_reg_wait = vcn_v2_0_enc_ring_emit_reg_wait,
	.emit_reg_write_reg_wait = amdgpu_ring_emit_reg_write_reg_wait_helper,
};

1654 1655
static void vcn_v2_5_set_dec_ring_funcs(struct amdgpu_device *adev)
{
1656 1657 1658
	int i;

	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1659 1660
		if (adev->vcn.harvest_config & (1 << i))
			continue;
1661 1662 1663 1664
		adev->vcn.inst[i].ring_dec.funcs = &vcn_v2_5_dec_ring_vm_funcs;
		adev->vcn.inst[i].ring_dec.me = i;
		DRM_INFO("VCN(%d) decode is enabled in VM mode\n", i);
	}
1665 1666
}

1667 1668
static void vcn_v2_5_set_enc_ring_funcs(struct amdgpu_device *adev)
{
1669
	int i, j;
1670

1671
	for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
1672 1673
		if (adev->vcn.harvest_config & (1 << j))
			continue;
1674 1675 1676 1677 1678 1679
		for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
			adev->vcn.inst[j].ring_enc[i].funcs = &vcn_v2_5_enc_ring_vm_funcs;
			adev->vcn.inst[j].ring_enc[i].me = j;
		}
		DRM_INFO("VCN(%d) encode is enabled in VM mode\n", j);
	}
1680 1681
}

L
Leo Liu 已提交
1682 1683 1684
static bool vcn_v2_5_is_idle(void *handle)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1685 1686 1687
	int i, ret = 1;

	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1688 1689
		if (adev->vcn.harvest_config & (1 << i))
			continue;
1690 1691
		ret &= (RREG32_SOC15(VCN, i, mmUVD_STATUS) == UVD_STATUS__IDLE);
	}
L
Leo Liu 已提交
1692

1693
	return ret;
L
Leo Liu 已提交
1694 1695 1696 1697 1698
}

static int vcn_v2_5_wait_for_idle(void *handle)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1699
	int i, ret = 0;
L
Leo Liu 已提交
1700

1701
	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1702 1703
		if (adev->vcn.harvest_config & (1 << i))
			continue;
1704 1705 1706 1707 1708
		SOC15_WAIT_ON_RREG(VCN, i, mmUVD_STATUS, UVD_STATUS__IDLE,
			UVD_STATUS__IDLE, ret);
		if (ret)
			return ret;
	}
L
Leo Liu 已提交
1709 1710 1711 1712 1713 1714 1715

	return ret;
}

static int vcn_v2_5_set_clockgating_state(void *handle,
					  enum amd_clockgating_state state)
{
1716
	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
1717
	bool enable = (state == AMD_CG_STATE_GATE);
1718

1719 1720 1721
	if (amdgpu_sriov_vf(adev))
		return 0;

1722
	if (enable) {
1723
		if (!vcn_v2_5_is_idle(handle))
1724 1725 1726 1727 1728 1729
			return -EBUSY;
		vcn_v2_5_enable_clock_gating(adev);
	} else {
		vcn_v2_5_disable_clock_gating(adev);
	}

L
Leo Liu 已提交
1730 1731 1732 1733 1734 1735
	return 0;
}

static int vcn_v2_5_set_powergating_state(void *handle,
					  enum amd_powergating_state state)
{
1736 1737 1738
	struct amdgpu_device *adev = (struct amdgpu_device *)handle;
	int ret;

1739 1740 1741
	if (amdgpu_sriov_vf(adev))
		return 0;

1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753
	if(state == adev->vcn.cur_state)
		return 0;

	if (state == AMD_PG_STATE_GATE)
		ret = vcn_v2_5_stop(adev);
	else
		ret = vcn_v2_5_start(adev);

	if(!ret)
		adev->vcn.cur_state = state;

	return ret;
L
Leo Liu 已提交
1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767
}

static int vcn_v2_5_set_interrupt_state(struct amdgpu_device *adev,
					struct amdgpu_irq_src *source,
					unsigned type,
					enum amdgpu_interrupt_state state)
{
	return 0;
}

static int vcn_v2_5_process_interrupt(struct amdgpu_device *adev,
				      struct amdgpu_irq_src *source,
				      struct amdgpu_iv_entry *entry)
{
1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781
	uint32_t ip_instance;

	switch (entry->client_id) {
	case SOC15_IH_CLIENTID_VCN:
		ip_instance = 0;
		break;
	case SOC15_IH_CLIENTID_VCN1:
		ip_instance = 1;
		break;
	default:
		DRM_ERROR("Unhandled client id: %d\n", entry->client_id);
		return 0;
	}

L
Leo Liu 已提交
1782 1783 1784 1785
	DRM_DEBUG("IH: VCN TRAP\n");

	switch (entry->src_id) {
	case VCN_2_0__SRCID__UVD_SYSTEM_MESSAGE_INTERRUPT:
1786
		amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_dec);
L
Leo Liu 已提交
1787 1788
		break;
	case VCN_2_0__SRCID__UVD_ENC_GENERAL_PURPOSE:
1789
		amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[0]);
L
Leo Liu 已提交
1790 1791
		break;
	case VCN_2_0__SRCID__UVD_ENC_LOW_LATENCY:
1792
		amdgpu_fence_process(&adev->vcn.inst[ip_instance].ring_enc[1]);
L
Leo Liu 已提交
1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809
		break;
	default:
		DRM_ERROR("Unhandled interrupt: %d %d\n",
			  entry->src_id, entry->src_data[0]);
		break;
	}

	return 0;
}

static const struct amdgpu_irq_src_funcs vcn_v2_5_irq_funcs = {
	.set = vcn_v2_5_set_interrupt_state,
	.process = vcn_v2_5_process_interrupt,
};

static void vcn_v2_5_set_irq_funcs(struct amdgpu_device *adev)
{
1810 1811 1812
	int i;

	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
1813 1814
		if (adev->vcn.harvest_config & (1 << i))
			continue;
1815
		adev->vcn.inst[i].irq.num_types = adev->vcn.num_enc_rings + 1;
1816 1817
		adev->vcn.inst[i].irq.funcs = &vcn_v2_5_irq_funcs;
	}
L
Leo Liu 已提交
1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847
}

static const struct amd_ip_funcs vcn_v2_5_ip_funcs = {
	.name = "vcn_v2_5",
	.early_init = vcn_v2_5_early_init,
	.late_init = NULL,
	.sw_init = vcn_v2_5_sw_init,
	.sw_fini = vcn_v2_5_sw_fini,
	.hw_init = vcn_v2_5_hw_init,
	.hw_fini = vcn_v2_5_hw_fini,
	.suspend = vcn_v2_5_suspend,
	.resume = vcn_v2_5_resume,
	.is_idle = vcn_v2_5_is_idle,
	.wait_for_idle = vcn_v2_5_wait_for_idle,
	.check_soft_reset = NULL,
	.pre_soft_reset = NULL,
	.soft_reset = NULL,
	.post_soft_reset = NULL,
	.set_clockgating_state = vcn_v2_5_set_clockgating_state,
	.set_powergating_state = vcn_v2_5_set_powergating_state,
};

const struct amdgpu_ip_block_version vcn_v2_5_ip_block =
{
		.type = AMD_IP_BLOCK_TYPE_VCN,
		.major = 2,
		.minor = 5,
		.rev = 0,
		.funcs = &vcn_v2_5_ip_funcs,
};