amdgpu_vcn.c 24.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
/*
 * Copyright 2016 Advanced Micro Devices, Inc.
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sub license, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial portions
 * of the Software.
 *
 */

#include <linux/firmware.h>
#include <linux/module.h>
29
#include <linux/pci.h>
30
#include <drm/drm_drv.h>
31

32 33 34 35 36 37 38
#include "amdgpu.h"
#include "amdgpu_pm.h"
#include "amdgpu_vcn.h"
#include "soc15d.h"

/* Firmware Names */
#define FIRMWARE_RAVEN		"amdgpu/raven_vcn.bin"
39
#define FIRMWARE_PICASSO	"amdgpu/picasso_vcn.bin"
40
#define FIRMWARE_RAVEN2		"amdgpu/raven2_vcn.bin"
41 42 43 44 45 46 47 48
#define FIRMWARE_ARCTURUS	"amdgpu/arcturus_vcn.bin"
#define FIRMWARE_RENOIR		"amdgpu/renoir_vcn.bin"
#define FIRMWARE_GREEN_SARDINE	"amdgpu/green_sardine_vcn.bin"
#define FIRMWARE_NAVI10		"amdgpu/navi10_vcn.bin"
#define FIRMWARE_NAVI14		"amdgpu/navi14_vcn.bin"
#define FIRMWARE_NAVI12		"amdgpu/navi12_vcn.bin"
#define FIRMWARE_SIENNA_CICHLID	"amdgpu/sienna_cichlid_vcn.bin"
#define FIRMWARE_NAVY_FLOUNDER	"amdgpu/navy_flounder_vcn.bin"
49
#define FIRMWARE_VANGOGH	"amdgpu/vangogh_vcn.bin"
50
#define FIRMWARE_DIMGREY_CAVEFISH	"amdgpu/dimgrey_cavefish_vcn.bin"
51
#define FIRMWARE_ALDEBARAN	"amdgpu/aldebaran_vcn.bin"
52
#define FIRMWARE_BEIGE_GOBY	"amdgpu/beige_goby_vcn.bin"
53
#define FIRMWARE_YELLOW_CARP	"amdgpu/yellow_carp_vcn.bin"
54 55

MODULE_FIRMWARE(FIRMWARE_RAVEN);
56
MODULE_FIRMWARE(FIRMWARE_PICASSO);
57
MODULE_FIRMWARE(FIRMWARE_RAVEN2);
58
MODULE_FIRMWARE(FIRMWARE_ARCTURUS);
59
MODULE_FIRMWARE(FIRMWARE_RENOIR);
60
MODULE_FIRMWARE(FIRMWARE_GREEN_SARDINE);
61
MODULE_FIRMWARE(FIRMWARE_ALDEBARAN);
62
MODULE_FIRMWARE(FIRMWARE_NAVI10);
J
James Zhu 已提交
63
MODULE_FIRMWARE(FIRMWARE_NAVI14);
64
MODULE_FIRMWARE(FIRMWARE_NAVI12);
65
MODULE_FIRMWARE(FIRMWARE_SIENNA_CICHLID);
66
MODULE_FIRMWARE(FIRMWARE_NAVY_FLOUNDER);
67
MODULE_FIRMWARE(FIRMWARE_VANGOGH);
68
MODULE_FIRMWARE(FIRMWARE_DIMGREY_CAVEFISH);
69
MODULE_FIRMWARE(FIRMWARE_BEIGE_GOBY);
70
MODULE_FIRMWARE(FIRMWARE_YELLOW_CARP);
71 72 73 74 75

static void amdgpu_vcn_idle_work_handler(struct work_struct *work);

int amdgpu_vcn_sw_init(struct amdgpu_device *adev)
{
76
	unsigned long bo_size;
77 78
	const char *fw_name;
	const struct common_firmware_header *hdr;
79
	unsigned char fw_check;
80
	int i, r;
81 82

	INIT_DELAYED_WORK(&adev->vcn.idle_work, amdgpu_vcn_idle_work_handler);
83
	mutex_init(&adev->vcn.vcn_pg_lock);
84
	mutex_init(&adev->vcn.vcn1_jpeg1_workaround);
85
	atomic_set(&adev->vcn.total_submission_cnt, 0);
86 87
	for (i = 0; i < adev->vcn.num_vcn_inst; i++)
		atomic_set(&adev->vcn.inst[i].dpg_enc_submission_cnt, 0);
88 89 90

	switch (adev->asic_type) {
	case CHIP_RAVEN:
A
Alex Deucher 已提交
91
		if (adev->apu_flags & AMD_APU_IS_RAVEN2)
92
			fw_name = FIRMWARE_RAVEN2;
A
Alex Deucher 已提交
93
		else if (adev->apu_flags & AMD_APU_IS_PICASSO)
94
			fw_name = FIRMWARE_PICASSO;
95 96
		else
			fw_name = FIRMWARE_RAVEN;
97
		break;
98 99
	case CHIP_ARCTURUS:
		fw_name = FIRMWARE_ARCTURUS;
100 101 102
		if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
		    (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
			adev->vcn.indirect_sram = true;
103
		break;
104
	case CHIP_RENOIR:
105 106 107 108 109
		if (adev->apu_flags & AMD_APU_IS_RENOIR)
			fw_name = FIRMWARE_RENOIR;
		else
			fw_name = FIRMWARE_GREEN_SARDINE;

110 111 112 113
		if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
		    (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
			adev->vcn.indirect_sram = true;
		break;
114 115
	case CHIP_ALDEBARAN:
		fw_name = FIRMWARE_ALDEBARAN;
116 117 118
		if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
		    (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
			adev->vcn.indirect_sram = true;
119
		break;
120 121
	case CHIP_NAVI10:
		fw_name = FIRMWARE_NAVI10;
122 123 124
		if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
		    (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
			adev->vcn.indirect_sram = true;
125
		break;
126
	case CHIP_NAVI14:
J
James Zhu 已提交
127
		fw_name = FIRMWARE_NAVI14;
128
		if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
129
		    (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
130
			adev->vcn.indirect_sram = true;
J
James Zhu 已提交
131
		break;
132 133 134 135 136 137
	case CHIP_NAVI12:
		fw_name = FIRMWARE_NAVI12;
		if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
		    (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
			adev->vcn.indirect_sram = true;
		break;
138 139
	case CHIP_SIENNA_CICHLID:
		fw_name = FIRMWARE_SIENNA_CICHLID;
140 141 142
		if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
		    (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
			adev->vcn.indirect_sram = true;
143
		break;
144 145 146 147 148 149
	case CHIP_NAVY_FLOUNDER:
		fw_name = FIRMWARE_NAVY_FLOUNDER;
		if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
		    (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
			adev->vcn.indirect_sram = true;
		break;
150 151
	case CHIP_VANGOGH:
		fw_name = FIRMWARE_VANGOGH;
152 153 154
		break;
	case CHIP_DIMGREY_CAVEFISH:
		fw_name = FIRMWARE_DIMGREY_CAVEFISH;
155 156 157 158
		if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
		    (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
			adev->vcn.indirect_sram = true;
		break;
159 160 161 162 163 164
	case CHIP_BEIGE_GOBY:
		fw_name = FIRMWARE_BEIGE_GOBY;
		if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
		    (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
			adev->vcn.indirect_sram = true;
		break;
165 166 167 168 169 170
	case CHIP_YELLOW_CARP:
		fw_name = FIRMWARE_YELLOW_CARP;
		if ((adev->firmware.load_type == AMDGPU_FW_LOAD_PSP) &&
		    (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG))
			adev->vcn.indirect_sram = true;
		break;
171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191
	default:
		return -EINVAL;
	}

	r = request_firmware(&adev->vcn.fw, fw_name, adev->dev);
	if (r) {
		dev_err(adev->dev, "amdgpu_vcn: Can't load firmware \"%s\"\n",
			fw_name);
		return r;
	}

	r = amdgpu_ucode_validate(adev->vcn.fw);
	if (r) {
		dev_err(adev->dev, "amdgpu_vcn: Can't validate firmware \"%s\"\n",
			fw_name);
		release_firmware(adev->vcn.fw);
		adev->vcn.fw = NULL;
		return r;
	}

	hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
192
	adev->vcn.fw_version = le32_to_cpu(hdr->ucode_version);
193

194 195 196 197 198 199 200 201 202 203 204 205 206 207 208
	/* Bit 20-23, it is encode major and non-zero for new naming convention.
	 * This field is part of version minor and DRM_DISABLED_FLAG in old naming
	 * convention. Since the l:wq!atest version minor is 0x5B and DRM_DISABLED_FLAG
	 * is zero in old naming convention, this field is always zero so far.
	 * These four bits are used to tell which naming convention is present.
	 */
	fw_check = (le32_to_cpu(hdr->ucode_version) >> 20) & 0xf;
	if (fw_check) {
		unsigned int dec_ver, enc_major, enc_minor, vep, fw_rev;

		fw_rev = le32_to_cpu(hdr->ucode_version) & 0xfff;
		enc_minor = (le32_to_cpu(hdr->ucode_version) >> 12) & 0xff;
		enc_major = fw_check;
		dec_ver = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xf;
		vep = (le32_to_cpu(hdr->ucode_version) >> 28) & 0xf;
209
		DRM_INFO("Found VCN firmware Version ENC: %u.%u DEC: %u VEP: %u Revision: %u\n",
210 211 212 213 214 215 216
			enc_major, enc_minor, dec_ver, vep, fw_rev);
	} else {
		unsigned int version_major, version_minor, family_id;

		family_id = le32_to_cpu(hdr->ucode_version) & 0xff;
		version_major = (le32_to_cpu(hdr->ucode_version) >> 24) & 0xff;
		version_minor = (le32_to_cpu(hdr->ucode_version) >> 8) & 0xff;
217
		DRM_INFO("Found VCN firmware Version: %u.%u Family ID: %u\n",
218 219
			version_major, version_minor, family_id);
	}
220

221
	bo_size = AMDGPU_VCN_STACK_SIZE + AMDGPU_VCN_CONTEXT_SIZE;
222
	if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP)
223
		bo_size += AMDGPU_GPU_PAGE_ALIGN(le32_to_cpu(hdr->ucode_size_bytes) + 8);
224
	bo_size += AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared));
225 226

	for (i = 0; i < adev->vcn.num_vcn_inst; i++) {
227 228 229
		if (adev->vcn.harvest_config & (1 << i))
			continue;

230 231 232 233 234 235 236
		r = amdgpu_bo_create_kernel(adev, bo_size, PAGE_SIZE,
						AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].vcpu_bo,
						&adev->vcn.inst[i].gpu_addr, &adev->vcn.inst[i].cpu_addr);
		if (r) {
			dev_err(adev->dev, "(%d) failed to allocate vcn bo\n", r);
			return r;
		}
237

238 239 240 241 242
		adev->vcn.inst[i].fw_shared_cpu_addr = adev->vcn.inst[i].cpu_addr +
				bo_size - AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared));
		adev->vcn.inst[i].fw_shared_gpu_addr = adev->vcn.inst[i].gpu_addr +
				bo_size - AMDGPU_GPU_PAGE_ALIGN(sizeof(struct amdgpu_fw_shared));

243 244 245 246 247 248 249 250
		if (adev->vcn.indirect_sram) {
			r = amdgpu_bo_create_kernel(adev, 64 * 2 * 4, PAGE_SIZE,
					AMDGPU_GEM_DOMAIN_VRAM, &adev->vcn.inst[i].dpg_sram_bo,
					&adev->vcn.inst[i].dpg_sram_gpu_addr, &adev->vcn.inst[i].dpg_sram_cpu_addr);
			if (r) {
				dev_err(adev->dev, "VCN %d (%d) failed to allocate DPG bo\n", i, r);
				return r;
			}
251 252 253
		}
	}

254 255 256 257 258
	return 0;
}

int amdgpu_vcn_sw_fini(struct amdgpu_device *adev)
{
259
	int i, j;
260

261 262
	cancel_delayed_work_sync(&adev->vcn.idle_work);

263
	for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
264 265
		if (adev->vcn.harvest_config & (1 << j))
			continue;
266

267 268 269 270 271
		if (adev->vcn.indirect_sram) {
			amdgpu_bo_free_kernel(&adev->vcn.inst[j].dpg_sram_bo,
						  &adev->vcn.inst[j].dpg_sram_gpu_addr,
						  (void **)&adev->vcn.inst[j].dpg_sram_cpu_addr);
		}
272
		kvfree(adev->vcn.inst[j].saved_bo);
273

274 275 276
		amdgpu_bo_free_kernel(&adev->vcn.inst[j].vcpu_bo,
					  &adev->vcn.inst[j].gpu_addr,
					  (void **)&adev->vcn.inst[j].cpu_addr);
277

278
		amdgpu_ring_fini(&adev->vcn.inst[j].ring_dec);
L
Leo Liu 已提交
279

280 281 282
		for (i = 0; i < adev->vcn.num_enc_rings; ++i)
			amdgpu_ring_fini(&adev->vcn.inst[j].ring_enc[i]);
	}
283

284
	release_firmware(adev->vcn.fw);
285
	mutex_destroy(&adev->vcn.vcn1_jpeg1_workaround);
286
	mutex_destroy(&adev->vcn.vcn_pg_lock);
287 288 289 290 291 292 293 294

	return 0;
}

int amdgpu_vcn_suspend(struct amdgpu_device *adev)
{
	unsigned size;
	void *ptr;
295
	int i, idx;
296

297 298
	cancel_delayed_work_sync(&adev->vcn.idle_work);

299
	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
300 301
		if (adev->vcn.harvest_config & (1 << i))
			continue;
302 303
		if (adev->vcn.inst[i].vcpu_bo == NULL)
			return 0;
304

305 306
		size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo);
		ptr = adev->vcn.inst[i].cpu_addr;
307

308 309 310
		adev->vcn.inst[i].saved_bo = kvmalloc(size, GFP_KERNEL);
		if (!adev->vcn.inst[i].saved_bo)
			return -ENOMEM;
311

312 313 314 315
		if (drm_dev_enter(&adev->ddev, &idx)) {
			memcpy_fromio(adev->vcn.inst[i].saved_bo, ptr, size);
			drm_dev_exit(idx);
		}
316
	}
317 318 319 320 321 322 323
	return 0;
}

int amdgpu_vcn_resume(struct amdgpu_device *adev)
{
	unsigned size;
	void *ptr;
324
	int i, idx;
325

326
	for (i = 0; i < adev->vcn.num_vcn_inst; ++i) {
327 328
		if (adev->vcn.harvest_config & (1 << i))
			continue;
329 330 331 332 333 334 335
		if (adev->vcn.inst[i].vcpu_bo == NULL)
			return -EINVAL;

		size = amdgpu_bo_size(adev->vcn.inst[i].vcpu_bo);
		ptr = adev->vcn.inst[i].cpu_addr;

		if (adev->vcn.inst[i].saved_bo != NULL) {
336 337 338 339
			if (drm_dev_enter(&adev->ddev, &idx)) {
				memcpy_toio(ptr, adev->vcn.inst[i].saved_bo, size);
				drm_dev_exit(idx);
			}
340 341 342 343 344 345 346
			kvfree(adev->vcn.inst[i].saved_bo);
			adev->vcn.inst[i].saved_bo = NULL;
		} else {
			const struct common_firmware_header *hdr;
			unsigned offset;

			hdr = (const struct common_firmware_header *)adev->vcn.fw->data;
347
			if (adev->firmware.load_type != AMDGPU_FW_LOAD_PSP) {
348
				offset = le32_to_cpu(hdr->ucode_array_offset_bytes);
349 350 351 352 353
				if (drm_dev_enter(&adev->ddev, &idx)) {
					memcpy_toio(adev->vcn.inst[i].cpu_addr, adev->vcn.fw->data + offset,
						    le32_to_cpu(hdr->ucode_size_bytes));
					drm_dev_exit(idx);
				}
354 355 356 357
				size -= le32_to_cpu(hdr->ucode_size_bytes);
				ptr += le32_to_cpu(hdr->ucode_size_bytes);
			}
			memset_io(ptr, 0, size);
358
		}
359 360 361 362
	}
	return 0;
}

363 364 365 366
static void amdgpu_vcn_idle_work_handler(struct work_struct *work)
{
	struct amdgpu_device *adev =
		container_of(work, struct amdgpu_device, vcn.idle_work.work);
367 368
	unsigned int fences = 0, fence[AMDGPU_MAX_VCN_INSTANCES] = {0};
	unsigned int i, j;
369
	int r = 0;
370

371
	for (j = 0; j < adev->vcn.num_vcn_inst; ++j) {
372 373
		if (adev->vcn.harvest_config & (1 << j))
			continue;
374

375 376 377
		for (i = 0; i < adev->vcn.num_enc_rings; ++i) {
			fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_enc[i]);
		}
378

379 380
		if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)	{
			struct dpg_pause_state new_state;
381

382 383
			if (fence[j] ||
				unlikely(atomic_read(&adev->vcn.inst[j].dpg_enc_submission_cnt)))
384 385 386
				new_state.fw_based = VCN_DPG_STATE__PAUSE;
			else
				new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
387

388
			adev->vcn.pause_dpg_mode(adev, j, &new_state);
389
		}
390

391 392 393
		fence[j] += amdgpu_fence_count_emitted(&adev->vcn.inst[j].ring_dec);
		fences += fence[j];
	}
394

395
	if (!fences && !atomic_read(&adev->vcn.total_submission_cnt)) {
396 397
		amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
		       AMD_PG_STATE_GATE);
398 399 400 401
		r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
				false);
		if (r)
			dev_warn(adev->dev, "(%d) failed to disable video power profile mode\n", r);
402 403 404 405 406 407 408 409
	} else {
		schedule_delayed_work(&adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
	}
}

void amdgpu_vcn_ring_begin_use(struct amdgpu_ring *ring)
{
	struct amdgpu_device *adev = ring->adev;
410
	int r = 0;
411

412
	atomic_inc(&adev->vcn.total_submission_cnt);
413

414 415 416 417 418 419
	if (!cancel_delayed_work_sync(&adev->vcn.idle_work)) {
		r = amdgpu_dpm_switch_power_profile(adev, PP_SMC_POWER_PROFILE_VIDEO,
				true);
		if (r)
			dev_warn(adev->dev, "(%d) failed to switch to video power profile mode\n", r);
	}
420 421 422 423

	mutex_lock(&adev->vcn.vcn_pg_lock);
	amdgpu_device_ip_set_powergating_state(adev, AMD_IP_BLOCK_TYPE_VCN,
	       AMD_PG_STATE_UNGATE);
424 425 426 427

	if (adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG)	{
		struct dpg_pause_state new_state;

428 429
		if (ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC) {
			atomic_inc(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt);
430
			new_state.fw_based = VCN_DPG_STATE__PAUSE;
431 432 433
		} else {
			unsigned int fences = 0;
			unsigned int i;
434

435 436 437 438 439 440 441 442
			for (i = 0; i < adev->vcn.num_enc_rings; ++i)
				fences += amdgpu_fence_count_emitted(&adev->vcn.inst[ring->me].ring_enc[i]);

			if (fences || atomic_read(&adev->vcn.inst[ring->me].dpg_enc_submission_cnt))
				new_state.fw_based = VCN_DPG_STATE__PAUSE;
			else
				new_state.fw_based = VCN_DPG_STATE__UNPAUSE;
		}
443

444
		adev->vcn.pause_dpg_mode(adev, ring->me, &new_state);
445
	}
446
	mutex_unlock(&adev->vcn.vcn_pg_lock);
447 448 449 450
}

void amdgpu_vcn_ring_end_use(struct amdgpu_ring *ring)
{
451 452 453 454
	if (ring->adev->pg_flags & AMD_PG_SUPPORT_VCN_DPG &&
		ring->funcs->type == AMDGPU_RING_TYPE_VCN_ENC)
		atomic_dec(&ring->adev->vcn.inst[ring->me].dpg_enc_submission_cnt);

455 456
	atomic_dec(&ring->adev->vcn.total_submission_cnt);

457 458 459
	schedule_delayed_work(&ring->adev->vcn.idle_work, VCN_IDLE_TIMEOUT);
}

460 461 462 463 464 465 466
int amdgpu_vcn_dec_ring_test_ring(struct amdgpu_ring *ring)
{
	struct amdgpu_device *adev = ring->adev;
	uint32_t tmp = 0;
	unsigned i;
	int r;

467 468 469 470
	/* VCN in SRIOV does not support direct register read/write */
	if (amdgpu_sriov_vf(adev))
		return 0;

471
	WREG32(adev->vcn.inst[ring->me].external.scratch9, 0xCAFEDEAD);
472
	r = amdgpu_ring_alloc(ring, 3);
473
	if (r)
474
		return r;
475
	amdgpu_ring_write(ring, PACKET0(adev->vcn.internal.scratch9, 0));
476 477 478
	amdgpu_ring_write(ring, 0xDEADBEEF);
	amdgpu_ring_commit(ring);
	for (i = 0; i < adev->usec_timeout; i++) {
479
		tmp = RREG32(adev->vcn.inst[ring->me].external.scratch9);
480 481
		if (tmp == 0xDEADBEEF)
			break;
482
		udelay(1);
483 484
	}

485 486 487
	if (i >= adev->usec_timeout)
		r = -ETIMEDOUT;

488 489 490
	return r;
}

491 492 493 494 495 496 497 498 499 500 501 502 503 504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521
int amdgpu_vcn_dec_sw_ring_test_ring(struct amdgpu_ring *ring)
{
	struct amdgpu_device *adev = ring->adev;
	uint32_t rptr;
	unsigned int i;
	int r;

	if (amdgpu_sriov_vf(adev))
		return 0;

	r = amdgpu_ring_alloc(ring, 16);
	if (r)
		return r;

	rptr = amdgpu_ring_get_rptr(ring);

	amdgpu_ring_write(ring, VCN_DEC_SW_CMD_END);
	amdgpu_ring_commit(ring);

	for (i = 0; i < adev->usec_timeout; i++) {
		if (amdgpu_ring_get_rptr(ring) != rptr)
			break;
		udelay(1);
	}

	if (i >= adev->usec_timeout)
		r = -ETIMEDOUT;

	return r;
}

522
static int amdgpu_vcn_dec_send_msg(struct amdgpu_ring *ring,
523
				   struct amdgpu_bo *bo,
524
				   struct dma_fence **fence)
525
{
526 527
	struct amdgpu_device *adev = ring->adev;
	struct dma_fence *f = NULL;
528 529 530
	struct amdgpu_job *job;
	struct amdgpu_ib *ib;
	uint64_t addr;
531
	void *msg = NULL;
532 533
	int i, r;

534 535
	r = amdgpu_job_alloc_with_ib(adev, 64,
					AMDGPU_IB_POOL_DIRECT, &job);
536 537 538 539 540
	if (r)
		goto err;

	ib = &job->ibs[0];
	addr = amdgpu_bo_gpu_offset(bo);
541
	msg = amdgpu_bo_kptr(bo);
L
Leo Liu 已提交
542
	ib->ptr[0] = PACKET0(adev->vcn.internal.data0, 0);
543
	ib->ptr[1] = addr;
L
Leo Liu 已提交
544
	ib->ptr[2] = PACKET0(adev->vcn.internal.data1, 0);
545
	ib->ptr[3] = addr >> 32;
L
Leo Liu 已提交
546
	ib->ptr[4] = PACKET0(adev->vcn.internal.cmd, 0);
547 548
	ib->ptr[5] = 0;
	for (i = 6; i < 16; i += 2) {
L
Leo Liu 已提交
549
		ib->ptr[i] = PACKET0(adev->vcn.internal.nop, 0);
550 551 552 553
		ib->ptr[i+1] = 0;
	}
	ib->length_dw = 16;

554
	r = amdgpu_job_submit_direct(job, ring, &f);
555 556
	if (r)
		goto err_free;
557

558 559
	amdgpu_bo_fence(bo, f, false);
	amdgpu_bo_unreserve(bo);
560
	amdgpu_bo_free_kernel(&bo, NULL, (void **)&msg);
561 562 563 564 565 566 567 568 569 570 571

	if (fence)
		*fence = dma_fence_get(f);
	dma_fence_put(f);

	return 0;

err_free:
	amdgpu_job_free(job);

err:
572
	amdgpu_bo_unreserve(bo);
573
	amdgpu_bo_free_kernel(&bo, NULL, (void **)&msg);
574 575 576 577
	return r;
}

static int amdgpu_vcn_dec_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
578
					 struct amdgpu_bo **bo)
579 580 581 582 583
{
	struct amdgpu_device *adev = ring->adev;
	uint32_t *msg;
	int r, i;

584
	*bo = NULL;
585 586
	r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
				      AMDGPU_GEM_DOMAIN_VRAM,
587
				      bo, NULL, (void **)&msg);
588 589 590
	if (r)
		return r;

591
	msg[0] = cpu_to_le32(0x00000028);
592
	msg[1] = cpu_to_le32(0x00000038);
593
	msg[2] = cpu_to_le32(0x00000001);
594
	msg[3] = cpu_to_le32(0x00000000);
595
	msg[4] = cpu_to_le32(handle);
596
	msg[5] = cpu_to_le32(0x00000000);
597 598
	msg[6] = cpu_to_le32(0x00000001);
	msg[7] = cpu_to_le32(0x00000028);
599
	msg[8] = cpu_to_le32(0x00000010);
600
	msg[9] = cpu_to_le32(0x00000000);
601 602
	msg[10] = cpu_to_le32(0x00000007);
	msg[11] = cpu_to_le32(0x00000000);
603 604 605
	msg[12] = cpu_to_le32(0x00000780);
	msg[13] = cpu_to_le32(0x00000440);
	for (i = 14; i < 1024; ++i)
606 607
		msg[i] = cpu_to_le32(0x0);

608
	return 0;
609 610 611
}

static int amdgpu_vcn_dec_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
612
					  struct amdgpu_bo **bo)
613 614 615 616 617
{
	struct amdgpu_device *adev = ring->adev;
	uint32_t *msg;
	int r, i;

618
	*bo = NULL;
619 620
	r = amdgpu_bo_create_reserved(adev, 1024, PAGE_SIZE,
				      AMDGPU_GEM_DOMAIN_VRAM,
621
				      bo, NULL, (void **)&msg);
622 623 624
	if (r)
		return r;

625 626 627 628 629 630 631
	msg[0] = cpu_to_le32(0x00000028);
	msg[1] = cpu_to_le32(0x00000018);
	msg[2] = cpu_to_le32(0x00000000);
	msg[3] = cpu_to_le32(0x00000002);
	msg[4] = cpu_to_le32(handle);
	msg[5] = cpu_to_le32(0x00000000);
	for (i = 6; i < 1024; ++i)
632 633
		msg[i] = cpu_to_le32(0x0);

634
	return 0;
635 636 637 638
}

int amdgpu_vcn_dec_ring_test_ib(struct amdgpu_ring *ring, long timeout)
{
639 640
	struct dma_fence *fence = NULL;
	struct amdgpu_bo *bo;
641 642
	long r;

643 644 645 646 647 648 649 650
	r = amdgpu_vcn_dec_get_create_msg(ring, 1, &bo);
	if (r)
		goto error;

	r = amdgpu_vcn_dec_send_msg(ring, bo, NULL);
	if (r)
		goto error;
	r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &bo);
651
	if (r)
652 653
		goto error;

654
	r = amdgpu_vcn_dec_send_msg(ring, bo, &fence);
655
	if (r)
656 657 658
		goto error;

	r = dma_fence_wait_timeout(fence, false, timeout);
659
	if (r == 0)
660
		r = -ETIMEDOUT;
661
	else if (r > 0)
662 663 664 665 666 667
		r = 0;

	dma_fence_put(fence);
error:
	return r;
}
L
Leo Liu 已提交
668

669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758
static int amdgpu_vcn_dec_sw_send_msg(struct amdgpu_ring *ring,
				   struct amdgpu_bo *bo,
				   struct dma_fence **fence)
{
	struct amdgpu_vcn_decode_buffer *decode_buffer = NULL;
	const unsigned int ib_size_dw = 64;
	struct amdgpu_device *adev = ring->adev;
	struct dma_fence *f = NULL;
	struct amdgpu_job *job;
	struct amdgpu_ib *ib;
	uint64_t addr;
	int i, r;

	r = amdgpu_job_alloc_with_ib(adev, ib_size_dw * 4,
				AMDGPU_IB_POOL_DIRECT, &job);
	if (r)
		goto err;

	ib = &job->ibs[0];
	addr = amdgpu_bo_gpu_offset(bo);
	ib->length_dw = 0;

	ib->ptr[ib->length_dw++] = sizeof(struct amdgpu_vcn_decode_buffer) + 8;
	ib->ptr[ib->length_dw++] = cpu_to_le32(AMDGPU_VCN_IB_FLAG_DECODE_BUFFER);
	decode_buffer = (struct amdgpu_vcn_decode_buffer *)&(ib->ptr[ib->length_dw]);
	ib->length_dw += sizeof(struct amdgpu_vcn_decode_buffer) / 4;
	memset(decode_buffer, 0, sizeof(struct amdgpu_vcn_decode_buffer));

	decode_buffer->valid_buf_flag |= cpu_to_le32(AMDGPU_VCN_CMD_FLAG_MSG_BUFFER);
	decode_buffer->msg_buffer_address_hi = cpu_to_le32(addr >> 32);
	decode_buffer->msg_buffer_address_lo = cpu_to_le32(addr);

	for (i = ib->length_dw; i < ib_size_dw; ++i)
		ib->ptr[i] = 0x0;

	r = amdgpu_job_submit_direct(job, ring, &f);
	if (r)
		goto err_free;

	amdgpu_bo_fence(bo, f, false);
	amdgpu_bo_unreserve(bo);
	amdgpu_bo_unref(&bo);

	if (fence)
		*fence = dma_fence_get(f);
	dma_fence_put(f);

	return 0;

err_free:
	amdgpu_job_free(job);

err:
	amdgpu_bo_unreserve(bo);
	amdgpu_bo_unref(&bo);
	return r;
}

int amdgpu_vcn_dec_sw_ring_test_ib(struct amdgpu_ring *ring, long timeout)
{
	struct dma_fence *fence = NULL;
	struct amdgpu_bo *bo;
	long r;

	r = amdgpu_vcn_dec_get_create_msg(ring, 1, &bo);
	if (r)
		goto error;

	r = amdgpu_vcn_dec_sw_send_msg(ring, bo, NULL);
	if (r)
		goto error;
	r = amdgpu_vcn_dec_get_destroy_msg(ring, 1, &bo);
	if (r)
		goto error;

	r = amdgpu_vcn_dec_sw_send_msg(ring, bo, &fence);
	if (r)
		goto error;

	r = dma_fence_wait_timeout(fence, false, timeout);
	if (r == 0)
		r = -ETIMEDOUT;
	else if (r > 0)
		r = 0;

	dma_fence_put(fence);
error:
	return r;
}

759 760 761
int amdgpu_vcn_enc_ring_test_ring(struct amdgpu_ring *ring)
{
	struct amdgpu_device *adev = ring->adev;
762
	uint32_t rptr;
763 764 765
	unsigned i;
	int r;

766 767 768
	if (amdgpu_sriov_vf(adev))
		return 0;

769
	r = amdgpu_ring_alloc(ring, 16);
770
	if (r)
771
		return r;
772

773 774
	rptr = amdgpu_ring_get_rptr(ring);

775
	amdgpu_ring_write(ring, VCN_ENC_CMD_END);
776 777 778 779 780
	amdgpu_ring_commit(ring);

	for (i = 0; i < adev->usec_timeout; i++) {
		if (amdgpu_ring_get_rptr(ring) != rptr)
			break;
781
		udelay(1);
782 783
	}

784
	if (i >= adev->usec_timeout)
785 786 787 788 789
		r = -ETIMEDOUT;

	return r;
}

L
Leo Liu 已提交
790
static int amdgpu_vcn_enc_get_create_msg(struct amdgpu_ring *ring, uint32_t handle,
791 792
					 struct amdgpu_bo *bo,
					 struct dma_fence **fence)
L
Leo Liu 已提交
793
{
L
Leo Liu 已提交
794
	const unsigned ib_size_dw = 16;
L
Leo Liu 已提交
795 796 797
	struct amdgpu_job *job;
	struct amdgpu_ib *ib;
	struct dma_fence *f = NULL;
798
	uint64_t addr;
L
Leo Liu 已提交
799 800
	int i, r;

801 802
	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
					AMDGPU_IB_POOL_DIRECT, &job);
L
Leo Liu 已提交
803 804 805 806
	if (r)
		return r;

	ib = &job->ibs[0];
807
	addr = amdgpu_bo_gpu_offset(bo);
L
Leo Liu 已提交
808 809

	ib->length_dw = 0;
L
Leo Liu 已提交
810 811
	ib->ptr[ib->length_dw++] = 0x00000018;
	ib->ptr[ib->length_dw++] = 0x00000001; /* session info */
L
Leo Liu 已提交
812
	ib->ptr[ib->length_dw++] = handle;
813 814
	ib->ptr[ib->length_dw++] = upper_32_bits(addr);
	ib->ptr[ib->length_dw++] = addr;
L
Leo Liu 已提交
815
	ib->ptr[ib->length_dw++] = 0x0000000b;
L
Leo Liu 已提交
816

L
Leo Liu 已提交
817 818 819
	ib->ptr[ib->length_dw++] = 0x00000014;
	ib->ptr[ib->length_dw++] = 0x00000002; /* task info */
	ib->ptr[ib->length_dw++] = 0x0000001c;
L
Leo Liu 已提交
820 821 822
	ib->ptr[ib->length_dw++] = 0x00000000;
	ib->ptr[ib->length_dw++] = 0x00000000;

L
Leo Liu 已提交
823 824
	ib->ptr[ib->length_dw++] = 0x00000008;
	ib->ptr[ib->length_dw++] = 0x08000001; /* op initialize */
L
Leo Liu 已提交
825 826 827 828

	for (i = ib->length_dw; i < ib_size_dw; ++i)
		ib->ptr[i] = 0x0;

829
	r = amdgpu_job_submit_direct(job, ring, &f);
L
Leo Liu 已提交
830 831 832 833 834 835
	if (r)
		goto err;

	if (fence)
		*fence = dma_fence_get(f);
	dma_fence_put(f);
L
Leo Liu 已提交
836

L
Leo Liu 已提交
837 838 839 840 841 842 843 844
	return 0;

err:
	amdgpu_job_free(job);
	return r;
}

static int amdgpu_vcn_enc_get_destroy_msg(struct amdgpu_ring *ring, uint32_t handle,
845 846
					  struct amdgpu_bo *bo,
					  struct dma_fence **fence)
L
Leo Liu 已提交
847
{
L
Leo Liu 已提交
848
	const unsigned ib_size_dw = 16;
L
Leo Liu 已提交
849 850 851
	struct amdgpu_job *job;
	struct amdgpu_ib *ib;
	struct dma_fence *f = NULL;
852
	uint64_t addr;
L
Leo Liu 已提交
853 854
	int i, r;

855 856
	r = amdgpu_job_alloc_with_ib(ring->adev, ib_size_dw * 4,
					AMDGPU_IB_POOL_DIRECT, &job);
L
Leo Liu 已提交
857 858 859 860
	if (r)
		return r;

	ib = &job->ibs[0];
861
	addr = amdgpu_bo_gpu_offset(bo);
L
Leo Liu 已提交
862 863

	ib->length_dw = 0;
L
Leo Liu 已提交
864 865
	ib->ptr[ib->length_dw++] = 0x00000018;
	ib->ptr[ib->length_dw++] = 0x00000001;
L
Leo Liu 已提交
866
	ib->ptr[ib->length_dw++] = handle;
867 868
	ib->ptr[ib->length_dw++] = upper_32_bits(addr);
	ib->ptr[ib->length_dw++] = addr;
L
Leo Liu 已提交
869
	ib->ptr[ib->length_dw++] = 0x0000000b;
L
Leo Liu 已提交
870

L
Leo Liu 已提交
871 872 873
	ib->ptr[ib->length_dw++] = 0x00000014;
	ib->ptr[ib->length_dw++] = 0x00000002;
	ib->ptr[ib->length_dw++] = 0x0000001c;
L
Leo Liu 已提交
874 875 876
	ib->ptr[ib->length_dw++] = 0x00000000;
	ib->ptr[ib->length_dw++] = 0x00000000;

L
Leo Liu 已提交
877 878
	ib->ptr[ib->length_dw++] = 0x00000008;
	ib->ptr[ib->length_dw++] = 0x08000002; /* op close session */
L
Leo Liu 已提交
879 880 881 882

	for (i = ib->length_dw; i < ib_size_dw; ++i)
		ib->ptr[i] = 0x0;

883
	r = amdgpu_job_submit_direct(job, ring, &f);
L
Leo Liu 已提交
884 885
	if (r)
		goto err;
L
Leo Liu 已提交
886 887 888 889

	if (fence)
		*fence = dma_fence_get(f);
	dma_fence_put(f);
L
Leo Liu 已提交
890

L
Leo Liu 已提交
891 892 893 894 895 896 897 898 899 900
	return 0;

err:
	amdgpu_job_free(job);
	return r;
}

int amdgpu_vcn_enc_ring_test_ib(struct amdgpu_ring *ring, long timeout)
{
	struct dma_fence *fence = NULL;
901
	struct amdgpu_bo *bo = NULL;
L
Leo Liu 已提交
902 903
	long r;

904 905 906 907 908 909 910
	r = amdgpu_bo_create_reserved(ring->adev, 128 * 1024, PAGE_SIZE,
				      AMDGPU_GEM_DOMAIN_VRAM,
				      &bo, NULL, NULL);
	if (r)
		return r;

	r = amdgpu_vcn_enc_get_create_msg(ring, 1, bo, NULL);
911
	if (r)
L
Leo Liu 已提交
912 913
		goto error;

914
	r = amdgpu_vcn_enc_get_destroy_msg(ring, 1, bo, &fence);
915
	if (r)
L
Leo Liu 已提交
916 917 918
		goto error;

	r = dma_fence_wait_timeout(fence, false, timeout);
919
	if (r == 0)
L
Leo Liu 已提交
920
		r = -ETIMEDOUT;
921
	else if (r > 0)
L
Leo Liu 已提交
922
		r = 0;
923

L
Leo Liu 已提交
924 925
error:
	dma_fence_put(fence);
926
	amdgpu_bo_unreserve(bo);
927 928
	amdgpu_bo_free_kernel(&bo, NULL, NULL);

L
Leo Liu 已提交
929 930
	return r;
}