kfd_device_queue_manager.c 47.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
/*
 * Copyright 2014 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

24 25
#include <linux/ratelimit.h>
#include <linux/printk.h>
26 27 28 29
#include <linux/slab.h>
#include <linux/list.h>
#include <linux/types.h>
#include <linux/bitops.h>
30
#include <linux/sched.h>
31 32 33 34 35
#include "kfd_priv.h"
#include "kfd_device_queue_manager.h"
#include "kfd_mqd_manager.h"
#include "cik_regs.h"
#include "kfd_kernel_queue.h"
A
Amber Lin 已提交
36
#include "amdgpu_amdkfd.h"
37 38 39 40 41 42 43 44 45 46 47

/* Size of the per-pipe EOP queue */
#define CIK_HPD_EOP_BYTES_LOG2 11
#define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)

static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
					unsigned int pasid, unsigned int vmid);

static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
					struct queue *q,
					struct qcm_process_device *qpd);
48

49 50 51
static int execute_queues_cpsch(struct device_queue_manager *dqm,
				enum kfd_unmap_queues_filter filter,
				uint32_t filter_param);
52
static int unmap_queues_cpsch(struct device_queue_manager *dqm,
53 54
				enum kfd_unmap_queues_filter filter,
				uint32_t filter_param);
55

F
Felix Kuehling 已提交
56 57
static int map_queues_cpsch(struct device_queue_manager *dqm);

58 59 60 61 62 63
static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
					struct queue *q,
					struct qcm_process_device *qpd);

static void deallocate_sdma_queue(struct device_queue_manager *dqm,
				unsigned int sdma_queue_id);
64

65 66
static void kfd_process_hw_exception(struct work_struct *work);

67 68
static inline
enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
69
{
70
	if (type == KFD_QUEUE_TYPE_SDMA)
71 72
		return KFD_MQD_TYPE_SDMA;
	return KFD_MQD_TYPE_CP;
73 74
}

75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe)
{
	int i;
	int pipe_offset = mec * dqm->dev->shared_resources.num_pipe_per_mec
		+ pipe * dqm->dev->shared_resources.num_queue_per_pipe;

	/* queue is available for KFD usage if bit is 1 */
	for (i = 0; i <  dqm->dev->shared_resources.num_queue_per_pipe; ++i)
		if (test_bit(pipe_offset + i,
			      dqm->dev->shared_resources.queue_bitmap))
			return true;
	return false;
}

unsigned int get_queues_num(struct device_queue_manager *dqm)
90
{
91 92
	return bitmap_weight(dqm->dev->shared_resources.queue_bitmap,
				KGD_MAX_QUEUES);
93 94
}

95
unsigned int get_queues_per_pipe(struct device_queue_manager *dqm)
96
{
97 98 99 100 101 102
	return dqm->dev->shared_resources.num_queue_per_pipe;
}

unsigned int get_pipes_per_mec(struct device_queue_manager *dqm)
{
	return dqm->dev->shared_resources.num_pipe_per_mec;
103 104
}

105 106 107 108 109 110 111 112
static unsigned int get_num_sdma_engines(struct device_queue_manager *dqm)
{
	return dqm->dev->device_info->num_sdma_engines;
}

unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
{
	return dqm->dev->device_info->num_sdma_engines
113
			* dqm->dev->device_info->num_sdma_queues_per_engine;
114 115
}

116
void program_sh_mem_settings(struct device_queue_manager *dqm,
117 118
					struct qcm_process_device *qpd)
{
119 120
	return dqm->dev->kfd2kgd->program_sh_mem_settings(
						dqm->dev->kgd, qpd->vmid,
121 122 123 124 125 126
						qpd->sh_mem_config,
						qpd->sh_mem_ape1_base,
						qpd->sh_mem_ape1_limit,
						qpd->sh_mem_bases);
}

127 128 129 130 131 132 133 134 135 136
static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
{
	struct kfd_dev *dev = qpd->dqm->dev;

	if (!KFD_IS_SOC15(dev->device_info->asic_family)) {
		/* On pre-SOC15 chips we need to use the queue ID to
		 * preserve the user mode ABI.
		 */
		q->doorbell_id = q->properties.queue_id;
	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
137 138 139 140
		/* For SDMA queues on SOC15 with 8-byte doorbell, use static
		 * doorbell assignments based on the engine and queue id.
		 * The doobell index distance between RLC (2*i) and (2*i+1)
		 * for a SDMA engine is 512.
141
		 */
142 143 144 145 146 147 148
		uint32_t *idx_offset =
				dev->shared_resources.sdma_doorbell_idx;

		q->doorbell_id = idx_offset[q->properties.sdma_engine_id]
			+ (q->properties.sdma_queue_id & 1)
			* KFD_QUEUE_DOORBELL_MIRROR_OFFSET
			+ (q->properties.sdma_queue_id >> 1);
149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183
	} else {
		/* For CP queues on SOC15 reserve a free doorbell ID */
		unsigned int found;

		found = find_first_zero_bit(qpd->doorbell_bitmap,
					    KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
		if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
			pr_debug("No doorbells available");
			return -EBUSY;
		}
		set_bit(found, qpd->doorbell_bitmap);
		q->doorbell_id = found;
	}

	q->properties.doorbell_off =
		kfd_doorbell_id_to_offset(dev, q->process,
					  q->doorbell_id);

	return 0;
}

static void deallocate_doorbell(struct qcm_process_device *qpd,
				struct queue *q)
{
	unsigned int old;
	struct kfd_dev *dev = qpd->dqm->dev;

	if (!KFD_IS_SOC15(dev->device_info->asic_family) ||
	    q->properties.type == KFD_QUEUE_TYPE_SDMA)
		return;

	old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap);
	WARN_ON(!old);
}

184 185 186 187 188 189 190 191 192
static int allocate_vmid(struct device_queue_manager *dqm,
			struct qcm_process_device *qpd,
			struct queue *q)
{
	int bit, allocated_vmid;

	if (dqm->vmid_bitmap == 0)
		return -ENOMEM;

193 194
	bit = ffs(dqm->vmid_bitmap) - 1;
	dqm->vmid_bitmap &= ~(1 << bit);
195

196
	allocated_vmid = bit + dqm->dev->vm_info.first_vmid_kfd;
197
	pr_debug("vmid allocation %d\n", allocated_vmid);
198 199 200 201 202 203
	qpd->vmid = allocated_vmid;
	q->properties.vmid = allocated_vmid;

	set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid);
	program_sh_mem_settings(dqm, qpd);

204 205 206 207 208 209 210 211 212
	/* qpd->page_table_base is set earlier when register_process()
	 * is called, i.e. when the first queue is created.
	 */
	dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->kgd,
			qpd->vmid,
			qpd->page_table_base);
	/* invalidate the VM context after pasid and vmid mapping is set up */
	kfd_flush_tlb(qpd_to_pdd(qpd));

213 214 215
	return 0;
}

216 217 218
static int flush_texture_cache_nocpsch(struct kfd_dev *kdev,
				struct qcm_process_device *qpd)
{
219 220
	const struct packet_manager_funcs *pmf = qpd->dqm->packets.pmf;
	int ret;
221 222 223 224

	if (!qpd->ib_kaddr)
		return -ENOMEM;

225 226 227
	ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr);
	if (ret)
		return ret;
228

A
Amber Lin 已提交
229
	return amdgpu_amdkfd_submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid,
230 231
				qpd->ib_base, (uint32_t *)qpd->ib_kaddr,
				pmf->release_mem_size / sizeof(uint32_t));
232 233
}

234 235 236 237
static void deallocate_vmid(struct device_queue_manager *dqm,
				struct qcm_process_device *qpd,
				struct queue *q)
{
238
	int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd;
239

240 241 242 243 244
	/* On GFX v7, CP doesn't flush TC at dequeue */
	if (q->device->device_info->asic_family == CHIP_HAWAII)
		if (flush_texture_cache_nocpsch(q->device, qpd))
			pr_err("Failed to flush TC\n");

245 246
	kfd_flush_tlb(qpd_to_pdd(qpd));

247 248 249
	/* Release the vmid mapping */
	set_pasid_vmid_mapping(dqm, 0, qpd->vmid);

250
	dqm->vmid_bitmap |= (1 << bit);
251 252 253 254 255 256
	qpd->vmid = 0;
	q->properties.vmid = 0;
}

static int create_queue_nocpsch(struct device_queue_manager *dqm,
				struct queue *q,
257
				struct qcm_process_device *qpd)
258 259 260 261 262
{
	int retval;

	print_queue(q);

263
	dqm_lock(dqm);
264

265
	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
266
		pr_warn("Can't create new usermode queue because %d queues were already created\n",
267
				dqm->total_queue_count);
K
Kent Russell 已提交
268 269
		retval = -EPERM;
		goto out_unlock;
270 271
	}

272 273
	if (list_empty(&qpd->queues_list)) {
		retval = allocate_vmid(dqm, qpd, q);
K
Kent Russell 已提交
274 275
		if (retval)
			goto out_unlock;
276 277
	}
	q->properties.vmid = qpd->vmid;
278 279 280 281 282 283 284 285
	/*
	 * Eviction state logic: we only mark active queues as evicted
	 * to avoid the overhead of restoring inactive queues later
	 */
	if (qpd->evicted)
		q->properties.is_evicted = (q->properties.queue_size > 0 &&
					    q->properties.queue_percent > 0 &&
					    q->properties.queue_address != 0);
286

F
Felix Kuehling 已提交
287 288 289
	q->properties.tba_addr = qpd->tba_addr;
	q->properties.tma_addr = qpd->tma_addr;

290 291
	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
		retval = create_compute_queue_nocpsch(dqm, q, qpd);
K
Kent Russell 已提交
292
	else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
293
		retval = create_sdma_queue_nocpsch(dqm, q, qpd);
K
Kent Russell 已提交
294 295
	else
		retval = -EINVAL;
296

297
	if (retval) {
298
		if (list_empty(&qpd->queues_list))
299
			deallocate_vmid(dqm, qpd, q);
K
Kent Russell 已提交
300
		goto out_unlock;
301 302 303
	}

	list_add(&q->list, &qpd->queues_list);
304
	qpd->queue_count++;
305 306
	if (q->properties.is_active)
		dqm->queue_count++;
307

308 309
	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
		dqm->sdma_queue_count++;
310

311 312 313 314 315 316 317 318
	/*
	 * Unconditionally increment this counter, regardless of the queue's
	 * type or whether the queue is active.
	 */
	dqm->total_queue_count++;
	pr_debug("Total of %d queues are accountable so far\n",
			dqm->total_queue_count);

K
Kent Russell 已提交
319
out_unlock:
320
	dqm_unlock(dqm);
K
Kent Russell 已提交
321
	return retval;
322 323 324 325 326
}

static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
{
	bool set;
327
	int pipe, bit, i;
328 329 330

	set = false;

331 332
	for (pipe = dqm->next_pipe_to_allocate, i = 0;
			i < get_pipes_per_mec(dqm);
333 334 335 336 337
			pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) {

		if (!is_pipe_enabled(dqm, 0, pipe))
			continue;

338
		if (dqm->allocated_queues[pipe] != 0) {
339 340
			bit = ffs(dqm->allocated_queues[pipe]) - 1;
			dqm->allocated_queues[pipe] &= ~(1 << bit);
341 342 343 344 345 346 347
			q->pipe = pipe;
			q->queue = bit;
			set = true;
			break;
		}
	}

348
	if (!set)
349 350
		return -EBUSY;

351
	pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue);
352
	/* horizontal hqd allocation */
353
	dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm);
354 355 356 357 358 359 360

	return 0;
}

static inline void deallocate_hqd(struct device_queue_manager *dqm,
				struct queue *q)
{
361
	dqm->allocated_queues[q->pipe] |= (1 << q->queue);
362 363 364 365 366 367
}

static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
					struct queue *q,
					struct qcm_process_device *qpd)
{
368
	struct mqd_manager *mqd_mgr;
369
	int retval;
370

371
	mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_COMPUTE];
372 373

	retval = allocate_hqd(dqm, q);
374
	if (retval)
375 376
		return retval;

377 378 379 380
	retval = allocate_doorbell(qpd, q);
	if (retval)
		goto out_deallocate_hqd;

381
	retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
382
				&q->gart_mqd_addr, &q->properties);
K
Kent Russell 已提交
383
	if (retval)
384
		goto out_deallocate_doorbell;
385

386 387
	pr_debug("Loading mqd to hqd on pipe %d, queue %d\n",
			q->pipe, q->queue);
388

389 390 391
	dqm->dev->kfd2kgd->set_scratch_backing_va(
			dqm->dev->kgd, qpd->sh_hidden_private_base, qpd->vmid);

F
Felix Kuehling 已提交
392 393 394
	if (!q->properties.is_active)
		return 0;

395 396 397 398 399 400
	if (WARN(q->process->mm != current->mm,
		 "should only run in user thread"))
		retval = -EFAULT;
	else
		retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue,
					   &q->properties, current->mm);
K
Kent Russell 已提交
401 402
	if (retval)
		goto out_uninit_mqd;
403

404
	return 0;
K
Kent Russell 已提交
405 406

out_uninit_mqd:
407
	mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
408 409
out_deallocate_doorbell:
	deallocate_doorbell(qpd, q);
K
Kent Russell 已提交
410 411 412 413
out_deallocate_hqd:
	deallocate_hqd(dqm, q);

	return retval;
414 415
}

416 417 418 419
/* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked
 * to avoid asynchronized access
 */
static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
420 421 422 423
				struct qcm_process_device *qpd,
				struct queue *q)
{
	int retval;
424
	struct mqd_manager *mqd_mgr;
425

426 427
	mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
			q->properties.type)];
428

429 430 431 432 433
	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
		deallocate_hqd(dqm, q);
	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
		dqm->sdma_queue_count--;
		deallocate_sdma_queue(dqm, q->sdma_id);
434
	} else {
435
		pr_debug("q->properties.type %d is invalid\n",
436
				q->properties.type);
437
		return -EINVAL;
438
	}
439
	dqm->total_queue_count--;
440

441 442
	deallocate_doorbell(qpd, q);

443
	retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
444
				KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
445
				KFD_UNMAP_LATENCY_MS,
446
				q->pipe, q->queue);
447 448
	if (retval == -ETIME)
		qpd->reset_wavefronts = true;
449

450
	mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
451 452

	list_del(&q->list);
453 454 455 456 457 458 459 460 461 462 463 464
	if (list_empty(&qpd->queues_list)) {
		if (qpd->reset_wavefronts) {
			pr_warn("Resetting wave fronts (nocpsch) on dev %p\n",
					dqm->dev);
			/* dbgdev_wave_reset_wavefronts has to be called before
			 * deallocate_vmid(), i.e. when vmid is still in use.
			 */
			dbgdev_wave_reset_wavefronts(dqm->dev,
					qpd->pqm->process);
			qpd->reset_wavefronts = false;
		}

465
		deallocate_vmid(dqm, qpd, q);
466
	}
467
	qpd->queue_count--;
468 469
	if (q->properties.is_active)
		dqm->queue_count--;
470

471 472
	return retval;
}
473

474 475 476 477 478 479
static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
				struct qcm_process_device *qpd,
				struct queue *q)
{
	int retval;

480
	dqm_lock(dqm);
481
	retval = destroy_queue_nocpsch_locked(dqm, qpd, q);
482
	dqm_unlock(dqm);
483

484 485 486 487 488 489
	return retval;
}

static int update_queue(struct device_queue_manager *dqm, struct queue *q)
{
	int retval;
490
	struct mqd_manager *mqd_mgr;
491
	struct kfd_process_device *pdd;
492
	bool prev_active = false;
493

494
	dqm_lock(dqm);
495 496 497 498 499
	pdd = kfd_get_process_device_data(q->device, q->process);
	if (!pdd) {
		retval = -ENODEV;
		goto out_unlock;
	}
500 501
	mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
			q->properties.type)];
502 503 504 505 506 507 508 509
	/*
	 * Eviction state logic: we only mark active queues as evicted
	 * to avoid the overhead of restoring inactive queues later
	 */
	if (pdd->qpd.evicted)
		q->properties.is_evicted = (q->properties.queue_size > 0 &&
					    q->properties.queue_percent > 0 &&
					    q->properties.queue_address != 0);
510

F
Felix Kuehling 已提交
511 512 513 514
	/* Save previous activity state for counters */
	prev_active = q->properties.is_active;

	/* Make sure the queue is unmapped before updating the MQD */
515
	if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
F
Felix Kuehling 已提交
516 517
		retval = unmap_queues_cpsch(dqm,
				KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
F
Felix Kuehling 已提交
518
		if (retval) {
F
Felix Kuehling 已提交
519 520 521
			pr_err("unmap queue failed\n");
			goto out_unlock;
		}
F
Felix Kuehling 已提交
522
	} else if (prev_active &&
F
Felix Kuehling 已提交
523 524
		   (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
		    q->properties.type == KFD_QUEUE_TYPE_SDMA)) {
525
		retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
F
Felix Kuehling 已提交
526 527 528 529 530 531 532 533
				KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
				KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
		if (retval) {
			pr_err("destroy mqd failed\n");
			goto out_unlock;
		}
	}

534
	retval = mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties);
F
Felix Kuehling 已提交
535

536 537 538 539 540 541 542 543 544 545 546
	/*
	 * check active state vs. the previous state and modify
	 * counter accordingly. map_queues_cpsch uses the
	 * dqm->queue_count to determine whether a new runlist must be
	 * uploaded.
	 */
	if (q->properties.is_active && !prev_active)
		dqm->queue_count++;
	else if (!q->properties.is_active && prev_active)
		dqm->queue_count--;

547
	if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS)
F
Felix Kuehling 已提交
548
		retval = map_queues_cpsch(dqm);
F
Felix Kuehling 已提交
549
	else if (q->properties.is_active &&
F
Felix Kuehling 已提交
550
		 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
551 552 553 554 555 556 557 558 559
		  q->properties.type == KFD_QUEUE_TYPE_SDMA)) {
		if (WARN(q->process->mm != current->mm,
			 "should only run in user thread"))
			retval = -EFAULT;
		else
			retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd,
						   q->pipe, q->queue,
						   &q->properties, current->mm);
	}
560

K
Kent Russell 已提交
561
out_unlock:
562
	dqm_unlock(dqm);
563 564 565
	return retval;
}

566 567 568 569
static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
					struct qcm_process_device *qpd)
{
	struct queue *q;
570
	struct mqd_manager *mqd_mgr;
571 572 573
	struct kfd_process_device *pdd;
	int retval = 0;

574
	dqm_lock(dqm);
575 576 577 578 579 580 581 582 583 584 585
	if (qpd->evicted++ > 0) /* already evicted, do nothing */
		goto out;

	pdd = qpd_to_pdd(qpd);
	pr_info_ratelimited("Evicting PASID %u queues\n",
			    pdd->process->pasid);

	/* unactivate all active queues on the qpd */
	list_for_each_entry(q, &qpd->queues_list, list) {
		if (!q->properties.is_active)
			continue;
586 587
		mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
				q->properties.type)];
588 589
		q->properties.is_evicted = true;
		q->properties.is_active = false;
590
		retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
591 592 593 594 595 596 597 598
				KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
				KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
		if (retval)
			goto out;
		dqm->queue_count--;
	}

out:
599
	dqm_unlock(dqm);
600 601 602 603 604 605 606 607 608 609
	return retval;
}

static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
				      struct qcm_process_device *qpd)
{
	struct queue *q;
	struct kfd_process_device *pdd;
	int retval = 0;

610
	dqm_lock(dqm);
611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626 627 628 629 630 631
	if (qpd->evicted++ > 0) /* already evicted, do nothing */
		goto out;

	pdd = qpd_to_pdd(qpd);
	pr_info_ratelimited("Evicting PASID %u queues\n",
			    pdd->process->pasid);

	/* unactivate all active queues on the qpd */
	list_for_each_entry(q, &qpd->queues_list, list) {
		if (!q->properties.is_active)
			continue;
		q->properties.is_evicted = true;
		q->properties.is_active = false;
		dqm->queue_count--;
	}
	retval = execute_queues_cpsch(dqm,
				qpd->is_debug ?
				KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
				KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);

out:
632
	dqm_unlock(dqm);
633 634 635 636 637 638
	return retval;
}

static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
					  struct qcm_process_device *qpd)
{
639
	struct mm_struct *mm = NULL;
640
	struct queue *q;
641
	struct mqd_manager *mqd_mgr;
642
	struct kfd_process_device *pdd;
643
	uint64_t pd_base;
644 645 646 647
	int retval = 0;

	pdd = qpd_to_pdd(qpd);
	/* Retrieve PD base */
A
Amber Lin 已提交
648
	pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
649

650
	dqm_lock(dqm);
651 652 653 654 655 656 657 658 659 660 661 662
	if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
		goto out;
	if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
		qpd->evicted--;
		goto out;
	}

	pr_info_ratelimited("Restoring PASID %u queues\n",
			    pdd->process->pasid);

	/* Update PD Base in QPD */
	qpd->page_table_base = pd_base;
663
	pr_debug("Updated PD address to 0x%llx\n", pd_base);
664 665 666 667 668 669 670 671 672

	if (!list_empty(&qpd->queues_list)) {
		dqm->dev->kfd2kgd->set_vm_context_page_table_base(
				dqm->dev->kgd,
				qpd->vmid,
				qpd->page_table_base);
		kfd_flush_tlb(pdd);
	}

673 674 675 676 677 678 679 680 681
	/* Take a safe reference to the mm_struct, which may otherwise
	 * disappear even while the kfd_process is still referenced.
	 */
	mm = get_task_mm(pdd->process->lead_thread);
	if (!mm) {
		retval = -EFAULT;
		goto out;
	}

682 683 684 685
	/* activate all active queues on the qpd */
	list_for_each_entry(q, &qpd->queues_list, list) {
		if (!q->properties.is_evicted)
			continue;
686 687
		mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
				q->properties.type)];
688 689
		q->properties.is_evicted = false;
		q->properties.is_active = true;
690
		retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
691
				       q->queue, &q->properties, mm);
692 693 694 695 696 697
		if (retval)
			goto out;
		dqm->queue_count++;
	}
	qpd->evicted = 0;
out:
698 699
	if (mm)
		mmput(mm);
700
	dqm_unlock(dqm);
701 702 703 704 705 706 707 708
	return retval;
}

static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
					struct qcm_process_device *qpd)
{
	struct queue *q;
	struct kfd_process_device *pdd;
709
	uint64_t pd_base;
710 711 712 713
	int retval = 0;

	pdd = qpd_to_pdd(qpd);
	/* Retrieve PD base */
A
Amber Lin 已提交
714
	pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
715

716
	dqm_lock(dqm);
717 718 719 720 721 722 723 724 725 726 727 728
	if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
		goto out;
	if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
		qpd->evicted--;
		goto out;
	}

	pr_info_ratelimited("Restoring PASID %u queues\n",
			    pdd->process->pasid);

	/* Update PD Base in QPD */
	qpd->page_table_base = pd_base;
729
	pr_debug("Updated PD address to 0x%llx\n", pd_base);
730 731 732 733 734 735 736 737 738 739 740 741 742 743

	/* activate all active queues on the qpd */
	list_for_each_entry(q, &qpd->queues_list, list) {
		if (!q->properties.is_evicted)
			continue;
		q->properties.is_evicted = false;
		q->properties.is_active = true;
		dqm->queue_count++;
	}
	retval = execute_queues_cpsch(dqm,
				KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
	if (!retval)
		qpd->evicted = 0;
out:
744
	dqm_unlock(dqm);
745 746 747
	return retval;
}

748
static int register_process(struct device_queue_manager *dqm,
749 750 751
					struct qcm_process_device *qpd)
{
	struct device_process_node *n;
752
	struct kfd_process_device *pdd;
753
	uint64_t pd_base;
754
	int retval;
755

756
	n = kzalloc(sizeof(*n), GFP_KERNEL);
757 758 759 760 761
	if (!n)
		return -ENOMEM;

	n->qpd = qpd;

762 763
	pdd = qpd_to_pdd(qpd);
	/* Retrieve PD base */
A
Amber Lin 已提交
764
	pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
765

766
	dqm_lock(dqm);
767 768
	list_add(&n->list, &dqm->queues);

769 770
	/* Update PD Base in QPD */
	qpd->page_table_base = pd_base;
771
	pr_debug("Updated PD address to 0x%llx\n", pd_base);
772

773
	retval = dqm->asic_ops.update_qpd(dqm, qpd);
774

775 776
	dqm->processes_count++;
	kfd_inc_compute_active(dqm->dev);
777

778
	dqm_unlock(dqm);
779

780
	return retval;
781 782
}

783
static int unregister_process(struct device_queue_manager *dqm,
784 785 786 787 788
					struct qcm_process_device *qpd)
{
	int retval;
	struct device_process_node *cur, *next;

789 790
	pr_debug("qpd->queues_list is %s\n",
			list_empty(&qpd->queues_list) ? "empty" : "not empty");
791 792

	retval = 0;
793
	dqm_lock(dqm);
794 795 796 797

	list_for_each_entry_safe(cur, next, &dqm->queues, list) {
		if (qpd == cur->qpd) {
			list_del(&cur->list);
798
			kfree(cur);
799 800
			dqm->processes_count--;
			kfd_dec_compute_active(dqm->dev);
801 802 803 804 805 806
			goto out;
		}
	}
	/* qpd not found in dqm list */
	retval = 1;
out:
807
	dqm_unlock(dqm);
808 809 810 811 812 813 814
	return retval;
}

static int
set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid,
			unsigned int vmid)
{
815
	return dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
816
						dqm->dev->kgd, pasid, vmid);
817 818
}

819 820 821 822
static void init_interrupts(struct device_queue_manager *dqm)
{
	unsigned int i;

823 824 825
	for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++)
		if (is_pipe_enabled(dqm, 0, i))
			dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, i);
826 827
}

828 829
static int initialize_nocpsch(struct device_queue_manager *dqm)
{
830
	int pipe, queue;
831

832
	pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
833

K
Kent Russell 已提交
834 835 836 837 838
	dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm),
					sizeof(unsigned int), GFP_KERNEL);
	if (!dqm->allocated_queues)
		return -ENOMEM;

839
	mutex_init(&dqm->lock_hidden);
840 841
	INIT_LIST_HEAD(&dqm->queues);
	dqm->queue_count = dqm->next_pipe_to_allocate = 0;
842
	dqm->sdma_queue_count = 0;
843

844 845 846 847 848 849 850 851
	for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
		int pipe_offset = pipe * get_queues_per_pipe(dqm);

		for (queue = 0; queue < get_queues_per_pipe(dqm); queue++)
			if (test_bit(pipe_offset + queue,
				     dqm->dev->shared_resources.queue_bitmap))
				dqm->allocated_queues[pipe] |= 1 << queue;
	}
852

853
	dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1;
O
Oak Zeng 已提交
854
	dqm->sdma_bitmap = (1ULL << get_num_sdma_queues(dqm)) - 1;
855 856 857 858

	return 0;
}

859
static void uninitialize(struct device_queue_manager *dqm)
860
{
861 862
	int i;

863
	WARN_ON(dqm->queue_count > 0 || dqm->processes_count > 0);
864 865

	kfree(dqm->allocated_queues);
866
	for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
867
		kfree(dqm->mqd_mgrs[i]);
868
	mutex_destroy(&dqm->lock_hidden);
869
	kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem);
870 871 872 873
}

static int start_nocpsch(struct device_queue_manager *dqm)
{
874
	init_interrupts(dqm);
875
	return pm_init(&dqm->packets, dqm);
876 877 878 879
}

static int stop_nocpsch(struct device_queue_manager *dqm)
{
880
	pm_uninit(&dqm->packets);
881 882 883
	return 0;
}

884
static int allocate_sdma_queue(struct device_queue_manager *dqm,
885
				unsigned int *sdma_id)
886 887 888 889 890 891
{
	int bit;

	if (dqm->sdma_bitmap == 0)
		return -ENOMEM;

O
Oak Zeng 已提交
892 893
	bit = __ffs64(dqm->sdma_bitmap);
	dqm->sdma_bitmap &= ~(1ULL << bit);
894
	*sdma_id = bit;
895 896 897 898 899

	return 0;
}

static void deallocate_sdma_queue(struct device_queue_manager *dqm,
900
				unsigned int sdma_id)
901
{
902
	if (sdma_id >= get_num_sdma_queues(dqm))
903
		return;
904
	dqm->sdma_bitmap |= (1ULL << sdma_id);
905 906 907 908 909 910
}

static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
					struct queue *q,
					struct qcm_process_device *qpd)
{
911
	struct mqd_manager *mqd_mgr;
912 913
	int retval;

914
	mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA];
915 916

	retval = allocate_sdma_queue(dqm, &q->sdma_id);
917
	if (retval)
918 919
		return retval;

920 921
	q->properties.sdma_queue_id = q->sdma_id / get_num_sdma_engines(dqm);
	q->properties.sdma_engine_id = q->sdma_id % get_num_sdma_engines(dqm);
922

923 924 925 926
	retval = allocate_doorbell(qpd, q);
	if (retval)
		goto out_deallocate_sdma_queue;

927 928 929
	pr_debug("SDMA id is:    %d\n", q->sdma_id);
	pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
	pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
930

931
	dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
932
	retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
933
				&q->gart_mqd_addr, &q->properties);
K
Kent Russell 已提交
934
	if (retval)
935
		goto out_deallocate_doorbell;
936

937 938
	retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, 0, 0, &q->properties,
				NULL);
K
Kent Russell 已提交
939 940
	if (retval)
		goto out_uninit_mqd;
941

942
	return 0;
K
Kent Russell 已提交
943 944

out_uninit_mqd:
945
	mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
946 947
out_deallocate_doorbell:
	deallocate_doorbell(qpd, q);
K
Kent Russell 已提交
948 949 950 951
out_deallocate_sdma_queue:
	deallocate_sdma_queue(dqm, q->sdma_id);

	return retval;
952 953
}

954 955 956 957 958 959
/*
 * Device Queue Manager implementation for cp scheduler
 */

static int set_sched_resources(struct device_queue_manager *dqm)
{
960
	int i, mec;
961 962
	struct scheduling_resources res;

963
	res.vmid_mask = dqm->dev->shared_resources.compute_vmid_bitmap;
964 965 966 967 968 969 970 971 972 973 974 975 976 977 978

	res.queue_mask = 0;
	for (i = 0; i < KGD_MAX_QUEUES; ++i) {
		mec = (i / dqm->dev->shared_resources.num_queue_per_pipe)
			/ dqm->dev->shared_resources.num_pipe_per_mec;

		if (!test_bit(i, dqm->dev->shared_resources.queue_bitmap))
			continue;

		/* only acquire queues from the first MEC */
		if (mec > 0)
			continue;

		/* This situation may be hit in the future if a new HW
		 * generation exposes more than 64 queues. If so, the
979 980
		 * definition of res.queue_mask needs updating
		 */
981
		if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) {
982 983 984 985 986 987
			pr_err("Invalid queue enabled by amdgpu: %d\n", i);
			break;
		}

		res.queue_mask |= (1ull << i);
	}
988 989 990
	res.gws_mask = res.oac_mask = res.gds_heap_base =
						res.gds_heap_size = 0;

991 992 993
	pr_debug("Scheduling resources:\n"
			"vmid mask: 0x%8X\n"
			"queue mask: 0x%8llX\n",
994 995 996 997 998 999 1000
			res.vmid_mask, res.queue_mask);

	return pm_send_set_resources(&dqm->packets, &res);
}

static int initialize_cpsch(struct device_queue_manager *dqm)
{
1001
	pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
1002

1003
	mutex_init(&dqm->lock_hidden);
1004 1005
	INIT_LIST_HEAD(&dqm->queues);
	dqm->queue_count = dqm->processes_count = 0;
1006
	dqm->sdma_queue_count = 0;
1007
	dqm->active_runlist = false;
O
Oak Zeng 已提交
1008
	dqm->sdma_bitmap = (1ULL << get_num_sdma_queues(dqm)) - 1;
1009

1010 1011
	INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception);

1012
	return 0;
1013 1014 1015 1016 1017 1018 1019 1020 1021
}

static int start_cpsch(struct device_queue_manager *dqm)
{
	int retval;

	retval = 0;

	retval = pm_init(&dqm->packets, dqm);
1022
	if (retval)
1023 1024 1025
		goto fail_packet_manager_init;

	retval = set_sched_resources(dqm);
1026
	if (retval)
1027 1028
		goto fail_set_sched_resources;

1029
	pr_debug("Allocating fence memory\n");
1030 1031

	/* allocate fence memory on the gart */
1032 1033
	retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr),
					&dqm->fence_mem);
1034

1035
	if (retval)
1036 1037 1038 1039
		goto fail_allocate_vidmem;

	dqm->fence_addr = dqm->fence_mem->cpu_ptr;
	dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr;
1040 1041 1042

	init_interrupts(dqm);

1043
	dqm_lock(dqm);
1044 1045
	/* clear hang status when driver try to start the hw scheduler */
	dqm->is_hws_hang = false;
1046
	execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1047
	dqm_unlock(dqm);
1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058

	return 0;
fail_allocate_vidmem:
fail_set_sched_resources:
	pm_uninit(&dqm->packets);
fail_packet_manager_init:
	return retval;
}

static int stop_cpsch(struct device_queue_manager *dqm)
{
1059
	dqm_lock(dqm);
1060
	unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
1061
	dqm_unlock(dqm);
1062

1063
	kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
1064 1065 1066 1067 1068 1069 1070 1071 1072
	pm_uninit(&dqm->packets);

	return 0;
}

static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
					struct kernel_queue *kq,
					struct qcm_process_device *qpd)
{
1073
	dqm_lock(dqm);
1074
	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
1075
		pr_warn("Can't create new kernel queue because %d queues were already created\n",
1076
				dqm->total_queue_count);
1077
		dqm_unlock(dqm);
1078 1079 1080 1081 1082 1083 1084 1085 1086 1087 1088
		return -EPERM;
	}

	/*
	 * Unconditionally increment this counter, regardless of the queue's
	 * type or whether the queue is active.
	 */
	dqm->total_queue_count++;
	pr_debug("Total of %d queues are accountable so far\n",
			dqm->total_queue_count);

1089 1090 1091
	list_add(&kq->list, &qpd->priv_queue_list);
	dqm->queue_count++;
	qpd->is_debug = true;
1092
	execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1093
	dqm_unlock(dqm);
1094 1095 1096 1097 1098 1099 1100 1101

	return 0;
}

static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
					struct kernel_queue *kq,
					struct qcm_process_device *qpd)
{
1102
	dqm_lock(dqm);
1103 1104 1105
	list_del(&kq->list);
	dqm->queue_count--;
	qpd->is_debug = false;
1106
	execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
1107 1108 1109 1110
	/*
	 * Unconditionally decrement this counter, regardless of the queue's
	 * type.
	 */
1111
	dqm->total_queue_count--;
1112 1113
	pr_debug("Total of %d queues are accountable so far\n",
			dqm->total_queue_count);
1114
	dqm_unlock(dqm);
1115 1116 1117
}

static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
1118
			struct qcm_process_device *qpd)
1119 1120
{
	int retval;
1121
	struct mqd_manager *mqd_mgr;
1122

1123
	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
1124
		pr_warn("Can't create new usermode queue because %d queues were already created\n",
1125 1126
				dqm->total_queue_count);
		retval = -EPERM;
1127
		goto out;
1128 1129
	}

1130 1131
	if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
		retval = allocate_sdma_queue(dqm, &q->sdma_id);
F
Felix Kuehling 已提交
1132
		if (retval)
1133
			goto out;
1134
		q->properties.sdma_queue_id =
1135
			q->sdma_id / get_num_sdma_engines(dqm);
1136
		q->properties.sdma_engine_id =
1137
			q->sdma_id % get_num_sdma_engines(dqm);
1138 1139 1140
		pr_debug("SDMA id is:    %d\n", q->sdma_id);
		pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
		pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
1141
	}
1142 1143 1144 1145 1146

	retval = allocate_doorbell(qpd, q);
	if (retval)
		goto out_deallocate_sdma_queue;

1147 1148
	mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
			q->properties.type)];
1149 1150 1151 1152 1153 1154 1155 1156
	/*
	 * Eviction state logic: we only mark active queues as evicted
	 * to avoid the overhead of restoring inactive queues later
	 */
	if (qpd->evicted)
		q->properties.is_evicted = (q->properties.queue_size > 0 &&
					    q->properties.queue_percent > 0 &&
					    q->properties.queue_address != 0);
1157
	dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
F
Felix Kuehling 已提交
1158 1159
	q->properties.tba_addr = qpd->tba_addr;
	q->properties.tma_addr = qpd->tma_addr;
1160
	retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
1161
				&q->gart_mqd_addr, &q->properties);
1162
	if (retval)
1163
		goto out_deallocate_doorbell;
1164

1165 1166
	dqm_lock(dqm);

1167
	list_add(&q->list, &qpd->queues_list);
1168
	qpd->queue_count++;
1169 1170
	if (q->properties.is_active) {
		dqm->queue_count++;
1171 1172
		retval = execute_queues_cpsch(dqm,
				KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1173 1174
	}

1175
	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
1176
		dqm->sdma_queue_count++;
1177 1178 1179 1180 1181 1182 1183 1184 1185
	/*
	 * Unconditionally increment this counter, regardless of the queue's
	 * type or whether the queue is active.
	 */
	dqm->total_queue_count++;

	pr_debug("Total of %d queues are accountable so far\n",
			dqm->total_queue_count);

1186
	dqm_unlock(dqm);
1187 1188
	return retval;

1189 1190
out_deallocate_doorbell:
	deallocate_doorbell(qpd, q);
1191 1192 1193
out_deallocate_sdma_queue:
	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
		deallocate_sdma_queue(dqm, q->sdma_id);
1194
out:
1195 1196 1197
	return retval;
}

1198
int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
1199
				unsigned int fence_value,
1200
				unsigned int timeout_ms)
1201
{
1202
	unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies;
1203 1204

	while (*fence_addr != fence_value) {
1205
		if (time_after(jiffies, end_jiffies)) {
1206
			pr_err("qcm fence wait loop timeout expired\n");
1207 1208 1209 1210 1211 1212 1213
			/* In HWS case, this is used to halt the driver thread
			 * in order not to mess up CP states before doing
			 * scandumps for FW debugging.
			 */
			while (halt_if_hws_hang)
				schedule();

1214 1215
			return -ETIME;
		}
1216
		schedule();
1217 1218 1219 1220 1221
	}

	return 0;
}

1222
static int unmap_sdma_queues(struct device_queue_manager *dqm,
1223 1224 1225
				unsigned int sdma_engine)
{
	return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
1226
			KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false,
1227 1228 1229
			sdma_engine);
}

F
Felix Kuehling 已提交
1230 1231 1232 1233 1234 1235 1236 1237 1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250
/* dqm->lock mutex has to be locked before calling this function */
static int map_queues_cpsch(struct device_queue_manager *dqm)
{
	int retval;

	if (dqm->queue_count <= 0 || dqm->processes_count <= 0)
		return 0;

	if (dqm->active_runlist)
		return 0;

	retval = pm_send_runlist(&dqm->packets, &dqm->queues);
	if (retval) {
		pr_err("failed to execute runlist\n");
		return retval;
	}
	dqm->active_runlist = true;

	return retval;
}

1251
/* dqm->lock mutex has to be locked before calling this function */
1252
static int unmap_queues_cpsch(struct device_queue_manager *dqm,
1253 1254
				enum kfd_unmap_queues_filter filter,
				uint32_t filter_param)
1255
{
1256
	int retval = 0;
1257

1258 1259
	if (dqm->is_hws_hang)
		return -EIO;
1260
	if (!dqm->active_runlist)
1261
		return retval;
1262

1263
	pr_debug("Before destroying queues, sdma queue count is : %u\n",
1264 1265 1266
		dqm->sdma_queue_count);

	if (dqm->sdma_queue_count > 0) {
1267 1268
		unmap_sdma_queues(dqm, 0);
		unmap_sdma_queues(dqm, 1);
1269 1270
	}

1271
	retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
1272
			filter, filter_param, false, 0);
1273
	if (retval)
1274
		return retval;
1275 1276 1277 1278 1279

	*dqm->fence_addr = KFD_FENCE_INIT;
	pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr,
				KFD_FENCE_COMPLETED);
	/* should be timed out */
1280
	retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
1281
				QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS);
1282
	if (retval)
1283
		return retval;
1284

1285 1286 1287 1288 1289 1290
	pm_release_ib(&dqm->packets);
	dqm->active_runlist = false;

	return retval;
}

1291
/* dqm->lock mutex has to be locked before calling this function */
1292 1293 1294
static int execute_queues_cpsch(struct device_queue_manager *dqm,
				enum kfd_unmap_queues_filter filter,
				uint32_t filter_param)
1295 1296 1297
{
	int retval;

1298 1299
	if (dqm->is_hws_hang)
		return -EIO;
1300
	retval = unmap_queues_cpsch(dqm, filter, filter_param);
1301
	if (retval) {
1302
		pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n");
1303 1304
		dqm->is_hws_hang = true;
		schedule_work(&dqm->hw_exception_work);
1305
		return retval;
1306 1307
	}

F
Felix Kuehling 已提交
1308
	return map_queues_cpsch(dqm);
1309 1310 1311 1312 1313 1314 1315
}

static int destroy_queue_cpsch(struct device_queue_manager *dqm,
				struct qcm_process_device *qpd,
				struct queue *q)
{
	int retval;
1316
	struct mqd_manager *mqd_mgr;
1317

1318 1319 1320
	retval = 0;

	/* remove queue from list to prevent rescheduling after preemption */
1321
	dqm_lock(dqm);
1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332

	if (qpd->is_debug) {
		/*
		 * error, currently we do not allow to destroy a queue
		 * of a currently debugged process
		 */
		retval = -EBUSY;
		goto failed_try_destroy_debugged_queue;

	}

1333 1334
	mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
			q->properties.type)];
1335

1336 1337
	deallocate_doorbell(qpd, q);

1338
	if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
1339
		dqm->sdma_queue_count--;
1340 1341
		deallocate_sdma_queue(dqm, q->sdma_id);
	}
1342

1343
	list_del(&q->list);
1344
	qpd->queue_count--;
1345
	if (q->properties.is_active) {
1346
		dqm->queue_count--;
1347
		retval = execute_queues_cpsch(dqm,
1348
				KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1349 1350 1351
		if (retval == -ETIME)
			qpd->reset_wavefronts = true;
	}
1352

1353 1354 1355 1356 1357 1358 1359
	/*
	 * Unconditionally decrement this counter, regardless of the queue's
	 * type
	 */
	dqm->total_queue_count--;
	pr_debug("Total of %d queues are accountable so far\n",
			dqm->total_queue_count);
1360

1361
	dqm_unlock(dqm);
1362

1363 1364 1365
	/* Do uninit_mqd after dqm_unlock(dqm) to avoid circular locking */
	mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);

1366
	return retval;
1367

1368 1369
failed_try_destroy_debugged_queue:

1370
	dqm_unlock(dqm);
1371 1372 1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388
	return retval;
}

/*
 * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
 * stay in user mode.
 */
#define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
/* APE1 limit is inclusive and 64K aligned. */
#define APE1_LIMIT_ALIGNMENT 0xFFFF

static bool set_cache_memory_policy(struct device_queue_manager *dqm,
				   struct qcm_process_device *qpd,
				   enum cache_policy default_policy,
				   enum cache_policy alternate_policy,
				   void __user *alternate_aperture_base,
				   uint64_t alternate_aperture_size)
{
1389 1390 1391 1392
	bool retval = true;

	if (!dqm->asic_ops.set_cache_memory_policy)
		return retval;
1393

1394
	dqm_lock(dqm);
1395 1396 1397 1398 1399 1400 1401 1402 1403 1404 1405 1406 1407 1408 1409 1410 1411 1412 1413

	if (alternate_aperture_size == 0) {
		/* base > limit disables APE1 */
		qpd->sh_mem_ape1_base = 1;
		qpd->sh_mem_ape1_limit = 0;
	} else {
		/*
		 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
		 *			SH_MEM_APE1_BASE[31:0], 0x0000 }
		 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
		 *			SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
		 * Verify that the base and size parameters can be
		 * represented in this format and convert them.
		 * Additionally restrict APE1 to user-mode addresses.
		 */

		uint64_t base = (uintptr_t)alternate_aperture_base;
		uint64_t limit = base + alternate_aperture_size - 1;

K
Kent Russell 已提交
1414 1415 1416
		if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
		   (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
			retval = false;
1417
			goto out;
K
Kent Russell 已提交
1418
		}
1419 1420 1421 1422 1423

		qpd->sh_mem_ape1_base = base >> 16;
		qpd->sh_mem_ape1_limit = limit >> 16;
	}

1424
	retval = dqm->asic_ops.set_cache_memory_policy(
1425 1426 1427 1428 1429 1430
			dqm,
			qpd,
			default_policy,
			alternate_policy,
			alternate_aperture_base,
			alternate_aperture_size);
1431

1432
	if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
1433 1434
		program_sh_mem_settings(dqm, qpd);

1435
	pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n",
1436 1437 1438 1439
		qpd->sh_mem_config, qpd->sh_mem_ape1_base,
		qpd->sh_mem_ape1_limit);

out:
1440
	dqm_unlock(dqm);
K
Kent Russell 已提交
1441
	return retval;
1442 1443
}

1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463
static int set_trap_handler(struct device_queue_manager *dqm,
				struct qcm_process_device *qpd,
				uint64_t tba_addr,
				uint64_t tma_addr)
{
	uint64_t *tma;

	if (dqm->dev->cwsr_enabled) {
		/* Jump from CWSR trap handler to user trap */
		tma = (uint64_t *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET);
		tma[0] = tba_addr;
		tma[1] = tma_addr;
	} else {
		qpd->tba_addr = tba_addr;
		qpd->tma_addr = tma_addr;
	}

	return 0;
}

1464 1465 1466 1467 1468 1469 1470
static int process_termination_nocpsch(struct device_queue_manager *dqm,
		struct qcm_process_device *qpd)
{
	struct queue *q, *next;
	struct device_process_node *cur, *next_dpn;
	int retval = 0;

1471
	dqm_lock(dqm);
1472 1473 1474 1475 1476 1477 1478 1479 1480 1481 1482 1483 1484 1485 1486 1487

	/* Clear all user mode queues */
	list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
		int ret;

		ret = destroy_queue_nocpsch_locked(dqm, qpd, q);
		if (ret)
			retval = ret;
	}

	/* Unregister process */
	list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
		if (qpd == cur->qpd) {
			list_del(&cur->list);
			kfree(cur);
			dqm->processes_count--;
1488
			kfd_dec_compute_active(dqm->dev);
1489 1490 1491 1492
			break;
		}
	}

1493
	dqm_unlock(dqm);
1494 1495 1496
	return retval;
}

1497 1498 1499 1500 1501 1502
static int get_wave_state(struct device_queue_manager *dqm,
			  struct queue *q,
			  void __user *ctl_stack,
			  u32 *ctl_stack_used_size,
			  u32 *save_area_used_size)
{
1503
	struct mqd_manager *mqd_mgr;
1504 1505 1506 1507 1508 1509 1510 1511 1512 1513
	int r;

	dqm_lock(dqm);

	if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE ||
	    q->properties.is_active || !q->device->cwsr_enabled) {
		r = -EINVAL;
		goto dqm_unlock;
	}

1514
	mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_COMPUTE];
1515

1516
	if (!mqd_mgr->get_wave_state) {
1517 1518 1519 1520
		r = -EINVAL;
		goto dqm_unlock;
	}

1521 1522
	r = mqd_mgr->get_wave_state(mqd_mgr, q->mqd, ctl_stack,
			ctl_stack_used_size, save_area_used_size);
1523 1524 1525 1526 1527

dqm_unlock:
	dqm_unlock(dqm);
	return r;
}
1528 1529 1530 1531 1532 1533 1534

static int process_termination_cpsch(struct device_queue_manager *dqm,
		struct qcm_process_device *qpd)
{
	int retval;
	struct queue *q, *next;
	struct kernel_queue *kq, *kq_next;
1535
	struct mqd_manager *mqd_mgr;
1536 1537 1538 1539 1540 1541
	struct device_process_node *cur, *next_dpn;
	enum kfd_unmap_queues_filter filter =
		KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES;

	retval = 0;

1542
	dqm_lock(dqm);
1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554

	/* Clean all kernel queues */
	list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) {
		list_del(&kq->list);
		dqm->queue_count--;
		qpd->is_debug = false;
		dqm->total_queue_count--;
		filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES;
	}

	/* Clear all user mode queues */
	list_for_each_entry(q, &qpd->queues_list, list) {
1555
		if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
1556
			dqm->sdma_queue_count--;
1557 1558
			deallocate_sdma_queue(dqm, q->sdma_id);
		}
1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571

		if (q->properties.is_active)
			dqm->queue_count--;

		dqm->total_queue_count--;
	}

	/* Unregister process */
	list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
		if (qpd == cur->qpd) {
			list_del(&cur->list);
			kfree(cur);
			dqm->processes_count--;
1572
			kfd_dec_compute_active(dqm->dev);
1573 1574 1575 1576 1577
			break;
		}
	}

	retval = execute_queues_cpsch(dqm, filter, 0);
1578
	if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) {
1579 1580 1581 1582 1583
		pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev);
		dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process);
		qpd->reset_wavefronts = false;
	}

1584 1585 1586 1587 1588
	dqm_unlock(dqm);

	/* Lastly, free mqd resources.
	 * Do uninit_mqd() after dqm_unlock to avoid circular locking.
	 */
1589
	list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
1590 1591
		mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
				q->properties.type)];
1592
		list_del(&q->list);
1593
		qpd->queue_count--;
1594
		mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
1595 1596 1597 1598 1599
	}

	return retval;
}

1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623
static int init_mqd_managers(struct device_queue_manager *dqm)
{
	int i, j;
	struct mqd_manager *mqd_mgr;

	for (i = 0; i < KFD_MQD_TYPE_MAX; i++) {
		mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev);
		if (!mqd_mgr) {
			pr_err("mqd manager [%d] initialization failed\n", i);
			goto out_free;
		}
		dqm->mqd_mgrs[i] = mqd_mgr;
	}

	return 0;

out_free:
	for (j = 0; j < i; j++) {
		kfree(dqm->mqd_mgrs[j]);
		dqm->mqd_mgrs[j] = NULL;
	}

	return -ENOMEM;
}
1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642

/* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/
static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm)
{
	int retval;
	struct kfd_dev *dev = dqm->dev;
	struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd;
	uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size *
		dev->device_info->num_sdma_engines *
		dev->device_info->num_sdma_queues_per_engine +
		dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;

	retval = amdgpu_amdkfd_alloc_gtt_mem(dev->kgd, size,
		&(mem_obj->gtt_mem), &(mem_obj->gpu_addr),
		(void *)&(mem_obj->cpu_ptr), true);

	return retval;
}

1643 1644 1645 1646
struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
{
	struct device_queue_manager *dqm;

1647
	pr_debug("Loading device queue manager\n");
1648

1649
	dqm = kzalloc(sizeof(*dqm), GFP_KERNEL);
1650 1651 1652
	if (!dqm)
		return NULL;

1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668
	switch (dev->device_info->asic_family) {
	/* HWS is not available on Hawaii. */
	case CHIP_HAWAII:
	/* HWS depends on CWSR for timely dequeue. CWSR is not
	 * available on Tonga.
	 *
	 * FIXME: This argument also applies to Kaveri.
	 */
	case CHIP_TONGA:
		dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS;
		break;
	default:
		dqm->sched_policy = sched_policy;
		break;
	}

1669
	dqm->dev = dev;
1670
	switch (dqm->sched_policy) {
1671 1672 1673
	case KFD_SCHED_POLICY_HWS:
	case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION:
		/* initialize dqm for cp scheduling */
1674 1675 1676 1677 1678 1679
		dqm->ops.create_queue = create_queue_cpsch;
		dqm->ops.initialize = initialize_cpsch;
		dqm->ops.start = start_cpsch;
		dqm->ops.stop = stop_cpsch;
		dqm->ops.destroy_queue = destroy_queue_cpsch;
		dqm->ops.update_queue = update_queue;
1680 1681 1682
		dqm->ops.register_process = register_process;
		dqm->ops.unregister_process = unregister_process;
		dqm->ops.uninitialize = uninitialize;
1683 1684 1685
		dqm->ops.create_kernel_queue = create_kernel_queue_cpsch;
		dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch;
		dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
1686
		dqm->ops.set_trap_handler = set_trap_handler;
1687
		dqm->ops.process_termination = process_termination_cpsch;
1688 1689
		dqm->ops.evict_process_queues = evict_process_queues_cpsch;
		dqm->ops.restore_process_queues = restore_process_queues_cpsch;
1690
		dqm->ops.get_wave_state = get_wave_state;
1691 1692 1693
		break;
	case KFD_SCHED_POLICY_NO_HWS:
		/* initialize dqm for no cp scheduling */
1694 1695 1696 1697 1698
		dqm->ops.start = start_nocpsch;
		dqm->ops.stop = stop_nocpsch;
		dqm->ops.create_queue = create_queue_nocpsch;
		dqm->ops.destroy_queue = destroy_queue_nocpsch;
		dqm->ops.update_queue = update_queue;
1699 1700
		dqm->ops.register_process = register_process;
		dqm->ops.unregister_process = unregister_process;
1701
		dqm->ops.initialize = initialize_nocpsch;
1702
		dqm->ops.uninitialize = uninitialize;
1703
		dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
1704
		dqm->ops.set_trap_handler = set_trap_handler;
1705
		dqm->ops.process_termination = process_termination_nocpsch;
1706 1707 1708
		dqm->ops.evict_process_queues = evict_process_queues_nocpsch;
		dqm->ops.restore_process_queues =
			restore_process_queues_nocpsch;
1709
		dqm->ops.get_wave_state = get_wave_state;
1710 1711
		break;
	default:
1712
		pr_err("Invalid scheduling policy %d\n", dqm->sched_policy);
1713
		goto out_free;
1714 1715
	}

1716 1717
	switch (dev->device_info->asic_family) {
	case CHIP_CARRIZO:
1718
		device_queue_manager_init_vi(&dqm->asic_ops);
1719 1720
		break;

1721
	case CHIP_KAVERI:
1722
		device_queue_manager_init_cik(&dqm->asic_ops);
1723
		break;
1724 1725 1726 1727 1728 1729 1730 1731 1732

	case CHIP_HAWAII:
		device_queue_manager_init_cik_hawaii(&dqm->asic_ops);
		break;

	case CHIP_TONGA:
	case CHIP_FIJI:
	case CHIP_POLARIS10:
	case CHIP_POLARIS11:
1733
	case CHIP_POLARIS12:
1734 1735
		device_queue_manager_init_vi_tonga(&dqm->asic_ops);
		break;
1736 1737

	case CHIP_VEGA10:
1738
	case CHIP_VEGA12:
1739
	case CHIP_VEGA20:
1740 1741 1742
	case CHIP_RAVEN:
		device_queue_manager_init_v9(&dqm->asic_ops);
		break;
1743 1744 1745 1746
	default:
		WARN(1, "Unexpected ASIC family %u",
		     dev->device_info->asic_family);
		goto out_free;
1747 1748
	}

1749 1750 1751
	if (init_mqd_managers(dqm))
		goto out_free;

1752 1753 1754 1755 1756
	if (allocate_hiq_sdma_mqd(dqm)) {
		pr_err("Failed to allocate hiq sdma mqd trunk buffer\n");
		goto out_free;
	}

1757 1758
	if (!dqm->ops.initialize(dqm))
		return dqm;
1759

1760 1761 1762
out_free:
	kfree(dqm);
	return NULL;
1763 1764
}

1765 1766 1767 1768 1769 1770 1771
void deallocate_hiq_sdma_mqd(struct kfd_dev *dev, struct kfd_mem_obj *mqd)
{
	WARN(!mqd, "No hiq sdma mqd trunk to free");

	amdgpu_amdkfd_free_gtt_mem(dev->kgd, mqd->gtt_mem);
}

1772 1773
void device_queue_manager_uninit(struct device_queue_manager *dqm)
{
1774
	dqm->ops.uninitialize(dqm);
1775
	deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd);
1776 1777
	kfree(dqm);
}
1778

S
shaoyunl 已提交
1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795
int kfd_process_vm_fault(struct device_queue_manager *dqm,
			 unsigned int pasid)
{
	struct kfd_process_device *pdd;
	struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
	int ret = 0;

	if (!p)
		return -EINVAL;
	pdd = kfd_get_process_device_data(dqm->dev, p);
	if (pdd)
		ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd);
	kfd_unref_process(p);

	return ret;
}

1796 1797 1798 1799
static void kfd_process_hw_exception(struct work_struct *work)
{
	struct device_queue_manager *dqm = container_of(work,
			struct device_queue_manager, hw_exception_work);
A
Amber Lin 已提交
1800
	amdgpu_amdkfd_gpu_reset(dqm->dev->kgd);
1801 1802
}

1803 1804 1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832
#if defined(CONFIG_DEBUG_FS)

static void seq_reg_dump(struct seq_file *m,
			 uint32_t (*dump)[2], uint32_t n_regs)
{
	uint32_t i, count;

	for (i = 0, count = 0; i < n_regs; i++) {
		if (count == 0 ||
		    dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) {
			seq_printf(m, "%s    %08x: %08x",
				   i ? "\n" : "",
				   dump[i][0], dump[i][1]);
			count = 7;
		} else {
			seq_printf(m, " %08x", dump[i][1]);
			count--;
		}
	}

	seq_puts(m, "\n");
}

int dqm_debugfs_hqds(struct seq_file *m, void *data)
{
	struct device_queue_manager *dqm = data;
	uint32_t (*dump)[2], n_regs;
	int pipe, queue;
	int r = 0;

O
Oak Zeng 已提交
1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844
	r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->kgd,
		KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE, &dump, &n_regs);
	if (!r) {
		seq_printf(m, "  HIQ on MEC %d Pipe %d Queue %d\n",
				KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1,
				KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm),
				KFD_CIK_HIQ_QUEUE);
		seq_reg_dump(m, dump, n_regs);

		kfree(dump);
	}

1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865
	for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
		int pipe_offset = pipe * get_queues_per_pipe(dqm);

		for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) {
			if (!test_bit(pipe_offset + queue,
				      dqm->dev->shared_resources.queue_bitmap))
				continue;

			r = dqm->dev->kfd2kgd->hqd_dump(
				dqm->dev->kgd, pipe, queue, &dump, &n_regs);
			if (r)
				break;

			seq_printf(m, "  CP Pipe %d, Queue %d\n",
				  pipe, queue);
			seq_reg_dump(m, dump, n_regs);

			kfree(dump);
		}
	}

1866
	for (pipe = 0; pipe < get_num_sdma_engines(dqm); pipe++) {
1867 1868 1869
		for (queue = 0;
		     queue < dqm->dev->device_info->num_sdma_queues_per_engine;
		     queue++) {
1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885
			r = dqm->dev->kfd2kgd->hqd_sdma_dump(
				dqm->dev->kgd, pipe, queue, &dump, &n_regs);
			if (r)
				break;

			seq_printf(m, "  SDMA Engine %d, RLC %d\n",
				  pipe, queue);
			seq_reg_dump(m, dump, n_regs);

			kfree(dump);
		}
	}

	return r;
}

1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897
int dqm_debugfs_execute_queues(struct device_queue_manager *dqm)
{
	int r = 0;

	dqm_lock(dqm);
	dqm->active_runlist = true;
	r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
	dqm_unlock(dqm);

	return r;
}

1898
#endif