kfd_device_queue_manager.c 47.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
/*
 * Copyright 2014 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

24 25
#include <linux/ratelimit.h>
#include <linux/printk.h>
26 27 28 29
#include <linux/slab.h>
#include <linux/list.h>
#include <linux/types.h>
#include <linux/bitops.h>
30
#include <linux/sched.h>
31 32 33 34 35
#include "kfd_priv.h"
#include "kfd_device_queue_manager.h"
#include "kfd_mqd_manager.h"
#include "cik_regs.h"
#include "kfd_kernel_queue.h"
A
Amber Lin 已提交
36
#include "amdgpu_amdkfd.h"
37 38 39 40 41 42 43 44 45 46 47

/* Size of the per-pipe EOP queue */
#define CIK_HPD_EOP_BYTES_LOG2 11
#define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)

static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
					unsigned int pasid, unsigned int vmid);

static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
					struct queue *q,
					struct qcm_process_device *qpd);
48

49 50 51
static int execute_queues_cpsch(struct device_queue_manager *dqm,
				enum kfd_unmap_queues_filter filter,
				uint32_t filter_param);
52
static int unmap_queues_cpsch(struct device_queue_manager *dqm,
53 54
				enum kfd_unmap_queues_filter filter,
				uint32_t filter_param);
55

F
Felix Kuehling 已提交
56 57
static int map_queues_cpsch(struct device_queue_manager *dqm);

58 59 60 61 62 63
static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
					struct queue *q,
					struct qcm_process_device *qpd);

static void deallocate_sdma_queue(struct device_queue_manager *dqm,
				unsigned int sdma_queue_id);
64

65 66
static void kfd_process_hw_exception(struct work_struct *work);

67 68
static inline
enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
69
{
70
	if (type == KFD_QUEUE_TYPE_SDMA)
71 72
		return KFD_MQD_TYPE_SDMA;
	return KFD_MQD_TYPE_CP;
73 74
}

75 76 77 78 79 80 81 82 83 84 85 86 87 88 89
static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe)
{
	int i;
	int pipe_offset = mec * dqm->dev->shared_resources.num_pipe_per_mec
		+ pipe * dqm->dev->shared_resources.num_queue_per_pipe;

	/* queue is available for KFD usage if bit is 1 */
	for (i = 0; i <  dqm->dev->shared_resources.num_queue_per_pipe; ++i)
		if (test_bit(pipe_offset + i,
			      dqm->dev->shared_resources.queue_bitmap))
			return true;
	return false;
}

unsigned int get_queues_num(struct device_queue_manager *dqm)
90
{
91 92
	return bitmap_weight(dqm->dev->shared_resources.queue_bitmap,
				KGD_MAX_QUEUES);
93 94
}

95
unsigned int get_queues_per_pipe(struct device_queue_manager *dqm)
96
{
97 98 99 100 101 102
	return dqm->dev->shared_resources.num_queue_per_pipe;
}

unsigned int get_pipes_per_mec(struct device_queue_manager *dqm)
{
	return dqm->dev->shared_resources.num_pipe_per_mec;
103 104
}

105 106 107 108 109 110 111 112
static unsigned int get_num_sdma_engines(struct device_queue_manager *dqm)
{
	return dqm->dev->device_info->num_sdma_engines;
}

unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
{
	return dqm->dev->device_info->num_sdma_engines
113
			* dqm->dev->device_info->num_sdma_queues_per_engine;
114 115
}

116
void program_sh_mem_settings(struct device_queue_manager *dqm,
117 118
					struct qcm_process_device *qpd)
{
119 120
	return dqm->dev->kfd2kgd->program_sh_mem_settings(
						dqm->dev->kgd, qpd->vmid,
121 122 123 124 125 126
						qpd->sh_mem_config,
						qpd->sh_mem_ape1_base,
						qpd->sh_mem_ape1_limit,
						qpd->sh_mem_bases);
}

127 128 129 130 131 132 133 134 135 136
static int allocate_doorbell(struct qcm_process_device *qpd, struct queue *q)
{
	struct kfd_dev *dev = qpd->dqm->dev;

	if (!KFD_IS_SOC15(dev->device_info->asic_family)) {
		/* On pre-SOC15 chips we need to use the queue ID to
		 * preserve the user mode ABI.
		 */
		q->doorbell_id = q->properties.queue_id;
	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
137 138 139 140
		/* For SDMA queues on SOC15 with 8-byte doorbell, use static
		 * doorbell assignments based on the engine and queue id.
		 * The doobell index distance between RLC (2*i) and (2*i+1)
		 * for a SDMA engine is 512.
141
		 */
142 143 144 145 146 147 148
		uint32_t *idx_offset =
				dev->shared_resources.sdma_doorbell_idx;

		q->doorbell_id = idx_offset[q->properties.sdma_engine_id]
			+ (q->properties.sdma_queue_id & 1)
			* KFD_QUEUE_DOORBELL_MIRROR_OFFSET
			+ (q->properties.sdma_queue_id >> 1);
149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183
	} else {
		/* For CP queues on SOC15 reserve a free doorbell ID */
		unsigned int found;

		found = find_first_zero_bit(qpd->doorbell_bitmap,
					    KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
		if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
			pr_debug("No doorbells available");
			return -EBUSY;
		}
		set_bit(found, qpd->doorbell_bitmap);
		q->doorbell_id = found;
	}

	q->properties.doorbell_off =
		kfd_doorbell_id_to_offset(dev, q->process,
					  q->doorbell_id);

	return 0;
}

static void deallocate_doorbell(struct qcm_process_device *qpd,
				struct queue *q)
{
	unsigned int old;
	struct kfd_dev *dev = qpd->dqm->dev;

	if (!KFD_IS_SOC15(dev->device_info->asic_family) ||
	    q->properties.type == KFD_QUEUE_TYPE_SDMA)
		return;

	old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap);
	WARN_ON(!old);
}

184 185 186 187 188 189 190 191 192
static int allocate_vmid(struct device_queue_manager *dqm,
			struct qcm_process_device *qpd,
			struct queue *q)
{
	int bit, allocated_vmid;

	if (dqm->vmid_bitmap == 0)
		return -ENOMEM;

193 194
	bit = ffs(dqm->vmid_bitmap) - 1;
	dqm->vmid_bitmap &= ~(1 << bit);
195

196
	allocated_vmid = bit + dqm->dev->vm_info.first_vmid_kfd;
197
	pr_debug("vmid allocation %d\n", allocated_vmid);
198 199 200 201 202 203
	qpd->vmid = allocated_vmid;
	q->properties.vmid = allocated_vmid;

	set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid);
	program_sh_mem_settings(dqm, qpd);

204 205 206 207 208 209 210 211 212
	/* qpd->page_table_base is set earlier when register_process()
	 * is called, i.e. when the first queue is created.
	 */
	dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->kgd,
			qpd->vmid,
			qpd->page_table_base);
	/* invalidate the VM context after pasid and vmid mapping is set up */
	kfd_flush_tlb(qpd_to_pdd(qpd));

213 214 215
	return 0;
}

216 217 218
static int flush_texture_cache_nocpsch(struct kfd_dev *kdev,
				struct qcm_process_device *qpd)
{
219 220
	const struct packet_manager_funcs *pmf = qpd->dqm->packets.pmf;
	int ret;
221 222 223 224

	if (!qpd->ib_kaddr)
		return -ENOMEM;

225 226 227
	ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr);
	if (ret)
		return ret;
228

A
Amber Lin 已提交
229
	return amdgpu_amdkfd_submit_ib(kdev->kgd, KGD_ENGINE_MEC1, qpd->vmid,
230 231
				qpd->ib_base, (uint32_t *)qpd->ib_kaddr,
				pmf->release_mem_size / sizeof(uint32_t));
232 233
}

234 235 236 237
static void deallocate_vmid(struct device_queue_manager *dqm,
				struct qcm_process_device *qpd,
				struct queue *q)
{
238
	int bit = qpd->vmid - dqm->dev->vm_info.first_vmid_kfd;
239

240 241 242 243 244
	/* On GFX v7, CP doesn't flush TC at dequeue */
	if (q->device->device_info->asic_family == CHIP_HAWAII)
		if (flush_texture_cache_nocpsch(q->device, qpd))
			pr_err("Failed to flush TC\n");

245 246
	kfd_flush_tlb(qpd_to_pdd(qpd));

247 248 249
	/* Release the vmid mapping */
	set_pasid_vmid_mapping(dqm, 0, qpd->vmid);

250
	dqm->vmid_bitmap |= (1 << bit);
251 252 253 254 255 256
	qpd->vmid = 0;
	q->properties.vmid = 0;
}

static int create_queue_nocpsch(struct device_queue_manager *dqm,
				struct queue *q,
257
				struct qcm_process_device *qpd)
258 259 260 261 262
{
	int retval;

	print_queue(q);

263
	dqm_lock(dqm);
264

265
	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
266
		pr_warn("Can't create new usermode queue because %d queues were already created\n",
267
				dqm->total_queue_count);
K
Kent Russell 已提交
268 269
		retval = -EPERM;
		goto out_unlock;
270 271
	}

272 273
	if (list_empty(&qpd->queues_list)) {
		retval = allocate_vmid(dqm, qpd, q);
K
Kent Russell 已提交
274 275
		if (retval)
			goto out_unlock;
276 277
	}
	q->properties.vmid = qpd->vmid;
278 279 280 281 282 283 284 285
	/*
	 * Eviction state logic: we only mark active queues as evicted
	 * to avoid the overhead of restoring inactive queues later
	 */
	if (qpd->evicted)
		q->properties.is_evicted = (q->properties.queue_size > 0 &&
					    q->properties.queue_percent > 0 &&
					    q->properties.queue_address != 0);
286

F
Felix Kuehling 已提交
287 288 289
	q->properties.tba_addr = qpd->tba_addr;
	q->properties.tma_addr = qpd->tma_addr;

290 291
	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
		retval = create_compute_queue_nocpsch(dqm, q, qpd);
K
Kent Russell 已提交
292
	else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
293
		retval = create_sdma_queue_nocpsch(dqm, q, qpd);
K
Kent Russell 已提交
294 295
	else
		retval = -EINVAL;
296

297
	if (retval) {
298
		if (list_empty(&qpd->queues_list))
299
			deallocate_vmid(dqm, qpd, q);
K
Kent Russell 已提交
300
		goto out_unlock;
301 302 303
	}

	list_add(&q->list, &qpd->queues_list);
304
	qpd->queue_count++;
305 306
	if (q->properties.is_active)
		dqm->queue_count++;
307

308 309
	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
		dqm->sdma_queue_count++;
310

311 312 313 314 315 316 317 318
	/*
	 * Unconditionally increment this counter, regardless of the queue's
	 * type or whether the queue is active.
	 */
	dqm->total_queue_count++;
	pr_debug("Total of %d queues are accountable so far\n",
			dqm->total_queue_count);

K
Kent Russell 已提交
319
out_unlock:
320
	dqm_unlock(dqm);
K
Kent Russell 已提交
321
	return retval;
322 323 324 325 326
}

static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
{
	bool set;
327
	int pipe, bit, i;
328 329 330

	set = false;

331 332
	for (pipe = dqm->next_pipe_to_allocate, i = 0;
			i < get_pipes_per_mec(dqm);
333 334 335 336 337
			pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) {

		if (!is_pipe_enabled(dqm, 0, pipe))
			continue;

338
		if (dqm->allocated_queues[pipe] != 0) {
339 340
			bit = ffs(dqm->allocated_queues[pipe]) - 1;
			dqm->allocated_queues[pipe] &= ~(1 << bit);
341 342 343 344 345 346 347
			q->pipe = pipe;
			q->queue = bit;
			set = true;
			break;
		}
	}

348
	if (!set)
349 350
		return -EBUSY;

351
	pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue);
352
	/* horizontal hqd allocation */
353
	dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm);
354 355 356 357 358 359 360

	return 0;
}

static inline void deallocate_hqd(struct device_queue_manager *dqm,
				struct queue *q)
{
361
	dqm->allocated_queues[q->pipe] |= (1 << q->queue);
362 363 364 365 366 367
}

static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
					struct queue *q,
					struct qcm_process_device *qpd)
{
368
	struct mqd_manager *mqd_mgr;
369
	int retval;
370

371 372
	mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
	if (!mqd_mgr)
373 374 375
		return -ENOMEM;

	retval = allocate_hqd(dqm, q);
376
	if (retval)
377 378
		return retval;

379 380 381 382
	retval = allocate_doorbell(qpd, q);
	if (retval)
		goto out_deallocate_hqd;

383
	retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
384
				&q->gart_mqd_addr, &q->properties);
K
Kent Russell 已提交
385
	if (retval)
386
		goto out_deallocate_doorbell;
387

388 389
	pr_debug("Loading mqd to hqd on pipe %d, queue %d\n",
			q->pipe, q->queue);
390

391 392 393
	dqm->dev->kfd2kgd->set_scratch_backing_va(
			dqm->dev->kgd, qpd->sh_hidden_private_base, qpd->vmid);

F
Felix Kuehling 已提交
394 395 396
	if (!q->properties.is_active)
		return 0;

397 398 399 400 401 402
	if (WARN(q->process->mm != current->mm,
		 "should only run in user thread"))
		retval = -EFAULT;
	else
		retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe, q->queue,
					   &q->properties, current->mm);
K
Kent Russell 已提交
403 404
	if (retval)
		goto out_uninit_mqd;
405

406
	return 0;
K
Kent Russell 已提交
407 408

out_uninit_mqd:
409
	mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
410 411
out_deallocate_doorbell:
	deallocate_doorbell(qpd, q);
K
Kent Russell 已提交
412 413 414 415
out_deallocate_hqd:
	deallocate_hqd(dqm, q);

	return retval;
416 417
}

418 419 420 421
/* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked
 * to avoid asynchronized access
 */
static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
422 423 424 425
				struct qcm_process_device *qpd,
				struct queue *q)
{
	int retval;
426
	struct mqd_manager *mqd_mgr;
427

428
	mqd_mgr = dqm->ops.get_mqd_manager(dqm,
429
		get_mqd_type_from_queue_type(q->properties.type));
430
	if (!mqd_mgr)
431
		return -ENOMEM;
432

433 434 435 436 437
	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
		deallocate_hqd(dqm, q);
	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
		dqm->sdma_queue_count--;
		deallocate_sdma_queue(dqm, q->sdma_id);
438
	} else {
439
		pr_debug("q->properties.type %d is invalid\n",
440
				q->properties.type);
441
		return -EINVAL;
442
	}
443
	dqm->total_queue_count--;
444

445 446
	deallocate_doorbell(qpd, q);

447
	retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
448
				KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
449
				KFD_UNMAP_LATENCY_MS,
450
				q->pipe, q->queue);
451 452
	if (retval == -ETIME)
		qpd->reset_wavefronts = true;
453

454
	mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
455 456

	list_del(&q->list);
457 458 459 460 461 462 463 464 465 466 467 468
	if (list_empty(&qpd->queues_list)) {
		if (qpd->reset_wavefronts) {
			pr_warn("Resetting wave fronts (nocpsch) on dev %p\n",
					dqm->dev);
			/* dbgdev_wave_reset_wavefronts has to be called before
			 * deallocate_vmid(), i.e. when vmid is still in use.
			 */
			dbgdev_wave_reset_wavefronts(dqm->dev,
					qpd->pqm->process);
			qpd->reset_wavefronts = false;
		}

469
		deallocate_vmid(dqm, qpd, q);
470
	}
471
	qpd->queue_count--;
472 473
	if (q->properties.is_active)
		dqm->queue_count--;
474

475 476
	return retval;
}
477

478 479 480 481 482 483
static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
				struct qcm_process_device *qpd,
				struct queue *q)
{
	int retval;

484
	dqm_lock(dqm);
485
	retval = destroy_queue_nocpsch_locked(dqm, qpd, q);
486
	dqm_unlock(dqm);
487

488 489 490 491 492 493
	return retval;
}

static int update_queue(struct device_queue_manager *dqm, struct queue *q)
{
	int retval;
494
	struct mqd_manager *mqd_mgr;
495
	struct kfd_process_device *pdd;
496
	bool prev_active = false;
497

498
	dqm_lock(dqm);
499 500 501 502 503
	pdd = kfd_get_process_device_data(q->device, q->process);
	if (!pdd) {
		retval = -ENODEV;
		goto out_unlock;
	}
504
	mqd_mgr = dqm->ops.get_mqd_manager(dqm,
O
Oded Gabbay 已提交
505
			get_mqd_type_from_queue_type(q->properties.type));
506
	if (!mqd_mgr) {
K
Kent Russell 已提交
507 508
		retval = -ENOMEM;
		goto out_unlock;
509
	}
510 511 512 513 514 515 516 517
	/*
	 * Eviction state logic: we only mark active queues as evicted
	 * to avoid the overhead of restoring inactive queues later
	 */
	if (pdd->qpd.evicted)
		q->properties.is_evicted = (q->properties.queue_size > 0 &&
					    q->properties.queue_percent > 0 &&
					    q->properties.queue_address != 0);
518

F
Felix Kuehling 已提交
519 520 521 522
	/* Save previous activity state for counters */
	prev_active = q->properties.is_active;

	/* Make sure the queue is unmapped before updating the MQD */
523
	if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
F
Felix Kuehling 已提交
524 525
		retval = unmap_queues_cpsch(dqm,
				KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
F
Felix Kuehling 已提交
526
		if (retval) {
F
Felix Kuehling 已提交
527 528 529
			pr_err("unmap queue failed\n");
			goto out_unlock;
		}
F
Felix Kuehling 已提交
530
	} else if (prev_active &&
F
Felix Kuehling 已提交
531 532
		   (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
		    q->properties.type == KFD_QUEUE_TYPE_SDMA)) {
533
		retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
F
Felix Kuehling 已提交
534 535 536 537 538 539 540 541
				KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
				KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
		if (retval) {
			pr_err("destroy mqd failed\n");
			goto out_unlock;
		}
	}

542
	retval = mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties);
F
Felix Kuehling 已提交
543

544 545 546 547 548 549 550 551 552 553 554
	/*
	 * check active state vs. the previous state and modify
	 * counter accordingly. map_queues_cpsch uses the
	 * dqm->queue_count to determine whether a new runlist must be
	 * uploaded.
	 */
	if (q->properties.is_active && !prev_active)
		dqm->queue_count++;
	else if (!q->properties.is_active && prev_active)
		dqm->queue_count--;

555
	if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS)
F
Felix Kuehling 已提交
556
		retval = map_queues_cpsch(dqm);
F
Felix Kuehling 已提交
557
	else if (q->properties.is_active &&
F
Felix Kuehling 已提交
558
		 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
559 560 561 562 563 564 565 566 567
		  q->properties.type == KFD_QUEUE_TYPE_SDMA)) {
		if (WARN(q->process->mm != current->mm,
			 "should only run in user thread"))
			retval = -EFAULT;
		else
			retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd,
						   q->pipe, q->queue,
						   &q->properties, current->mm);
	}
568

K
Kent Russell 已提交
569
out_unlock:
570
	dqm_unlock(dqm);
571 572 573
	return retval;
}

574
static struct mqd_manager *get_mqd_manager(
575 576
		struct device_queue_manager *dqm, enum KFD_MQD_TYPE type)
{
577
	struct mqd_manager *mqd_mgr;
578

579 580
	if (WARN_ON(type >= KFD_MQD_TYPE_MAX))
		return NULL;
581

582
	pr_debug("mqd type %d\n", type);
583

584 585 586 587
	mqd_mgr = dqm->mqd_mgrs[type];
	if (!mqd_mgr) {
		mqd_mgr = mqd_manager_init(type, dqm->dev);
		if (!mqd_mgr)
588
			pr_err("mqd manager is NULL");
589
		dqm->mqd_mgrs[type] = mqd_mgr;
590 591
	}

592
	return mqd_mgr;
593 594
}

595 596 597 598
static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
					struct qcm_process_device *qpd)
{
	struct queue *q;
599
	struct mqd_manager *mqd_mgr;
600 601 602
	struct kfd_process_device *pdd;
	int retval = 0;

603
	dqm_lock(dqm);
604 605 606 607 608 609 610 611 612 613 614
	if (qpd->evicted++ > 0) /* already evicted, do nothing */
		goto out;

	pdd = qpd_to_pdd(qpd);
	pr_info_ratelimited("Evicting PASID %u queues\n",
			    pdd->process->pasid);

	/* unactivate all active queues on the qpd */
	list_for_each_entry(q, &qpd->queues_list, list) {
		if (!q->properties.is_active)
			continue;
615
		mqd_mgr = dqm->ops.get_mqd_manager(dqm,
616
			get_mqd_type_from_queue_type(q->properties.type));
617
		if (!mqd_mgr) { /* should not be here */
618 619 620 621 622 623
			pr_err("Cannot evict queue, mqd mgr is NULL\n");
			retval = -ENOMEM;
			goto out;
		}
		q->properties.is_evicted = true;
		q->properties.is_active = false;
624
		retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
625 626 627 628 629 630 631 632
				KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN,
				KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
		if (retval)
			goto out;
		dqm->queue_count--;
	}

out:
633
	dqm_unlock(dqm);
634 635 636 637 638 639 640 641 642 643
	return retval;
}

static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
				      struct qcm_process_device *qpd)
{
	struct queue *q;
	struct kfd_process_device *pdd;
	int retval = 0;

644
	dqm_lock(dqm);
645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665
	if (qpd->evicted++ > 0) /* already evicted, do nothing */
		goto out;

	pdd = qpd_to_pdd(qpd);
	pr_info_ratelimited("Evicting PASID %u queues\n",
			    pdd->process->pasid);

	/* unactivate all active queues on the qpd */
	list_for_each_entry(q, &qpd->queues_list, list) {
		if (!q->properties.is_active)
			continue;
		q->properties.is_evicted = true;
		q->properties.is_active = false;
		dqm->queue_count--;
	}
	retval = execute_queues_cpsch(dqm,
				qpd->is_debug ?
				KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
				KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);

out:
666
	dqm_unlock(dqm);
667 668 669 670 671 672
	return retval;
}

static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
					  struct qcm_process_device *qpd)
{
673
	struct mm_struct *mm = NULL;
674
	struct queue *q;
675
	struct mqd_manager *mqd_mgr;
676
	struct kfd_process_device *pdd;
677
	uint64_t pd_base;
678 679 680 681
	int retval = 0;

	pdd = qpd_to_pdd(qpd);
	/* Retrieve PD base */
A
Amber Lin 已提交
682
	pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
683

684
	dqm_lock(dqm);
685 686 687 688 689 690 691 692 693 694 695 696
	if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
		goto out;
	if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
		qpd->evicted--;
		goto out;
	}

	pr_info_ratelimited("Restoring PASID %u queues\n",
			    pdd->process->pasid);

	/* Update PD Base in QPD */
	qpd->page_table_base = pd_base;
697
	pr_debug("Updated PD address to 0x%llx\n", pd_base);
698 699 700 701 702 703 704 705 706

	if (!list_empty(&qpd->queues_list)) {
		dqm->dev->kfd2kgd->set_vm_context_page_table_base(
				dqm->dev->kgd,
				qpd->vmid,
				qpd->page_table_base);
		kfd_flush_tlb(pdd);
	}

707 708 709 710 711 712 713 714 715
	/* Take a safe reference to the mm_struct, which may otherwise
	 * disappear even while the kfd_process is still referenced.
	 */
	mm = get_task_mm(pdd->process->lead_thread);
	if (!mm) {
		retval = -EFAULT;
		goto out;
	}

716 717 718 719
	/* activate all active queues on the qpd */
	list_for_each_entry(q, &qpd->queues_list, list) {
		if (!q->properties.is_evicted)
			continue;
720
		mqd_mgr = dqm->ops.get_mqd_manager(dqm,
721
			get_mqd_type_from_queue_type(q->properties.type));
722
		if (!mqd_mgr) { /* should not be here */
723 724 725 726 727 728
			pr_err("Cannot restore queue, mqd mgr is NULL\n");
			retval = -ENOMEM;
			goto out;
		}
		q->properties.is_evicted = false;
		q->properties.is_active = true;
729
		retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
730
				       q->queue, &q->properties, mm);
731 732 733 734 735 736
		if (retval)
			goto out;
		dqm->queue_count++;
	}
	qpd->evicted = 0;
out:
737 738
	if (mm)
		mmput(mm);
739
	dqm_unlock(dqm);
740 741 742 743 744 745 746 747
	return retval;
}

static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
					struct qcm_process_device *qpd)
{
	struct queue *q;
	struct kfd_process_device *pdd;
748
	uint64_t pd_base;
749 750 751 752
	int retval = 0;

	pdd = qpd_to_pdd(qpd);
	/* Retrieve PD base */
A
Amber Lin 已提交
753
	pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
754

755
	dqm_lock(dqm);
756 757 758 759 760 761 762 763 764 765 766 767
	if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
		goto out;
	if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
		qpd->evicted--;
		goto out;
	}

	pr_info_ratelimited("Restoring PASID %u queues\n",
			    pdd->process->pasid);

	/* Update PD Base in QPD */
	qpd->page_table_base = pd_base;
768
	pr_debug("Updated PD address to 0x%llx\n", pd_base);
769 770 771 772 773 774 775 776 777 778 779 780 781 782

	/* activate all active queues on the qpd */
	list_for_each_entry(q, &qpd->queues_list, list) {
		if (!q->properties.is_evicted)
			continue;
		q->properties.is_evicted = false;
		q->properties.is_active = true;
		dqm->queue_count++;
	}
	retval = execute_queues_cpsch(dqm,
				KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
	if (!retval)
		qpd->evicted = 0;
out:
783
	dqm_unlock(dqm);
784 785 786
	return retval;
}

787
static int register_process(struct device_queue_manager *dqm,
788 789 790
					struct qcm_process_device *qpd)
{
	struct device_process_node *n;
791
	struct kfd_process_device *pdd;
792
	uint64_t pd_base;
793
	int retval;
794

795
	n = kzalloc(sizeof(*n), GFP_KERNEL);
796 797 798 799 800
	if (!n)
		return -ENOMEM;

	n->qpd = qpd;

801 802
	pdd = qpd_to_pdd(qpd);
	/* Retrieve PD base */
A
Amber Lin 已提交
803
	pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->vm);
804

805
	dqm_lock(dqm);
806 807
	list_add(&n->list, &dqm->queues);

808 809
	/* Update PD Base in QPD */
	qpd->page_table_base = pd_base;
810
	pr_debug("Updated PD address to 0x%llx\n", pd_base);
811

812
	retval = dqm->asic_ops.update_qpd(dqm, qpd);
813

814
	if (dqm->processes_count++ == 0)
A
Amber Lin 已提交
815
		amdgpu_amdkfd_set_compute_idle(dqm->dev->kgd, false);
816

817
	dqm_unlock(dqm);
818

819
	return retval;
820 821
}

822
static int unregister_process(struct device_queue_manager *dqm,
823 824 825 826 827
					struct qcm_process_device *qpd)
{
	int retval;
	struct device_process_node *cur, *next;

828 829
	pr_debug("qpd->queues_list is %s\n",
			list_empty(&qpd->queues_list) ? "empty" : "not empty");
830 831

	retval = 0;
832
	dqm_lock(dqm);
833 834 835 836

	list_for_each_entry_safe(cur, next, &dqm->queues, list) {
		if (qpd == cur->qpd) {
			list_del(&cur->list);
837
			kfree(cur);
838
			if (--dqm->processes_count == 0)
A
Amber Lin 已提交
839
				amdgpu_amdkfd_set_compute_idle(
840
					dqm->dev->kgd, true);
841 842 843 844 845 846
			goto out;
		}
	}
	/* qpd not found in dqm list */
	retval = 1;
out:
847
	dqm_unlock(dqm);
848 849 850 851 852 853 854
	return retval;
}

static int
set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid,
			unsigned int vmid)
{
855
	return dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
856
						dqm->dev->kgd, pasid, vmid);
857 858
}

859 860 861 862
static void init_interrupts(struct device_queue_manager *dqm)
{
	unsigned int i;

863 864 865
	for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++)
		if (is_pipe_enabled(dqm, 0, i))
			dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, i);
866 867
}

868 869
static int initialize_nocpsch(struct device_queue_manager *dqm)
{
870
	int pipe, queue;
871

872
	pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
873

K
Kent Russell 已提交
874 875 876 877 878
	dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm),
					sizeof(unsigned int), GFP_KERNEL);
	if (!dqm->allocated_queues)
		return -ENOMEM;

879
	mutex_init(&dqm->lock_hidden);
880 881
	INIT_LIST_HEAD(&dqm->queues);
	dqm->queue_count = dqm->next_pipe_to_allocate = 0;
882
	dqm->sdma_queue_count = 0;
883

884 885 886 887 888 889 890 891
	for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
		int pipe_offset = pipe * get_queues_per_pipe(dqm);

		for (queue = 0; queue < get_queues_per_pipe(dqm); queue++)
			if (test_bit(pipe_offset + queue,
				     dqm->dev->shared_resources.queue_bitmap))
				dqm->allocated_queues[pipe] |= 1 << queue;
	}
892

893
	dqm->vmid_bitmap = (1 << dqm->dev->vm_info.vmid_num_kfd) - 1;
O
Oak Zeng 已提交
894
	dqm->sdma_bitmap = (1ULL << get_num_sdma_queues(dqm)) - 1;
895 896 897 898

	return 0;
}

899
static void uninitialize(struct device_queue_manager *dqm)
900
{
901 902
	int i;

903
	WARN_ON(dqm->queue_count > 0 || dqm->processes_count > 0);
904 905

	kfree(dqm->allocated_queues);
906
	for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
907
		kfree(dqm->mqd_mgrs[i]);
908
	mutex_destroy(&dqm->lock_hidden);
909
	kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem);
910 911 912 913
}

static int start_nocpsch(struct device_queue_manager *dqm)
{
914
	init_interrupts(dqm);
915
	return pm_init(&dqm->packets, dqm);
916 917 918 919
}

static int stop_nocpsch(struct device_queue_manager *dqm)
{
920
	pm_uninit(&dqm->packets);
921 922 923
	return 0;
}

924
static int allocate_sdma_queue(struct device_queue_manager *dqm,
925
				unsigned int *sdma_id)
926 927 928 929 930 931
{
	int bit;

	if (dqm->sdma_bitmap == 0)
		return -ENOMEM;

O
Oak Zeng 已提交
932 933
	bit = __ffs64(dqm->sdma_bitmap);
	dqm->sdma_bitmap &= ~(1ULL << bit);
934
	*sdma_id = bit;
935 936 937 938 939

	return 0;
}

static void deallocate_sdma_queue(struct device_queue_manager *dqm,
940
				unsigned int sdma_id)
941
{
942
	if (sdma_id >= get_num_sdma_queues(dqm))
943
		return;
944
	dqm->sdma_bitmap |= (1ULL << sdma_id);
945 946 947 948 949 950
}

static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
					struct queue *q,
					struct qcm_process_device *qpd)
{
951
	struct mqd_manager *mqd_mgr;
952 953
	int retval;

954 955
	mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA);
	if (!mqd_mgr)
956 957 958
		return -ENOMEM;

	retval = allocate_sdma_queue(dqm, &q->sdma_id);
959
	if (retval)
960 961
		return retval;

962 963
	q->properties.sdma_queue_id = q->sdma_id / get_num_sdma_engines(dqm);
	q->properties.sdma_engine_id = q->sdma_id % get_num_sdma_engines(dqm);
964

965 966 967 968
	retval = allocate_doorbell(qpd, q);
	if (retval)
		goto out_deallocate_sdma_queue;

969 970 971
	pr_debug("SDMA id is:    %d\n", q->sdma_id);
	pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
	pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
972

973
	dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
974
	retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
975
				&q->gart_mqd_addr, &q->properties);
K
Kent Russell 已提交
976
	if (retval)
977
		goto out_deallocate_doorbell;
978

979 980
	retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, 0, 0, &q->properties,
				NULL);
K
Kent Russell 已提交
981 982
	if (retval)
		goto out_uninit_mqd;
983

984
	return 0;
K
Kent Russell 已提交
985 986

out_uninit_mqd:
987
	mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
988 989
out_deallocate_doorbell:
	deallocate_doorbell(qpd, q);
K
Kent Russell 已提交
990 991 992 993
out_deallocate_sdma_queue:
	deallocate_sdma_queue(dqm, q->sdma_id);

	return retval;
994 995
}

996 997 998 999 1000 1001
/*
 * Device Queue Manager implementation for cp scheduler
 */

static int set_sched_resources(struct device_queue_manager *dqm)
{
1002
	int i, mec;
1003 1004
	struct scheduling_resources res;

1005
	res.vmid_mask = dqm->dev->shared_resources.compute_vmid_bitmap;
1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016 1017 1018 1019 1020

	res.queue_mask = 0;
	for (i = 0; i < KGD_MAX_QUEUES; ++i) {
		mec = (i / dqm->dev->shared_resources.num_queue_per_pipe)
			/ dqm->dev->shared_resources.num_pipe_per_mec;

		if (!test_bit(i, dqm->dev->shared_resources.queue_bitmap))
			continue;

		/* only acquire queues from the first MEC */
		if (mec > 0)
			continue;

		/* This situation may be hit in the future if a new HW
		 * generation exposes more than 64 queues. If so, the
1021 1022
		 * definition of res.queue_mask needs updating
		 */
1023
		if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) {
1024 1025 1026 1027 1028 1029
			pr_err("Invalid queue enabled by amdgpu: %d\n", i);
			break;
		}

		res.queue_mask |= (1ull << i);
	}
1030 1031 1032
	res.gws_mask = res.oac_mask = res.gds_heap_base =
						res.gds_heap_size = 0;

1033 1034 1035
	pr_debug("Scheduling resources:\n"
			"vmid mask: 0x%8X\n"
			"queue mask: 0x%8llX\n",
1036 1037 1038 1039 1040 1041 1042
			res.vmid_mask, res.queue_mask);

	return pm_send_set_resources(&dqm->packets, &res);
}

static int initialize_cpsch(struct device_queue_manager *dqm)
{
1043
	pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
1044

1045
	mutex_init(&dqm->lock_hidden);
1046 1047
	INIT_LIST_HEAD(&dqm->queues);
	dqm->queue_count = dqm->processes_count = 0;
1048
	dqm->sdma_queue_count = 0;
1049
	dqm->active_runlist = false;
O
Oak Zeng 已提交
1050
	dqm->sdma_bitmap = (1ULL << get_num_sdma_queues(dqm)) - 1;
1051

1052 1053
	INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception);

1054
	return 0;
1055 1056 1057 1058 1059 1060 1061 1062 1063
}

static int start_cpsch(struct device_queue_manager *dqm)
{
	int retval;

	retval = 0;

	retval = pm_init(&dqm->packets, dqm);
1064
	if (retval)
1065 1066 1067
		goto fail_packet_manager_init;

	retval = set_sched_resources(dqm);
1068
	if (retval)
1069 1070
		goto fail_set_sched_resources;

1071
	pr_debug("Allocating fence memory\n");
1072 1073

	/* allocate fence memory on the gart */
1074 1075
	retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr),
					&dqm->fence_mem);
1076

1077
	if (retval)
1078 1079 1080 1081
		goto fail_allocate_vidmem;

	dqm->fence_addr = dqm->fence_mem->cpu_ptr;
	dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr;
1082 1083 1084

	init_interrupts(dqm);

1085
	dqm_lock(dqm);
1086 1087
	/* clear hang status when driver try to start the hw scheduler */
	dqm->is_hws_hang = false;
1088
	execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1089
	dqm_unlock(dqm);
1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100

	return 0;
fail_allocate_vidmem:
fail_set_sched_resources:
	pm_uninit(&dqm->packets);
fail_packet_manager_init:
	return retval;
}

static int stop_cpsch(struct device_queue_manager *dqm)
{
1101
	dqm_lock(dqm);
1102
	unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
1103
	dqm_unlock(dqm);
1104

1105
	kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
1106 1107 1108 1109 1110 1111 1112 1113 1114
	pm_uninit(&dqm->packets);

	return 0;
}

static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
					struct kernel_queue *kq,
					struct qcm_process_device *qpd)
{
1115
	dqm_lock(dqm);
1116
	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
1117
		pr_warn("Can't create new kernel queue because %d queues were already created\n",
1118
				dqm->total_queue_count);
1119
		dqm_unlock(dqm);
1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130
		return -EPERM;
	}

	/*
	 * Unconditionally increment this counter, regardless of the queue's
	 * type or whether the queue is active.
	 */
	dqm->total_queue_count++;
	pr_debug("Total of %d queues are accountable so far\n",
			dqm->total_queue_count);

1131 1132 1133
	list_add(&kq->list, &qpd->priv_queue_list);
	dqm->queue_count++;
	qpd->is_debug = true;
1134
	execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1135
	dqm_unlock(dqm);
1136 1137 1138 1139 1140 1141 1142 1143

	return 0;
}

static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
					struct kernel_queue *kq,
					struct qcm_process_device *qpd)
{
1144
	dqm_lock(dqm);
1145 1146 1147
	list_del(&kq->list);
	dqm->queue_count--;
	qpd->is_debug = false;
1148
	execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
1149 1150 1151 1152
	/*
	 * Unconditionally decrement this counter, regardless of the queue's
	 * type.
	 */
1153
	dqm->total_queue_count--;
1154 1155
	pr_debug("Total of %d queues are accountable so far\n",
			dqm->total_queue_count);
1156
	dqm_unlock(dqm);
1157 1158 1159
}

static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
1160
			struct qcm_process_device *qpd)
1161 1162
{
	int retval;
1163
	struct mqd_manager *mqd_mgr;
1164 1165 1166

	retval = 0;

1167
	dqm_lock(dqm);
1168

1169
	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
1170
		pr_warn("Can't create new usermode queue because %d queues were already created\n",
1171 1172
				dqm->total_queue_count);
		retval = -EPERM;
1173
		goto out_unlock;
1174 1175
	}

1176 1177
	if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
		retval = allocate_sdma_queue(dqm, &q->sdma_id);
F
Felix Kuehling 已提交
1178
		if (retval)
1179
			goto out_unlock;
1180
		q->properties.sdma_queue_id =
1181
			q->sdma_id / get_num_sdma_engines(dqm);
1182
		q->properties.sdma_engine_id =
1183
			q->sdma_id % get_num_sdma_engines(dqm);
1184 1185 1186
		pr_debug("SDMA id is:    %d\n", q->sdma_id);
		pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
		pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
1187
	}
1188 1189 1190 1191 1192

	retval = allocate_doorbell(qpd, q);
	if (retval)
		goto out_deallocate_sdma_queue;

1193
	mqd_mgr = dqm->ops.get_mqd_manager(dqm,
1194 1195
			get_mqd_type_from_queue_type(q->properties.type));

1196
	if (!mqd_mgr) {
K
Kent Russell 已提交
1197
		retval = -ENOMEM;
1198
		goto out_deallocate_doorbell;
1199
	}
1200 1201 1202 1203 1204 1205 1206 1207
	/*
	 * Eviction state logic: we only mark active queues as evicted
	 * to avoid the overhead of restoring inactive queues later
	 */
	if (qpd->evicted)
		q->properties.is_evicted = (q->properties.queue_size > 0 &&
					    q->properties.queue_percent > 0 &&
					    q->properties.queue_address != 0);
1208

1209
	dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
F
Felix Kuehling 已提交
1210 1211 1212

	q->properties.tba_addr = qpd->tba_addr;
	q->properties.tma_addr = qpd->tma_addr;
1213
	retval = mqd_mgr->init_mqd(mqd_mgr, &q->mqd, &q->mqd_mem_obj,
1214
				&q->gart_mqd_addr, &q->properties);
1215
	if (retval)
1216
		goto out_deallocate_doorbell;
1217 1218

	list_add(&q->list, &qpd->queues_list);
1219
	qpd->queue_count++;
1220 1221
	if (q->properties.is_active) {
		dqm->queue_count++;
1222 1223
		retval = execute_queues_cpsch(dqm,
				KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1224 1225
	}

1226
	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
1227
		dqm->sdma_queue_count++;
1228 1229 1230 1231 1232 1233 1234 1235 1236
	/*
	 * Unconditionally increment this counter, regardless of the queue's
	 * type or whether the queue is active.
	 */
	dqm->total_queue_count++;

	pr_debug("Total of %d queues are accountable so far\n",
			dqm->total_queue_count);

1237
	dqm_unlock(dqm);
1238 1239
	return retval;

1240 1241
out_deallocate_doorbell:
	deallocate_doorbell(qpd, q);
1242 1243 1244 1245
out_deallocate_sdma_queue:
	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
		deallocate_sdma_queue(dqm, q->sdma_id);
out_unlock:
1246 1247
	dqm_unlock(dqm);

1248 1249 1250
	return retval;
}

1251
int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
1252
				unsigned int fence_value,
1253
				unsigned int timeout_ms)
1254
{
1255
	unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies;
1256 1257

	while (*fence_addr != fence_value) {
1258
		if (time_after(jiffies, end_jiffies)) {
1259
			pr_err("qcm fence wait loop timeout expired\n");
1260 1261 1262 1263 1264 1265 1266
			/* In HWS case, this is used to halt the driver thread
			 * in order not to mess up CP states before doing
			 * scandumps for FW debugging.
			 */
			while (halt_if_hws_hang)
				schedule();

1267 1268
			return -ETIME;
		}
1269
		schedule();
1270 1271 1272 1273 1274
	}

	return 0;
}

1275
static int unmap_sdma_queues(struct device_queue_manager *dqm,
1276 1277 1278
				unsigned int sdma_engine)
{
	return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
1279
			KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false,
1280 1281 1282
			sdma_engine);
}

F
Felix Kuehling 已提交
1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293 1294 1295 1296 1297 1298 1299 1300 1301 1302 1303
/* dqm->lock mutex has to be locked before calling this function */
static int map_queues_cpsch(struct device_queue_manager *dqm)
{
	int retval;

	if (dqm->queue_count <= 0 || dqm->processes_count <= 0)
		return 0;

	if (dqm->active_runlist)
		return 0;

	retval = pm_send_runlist(&dqm->packets, &dqm->queues);
	if (retval) {
		pr_err("failed to execute runlist\n");
		return retval;
	}
	dqm->active_runlist = true;

	return retval;
}

1304
/* dqm->lock mutex has to be locked before calling this function */
1305
static int unmap_queues_cpsch(struct device_queue_manager *dqm,
1306 1307
				enum kfd_unmap_queues_filter filter,
				uint32_t filter_param)
1308
{
1309
	int retval = 0;
1310

1311 1312
	if (dqm->is_hws_hang)
		return -EIO;
1313
	if (!dqm->active_runlist)
1314
		return retval;
1315

1316
	pr_debug("Before destroying queues, sdma queue count is : %u\n",
1317 1318 1319
		dqm->sdma_queue_count);

	if (dqm->sdma_queue_count > 0) {
1320 1321
		unmap_sdma_queues(dqm, 0);
		unmap_sdma_queues(dqm, 1);
1322 1323
	}

1324
	retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
1325
			filter, filter_param, false, 0);
1326
	if (retval)
1327
		return retval;
1328 1329 1330 1331 1332

	*dqm->fence_addr = KFD_FENCE_INIT;
	pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr,
				KFD_FENCE_COMPLETED);
	/* should be timed out */
1333
	retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
1334
				QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS);
1335
	if (retval)
1336
		return retval;
1337

1338 1339 1340 1341 1342 1343
	pm_release_ib(&dqm->packets);
	dqm->active_runlist = false;

	return retval;
}

1344
/* dqm->lock mutex has to be locked before calling this function */
1345 1346 1347
static int execute_queues_cpsch(struct device_queue_manager *dqm,
				enum kfd_unmap_queues_filter filter,
				uint32_t filter_param)
1348 1349 1350
{
	int retval;

1351 1352
	if (dqm->is_hws_hang)
		return -EIO;
1353
	retval = unmap_queues_cpsch(dqm, filter, filter_param);
1354
	if (retval) {
1355
		pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n");
1356 1357
		dqm->is_hws_hang = true;
		schedule_work(&dqm->hw_exception_work);
1358
		return retval;
1359 1360
	}

F
Felix Kuehling 已提交
1361
	return map_queues_cpsch(dqm);
1362 1363 1364 1365 1366 1367 1368
}

static int destroy_queue_cpsch(struct device_queue_manager *dqm,
				struct qcm_process_device *qpd,
				struct queue *q)
{
	int retval;
1369
	struct mqd_manager *mqd_mgr;
1370

1371 1372 1373
	retval = 0;

	/* remove queue from list to prevent rescheduling after preemption */
1374
	dqm_lock(dqm);
1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385

	if (qpd->is_debug) {
		/*
		 * error, currently we do not allow to destroy a queue
		 * of a currently debugged process
		 */
		retval = -EBUSY;
		goto failed_try_destroy_debugged_queue;

	}

1386
	mqd_mgr = dqm->ops.get_mqd_manager(dqm,
1387
			get_mqd_type_from_queue_type(q->properties.type));
1388
	if (!mqd_mgr) {
1389 1390 1391 1392
		retval = -ENOMEM;
		goto failed;
	}

1393 1394
	deallocate_doorbell(qpd, q);

1395
	if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
1396
		dqm->sdma_queue_count--;
1397 1398
		deallocate_sdma_queue(dqm, q->sdma_id);
	}
1399

1400
	list_del(&q->list);
1401
	qpd->queue_count--;
1402
	if (q->properties.is_active) {
1403
		dqm->queue_count--;
1404
		retval = execute_queues_cpsch(dqm,
1405
				KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1406 1407 1408
		if (retval == -ETIME)
			qpd->reset_wavefronts = true;
	}
1409

1410
	mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
1411 1412 1413 1414 1415 1416 1417 1418

	/*
	 * Unconditionally decrement this counter, regardless of the queue's
	 * type
	 */
	dqm->total_queue_count--;
	pr_debug("Total of %d queues are accountable so far\n",
			dqm->total_queue_count);
1419

1420
	dqm_unlock(dqm);
1421

1422
	return retval;
1423 1424

failed:
1425 1426
failed_try_destroy_debugged_queue:

1427
	dqm_unlock(dqm);
1428 1429 1430 1431 1432 1433 1434 1435 1436 1437 1438 1439 1440 1441 1442 1443 1444 1445
	return retval;
}

/*
 * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
 * stay in user mode.
 */
#define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
/* APE1 limit is inclusive and 64K aligned. */
#define APE1_LIMIT_ALIGNMENT 0xFFFF

static bool set_cache_memory_policy(struct device_queue_manager *dqm,
				   struct qcm_process_device *qpd,
				   enum cache_policy default_policy,
				   enum cache_policy alternate_policy,
				   void __user *alternate_aperture_base,
				   uint64_t alternate_aperture_size)
{
1446 1447 1448 1449
	bool retval = true;

	if (!dqm->asic_ops.set_cache_memory_policy)
		return retval;
1450

1451
	dqm_lock(dqm);
1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470

	if (alternate_aperture_size == 0) {
		/* base > limit disables APE1 */
		qpd->sh_mem_ape1_base = 1;
		qpd->sh_mem_ape1_limit = 0;
	} else {
		/*
		 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
		 *			SH_MEM_APE1_BASE[31:0], 0x0000 }
		 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
		 *			SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
		 * Verify that the base and size parameters can be
		 * represented in this format and convert them.
		 * Additionally restrict APE1 to user-mode addresses.
		 */

		uint64_t base = (uintptr_t)alternate_aperture_base;
		uint64_t limit = base + alternate_aperture_size - 1;

K
Kent Russell 已提交
1471 1472 1473
		if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
		   (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
			retval = false;
1474
			goto out;
K
Kent Russell 已提交
1475
		}
1476 1477 1478 1479 1480

		qpd->sh_mem_ape1_base = base >> 16;
		qpd->sh_mem_ape1_limit = limit >> 16;
	}

1481
	retval = dqm->asic_ops.set_cache_memory_policy(
1482 1483 1484 1485 1486 1487
			dqm,
			qpd,
			default_policy,
			alternate_policy,
			alternate_aperture_base,
			alternate_aperture_size);
1488

1489
	if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
1490 1491
		program_sh_mem_settings(dqm, qpd);

1492
	pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n",
1493 1494 1495 1496
		qpd->sh_mem_config, qpd->sh_mem_ape1_base,
		qpd->sh_mem_ape1_limit);

out:
1497
	dqm_unlock(dqm);
K
Kent Russell 已提交
1498
	return retval;
1499 1500
}

1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520
static int set_trap_handler(struct device_queue_manager *dqm,
				struct qcm_process_device *qpd,
				uint64_t tba_addr,
				uint64_t tma_addr)
{
	uint64_t *tma;

	if (dqm->dev->cwsr_enabled) {
		/* Jump from CWSR trap handler to user trap */
		tma = (uint64_t *)(qpd->cwsr_kaddr + KFD_CWSR_TMA_OFFSET);
		tma[0] = tba_addr;
		tma[1] = tma_addr;
	} else {
		qpd->tba_addr = tba_addr;
		qpd->tma_addr = tma_addr;
	}

	return 0;
}

1521 1522 1523 1524 1525 1526 1527
static int process_termination_nocpsch(struct device_queue_manager *dqm,
		struct qcm_process_device *qpd)
{
	struct queue *q, *next;
	struct device_process_node *cur, *next_dpn;
	int retval = 0;

1528
	dqm_lock(dqm);
1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548

	/* Clear all user mode queues */
	list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
		int ret;

		ret = destroy_queue_nocpsch_locked(dqm, qpd, q);
		if (ret)
			retval = ret;
	}

	/* Unregister process */
	list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
		if (qpd == cur->qpd) {
			list_del(&cur->list);
			kfree(cur);
			dqm->processes_count--;
			break;
		}
	}

1549
	dqm_unlock(dqm);
1550 1551 1552
	return retval;
}

1553 1554 1555 1556 1557 1558
static int get_wave_state(struct device_queue_manager *dqm,
			  struct queue *q,
			  void __user *ctl_stack,
			  u32 *ctl_stack_used_size,
			  u32 *save_area_used_size)
{
1559
	struct mqd_manager *mqd_mgr;
1560 1561 1562 1563 1564 1565 1566 1567 1568 1569
	int r;

	dqm_lock(dqm);

	if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE ||
	    q->properties.is_active || !q->device->cwsr_enabled) {
		r = -EINVAL;
		goto dqm_unlock;
	}

1570 1571
	mqd_mgr = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
	if (!mqd_mgr) {
1572 1573 1574 1575
		r = -ENOMEM;
		goto dqm_unlock;
	}

1576
	if (!mqd_mgr->get_wave_state) {
1577 1578 1579 1580
		r = -EINVAL;
		goto dqm_unlock;
	}

1581 1582
	r = mqd_mgr->get_wave_state(mqd_mgr, q->mqd, ctl_stack,
			ctl_stack_used_size, save_area_used_size);
1583 1584 1585 1586 1587

dqm_unlock:
	dqm_unlock(dqm);
	return r;
}
1588 1589 1590 1591 1592 1593 1594

static int process_termination_cpsch(struct device_queue_manager *dqm,
		struct qcm_process_device *qpd)
{
	int retval;
	struct queue *q, *next;
	struct kernel_queue *kq, *kq_next;
1595
	struct mqd_manager *mqd_mgr;
1596 1597 1598 1599 1600 1601
	struct device_process_node *cur, *next_dpn;
	enum kfd_unmap_queues_filter filter =
		KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES;

	retval = 0;

1602
	dqm_lock(dqm);
1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614

	/* Clean all kernel queues */
	list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) {
		list_del(&kq->list);
		dqm->queue_count--;
		qpd->is_debug = false;
		dqm->total_queue_count--;
		filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES;
	}

	/* Clear all user mode queues */
	list_for_each_entry(q, &qpd->queues_list, list) {
1615
		if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
1616
			dqm->sdma_queue_count--;
1617 1618
			deallocate_sdma_queue(dqm, q->sdma_id);
		}
1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636

		if (q->properties.is_active)
			dqm->queue_count--;

		dqm->total_queue_count--;
	}

	/* Unregister process */
	list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
		if (qpd == cur->qpd) {
			list_del(&cur->list);
			kfree(cur);
			dqm->processes_count--;
			break;
		}
	}

	retval = execute_queues_cpsch(dqm, filter, 0);
1637
	if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) {
1638 1639 1640 1641 1642 1643 1644
		pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev);
		dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process);
		qpd->reset_wavefronts = false;
	}

	/* lastly, free mqd resources */
	list_for_each_entry_safe(q, next, &qpd->queues_list, list) {
1645
		mqd_mgr = dqm->ops.get_mqd_manager(dqm,
1646
			get_mqd_type_from_queue_type(q->properties.type));
1647
		if (!mqd_mgr) {
1648 1649 1650 1651
			retval = -ENOMEM;
			goto out;
		}
		list_del(&q->list);
1652
		qpd->queue_count--;
1653
		mqd_mgr->uninit_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
1654 1655 1656
	}

out:
1657
	dqm_unlock(dqm);
1658 1659 1660
	return retval;
}

1661 1662 1663 1664
struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
{
	struct device_queue_manager *dqm;

1665
	pr_debug("Loading device queue manager\n");
1666

1667
	dqm = kzalloc(sizeof(*dqm), GFP_KERNEL);
1668 1669 1670
	if (!dqm)
		return NULL;

1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686
	switch (dev->device_info->asic_family) {
	/* HWS is not available on Hawaii. */
	case CHIP_HAWAII:
	/* HWS depends on CWSR for timely dequeue. CWSR is not
	 * available on Tonga.
	 *
	 * FIXME: This argument also applies to Kaveri.
	 */
	case CHIP_TONGA:
		dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS;
		break;
	default:
		dqm->sched_policy = sched_policy;
		break;
	}

1687
	dqm->dev = dev;
1688
	switch (dqm->sched_policy) {
1689 1690 1691
	case KFD_SCHED_POLICY_HWS:
	case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION:
		/* initialize dqm for cp scheduling */
1692 1693 1694 1695 1696 1697
		dqm->ops.create_queue = create_queue_cpsch;
		dqm->ops.initialize = initialize_cpsch;
		dqm->ops.start = start_cpsch;
		dqm->ops.stop = stop_cpsch;
		dqm->ops.destroy_queue = destroy_queue_cpsch;
		dqm->ops.update_queue = update_queue;
1698 1699 1700 1701
		dqm->ops.get_mqd_manager = get_mqd_manager;
		dqm->ops.register_process = register_process;
		dqm->ops.unregister_process = unregister_process;
		dqm->ops.uninitialize = uninitialize;
1702 1703 1704
		dqm->ops.create_kernel_queue = create_kernel_queue_cpsch;
		dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch;
		dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
1705
		dqm->ops.set_trap_handler = set_trap_handler;
1706
		dqm->ops.process_termination = process_termination_cpsch;
1707 1708
		dqm->ops.evict_process_queues = evict_process_queues_cpsch;
		dqm->ops.restore_process_queues = restore_process_queues_cpsch;
1709
		dqm->ops.get_wave_state = get_wave_state;
1710 1711 1712
		break;
	case KFD_SCHED_POLICY_NO_HWS:
		/* initialize dqm for no cp scheduling */
1713 1714 1715 1716 1717
		dqm->ops.start = start_nocpsch;
		dqm->ops.stop = stop_nocpsch;
		dqm->ops.create_queue = create_queue_nocpsch;
		dqm->ops.destroy_queue = destroy_queue_nocpsch;
		dqm->ops.update_queue = update_queue;
1718 1719 1720
		dqm->ops.get_mqd_manager = get_mqd_manager;
		dqm->ops.register_process = register_process;
		dqm->ops.unregister_process = unregister_process;
1721
		dqm->ops.initialize = initialize_nocpsch;
1722
		dqm->ops.uninitialize = uninitialize;
1723
		dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
1724
		dqm->ops.set_trap_handler = set_trap_handler;
1725
		dqm->ops.process_termination = process_termination_nocpsch;
1726 1727 1728
		dqm->ops.evict_process_queues = evict_process_queues_nocpsch;
		dqm->ops.restore_process_queues =
			restore_process_queues_nocpsch;
1729
		dqm->ops.get_wave_state = get_wave_state;
1730 1731
		break;
	default:
1732
		pr_err("Invalid scheduling policy %d\n", dqm->sched_policy);
1733
		goto out_free;
1734 1735
	}

1736 1737
	switch (dev->device_info->asic_family) {
	case CHIP_CARRIZO:
1738
		device_queue_manager_init_vi(&dqm->asic_ops);
1739 1740
		break;

1741
	case CHIP_KAVERI:
1742
		device_queue_manager_init_cik(&dqm->asic_ops);
1743
		break;
1744 1745 1746 1747 1748 1749 1750 1751 1752

	case CHIP_HAWAII:
		device_queue_manager_init_cik_hawaii(&dqm->asic_ops);
		break;

	case CHIP_TONGA:
	case CHIP_FIJI:
	case CHIP_POLARIS10:
	case CHIP_POLARIS11:
1753
	case CHIP_POLARIS12:
1754 1755
		device_queue_manager_init_vi_tonga(&dqm->asic_ops);
		break;
1756 1757

	case CHIP_VEGA10:
1758
	case CHIP_VEGA12:
1759
	case CHIP_VEGA20:
1760 1761 1762
	case CHIP_RAVEN:
		device_queue_manager_init_v9(&dqm->asic_ops);
		break;
1763 1764 1765 1766
	default:
		WARN(1, "Unexpected ASIC family %u",
		     dev->device_info->asic_family);
		goto out_free;
1767 1768
	}

1769 1770
	if (!dqm->ops.initialize(dqm))
		return dqm;
1771

1772 1773 1774
out_free:
	kfree(dqm);
	return NULL;
1775 1776 1777 1778
}

void device_queue_manager_uninit(struct device_queue_manager *dqm)
{
1779
	dqm->ops.uninitialize(dqm);
1780 1781
	kfree(dqm);
}
1782

S
shaoyunl 已提交
1783 1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799
int kfd_process_vm_fault(struct device_queue_manager *dqm,
			 unsigned int pasid)
{
	struct kfd_process_device *pdd;
	struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
	int ret = 0;

	if (!p)
		return -EINVAL;
	pdd = kfd_get_process_device_data(dqm->dev, p);
	if (pdd)
		ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd);
	kfd_unref_process(p);

	return ret;
}

1800 1801 1802 1803
static void kfd_process_hw_exception(struct work_struct *work)
{
	struct device_queue_manager *dqm = container_of(work,
			struct device_queue_manager, hw_exception_work);
A
Amber Lin 已提交
1804
	amdgpu_amdkfd_gpu_reset(dqm->dev->kgd);
1805 1806
}

1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836
#if defined(CONFIG_DEBUG_FS)

static void seq_reg_dump(struct seq_file *m,
			 uint32_t (*dump)[2], uint32_t n_regs)
{
	uint32_t i, count;

	for (i = 0, count = 0; i < n_regs; i++) {
		if (count == 0 ||
		    dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) {
			seq_printf(m, "%s    %08x: %08x",
				   i ? "\n" : "",
				   dump[i][0], dump[i][1]);
			count = 7;
		} else {
			seq_printf(m, " %08x", dump[i][1]);
			count--;
		}
	}

	seq_puts(m, "\n");
}

int dqm_debugfs_hqds(struct seq_file *m, void *data)
{
	struct device_queue_manager *dqm = data;
	uint32_t (*dump)[2], n_regs;
	int pipe, queue;
	int r = 0;

O
Oak Zeng 已提交
1837 1838 1839 1840 1841 1842 1843 1844 1845 1846 1847 1848
	r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->kgd,
		KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE, &dump, &n_regs);
	if (!r) {
		seq_printf(m, "  HIQ on MEC %d Pipe %d Queue %d\n",
				KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1,
				KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm),
				KFD_CIK_HIQ_QUEUE);
		seq_reg_dump(m, dump, n_regs);

		kfree(dump);
	}

1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869
	for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
		int pipe_offset = pipe * get_queues_per_pipe(dqm);

		for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) {
			if (!test_bit(pipe_offset + queue,
				      dqm->dev->shared_resources.queue_bitmap))
				continue;

			r = dqm->dev->kfd2kgd->hqd_dump(
				dqm->dev->kgd, pipe, queue, &dump, &n_regs);
			if (r)
				break;

			seq_printf(m, "  CP Pipe %d, Queue %d\n",
				  pipe, queue);
			seq_reg_dump(m, dump, n_regs);

			kfree(dump);
		}
	}

1870
	for (pipe = 0; pipe < get_num_sdma_engines(dqm); pipe++) {
1871 1872 1873
		for (queue = 0;
		     queue < dqm->dev->device_info->num_sdma_queues_per_engine;
		     queue++) {
1874 1875 1876 1877 1878 1879 1880 1881 1882 1883 1884 1885 1886 1887 1888 1889
			r = dqm->dev->kfd2kgd->hqd_sdma_dump(
				dqm->dev->kgd, pipe, queue, &dump, &n_regs);
			if (r)
				break;

			seq_printf(m, "  SDMA Engine %d, RLC %d\n",
				  pipe, queue);
			seq_reg_dump(m, dump, n_regs);

			kfree(dump);
		}
	}

	return r;
}

1890 1891 1892 1893 1894 1895 1896 1897 1898 1899 1900 1901
int dqm_debugfs_execute_queues(struct device_queue_manager *dqm)
{
	int r = 0;

	dqm_lock(dqm);
	dqm->active_runlist = true;
	r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
	dqm_unlock(dqm);

	return r;
}

1902
#endif