kfd_device_queue_manager.c 66.6 KB
Newer Older
1
// SPDX-License-Identifier: GPL-2.0 OR MIT
2
/*
3
 * Copyright 2014-2022 Advanced Micro Devices, Inc.
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

25 26
#include <linux/ratelimit.h>
#include <linux/printk.h>
27 28 29 30
#include <linux/slab.h>
#include <linux/list.h>
#include <linux/types.h>
#include <linux/bitops.h>
31
#include <linux/sched.h>
32 33 34 35 36
#include "kfd_priv.h"
#include "kfd_device_queue_manager.h"
#include "kfd_mqd_manager.h"
#include "cik_regs.h"
#include "kfd_kernel_queue.h"
A
Amber Lin 已提交
37
#include "amdgpu_amdkfd.h"
38
#include "mes_api_def.h"
39 40 41 42 43 44

/* Size of the per-pipe EOP queue */
#define CIK_HPD_EOP_BYTES_LOG2 11
#define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)

static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
45
				  u32 pasid, unsigned int vmid);
46

47 48 49
static int execute_queues_cpsch(struct device_queue_manager *dqm,
				enum kfd_unmap_queues_filter filter,
				uint32_t filter_param);
50
static int unmap_queues_cpsch(struct device_queue_manager *dqm,
51
				enum kfd_unmap_queues_filter filter,
52
				uint32_t filter_param, bool reset);
53

F
Felix Kuehling 已提交
54 55
static int map_queues_cpsch(struct device_queue_manager *dqm);

56
static void deallocate_sdma_queue(struct device_queue_manager *dqm,
57
				struct queue *q);
58

59 60 61 62
static inline void deallocate_hqd(struct device_queue_manager *dqm,
				struct queue *q);
static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q);
static int allocate_sdma_queue(struct device_queue_manager *dqm,
63
				struct queue *q, const uint32_t *restore_sdma_id);
64 65
static void kfd_process_hw_exception(struct work_struct *work);

66 67
static inline
enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
68
{
69
	if (type == KFD_QUEUE_TYPE_SDMA || type == KFD_QUEUE_TYPE_SDMA_XGMI)
70 71
		return KFD_MQD_TYPE_SDMA;
	return KFD_MQD_TYPE_CP;
72 73
}

74 75 76
static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe)
{
	int i;
77 78
	int pipe_offset = (mec * dqm->dev->shared_resources.num_pipe_per_mec
		+ pipe) * dqm->dev->shared_resources.num_queue_per_pipe;
79 80 81 82

	/* queue is available for KFD usage if bit is 1 */
	for (i = 0; i <  dqm->dev->shared_resources.num_queue_per_pipe; ++i)
		if (test_bit(pipe_offset + i,
83
			      dqm->dev->shared_resources.cp_queue_bitmap))
84 85 86 87
			return true;
	return false;
}

88
unsigned int get_cp_queues_num(struct device_queue_manager *dqm)
89
{
90
	return bitmap_weight(dqm->dev->shared_resources.cp_queue_bitmap,
91
				KGD_MAX_QUEUES);
92 93
}

94
unsigned int get_queues_per_pipe(struct device_queue_manager *dqm)
95
{
96 97 98 99 100 101
	return dqm->dev->shared_resources.num_queue_per_pipe;
}

unsigned int get_pipes_per_mec(struct device_queue_manager *dqm)
{
	return dqm->dev->shared_resources.num_pipe_per_mec;
102 103
}

104 105
static unsigned int get_num_all_sdma_engines(struct device_queue_manager *dqm)
{
106 107
	return kfd_get_num_sdma_engines(dqm->dev) +
		kfd_get_num_xgmi_sdma_engines(dqm->dev);
108 109
}

110 111
unsigned int get_num_sdma_queues(struct device_queue_manager *dqm)
{
112
	return kfd_get_num_sdma_engines(dqm->dev) *
113
		dqm->dev->device_info.num_sdma_queues_per_engine;
114 115
}

116 117
unsigned int get_num_xgmi_sdma_queues(struct device_queue_manager *dqm)
{
118
	return kfd_get_num_xgmi_sdma_engines(dqm->dev) *
119
		dqm->dev->device_info.num_sdma_queues_per_engine;
120 121
}

122 123 124 125 126
static inline uint64_t get_reserved_sdma_queues_bitmap(struct device_queue_manager *dqm)
{
	return dqm->dev->device_info.reserved_sdma_queues_bitmap;
}

127
void program_sh_mem_settings(struct device_queue_manager *dqm,
128 129
					struct qcm_process_device *qpd)
{
130
	return dqm->dev->kfd2kgd->program_sh_mem_settings(
131
						dqm->dev->adev, qpd->vmid,
132 133 134 135 136 137
						qpd->sh_mem_config,
						qpd->sh_mem_ape1_base,
						qpd->sh_mem_ape1_limit,
						qpd->sh_mem_bases);
}

138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178
static void kfd_hws_hang(struct device_queue_manager *dqm)
{
	/*
	 * Issue a GPU reset if HWS is unresponsive
	 */
	dqm->is_hws_hang = true;

	/* It's possible we're detecting a HWS hang in the
	 * middle of a GPU reset. No need to schedule another
	 * reset in this case.
	 */
	if (!dqm->is_resetting)
		schedule_work(&dqm->hw_exception_work);
}

static int convert_to_mes_queue_type(int queue_type)
{
	int mes_queue_type;

	switch (queue_type) {
	case KFD_QUEUE_TYPE_COMPUTE:
		mes_queue_type = MES_QUEUE_TYPE_COMPUTE;
		break;
	case KFD_QUEUE_TYPE_SDMA:
		mes_queue_type = MES_QUEUE_TYPE_SDMA;
		break;
	default:
		WARN(1, "Invalid queue type %d", queue_type);
		mes_queue_type = -EINVAL;
		break;
	}

	return mes_queue_type;
}

static int add_queue_mes(struct device_queue_manager *dqm, struct queue *q,
			 struct qcm_process_device *qpd)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev;
	struct kfd_process_device *pdd = qpd_to_pdd(qpd);
	struct mes_add_queue_input queue_input;
179
	int r, queue_type;
180
	uint64_t wptr_addr_off;
181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199

	if (dqm->is_hws_hang)
		return -EIO;

	memset(&queue_input, 0x0, sizeof(struct mes_add_queue_input));
	queue_input.process_id = qpd->pqm->process->pasid;
	queue_input.page_table_base_addr =  qpd->page_table_base;
	queue_input.process_va_start = 0;
	queue_input.process_va_end = adev->vm_manager.max_pfn - 1;
	/* MES unit for quantum is 100ns */
	queue_input.process_quantum = KFD_MES_PROCESS_QUANTUM;  /* Equivalent to 10ms. */
	queue_input.process_context_addr = pdd->proc_ctx_gpu_addr;
	queue_input.gang_quantum = KFD_MES_GANG_QUANTUM; /* Equivalent to 1ms */
	queue_input.gang_context_addr = q->gang_ctx_gpu_addr;
	queue_input.inprocess_gang_priority = q->properties.priority;
	queue_input.gang_global_priority_level =
					AMDGPU_MES_PRIORITY_LEVEL_NORMAL;
	queue_input.doorbell_offset = q->properties.doorbell_off;
	queue_input.mqd_addr = q->gart_mqd_addr;
200 201 202 203 204 205 206

	if (q->wptr_bo) {
		wptr_addr_off = (uint64_t)q->properties.write_ptr - (uint64_t)q->wptr_bo->kfd_bo->va;
		queue_input.wptr_addr = ((uint64_t)q->wptr_bo->tbo.resource->start << PAGE_SHIFT) + wptr_addr_off;
	} else
		queue_input.wptr_addr = (uint64_t)q->properties.write_ptr;

207 208 209 210
	queue_input.paging = false;
	queue_input.tba_addr = qpd->tba_addr;
	queue_input.tma_addr = qpd->tma_addr;

211 212
	queue_type = convert_to_mes_queue_type(q->properties.type);
	if (queue_type < 0) {
213 214 215 216
		pr_err("Queue type not supported with MES, queue:%d\n",
				q->properties.type);
		return -EINVAL;
	}
217
	queue_input.queue_type = (uint32_t)queue_type;
218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290

	if (q->gws) {
		queue_input.gws_base = 0;
		queue_input.gws_size = qpd->num_gws;
	}

	amdgpu_mes_lock(&adev->mes);
	r = adev->mes.funcs->add_hw_queue(&adev->mes, &queue_input);
	amdgpu_mes_unlock(&adev->mes);
	if (r) {
		pr_err("failed to add hardware queue to MES, doorbell=0x%x\n",
			q->properties.doorbell_off);
		pr_err("MES might be in unrecoverable state, issue a GPU reset\n");
		kfd_hws_hang(dqm);
}

	return r;
}

static int remove_queue_mes(struct device_queue_manager *dqm, struct queue *q,
			struct qcm_process_device *qpd)
{
	struct amdgpu_device *adev = (struct amdgpu_device *)dqm->dev->adev;
	int r;
	struct mes_remove_queue_input queue_input;

	if (dqm->is_hws_hang)
		return -EIO;

	memset(&queue_input, 0x0, sizeof(struct mes_remove_queue_input));
	queue_input.doorbell_offset = q->properties.doorbell_off;
	queue_input.gang_context_addr = q->gang_ctx_gpu_addr;

	amdgpu_mes_lock(&adev->mes);
	r = adev->mes.funcs->remove_hw_queue(&adev->mes, &queue_input);
	amdgpu_mes_unlock(&adev->mes);

	if (r) {
		pr_err("failed to remove hardware queue from MES, doorbell=0x%x\n",
			q->properties.doorbell_off);
		pr_err("MES might be in unrecoverable state, issue a GPU reset\n");
		kfd_hws_hang(dqm);
	}

	return r;
}

static int remove_all_queues_mes(struct device_queue_manager *dqm)
{
	struct device_process_node *cur;
	struct qcm_process_device *qpd;
	struct queue *q;
	int retval = 0;

	list_for_each_entry(cur, &dqm->queues, list) {
		qpd = cur->qpd;
		list_for_each_entry(q, &qpd->queues_list, list) {
			if (q->properties.is_active) {
				retval = remove_queue_mes(dqm, q, qpd);
				if (retval) {
					pr_err("%s: Failed to remove queue %d for dev %d",
						__func__,
						q->properties.queue_id,
						dqm->dev->id);
					return retval;
				}
			}
		}
	}

	return retval;
}

291
static void increment_queue_count(struct device_queue_manager *dqm,
D
David Yat Sin 已提交
292 293
				  struct qcm_process_device *qpd,
				  struct queue *q)
294 295
{
	dqm->active_queue_count++;
D
David Yat Sin 已提交
296 297
	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
	    q->properties.type == KFD_QUEUE_TYPE_DIQ)
298
		dqm->active_cp_queue_count++;
D
David Yat Sin 已提交
299 300 301 302 303

	if (q->properties.is_gws) {
		dqm->gws_queue_count++;
		qpd->mapped_gws_queue = true;
	}
304 305
}

306
static void decrement_queue_count(struct device_queue_manager *dqm,
D
David Yat Sin 已提交
307 308
				  struct qcm_process_device *qpd,
				  struct queue *q)
309 310
{
	dqm->active_queue_count--;
D
David Yat Sin 已提交
311 312
	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
	    q->properties.type == KFD_QUEUE_TYPE_DIQ)
313
		dqm->active_cp_queue_count--;
D
David Yat Sin 已提交
314 315 316 317 318

	if (q->properties.is_gws) {
		dqm->gws_queue_count--;
		qpd->mapped_gws_queue = false;
	}
319 320
}

321 322 323 324 325 326 327
/*
 * Allocate a doorbell ID to this queue.
 * If doorbell_id is passed in, make sure requested ID is valid then allocate it.
 */
static int allocate_doorbell(struct qcm_process_device *qpd,
			     struct queue *q,
			     uint32_t const *restore_id)
328 329 330
{
	struct kfd_dev *dev = qpd->dqm->dev;

331
	if (!KFD_IS_SOC15(dev)) {
332 333 334
		/* On pre-SOC15 chips we need to use the queue ID to
		 * preserve the user mode ABI.
		 */
335 336 337 338

		if (restore_id && *restore_id != q->properties.queue_id)
			return -EINVAL;

339
		q->doorbell_id = q->properties.queue_id;
340 341
	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
			q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
342 343 344 345
		/* For SDMA queues on SOC15 with 8-byte doorbell, use static
		 * doorbell assignments based on the engine and queue id.
		 * The doobell index distance between RLC (2*i) and (2*i+1)
		 * for a SDMA engine is 512.
346
		 */
347

348 349 350 351 352 353 354 355 356
		uint32_t *idx_offset = dev->shared_resources.sdma_doorbell_idx;
		uint32_t valid_id = idx_offset[q->properties.sdma_engine_id]
						+ (q->properties.sdma_queue_id & 1)
						* KFD_QUEUE_DOORBELL_MIRROR_OFFSET
						+ (q->properties.sdma_queue_id >> 1);

		if (restore_id && *restore_id != valid_id)
			return -EINVAL;
		q->doorbell_id = valid_id;
357
	} else {
358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376
		/* For CP queues on SOC15 */
		if (restore_id) {
			/* make sure that ID is free  */
			if (__test_and_set_bit(*restore_id, qpd->doorbell_bitmap))
				return -EINVAL;

			q->doorbell_id = *restore_id;
		} else {
			/* or reserve a free doorbell ID */
			unsigned int found;

			found = find_first_zero_bit(qpd->doorbell_bitmap,
						KFD_MAX_NUM_OF_QUEUES_PER_PROCESS);
			if (found >= KFD_MAX_NUM_OF_QUEUES_PER_PROCESS) {
				pr_debug("No doorbells available");
				return -EBUSY;
			}
			set_bit(found, qpd->doorbell_bitmap);
			q->doorbell_id = found;
377 378 379 380
		}
	}

	q->properties.doorbell_off =
381
		kfd_get_doorbell_dw_offset_in_bar(dev, qpd_to_pdd(qpd),
382 383 384 385 386 387 388 389 390 391
					  q->doorbell_id);
	return 0;
}

static void deallocate_doorbell(struct qcm_process_device *qpd,
				struct queue *q)
{
	unsigned int old;
	struct kfd_dev *dev = qpd->dqm->dev;

392
	if (!KFD_IS_SOC15(dev) ||
393 394
	    q->properties.type == KFD_QUEUE_TYPE_SDMA ||
	    q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
395 396 397 398 399 400
		return;

	old = test_and_clear_bit(q->doorbell_id, qpd->doorbell_bitmap);
	WARN_ON(!old);
}

401 402 403 404 405
static void program_trap_handler_settings(struct device_queue_manager *dqm,
				struct qcm_process_device *qpd)
{
	if (dqm->dev->kfd2kgd->program_trap_handler_settings)
		dqm->dev->kfd2kgd->program_trap_handler_settings(
406
						dqm->dev->adev, qpd->vmid,
407 408 409
						qpd->tba_addr, qpd->tma_addr);
}

410 411 412 413
static int allocate_vmid(struct device_queue_manager *dqm,
			struct qcm_process_device *qpd,
			struct queue *q)
{
414
	int allocated_vmid = -1, i;
415

416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431
	for (i = dqm->dev->vm_info.first_vmid_kfd;
			i <= dqm->dev->vm_info.last_vmid_kfd; i++) {
		if (!dqm->vmid_pasid[i]) {
			allocated_vmid = i;
			break;
		}
	}

	if (allocated_vmid < 0) {
		pr_err("no more vmid to allocate\n");
		return -ENOSPC;
	}

	pr_debug("vmid allocated: %d\n", allocated_vmid);

	dqm->vmid_pasid[allocated_vmid] = q->process->pasid;
432

433
	set_pasid_vmid_mapping(dqm, q->process->pasid, allocated_vmid);
434 435 436 437 438 439

	qpd->vmid = allocated_vmid;
	q->properties.vmid = allocated_vmid;

	program_sh_mem_settings(dqm, qpd);

440
	if (KFD_IS_SOC15(dqm->dev) && dqm->dev->cwsr_enabled)
441 442
		program_trap_handler_settings(dqm, qpd);

443 444 445
	/* qpd->page_table_base is set earlier when register_process()
	 * is called, i.e. when the first queue is created.
	 */
446
	dqm->dev->kfd2kgd->set_vm_context_page_table_base(dqm->dev->adev,
447 448 449
			qpd->vmid,
			qpd->page_table_base);
	/* invalidate the VM context after pasid and vmid mapping is set up */
450
	kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
451

452
	if (dqm->dev->kfd2kgd->set_scratch_backing_va)
453
		dqm->dev->kfd2kgd->set_scratch_backing_va(dqm->dev->adev,
454
				qpd->sh_hidden_private_base, qpd->vmid);
455

456 457 458
	return 0;
}

459 460 461
static int flush_texture_cache_nocpsch(struct kfd_dev *kdev,
				struct qcm_process_device *qpd)
{
462
	const struct packet_manager_funcs *pmf = qpd->dqm->packet_mgr.pmf;
463
	int ret;
464 465 466 467

	if (!qpd->ib_kaddr)
		return -ENOMEM;

468 469 470
	ret = pmf->release_mem(qpd->ib_base, (uint32_t *)qpd->ib_kaddr);
	if (ret)
		return ret;
471

472
	return amdgpu_amdkfd_submit_ib(kdev->adev, KGD_ENGINE_MEC1, qpd->vmid,
473 474
				qpd->ib_base, (uint32_t *)qpd->ib_kaddr,
				pmf->release_mem_size / sizeof(uint32_t));
475 476
}

477 478 479 480
static void deallocate_vmid(struct device_queue_manager *dqm,
				struct qcm_process_device *qpd,
				struct queue *q)
{
481
	/* On GFX v7, CP doesn't flush TC at dequeue */
482
	if (q->device->adev->asic_type == CHIP_HAWAII)
483 484 485
		if (flush_texture_cache_nocpsch(q->device, qpd))
			pr_err("Failed to flush TC\n");

486
	kfd_flush_tlb(qpd_to_pdd(qpd), TLB_FLUSH_LEGACY);
487

488 489
	/* Release the vmid mapping */
	set_pasid_vmid_mapping(dqm, 0, qpd->vmid);
490
	dqm->vmid_pasid[qpd->vmid] = 0;
491

492 493 494 495 496 497
	qpd->vmid = 0;
	q->properties.vmid = 0;
}

static int create_queue_nocpsch(struct device_queue_manager *dqm,
				struct queue *q,
498
				struct qcm_process_device *qpd,
499
				const struct kfd_criu_queue_priv_data *qd,
500
				const void *restore_mqd, const void *restore_ctl_stack)
501
{
502
	struct mqd_manager *mqd_mgr;
503 504
	int retval;

505
	dqm_lock(dqm);
506

507
	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
508
		pr_warn("Can't create new usermode queue because %d queues were already created\n",
509
				dqm->total_queue_count);
K
Kent Russell 已提交
510 511
		retval = -EPERM;
		goto out_unlock;
512 513
	}

514 515
	if (list_empty(&qpd->queues_list)) {
		retval = allocate_vmid(dqm, qpd, q);
K
Kent Russell 已提交
516 517
		if (retval)
			goto out_unlock;
518 519
	}
	q->properties.vmid = qpd->vmid;
520
	/*
521 522 523
	 * Eviction state logic: mark all queues as evicted, even ones
	 * not currently active. Restoring inactive queues later only
	 * updates the is_evicted flag but is a no-op otherwise.
524
	 */
525
	q->properties.is_evicted = !!qpd->evicted;
526

F
Felix Kuehling 已提交
527 528 529
	q->properties.tba_addr = qpd->tba_addr;
	q->properties.tma_addr = qpd->tma_addr;

530 531
	mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
			q->properties.type)];
532 533 534 535 536 537 538 539
	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
		retval = allocate_hqd(dqm, q);
		if (retval)
			goto deallocate_vmid;
		pr_debug("Loading mqd to hqd on pipe %d, queue %d\n",
			q->pipe, q->queue);
	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
		q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
540
		retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL);
541 542 543 544 545
		if (retval)
			goto deallocate_vmid;
		dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
	}

546
	retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL);
547 548 549
	if (retval)
		goto out_deallocate_hqd;

550 551
	/* Temporarily release dqm lock to avoid a circular lock dependency */
	dqm_unlock(dqm);
552
	q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties);
553 554
	dqm_lock(dqm);

555 556 557 558
	if (!q->mqd_mem_obj) {
		retval = -ENOMEM;
		goto out_deallocate_doorbell;
	}
559 560 561

	if (qd)
		mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr,
562 563
				     &q->properties, restore_mqd, restore_ctl_stack,
				     qd->ctl_stack_size);
564 565 566 567
	else
		mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
					&q->gart_mqd_addr, &q->properties);

568
	if (q->properties.is_active) {
569 570 571 572
		if (!dqm->sched_running) {
			WARN_ONCE(1, "Load non-HWS mqd while stopped\n");
			goto add_queue_to_list;
		}
573 574 575 576 577 578 579 580

		if (WARN(q->process->mm != current->mm,
					"should only run in user thread"))
			retval = -EFAULT;
		else
			retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
					q->queue, &q->properties, current->mm);
		if (retval)
581
			goto out_free_mqd;
582 583
	}

584
add_queue_to_list:
585
	list_add(&q->list, &qpd->queues_list);
586
	qpd->queue_count++;
587
	if (q->properties.is_active)
D
David Yat Sin 已提交
588
		increment_queue_count(dqm, qpd, q);
589

590 591 592 593 594 595 596
	/*
	 * Unconditionally increment this counter, regardless of the queue's
	 * type or whether the queue is active.
	 */
	dqm->total_queue_count++;
	pr_debug("Total of %d queues are accountable so far\n",
			dqm->total_queue_count);
597
	goto out_unlock;
598

599 600
out_free_mqd:
	mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
601 602 603 604 605 606 607 608 609 610 611
out_deallocate_doorbell:
	deallocate_doorbell(qpd, q);
out_deallocate_hqd:
	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
		deallocate_hqd(dqm, q);
	else if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
		q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
		deallocate_sdma_queue(dqm, q);
deallocate_vmid:
	if (list_empty(&qpd->queues_list))
		deallocate_vmid(dqm, qpd, q);
K
Kent Russell 已提交
612
out_unlock:
613
	dqm_unlock(dqm);
K
Kent Russell 已提交
614
	return retval;
615 616 617 618 619
}

static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
{
	bool set;
620
	int pipe, bit, i;
621 622 623

	set = false;

624 625
	for (pipe = dqm->next_pipe_to_allocate, i = 0;
			i < get_pipes_per_mec(dqm);
626 627 628 629 630
			pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) {

		if (!is_pipe_enabled(dqm, 0, pipe))
			continue;

631
		if (dqm->allocated_queues[pipe] != 0) {
632 633
			bit = ffs(dqm->allocated_queues[pipe]) - 1;
			dqm->allocated_queues[pipe] &= ~(1 << bit);
634 635 636 637 638 639 640
			q->pipe = pipe;
			q->queue = bit;
			set = true;
			break;
		}
	}

641
	if (!set)
642 643
		return -EBUSY;

644
	pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue);
645
	/* horizontal hqd allocation */
646
	dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm);
647 648 649 650 651 652 653

	return 0;
}

static inline void deallocate_hqd(struct device_queue_manager *dqm,
				struct queue *q)
{
654
	dqm->allocated_queues[q->pipe] |= (1 << q->queue);
655 656
}

657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675
#define SQ_IND_CMD_CMD_KILL		0x00000003
#define SQ_IND_CMD_MODE_BROADCAST	0x00000001

static int dbgdev_wave_reset_wavefronts(struct kfd_dev *dev, struct kfd_process *p)
{
	int status = 0;
	unsigned int vmid;
	uint16_t queried_pasid;
	union SQ_CMD_BITS reg_sq_cmd;
	union GRBM_GFX_INDEX_BITS reg_gfx_index;
	struct kfd_process_device *pdd;
	int first_vmid_to_scan = dev->vm_info.first_vmid_kfd;
	int last_vmid_to_scan = dev->vm_info.last_vmid_kfd;

	reg_sq_cmd.u32All = 0;
	reg_gfx_index.u32All = 0;

	pr_debug("Killing all process wavefronts\n");

676 677 678 679 680
	if (!dev->kfd2kgd->get_atc_vmid_pasid_mapping_info) {
		pr_err("no vmid pasid mapping supported \n");
		return -EOPNOTSUPP;
	}

681 682 683 684 685
	/* Scan all registers in the range ATC_VMID8_PASID_MAPPING ..
	 * ATC_VMID15_PASID_MAPPING
	 * to check which VMID the current process is mapped to.
	 */

686 687 688
	for (vmid = first_vmid_to_scan; vmid <= last_vmid_to_scan; vmid++) {
		status = dev->kfd2kgd->get_atc_vmid_pasid_mapping_info
				(dev->adev, vmid, &queried_pasid);
689

690 691 692 693
		if (status && queried_pasid == p->pasid) {
			pr_debug("Killing wave fronts of vmid %d and pasid 0x%x\n",
					vmid, p->pasid);
			break;
694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718 719 720
		}
	}

	if (vmid > last_vmid_to_scan) {
		pr_err("Didn't find vmid for pasid 0x%x\n", p->pasid);
		return -EFAULT;
	}

	/* taking the VMID for that process on the safe way using PDD */
	pdd = kfd_get_process_device_data(dev, p);
	if (!pdd)
		return -EFAULT;

	reg_gfx_index.bits.sh_broadcast_writes = 1;
	reg_gfx_index.bits.se_broadcast_writes = 1;
	reg_gfx_index.bits.instance_broadcast_writes = 1;
	reg_sq_cmd.bits.mode = SQ_IND_CMD_MODE_BROADCAST;
	reg_sq_cmd.bits.cmd = SQ_IND_CMD_CMD_KILL;
	reg_sq_cmd.bits.vm_id = vmid;

	dev->kfd2kgd->wave_control_execute(dev->adev,
					reg_gfx_index.u32All,
					reg_sq_cmd.u32All);

	return 0;
}

721 722 723 724
/* Access to DQM has to be locked before calling destroy_queue_nocpsch_locked
 * to avoid asynchronized access
 */
static int destroy_queue_nocpsch_locked(struct device_queue_manager *dqm,
725 726 727 728
				struct qcm_process_device *qpd,
				struct queue *q)
{
	int retval;
729
	struct mqd_manager *mqd_mgr;
730

731 732
	mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
			q->properties.type)];
733

734
	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
735
		deallocate_hqd(dqm, q);
736
	else if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
737
		deallocate_sdma_queue(dqm, q);
738
	else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
739
		deallocate_sdma_queue(dqm, q);
740
	else {
741
		pr_debug("q->properties.type %d is invalid\n",
742
				q->properties.type);
743
		return -EINVAL;
744
	}
745
	dqm->total_queue_count--;
746

747 748
	deallocate_doorbell(qpd, q);

749 750 751 752 753
	if (!dqm->sched_running) {
		WARN_ONCE(1, "Destroy non-HWS queue while stopped\n");
		return 0;
	}

754
	retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
755
				KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
756
				KFD_UNMAP_LATENCY_MS,
757
				q->pipe, q->queue);
758 759
	if (retval == -ETIME)
		qpd->reset_wavefronts = true;
760 761

	list_del(&q->list);
762 763 764 765 766 767 768 769 770 771 772 773
	if (list_empty(&qpd->queues_list)) {
		if (qpd->reset_wavefronts) {
			pr_warn("Resetting wave fronts (nocpsch) on dev %p\n",
					dqm->dev);
			/* dbgdev_wave_reset_wavefronts has to be called before
			 * deallocate_vmid(), i.e. when vmid is still in use.
			 */
			dbgdev_wave_reset_wavefronts(dqm->dev,
					qpd->pqm->process);
			qpd->reset_wavefronts = false;
		}

774
		deallocate_vmid(dqm, qpd, q);
775
	}
776
	qpd->queue_count--;
D
David Yat Sin 已提交
777 778
	if (q->properties.is_active)
		decrement_queue_count(dqm, qpd, q);
779

780 781
	return retval;
}
782

783 784 785 786 787
static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
				struct qcm_process_device *qpd,
				struct queue *q)
{
	int retval;
788 789
	uint64_t sdma_val = 0;
	struct kfd_process_device *pdd = qpd_to_pdd(qpd);
790 791
	struct mqd_manager *mqd_mgr =
		dqm->mqd_mgrs[get_mqd_type_from_queue_type(q->properties.type)];
792 793 794 795

	/* Get the SDMA queue stats */
	if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
	    (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
796
		retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr,
797 798 799 800 801
							&sdma_val);
		if (retval)
			pr_err("Failed to read SDMA queue counter for queue: %d\n",
				q->properties.queue_id);
	}
802

803
	dqm_lock(dqm);
804
	retval = destroy_queue_nocpsch_locked(dqm, qpd, q);
805 806
	if (!retval)
		pdd->sdma_past_activity_counter += sdma_val;
807
	dqm_unlock(dqm);
808

809 810
	mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);

811 812 813
	return retval;
}

814 815
static int update_queue(struct device_queue_manager *dqm, struct queue *q,
			struct mqd_update_info *minfo)
816
{
817
	int retval = 0;
818
	struct mqd_manager *mqd_mgr;
819
	struct kfd_process_device *pdd;
820
	bool prev_active = false;
821

822
	dqm_lock(dqm);
823 824 825 826 827
	pdd = kfd_get_process_device_data(q->device, q->process);
	if (!pdd) {
		retval = -ENODEV;
		goto out_unlock;
	}
828 829
	mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
			q->properties.type)];
830

F
Felix Kuehling 已提交
831 832 833 834
	/* Save previous activity state for counters */
	prev_active = q->properties.is_active;

	/* Make sure the queue is unmapped before updating the MQD */
835
	if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
836 837 838 839 840 841
		if (!dqm->dev->shared_resources.enable_mes)
			retval = unmap_queues_cpsch(dqm,
						    KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0, false);
		else if (prev_active)
			retval = remove_queue_mes(dqm, q, &pdd->qpd);

F
Felix Kuehling 已提交
842
		if (retval) {
F
Felix Kuehling 已提交
843 844 845
			pr_err("unmap queue failed\n");
			goto out_unlock;
		}
F
Felix Kuehling 已提交
846
	} else if (prev_active &&
F
Felix Kuehling 已提交
847
		   (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
848 849
		    q->properties.type == KFD_QUEUE_TYPE_SDMA ||
		    q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
850 851 852 853 854 855

		if (!dqm->sched_running) {
			WARN_ONCE(1, "Update non-HWS queue while stopped\n");
			goto out_unlock;
		}

856
		retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
857 858 859
				(dqm->dev->cwsr_enabled ?
				 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE :
				 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
F
Felix Kuehling 已提交
860 861 862 863 864 865 866
				KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
		if (retval) {
			pr_err("destroy mqd failed\n");
			goto out_unlock;
		}
	}

867
	mqd_mgr->update_mqd(mqd_mgr, q->mqd, &q->properties, minfo);
F
Felix Kuehling 已提交
868

869 870 871
	/*
	 * check active state vs. the previous state and modify
	 * counter accordingly. map_queues_cpsch uses the
872
	 * dqm->active_queue_count to determine whether a new runlist must be
873 874
	 * uploaded.
	 */
D
David Yat Sin 已提交
875 876 877 878 879
	if (q->properties.is_active && !prev_active) {
		increment_queue_count(dqm, &pdd->qpd, q);
	} else if (!q->properties.is_active && prev_active) {
		decrement_queue_count(dqm, &pdd->qpd, q);
	} else if (q->gws && !q->properties.is_gws) {
880 881 882 883 884 885 886 887 888 889 890 891 892
		if (q->properties.is_active) {
			dqm->gws_queue_count++;
			pdd->qpd.mapped_gws_queue = true;
		}
		q->properties.is_gws = true;
	} else if (!q->gws && q->properties.is_gws) {
		if (q->properties.is_active) {
			dqm->gws_queue_count--;
			pdd->qpd.mapped_gws_queue = false;
		}
		q->properties.is_gws = false;
	}

893 894 895
	if (dqm->sched_policy != KFD_SCHED_POLICY_NO_HWS) {
		if (!dqm->dev->shared_resources.enable_mes)
			retval = map_queues_cpsch(dqm);
896
		else if (q->properties.is_active)
897 898
			retval = add_queue_mes(dqm, q, &pdd->qpd);
	} else if (q->properties.is_active &&
F
Felix Kuehling 已提交
899
		 (q->properties.type == KFD_QUEUE_TYPE_COMPUTE ||
900 901
		  q->properties.type == KFD_QUEUE_TYPE_SDMA ||
		  q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
902 903 904 905 906 907 908 909
		if (WARN(q->process->mm != current->mm,
			 "should only run in user thread"))
			retval = -EFAULT;
		else
			retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd,
						   q->pipe, q->queue,
						   &q->properties, current->mm);
	}
910

K
Kent Russell 已提交
911
out_unlock:
912
	dqm_unlock(dqm);
913 914 915
	return retval;
}

916 917 918 919
static int evict_process_queues_nocpsch(struct device_queue_manager *dqm,
					struct qcm_process_device *qpd)
{
	struct queue *q;
920
	struct mqd_manager *mqd_mgr;
921
	struct kfd_process_device *pdd;
922
	int retval, ret = 0;
923

924
	dqm_lock(dqm);
925 926 927 928
	if (qpd->evicted++ > 0) /* already evicted, do nothing */
		goto out;

	pdd = qpd_to_pdd(qpd);
929
	pr_debug_ratelimited("Evicting PASID 0x%x queues\n",
930 931
			    pdd->process->pasid);

932
	pdd->last_evict_timestamp = get_jiffies_64();
933 934 935
	/* Mark all queues as evicted. Deactivate all active queues on
	 * the qpd.
	 */
936
	list_for_each_entry(q, &qpd->queues_list, list) {
937
		q->properties.is_evicted = true;
938 939
		if (!q->properties.is_active)
			continue;
940

941 942
		mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
				q->properties.type)];
943
		q->properties.is_active = false;
D
David Yat Sin 已提交
944
		decrement_queue_count(dqm, qpd, q);
945 946 947 948

		if (WARN_ONCE(!dqm->sched_running, "Evict when stopped\n"))
			continue;

949
		retval = mqd_mgr->destroy_mqd(mqd_mgr, q->mqd,
950 951 952
				(dqm->dev->cwsr_enabled ?
				 KFD_PREEMPT_TYPE_WAVEFRONT_SAVE :
				 KFD_PREEMPT_TYPE_WAVEFRONT_DRAIN),
953
				KFD_UNMAP_LATENCY_MS, q->pipe, q->queue);
954 955 956 957 958
		if (retval && !ret)
			/* Return the first error, but keep going to
			 * maintain a consistent eviction state
			 */
			ret = retval;
959 960 961
	}

out:
962
	dqm_unlock(dqm);
963
	return ret;
964 965 966 967 968 969 970 971 972
}

static int evict_process_queues_cpsch(struct device_queue_manager *dqm,
				      struct qcm_process_device *qpd)
{
	struct queue *q;
	struct kfd_process_device *pdd;
	int retval = 0;

973
	dqm_lock(dqm);
974 975 976 977
	if (qpd->evicted++ > 0) /* already evicted, do nothing */
		goto out;

	pdd = qpd_to_pdd(qpd);
978
	pr_debug_ratelimited("Evicting PASID 0x%x queues\n",
979 980
			    pdd->process->pasid);

981 982 983
	/* Mark all queues as evicted. Deactivate all active queues on
	 * the qpd.
	 */
984
	list_for_each_entry(q, &qpd->queues_list, list) {
985
		q->properties.is_evicted = true;
986 987
		if (!q->properties.is_active)
			continue;
988

989
		q->properties.is_active = false;
D
David Yat Sin 已提交
990
		decrement_queue_count(dqm, qpd, q);
991 992 993 994 995 996 997 998 999

		if (dqm->dev->shared_resources.enable_mes) {
			retval = remove_queue_mes(dqm, q, qpd);
			if (retval) {
				pr_err("Failed to evict queue %d\n",
					q->properties.queue_id);
				goto out;
			}
		}
1000
	}
1001
	pdd->last_evict_timestamp = get_jiffies_64();
1002 1003 1004 1005 1006
	if (!dqm->dev->shared_resources.enable_mes)
		retval = execute_queues_cpsch(dqm,
					      qpd->is_debug ?
					      KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES :
					      KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1007 1008

out:
1009
	dqm_unlock(dqm);
1010 1011 1012 1013 1014 1015
	return retval;
}

static int restore_process_queues_nocpsch(struct device_queue_manager *dqm,
					  struct qcm_process_device *qpd)
{
1016
	struct mm_struct *mm = NULL;
1017
	struct queue *q;
1018
	struct mqd_manager *mqd_mgr;
1019
	struct kfd_process_device *pdd;
1020
	uint64_t pd_base;
1021
	uint64_t eviction_duration;
1022
	int retval, ret = 0;
1023 1024 1025

	pdd = qpd_to_pdd(qpd);
	/* Retrieve PD base */
1026
	pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
1027

1028
	dqm_lock(dqm);
1029 1030 1031 1032 1033 1034 1035
	if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
		goto out;
	if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
		qpd->evicted--;
		goto out;
	}

1036
	pr_debug_ratelimited("Restoring PASID 0x%x queues\n",
1037 1038 1039 1040
			    pdd->process->pasid);

	/* Update PD Base in QPD */
	qpd->page_table_base = pd_base;
1041
	pr_debug("Updated PD address to 0x%llx\n", pd_base);
1042 1043 1044

	if (!list_empty(&qpd->queues_list)) {
		dqm->dev->kfd2kgd->set_vm_context_page_table_base(
1045
				dqm->dev->adev,
1046 1047
				qpd->vmid,
				qpd->page_table_base);
1048
		kfd_flush_tlb(pdd, TLB_FLUSH_LEGACY);
1049 1050
	}

1051 1052 1053 1054 1055
	/* Take a safe reference to the mm_struct, which may otherwise
	 * disappear even while the kfd_process is still referenced.
	 */
	mm = get_task_mm(pdd->process->lead_thread);
	if (!mm) {
1056
		ret = -EFAULT;
1057 1058 1059
		goto out;
	}

1060 1061 1062
	/* Remove the eviction flags. Activate queues that are not
	 * inactive for other reasons.
	 */
1063
	list_for_each_entry(q, &qpd->queues_list, list) {
1064 1065
		q->properties.is_evicted = false;
		if (!QUEUE_IS_ACTIVE(q->properties))
1066
			continue;
1067

1068 1069
		mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
				q->properties.type)];
1070
		q->properties.is_active = true;
D
David Yat Sin 已提交
1071
		increment_queue_count(dqm, qpd, q);
1072 1073 1074 1075

		if (WARN_ONCE(!dqm->sched_running, "Restore when stopped\n"))
			continue;

1076
		retval = mqd_mgr->load_mqd(mqd_mgr, q->mqd, q->pipe,
1077
				       q->queue, &q->properties, mm);
1078 1079 1080 1081 1082
		if (retval && !ret)
			/* Return the first error, but keep going to
			 * maintain a consistent eviction state
			 */
			ret = retval;
1083 1084
	}
	qpd->evicted = 0;
1085 1086
	eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp;
	atomic64_add(eviction_duration, &pdd->evict_duration_counter);
1087
out:
1088 1089
	if (mm)
		mmput(mm);
1090
	dqm_unlock(dqm);
1091
	return ret;
1092 1093 1094 1095 1096 1097 1098
}

static int restore_process_queues_cpsch(struct device_queue_manager *dqm,
					struct qcm_process_device *qpd)
{
	struct queue *q;
	struct kfd_process_device *pdd;
1099
	uint64_t pd_base;
1100
	uint64_t eviction_duration;
1101 1102 1103 1104
	int retval = 0;

	pdd = qpd_to_pdd(qpd);
	/* Retrieve PD base */
1105
	pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
1106

1107
	dqm_lock(dqm);
1108 1109 1110 1111 1112 1113 1114
	if (WARN_ON_ONCE(!qpd->evicted)) /* already restored, do nothing */
		goto out;
	if (qpd->evicted > 1) { /* ref count still > 0, decrement & quit */
		qpd->evicted--;
		goto out;
	}

1115
	pr_debug_ratelimited("Restoring PASID 0x%x queues\n",
1116 1117 1118 1119
			    pdd->process->pasid);

	/* Update PD Base in QPD */
	qpd->page_table_base = pd_base;
1120
	pr_debug("Updated PD address to 0x%llx\n", pd_base);
1121 1122 1123 1124

	/* activate all active queues on the qpd */
	list_for_each_entry(q, &qpd->queues_list, list) {
		q->properties.is_evicted = false;
1125 1126 1127
		if (!QUEUE_IS_ACTIVE(q->properties))
			continue;

1128
		q->properties.is_active = true;
D
David Yat Sin 已提交
1129
		increment_queue_count(dqm, &pdd->qpd, q);
1130 1131 1132 1133 1134 1135 1136 1137 1138

		if (dqm->dev->shared_resources.enable_mes) {
			retval = add_queue_mes(dqm, q, qpd);
			if (retval) {
				pr_err("Failed to restore queue %d\n",
					q->properties.queue_id);
				goto out;
			}
		}
1139
	}
1140 1141 1142
	if (!dqm->dev->shared_resources.enable_mes)
		retval = execute_queues_cpsch(dqm,
					      KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1143
	qpd->evicted = 0;
1144 1145
	eviction_duration = get_jiffies_64() - pdd->last_evict_timestamp;
	atomic64_add(eviction_duration, &pdd->evict_duration_counter);
1146
out:
1147
	dqm_unlock(dqm);
1148 1149 1150
	return retval;
}

1151
static int register_process(struct device_queue_manager *dqm,
1152 1153 1154
					struct qcm_process_device *qpd)
{
	struct device_process_node *n;
1155
	struct kfd_process_device *pdd;
1156
	uint64_t pd_base;
1157
	int retval;
1158

1159
	n = kzalloc(sizeof(*n), GFP_KERNEL);
1160 1161 1162 1163 1164
	if (!n)
		return -ENOMEM;

	n->qpd = qpd;

1165 1166
	pdd = qpd_to_pdd(qpd);
	/* Retrieve PD base */
1167
	pd_base = amdgpu_amdkfd_gpuvm_get_process_page_dir(pdd->drm_priv);
1168

1169
	dqm_lock(dqm);
1170 1171
	list_add(&n->list, &dqm->queues);

1172 1173
	/* Update PD Base in QPD */
	qpd->page_table_base = pd_base;
1174
	pr_debug("Updated PD address to 0x%llx\n", pd_base);
1175

1176
	retval = dqm->asic_ops.update_qpd(dqm, qpd);
1177

1178
	dqm->processes_count++;
1179

1180
	dqm_unlock(dqm);
1181

1182 1183 1184 1185 1186
	/* Outside the DQM lock because under the DQM lock we can't do
	 * reclaim or take other locks that others hold while reclaiming.
	 */
	kfd_inc_compute_active(dqm->dev);

1187
	return retval;
1188 1189
}

1190
static int unregister_process(struct device_queue_manager *dqm,
1191 1192 1193 1194 1195
					struct qcm_process_device *qpd)
{
	int retval;
	struct device_process_node *cur, *next;

1196 1197
	pr_debug("qpd->queues_list is %s\n",
			list_empty(&qpd->queues_list) ? "empty" : "not empty");
1198 1199

	retval = 0;
1200
	dqm_lock(dqm);
1201 1202 1203 1204

	list_for_each_entry_safe(cur, next, &dqm->queues, list) {
		if (qpd == cur->qpd) {
			list_del(&cur->list);
1205
			kfree(cur);
1206
			dqm->processes_count--;
1207 1208 1209 1210 1211 1212
			goto out;
		}
	}
	/* qpd not found in dqm list */
	retval = 1;
out:
1213
	dqm_unlock(dqm);
1214 1215 1216 1217 1218 1219 1220

	/* Outside the DQM lock because under the DQM lock we can't do
	 * reclaim or take other locks that others hold while reclaiming.
	 */
	if (!retval)
		kfd_dec_compute_active(dqm->dev);

1221 1222 1223 1224
	return retval;
}

static int
1225
set_pasid_vmid_mapping(struct device_queue_manager *dqm, u32 pasid,
1226 1227
			unsigned int vmid)
{
1228
	return dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
1229
						dqm->dev->adev, pasid, vmid);
1230 1231
}

1232 1233 1234 1235
static void init_interrupts(struct device_queue_manager *dqm)
{
	unsigned int i;

1236 1237
	for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++)
		if (is_pipe_enabled(dqm, 0, i))
1238
			dqm->dev->kfd2kgd->init_interrupts(dqm->dev->adev, i);
1239 1240
}

1241 1242
static int initialize_nocpsch(struct device_queue_manager *dqm)
{
1243
	int pipe, queue;
1244

1245
	pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
1246

K
Kent Russell 已提交
1247 1248 1249 1250 1251
	dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm),
					sizeof(unsigned int), GFP_KERNEL);
	if (!dqm->allocated_queues)
		return -ENOMEM;

1252
	mutex_init(&dqm->lock_hidden);
1253
	INIT_LIST_HEAD(&dqm->queues);
1254
	dqm->active_queue_count = dqm->next_pipe_to_allocate = 0;
1255
	dqm->active_cp_queue_count = 0;
1256
	dqm->gws_queue_count = 0;
1257

1258 1259 1260 1261 1262
	for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
		int pipe_offset = pipe * get_queues_per_pipe(dqm);

		for (queue = 0; queue < get_queues_per_pipe(dqm); queue++)
			if (test_bit(pipe_offset + queue,
1263
				     dqm->dev->shared_resources.cp_queue_bitmap))
1264 1265
				dqm->allocated_queues[pipe] |= 1 << queue;
	}
1266

1267 1268
	memset(dqm->vmid_pasid, 0, sizeof(dqm->vmid_pasid));

1269
	dqm->sdma_bitmap = ~0ULL >> (64 - get_num_sdma_queues(dqm));
1270 1271 1272
	dqm->sdma_bitmap &= ~(get_reserved_sdma_queues_bitmap(dqm));
	pr_info("sdma_bitmap: %llx\n", dqm->sdma_bitmap);

1273
	dqm->xgmi_sdma_bitmap = ~0ULL >> (64 - get_num_xgmi_sdma_queues(dqm));
1274 1275 1276 1277

	return 0;
}

1278
static void uninitialize(struct device_queue_manager *dqm)
1279
{
1280 1281
	int i;

1282
	WARN_ON(dqm->active_queue_count > 0 || dqm->processes_count > 0);
1283 1284

	kfree(dqm->allocated_queues);
1285
	for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
1286
		kfree(dqm->mqd_mgrs[i]);
1287
	mutex_destroy(&dqm->lock_hidden);
1288 1289 1290 1291
}

static int start_nocpsch(struct device_queue_manager *dqm)
{
1292 1293
	int r = 0;

1294
	pr_info("SW scheduler is used");
1295
	init_interrupts(dqm);
1296

1297
	if (dqm->dev->adev->asic_type == CHIP_HAWAII)
1298 1299 1300
		r = pm_init(&dqm->packet_mgr, dqm);
	if (!r)
		dqm->sched_running = true;
1301

1302
	return r;
1303 1304 1305 1306
}

static int stop_nocpsch(struct device_queue_manager *dqm)
{
1307
	if (dqm->dev->adev->asic_type == CHIP_HAWAII)
1308
		pm_uninit(&dqm->packet_mgr, false);
1309 1310
	dqm->sched_running = false;

1311 1312 1313
	return 0;
}

1314 1315 1316 1317 1318 1319 1320
static void pre_reset(struct device_queue_manager *dqm)
{
	dqm_lock(dqm);
	dqm->is_resetting = true;
	dqm_unlock(dqm);
}

1321
static int allocate_sdma_queue(struct device_queue_manager *dqm,
1322
				struct queue *q, const uint32_t *restore_sdma_id)
1323 1324 1325
{
	int bit;

1326
	if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
1327 1328
		if (dqm->sdma_bitmap == 0) {
			pr_err("No more SDMA queue to allocate\n");
1329
			return -ENOMEM;
1330 1331
		}

1332 1333 1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346
		if (restore_sdma_id) {
			/* Re-use existing sdma_id */
			if (!(dqm->sdma_bitmap & (1ULL << *restore_sdma_id))) {
				pr_err("SDMA queue already in use\n");
				return -EBUSY;
			}
			dqm->sdma_bitmap &= ~(1ULL << *restore_sdma_id);
			q->sdma_id = *restore_sdma_id;
		} else {
			/* Find first available sdma_id */
			bit = __ffs64(dqm->sdma_bitmap);
			dqm->sdma_bitmap &= ~(1ULL << bit);
			q->sdma_id = bit;
		}

1347
		q->properties.sdma_engine_id = q->sdma_id %
1348
				kfd_get_num_sdma_engines(dqm->dev);
1349
		q->properties.sdma_queue_id = q->sdma_id /
1350
				kfd_get_num_sdma_engines(dqm->dev);
1351
	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1352 1353
		if (dqm->xgmi_sdma_bitmap == 0) {
			pr_err("No more XGMI SDMA queue to allocate\n");
1354
			return -ENOMEM;
1355
		}
1356 1357 1358 1359 1360 1361 1362 1363 1364 1365 1366 1367 1368
		if (restore_sdma_id) {
			/* Re-use existing sdma_id */
			if (!(dqm->xgmi_sdma_bitmap & (1ULL << *restore_sdma_id))) {
				pr_err("SDMA queue already in use\n");
				return -EBUSY;
			}
			dqm->xgmi_sdma_bitmap &= ~(1ULL << *restore_sdma_id);
			q->sdma_id = *restore_sdma_id;
		} else {
			bit = __ffs64(dqm->xgmi_sdma_bitmap);
			dqm->xgmi_sdma_bitmap &= ~(1ULL << bit);
			q->sdma_id = bit;
		}
1369 1370 1371 1372 1373 1374
		/* sdma_engine_id is sdma id including
		 * both PCIe-optimized SDMAs and XGMI-
		 * optimized SDMAs. The calculation below
		 * assumes the first N engines are always
		 * PCIe-optimized ones
		 */
1375 1376 1377
		q->properties.sdma_engine_id =
			kfd_get_num_sdma_engines(dqm->dev) +
			q->sdma_id % kfd_get_num_xgmi_sdma_engines(dqm->dev);
1378
		q->properties.sdma_queue_id = q->sdma_id /
1379
			kfd_get_num_xgmi_sdma_engines(dqm->dev);
1380
	}
1381 1382 1383

	pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
	pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
1384 1385 1386 1387 1388

	return 0;
}

static void deallocate_sdma_queue(struct device_queue_manager *dqm,
1389
				struct queue *q)
1390
{
1391 1392 1393 1394 1395 1396 1397 1398 1399
	if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
		if (q->sdma_id >= get_num_sdma_queues(dqm))
			return;
		dqm->sdma_bitmap |= (1ULL << q->sdma_id);
	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
		if (q->sdma_id >= get_num_xgmi_sdma_queues(dqm))
			return;
		dqm->xgmi_sdma_bitmap |= (1ULL << q->sdma_id);
	}
1400 1401
}

1402 1403 1404 1405 1406 1407
/*
 * Device Queue Manager implementation for cp scheduler
 */

static int set_sched_resources(struct device_queue_manager *dqm)
{
1408
	int i, mec;
1409 1410
	struct scheduling_resources res;

1411
	res.vmid_mask = dqm->dev->shared_resources.compute_vmid_bitmap;
1412 1413 1414 1415 1416 1417

	res.queue_mask = 0;
	for (i = 0; i < KGD_MAX_QUEUES; ++i) {
		mec = (i / dqm->dev->shared_resources.num_queue_per_pipe)
			/ dqm->dev->shared_resources.num_pipe_per_mec;

1418
		if (!test_bit(i, dqm->dev->shared_resources.cp_queue_bitmap))
1419 1420 1421 1422 1423 1424 1425 1426
			continue;

		/* only acquire queues from the first MEC */
		if (mec > 0)
			continue;

		/* This situation may be hit in the future if a new HW
		 * generation exposes more than 64 queues. If so, the
1427 1428
		 * definition of res.queue_mask needs updating
		 */
1429
		if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) {
1430 1431 1432 1433
			pr_err("Invalid queue enabled by amdgpu: %d\n", i);
			break;
		}

1434 1435
		res.queue_mask |= 1ull
			<< amdgpu_queue_mask_bit_to_set_resource_bit(
1436
				dqm->dev->adev, i);
1437
	}
O
Oak Zeng 已提交
1438 1439
	res.gws_mask = ~0ull;
	res.oac_mask = res.gds_heap_base = res.gds_heap_size = 0;
1440

1441 1442 1443
	pr_debug("Scheduling resources:\n"
			"vmid mask: 0x%8X\n"
			"queue mask: 0x%8llX\n",
1444 1445
			res.vmid_mask, res.queue_mask);

1446
	return pm_send_set_resources(&dqm->packet_mgr, &res);
1447 1448 1449 1450
}

static int initialize_cpsch(struct device_queue_manager *dqm)
{
1451 1452 1453
	uint64_t num_sdma_queues;
	uint64_t num_xgmi_sdma_queues;

1454
	pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
1455

1456
	mutex_init(&dqm->lock_hidden);
1457
	INIT_LIST_HEAD(&dqm->queues);
1458
	dqm->active_queue_count = dqm->processes_count = 0;
1459
	dqm->active_cp_queue_count = 0;
1460
	dqm->gws_queue_count = 0;
1461
	dqm->active_runlist = false;
1462 1463 1464 1465 1466 1467 1468

	num_sdma_queues = get_num_sdma_queues(dqm);
	if (num_sdma_queues >= BITS_PER_TYPE(dqm->sdma_bitmap))
		dqm->sdma_bitmap = ULLONG_MAX;
	else
		dqm->sdma_bitmap = (BIT_ULL(num_sdma_queues) - 1);

1469 1470 1471
	dqm->sdma_bitmap &= ~(get_reserved_sdma_queues_bitmap(dqm));
	pr_info("sdma_bitmap: %llx\n", dqm->sdma_bitmap);

1472 1473 1474 1475 1476
	num_xgmi_sdma_queues = get_num_xgmi_sdma_queues(dqm);
	if (num_xgmi_sdma_queues >= BITS_PER_TYPE(dqm->xgmi_sdma_bitmap))
		dqm->xgmi_sdma_bitmap = ULLONG_MAX;
	else
		dqm->xgmi_sdma_bitmap = (BIT_ULL(num_xgmi_sdma_queues) - 1);
1477

1478 1479
	INIT_WORK(&dqm->hw_exception_work, kfd_process_hw_exception);

1480
	return 0;
1481 1482 1483 1484 1485 1486 1487 1488
}

static int start_cpsch(struct device_queue_manager *dqm)
{
	int retval;

	retval = 0;

1489
	dqm_lock(dqm);
1490

1491 1492 1493 1494
	if (!dqm->dev->shared_resources.enable_mes) {
		retval = pm_init(&dqm->packet_mgr, dqm);
		if (retval)
			goto fail_packet_manager_init;
1495

1496 1497 1498 1499
		retval = set_sched_resources(dqm);
		if (retval)
			goto fail_set_sched_resources;
	}
1500
	pr_debug("Allocating fence memory\n");
1501 1502

	/* allocate fence memory on the gart */
1503 1504
	retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr),
					&dqm->fence_mem);
1505

1506
	if (retval)
1507 1508
		goto fail_allocate_vidmem;

1509
	dqm->fence_addr = (uint64_t *)dqm->fence_mem->cpu_ptr;
1510
	dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr;
1511 1512 1513

	init_interrupts(dqm);

1514 1515
	/* clear hang status when driver try to start the hw scheduler */
	dqm->is_hws_hang = false;
1516
	dqm->is_resetting = false;
1517
	dqm->sched_running = true;
1518 1519
	if (!dqm->dev->shared_resources.enable_mes)
		execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1520
	dqm_unlock(dqm);
1521 1522 1523 1524

	return 0;
fail_allocate_vidmem:
fail_set_sched_resources:
1525 1526
	if (!dqm->dev->shared_resources.enable_mes)
		pm_uninit(&dqm->packet_mgr, false);
1527
fail_packet_manager_init:
1528
	dqm_unlock(dqm);
1529 1530 1531 1532 1533
	return retval;
}

static int stop_cpsch(struct device_queue_manager *dqm)
{
1534 1535
	bool hanging;

1536
	dqm_lock(dqm);
1537 1538 1539 1540 1541
	if (!dqm->sched_running) {
		dqm_unlock(dqm);
		return 0;
	}

1542 1543 1544 1545 1546 1547 1548
	if (!dqm->is_hws_hang) {
		if (!dqm->dev->shared_resources.enable_mes)
			unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0, false);
		else
			remove_all_queues_mes(dqm);
	}

1549
	hanging = dqm->is_hws_hang || dqm->is_resetting;
1550
	dqm->sched_running = false;
1551

1552 1553
	if (!dqm->dev->shared_resources.enable_mes)
		pm_release_ib(&dqm->packet_mgr);
D
Dennis Li 已提交
1554

1555
	kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
1556 1557
	if (!dqm->dev->shared_resources.enable_mes)
		pm_uninit(&dqm->packet_mgr, hanging);
1558
	dqm_unlock(dqm);
1559 1560 1561 1562 1563 1564 1565 1566

	return 0;
}

static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
					struct kernel_queue *kq,
					struct qcm_process_device *qpd)
{
1567
	dqm_lock(dqm);
1568
	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
1569
		pr_warn("Can't create new kernel queue because %d queues were already created\n",
1570
				dqm->total_queue_count);
1571
		dqm_unlock(dqm);
1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582
		return -EPERM;
	}

	/*
	 * Unconditionally increment this counter, regardless of the queue's
	 * type or whether the queue is active.
	 */
	dqm->total_queue_count++;
	pr_debug("Total of %d queues are accountable so far\n",
			dqm->total_queue_count);

1583
	list_add(&kq->list, &qpd->priv_queue_list);
D
David Yat Sin 已提交
1584
	increment_queue_count(dqm, qpd, kq->queue);
1585
	qpd->is_debug = true;
1586
	execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
1587
	dqm_unlock(dqm);
1588 1589 1590 1591 1592 1593 1594 1595

	return 0;
}

static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
					struct kernel_queue *kq,
					struct qcm_process_device *qpd)
{
1596
	dqm_lock(dqm);
1597
	list_del(&kq->list);
D
David Yat Sin 已提交
1598
	decrement_queue_count(dqm, qpd, kq->queue);
1599
	qpd->is_debug = false;
1600
	execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
1601 1602 1603 1604
	/*
	 * Unconditionally decrement this counter, regardless of the queue's
	 * type.
	 */
1605
	dqm->total_queue_count--;
1606 1607
	pr_debug("Total of %d queues are accountable so far\n",
			dqm->total_queue_count);
1608
	dqm_unlock(dqm);
1609 1610 1611
}

static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
1612
			struct qcm_process_device *qpd,
1613
			const struct kfd_criu_queue_priv_data *qd,
1614
			const void *restore_mqd, const void *restore_ctl_stack)
1615 1616
{
	int retval;
1617
	struct mqd_manager *mqd_mgr;
1618

1619
	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
1620
		pr_warn("Can't create new usermode queue because %d queues were already created\n",
1621
				dqm->total_queue_count);
1622 1623
		retval = -EPERM;
		goto out;
1624 1625
	}

1626 1627
	if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
		q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
1628
		dqm_lock(dqm);
1629
		retval = allocate_sdma_queue(dqm, q, qd ? &qd->sdma_id : NULL);
1630
		dqm_unlock(dqm);
F
Felix Kuehling 已提交
1631
		if (retval)
1632
			goto out;
1633
	}
1634

1635
	retval = allocate_doorbell(qpd, q, qd ? &qd->doorbell_id : NULL);
1636 1637 1638
	if (retval)
		goto out_deallocate_sdma_queue;

1639 1640
	mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
			q->properties.type)];
E
Eric Huang 已提交
1641

1642 1643 1644
	if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
		q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
		dqm->asic_ops.init_sdma_vm(dqm, q, qpd);
F
Felix Kuehling 已提交
1645 1646
	q->properties.tba_addr = qpd->tba_addr;
	q->properties.tma_addr = qpd->tma_addr;
1647 1648 1649 1650 1651
	q->mqd_mem_obj = mqd_mgr->allocate_mqd(mqd_mgr->dev, &q->properties);
	if (!q->mqd_mem_obj) {
		retval = -ENOMEM;
		goto out_deallocate_doorbell;
	}
E
Eric Huang 已提交
1652 1653 1654 1655 1656 1657 1658 1659

	dqm_lock(dqm);
	/*
	 * Eviction state logic: mark all queues as evicted, even ones
	 * not currently active. Restoring inactive queues later only
	 * updates the is_evicted flag but is a no-op otherwise.
	 */
	q->properties.is_evicted = !!qpd->evicted;
1660 1661 1662

	if (qd)
		mqd_mgr->restore_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj, &q->gart_mqd_addr,
1663 1664
				     &q->properties, restore_mqd, restore_ctl_stack,
				     qd->ctl_stack_size);
1665 1666 1667
	else
		mqd_mgr->init_mqd(mqd_mgr, &q->mqd, q->mqd_mem_obj,
					&q->gart_mqd_addr, &q->properties);
1668

1669
	list_add(&q->list, &qpd->queues_list);
1670
	qpd->queue_count++;
1671

1672
	if (q->properties.is_active) {
D
David Yat Sin 已提交
1673
		increment_queue_count(dqm, qpd, q);
1674

1675 1676 1677 1678 1679 1680 1681 1682
		if (!dqm->dev->shared_resources.enable_mes) {
			retval = execute_queues_cpsch(dqm,
					     KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
		} else {
			retval = add_queue_mes(dqm, q, qpd);
			if (retval)
				goto cleanup_queue;
		}
1683 1684
	}

1685 1686 1687 1688 1689 1690 1691 1692 1693
	/*
	 * Unconditionally increment this counter, regardless of the queue's
	 * type or whether the queue is active.
	 */
	dqm->total_queue_count++;

	pr_debug("Total of %d queues are accountable so far\n",
			dqm->total_queue_count);

1694
	dqm_unlock(dqm);
1695 1696
	return retval;

1697 1698 1699 1700 1701 1702 1703
cleanup_queue:
	qpd->queue_count--;
	list_del(&q->list);
	if (q->properties.is_active)
		decrement_queue_count(dqm, qpd, q);
	mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
	dqm_unlock(dqm);
1704 1705
out_deallocate_doorbell:
	deallocate_doorbell(qpd, q);
1706
out_deallocate_sdma_queue:
1707
	if (q->properties.type == KFD_QUEUE_TYPE_SDMA ||
1708 1709
		q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI) {
		dqm_lock(dqm);
1710
		deallocate_sdma_queue(dqm, q);
1711 1712
		dqm_unlock(dqm);
	}
1713
out:
1714 1715 1716
	return retval;
}

1717 1718
int amdkfd_fence_wait_timeout(uint64_t *fence_addr,
				uint64_t fence_value,
1719
				unsigned int timeout_ms)
1720
{
1721
	unsigned long end_jiffies = msecs_to_jiffies(timeout_ms) + jiffies;
1722 1723

	while (*fence_addr != fence_value) {
1724
		if (time_after(jiffies, end_jiffies)) {
1725
			pr_err("qcm fence wait loop timeout expired\n");
1726 1727 1728 1729 1730 1731 1732
			/* In HWS case, this is used to halt the driver thread
			 * in order not to mess up CP states before doing
			 * scandumps for FW debugging.
			 */
			while (halt_if_hws_hang)
				schedule();

1733 1734
			return -ETIME;
		}
1735
		schedule();
1736 1737 1738 1739 1740
	}

	return 0;
}

F
Felix Kuehling 已提交
1741 1742 1743 1744 1745
/* dqm->lock mutex has to be locked before calling this function */
static int map_queues_cpsch(struct device_queue_manager *dqm)
{
	int retval;

1746 1747
	if (!dqm->sched_running)
		return 0;
1748
	if (dqm->active_queue_count <= 0 || dqm->processes_count <= 0)
F
Felix Kuehling 已提交
1749 1750 1751 1752
		return 0;
	if (dqm->active_runlist)
		return 0;

1753
	retval = pm_send_runlist(&dqm->packet_mgr, &dqm->queues);
1754
	pr_debug("%s sent runlist\n", __func__);
F
Felix Kuehling 已提交
1755 1756 1757 1758 1759 1760 1761 1762 1763
	if (retval) {
		pr_err("failed to execute runlist\n");
		return retval;
	}
	dqm->active_runlist = true;

	return retval;
}

1764
/* dqm->lock mutex has to be locked before calling this function */
1765
static int unmap_queues_cpsch(struct device_queue_manager *dqm,
1766
				enum kfd_unmap_queues_filter filter,
1767
				uint32_t filter_param, bool reset)
1768
{
1769
	int retval = 0;
1770
	struct mqd_manager *mqd_mgr;
1771

1772 1773
	if (!dqm->sched_running)
		return 0;
1774
	if (dqm->is_hws_hang || dqm->is_resetting)
1775
		return -EIO;
1776
	if (!dqm->active_runlist)
1777
		return retval;
1778

1779
	retval = pm_send_unmap_queue(&dqm->packet_mgr, filter, filter_param, reset);
1780
	if (retval)
1781
		return retval;
1782 1783

	*dqm->fence_addr = KFD_FENCE_INIT;
1784
	pm_send_query_status(&dqm->packet_mgr, dqm->fence_gpu_addr,
1785 1786
				KFD_FENCE_COMPLETED);
	/* should be timed out */
1787
	retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
1788
				queue_preemption_timeout_ms);
1789 1790
	if (retval) {
		pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption\n");
1791
		kfd_hws_hang(dqm);
1792
		return retval;
1793
	}
1794

1795 1796 1797 1798 1799 1800 1801 1802 1803
	/* In the current MEC firmware implementation, if compute queue
	 * doesn't response to the preemption request in time, HIQ will
	 * abandon the unmap request without returning any timeout error
	 * to driver. Instead, MEC firmware will log the doorbell of the
	 * unresponding compute queue to HIQ.MQD.queue_doorbell_id fields.
	 * To make sure the queue unmap was successful, driver need to
	 * check those fields
	 */
	mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ];
1804
	if (mqd_mgr->read_doorbell_id(dqm->packet_mgr.priv_queue->queue->mqd)) {
1805 1806 1807 1808 1809 1810
		pr_err("HIQ MQD's queue_doorbell_id0 is not 0, Queue preemption time out\n");
		while (halt_if_hws_hang)
			schedule();
		return -ETIME;
	}

1811
	pm_release_ib(&dqm->packet_mgr);
1812 1813 1814 1815 1816
	dqm->active_runlist = false;

	return retval;
}

1817 1818 1819 1820 1821 1822 1823 1824 1825 1826 1827 1828 1829 1830 1831
/* only for compute queue */
static int reset_queues_cpsch(struct device_queue_manager *dqm,
			uint16_t pasid)
{
	int retval;

	dqm_lock(dqm);

	retval = unmap_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_BY_PASID,
			pasid, true);

	dqm_unlock(dqm);
	return retval;
}

1832
/* dqm->lock mutex has to be locked before calling this function */
1833 1834 1835
static int execute_queues_cpsch(struct device_queue_manager *dqm,
				enum kfd_unmap_queues_filter filter,
				uint32_t filter_param)
1836 1837 1838
{
	int retval;

1839 1840
	if (dqm->is_hws_hang)
		return -EIO;
1841
	retval = unmap_queues_cpsch(dqm, filter, filter_param, false);
1842
	if (retval)
1843
		return retval;
1844

F
Felix Kuehling 已提交
1845
	return map_queues_cpsch(dqm);
1846 1847 1848 1849 1850 1851 1852
}

static int destroy_queue_cpsch(struct device_queue_manager *dqm,
				struct qcm_process_device *qpd,
				struct queue *q)
{
	int retval;
1853
	struct mqd_manager *mqd_mgr;
1854 1855 1856 1857 1858 1859
	uint64_t sdma_val = 0;
	struct kfd_process_device *pdd = qpd_to_pdd(qpd);

	/* Get the SDMA queue stats */
	if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
	    (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
1860
		retval = read_sdma_queue_counter((uint64_t __user *)q->properties.read_ptr,
1861 1862 1863 1864 1865
							&sdma_val);
		if (retval)
			pr_err("Failed to read SDMA queue counter for queue: %d\n",
				q->properties.queue_id);
	}
1866

1867 1868 1869
	retval = 0;

	/* remove queue from list to prevent rescheduling after preemption */
1870
	dqm_lock(dqm);
1871 1872 1873 1874 1875 1876 1877 1878 1879 1880 1881

	if (qpd->is_debug) {
		/*
		 * error, currently we do not allow to destroy a queue
		 * of a currently debugged process
		 */
		retval = -EBUSY;
		goto failed_try_destroy_debugged_queue;

	}

1882 1883 1884 1885 1886 1887 1888 1889 1890 1891 1892 1893 1894 1895 1896 1897
	if (q->properties.is_active) {
		if (!dqm->dev->shared_resources.enable_mes) {
			retval = execute_queues_cpsch(dqm,
						      KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES, 0);
			if (retval == -ETIME)
				qpd->reset_wavefronts = true;
		} else {
			retval = remove_queue_mes(dqm, q, qpd);
		}

		if (retval)
			goto failed_unmap_queue;

		decrement_queue_count(dqm, qpd, q);
	}

1898 1899
	mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
			q->properties.type)];
1900

1901 1902
	deallocate_doorbell(qpd, q);

1903 1904
	if ((q->properties.type == KFD_QUEUE_TYPE_SDMA) ||
	    (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)) {
1905
		deallocate_sdma_queue(dqm, q);
1906 1907
		pdd->sdma_past_activity_counter += sdma_val;
	}
1908

1909
	list_del(&q->list);
1910
	qpd->queue_count--;
1911

1912 1913 1914 1915 1916 1917 1918
	/*
	 * Unconditionally decrement this counter, regardless of the queue's
	 * type
	 */
	dqm->total_queue_count--;
	pr_debug("Total of %d queues are accountable so far\n",
			dqm->total_queue_count);
1919

1920
	dqm_unlock(dqm);
1921

1922 1923
	/* Do free_mqd after dqm_unlock(dqm) to avoid circular locking */
	mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
1924

1925
	return retval;
1926

1927
failed_unmap_queue:
1928 1929
failed_try_destroy_debugged_queue:

1930
	dqm_unlock(dqm);
1931 1932 1933 1934 1935 1936 1937 1938 1939 1940 1941 1942 1943 1944 1945 1946 1947 1948
	return retval;
}

/*
 * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
 * stay in user mode.
 */
#define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
/* APE1 limit is inclusive and 64K aligned. */
#define APE1_LIMIT_ALIGNMENT 0xFFFF

static bool set_cache_memory_policy(struct device_queue_manager *dqm,
				   struct qcm_process_device *qpd,
				   enum cache_policy default_policy,
				   enum cache_policy alternate_policy,
				   void __user *alternate_aperture_base,
				   uint64_t alternate_aperture_size)
{
1949 1950 1951 1952
	bool retval = true;

	if (!dqm->asic_ops.set_cache_memory_policy)
		return retval;
1953

1954
	dqm_lock(dqm);
1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973

	if (alternate_aperture_size == 0) {
		/* base > limit disables APE1 */
		qpd->sh_mem_ape1_base = 1;
		qpd->sh_mem_ape1_limit = 0;
	} else {
		/*
		 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
		 *			SH_MEM_APE1_BASE[31:0], 0x0000 }
		 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
		 *			SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
		 * Verify that the base and size parameters can be
		 * represented in this format and convert them.
		 * Additionally restrict APE1 to user-mode addresses.
		 */

		uint64_t base = (uintptr_t)alternate_aperture_base;
		uint64_t limit = base + alternate_aperture_size - 1;

K
Kent Russell 已提交
1974 1975 1976
		if (limit <= base || (base & APE1_FIXED_BITS_MASK) != 0 ||
		   (limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT) {
			retval = false;
1977
			goto out;
K
Kent Russell 已提交
1978
		}
1979 1980 1981 1982 1983

		qpd->sh_mem_ape1_base = base >> 16;
		qpd->sh_mem_ape1_limit = limit >> 16;
	}

1984
	retval = dqm->asic_ops.set_cache_memory_policy(
1985 1986 1987 1988 1989 1990
			dqm,
			qpd,
			default_policy,
			alternate_policy,
			alternate_aperture_base,
			alternate_aperture_size);
1991

1992
	if ((dqm->sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
1993 1994
		program_sh_mem_settings(dqm, qpd);

1995
	pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n",
1996 1997 1998 1999
		qpd->sh_mem_config, qpd->sh_mem_ape1_base,
		qpd->sh_mem_ape1_limit);

out:
2000
	dqm_unlock(dqm);
K
Kent Russell 已提交
2001
	return retval;
2002 2003
}

2004 2005 2006
static int process_termination_nocpsch(struct device_queue_manager *dqm,
		struct qcm_process_device *qpd)
{
2007
	struct queue *q;
2008 2009
	struct device_process_node *cur, *next_dpn;
	int retval = 0;
2010
	bool found = false;
2011

2012
	dqm_lock(dqm);
2013 2014

	/* Clear all user mode queues */
2015 2016
	while (!list_empty(&qpd->queues_list)) {
		struct mqd_manager *mqd_mgr;
2017 2018
		int ret;

2019 2020 2021
		q = list_first_entry(&qpd->queues_list, struct queue, list);
		mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
				q->properties.type)];
2022 2023 2024
		ret = destroy_queue_nocpsch_locked(dqm, qpd, q);
		if (ret)
			retval = ret;
2025 2026 2027
		dqm_unlock(dqm);
		mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
		dqm_lock(dqm);
2028 2029 2030 2031 2032 2033 2034 2035
	}

	/* Unregister process */
	list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
		if (qpd == cur->qpd) {
			list_del(&cur->list);
			kfree(cur);
			dqm->processes_count--;
2036
			found = true;
2037 2038 2039 2040
			break;
		}
	}

2041
	dqm_unlock(dqm);
2042 2043 2044 2045 2046 2047 2048

	/* Outside the DQM lock because under the DQM lock we can't do
	 * reclaim or take other locks that others hold while reclaiming.
	 */
	if (found)
		kfd_dec_compute_active(dqm->dev);

2049 2050 2051
	return retval;
}

2052 2053 2054 2055 2056 2057
static int get_wave_state(struct device_queue_manager *dqm,
			  struct queue *q,
			  void __user *ctl_stack,
			  u32 *ctl_stack_used_size,
			  u32 *save_area_used_size)
{
2058
	struct mqd_manager *mqd_mgr;
2059 2060 2061

	dqm_lock(dqm);

2062
	mqd_mgr = dqm->mqd_mgrs[KFD_MQD_TYPE_CP];
2063

2064 2065 2066 2067 2068
	if (q->properties.type != KFD_QUEUE_TYPE_COMPUTE ||
	    q->properties.is_active || !q->device->cwsr_enabled ||
	    !mqd_mgr->get_wave_state) {
		dqm_unlock(dqm);
		return -EINVAL;
2069 2070 2071
	}

	dqm_unlock(dqm);
2072 2073 2074 2075 2076 2077 2078 2079

	/*
	 * get_wave_state is outside the dqm lock to prevent circular locking
	 * and the queue should be protected against destruction by the process
	 * lock.
	 */
	return mqd_mgr->get_wave_state(mqd_mgr, q->mqd, ctl_stack,
			ctl_stack_used_size, save_area_used_size);
2080
}
2081

2082 2083
static void get_queue_checkpoint_info(struct device_queue_manager *dqm,
			const struct queue *q,
2084 2085
			u32 *mqd_size,
			u32 *ctl_stack_size)
2086 2087 2088 2089 2090 2091 2092 2093
{
	struct mqd_manager *mqd_mgr;
	enum KFD_MQD_TYPE mqd_type =
			get_mqd_type_from_queue_type(q->properties.type);

	dqm_lock(dqm);
	mqd_mgr = dqm->mqd_mgrs[mqd_type];
	*mqd_size = mqd_mgr->mqd_size;
2094 2095 2096 2097
	*ctl_stack_size = 0;

	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE && mqd_mgr->get_checkpoint_info)
		mqd_mgr->get_checkpoint_info(mqd_mgr, q->mqd, ctl_stack_size);
2098 2099 2100 2101 2102 2103

	dqm_unlock(dqm);
}

static int checkpoint_mqd(struct device_queue_manager *dqm,
			  const struct queue *q,
2104 2105
			  void *mqd,
			  void *ctl_stack)
2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116 2117 2118 2119 2120 2121 2122 2123 2124
{
	struct mqd_manager *mqd_mgr;
	int r = 0;
	enum KFD_MQD_TYPE mqd_type =
			get_mqd_type_from_queue_type(q->properties.type);

	dqm_lock(dqm);

	if (q->properties.is_active || !q->device->cwsr_enabled) {
		r = -EINVAL;
		goto dqm_unlock;
	}

	mqd_mgr = dqm->mqd_mgrs[mqd_type];
	if (!mqd_mgr->checkpoint_mqd) {
		r = -EOPNOTSUPP;
		goto dqm_unlock;
	}

2125
	mqd_mgr->checkpoint_mqd(mqd_mgr, q->mqd, mqd, ctl_stack);
2126 2127 2128 2129 2130 2131

dqm_unlock:
	dqm_unlock(dqm);
	return r;
}

2132 2133 2134 2135
static int process_termination_cpsch(struct device_queue_manager *dqm,
		struct qcm_process_device *qpd)
{
	int retval;
2136
	struct queue *q;
2137
	struct kernel_queue *kq, *kq_next;
2138
	struct mqd_manager *mqd_mgr;
2139 2140 2141
	struct device_process_node *cur, *next_dpn;
	enum kfd_unmap_queues_filter filter =
		KFD_UNMAP_QUEUES_FILTER_DYNAMIC_QUEUES;
2142
	bool found = false;
2143 2144 2145

	retval = 0;

2146
	dqm_lock(dqm);
2147 2148 2149 2150

	/* Clean all kernel queues */
	list_for_each_entry_safe(kq, kq_next, &qpd->priv_queue_list, list) {
		list_del(&kq->list);
D
David Yat Sin 已提交
2151
		decrement_queue_count(dqm, qpd, kq->queue);
2152 2153 2154 2155 2156 2157 2158
		qpd->is_debug = false;
		dqm->total_queue_count--;
		filter = KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES;
	}

	/* Clear all user mode queues */
	list_for_each_entry(q, &qpd->queues_list, list) {
2159
		if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
2160
			deallocate_sdma_queue(dqm, q);
2161
		else if (q->properties.type == KFD_QUEUE_TYPE_SDMA_XGMI)
2162
			deallocate_sdma_queue(dqm, q);
2163

2164
		if (q->properties.is_active) {
D
David Yat Sin 已提交
2165
			decrement_queue_count(dqm, qpd, q);
2166

2167 2168 2169 2170 2171 2172 2173 2174
			if (dqm->dev->shared_resources.enable_mes) {
				retval = remove_queue_mes(dqm, q, qpd);
				if (retval)
					pr_err("Failed to remove queue %d\n",
						q->properties.queue_id);
			}
		}

2175 2176 2177 2178 2179 2180 2181 2182 2183
		dqm->total_queue_count--;
	}

	/* Unregister process */
	list_for_each_entry_safe(cur, next_dpn, &dqm->queues, list) {
		if (qpd == cur->qpd) {
			list_del(&cur->list);
			kfree(cur);
			dqm->processes_count--;
2184
			found = true;
2185 2186 2187 2188
			break;
		}
	}

2189 2190 2191
	if (!dqm->dev->shared_resources.enable_mes)
		retval = execute_queues_cpsch(dqm, filter, 0);

2192
	if ((!dqm->is_hws_hang) && (retval || qpd->reset_wavefronts)) {
2193 2194 2195 2196 2197
		pr_warn("Resetting wave fronts (cpsch) on dev %p\n", dqm->dev);
		dbgdev_wave_reset_wavefronts(dqm->dev, qpd->pqm->process);
		qpd->reset_wavefronts = false;
	}

2198
	/* Lastly, free mqd resources.
2199
	 * Do free_mqd() after dqm_unlock to avoid circular locking.
2200
	 */
2201 2202
	while (!list_empty(&qpd->queues_list)) {
		q = list_first_entry(&qpd->queues_list, struct queue, list);
2203 2204
		mqd_mgr = dqm->mqd_mgrs[get_mqd_type_from_queue_type(
				q->properties.type)];
2205
		list_del(&q->list);
2206
		qpd->queue_count--;
2207
		dqm_unlock(dqm);
2208
		mqd_mgr->free_mqd(mqd_mgr, q->mqd, q->mqd_mem_obj);
2209
		dqm_lock(dqm);
2210
	}
2211 2212 2213 2214 2215 2216 2217
	dqm_unlock(dqm);

	/* Outside the DQM lock because under the DQM lock we can't do
	 * reclaim or take other locks that others hold while reclaiming.
	 */
	if (found)
		kfd_dec_compute_active(dqm->dev);
2218 2219 2220 2221

	return retval;
}

2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245
static int init_mqd_managers(struct device_queue_manager *dqm)
{
	int i, j;
	struct mqd_manager *mqd_mgr;

	for (i = 0; i < KFD_MQD_TYPE_MAX; i++) {
		mqd_mgr = dqm->asic_ops.mqd_manager_init(i, dqm->dev);
		if (!mqd_mgr) {
			pr_err("mqd manager [%d] initialization failed\n", i);
			goto out_free;
		}
		dqm->mqd_mgrs[i] = mqd_mgr;
	}

	return 0;

out_free:
	for (j = 0; j < i; j++) {
		kfree(dqm->mqd_mgrs[j]);
		dqm->mqd_mgrs[j] = NULL;
	}

	return -ENOMEM;
}
2246 2247 2248 2249 2250 2251 2252 2253

/* Allocate one hiq mqd (HWS) and all SDMA mqd in a continuous trunk*/
static int allocate_hiq_sdma_mqd(struct device_queue_manager *dqm)
{
	int retval;
	struct kfd_dev *dev = dqm->dev;
	struct kfd_mem_obj *mem_obj = &dqm->hiq_sdma_mqd;
	uint32_t size = dqm->mqd_mgrs[KFD_MQD_TYPE_SDMA]->mqd_size *
2254
		get_num_all_sdma_engines(dqm) *
2255
		dev->device_info.num_sdma_queues_per_engine +
2256 2257
		dqm->mqd_mgrs[KFD_MQD_TYPE_HIQ]->mqd_size;

2258
	retval = amdgpu_amdkfd_alloc_gtt_mem(dev->adev, size,
2259
		&(mem_obj->gtt_mem), &(mem_obj->gpu_addr),
2260
		(void *)&(mem_obj->cpu_ptr), false);
2261 2262 2263 2264

	return retval;
}

2265 2266 2267 2268
struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
{
	struct device_queue_manager *dqm;

2269
	pr_debug("Loading device queue manager\n");
2270

2271
	dqm = kzalloc(sizeof(*dqm), GFP_KERNEL);
2272 2273 2274
	if (!dqm)
		return NULL;

2275
	switch (dev->adev->asic_type) {
2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290
	/* HWS is not available on Hawaii. */
	case CHIP_HAWAII:
	/* HWS depends on CWSR for timely dequeue. CWSR is not
	 * available on Tonga.
	 *
	 * FIXME: This argument also applies to Kaveri.
	 */
	case CHIP_TONGA:
		dqm->sched_policy = KFD_SCHED_POLICY_NO_HWS;
		break;
	default:
		dqm->sched_policy = sched_policy;
		break;
	}

2291
	dqm->dev = dev;
2292
	switch (dqm->sched_policy) {
2293 2294 2295
	case KFD_SCHED_POLICY_HWS:
	case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION:
		/* initialize dqm for cp scheduling */
2296 2297 2298 2299
		dqm->ops.create_queue = create_queue_cpsch;
		dqm->ops.initialize = initialize_cpsch;
		dqm->ops.start = start_cpsch;
		dqm->ops.stop = stop_cpsch;
2300
		dqm->ops.pre_reset = pre_reset;
2301 2302
		dqm->ops.destroy_queue = destroy_queue_cpsch;
		dqm->ops.update_queue = update_queue;
2303 2304 2305
		dqm->ops.register_process = register_process;
		dqm->ops.unregister_process = unregister_process;
		dqm->ops.uninitialize = uninitialize;
2306 2307 2308
		dqm->ops.create_kernel_queue = create_kernel_queue_cpsch;
		dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch;
		dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
2309
		dqm->ops.process_termination = process_termination_cpsch;
2310 2311
		dqm->ops.evict_process_queues = evict_process_queues_cpsch;
		dqm->ops.restore_process_queues = restore_process_queues_cpsch;
2312
		dqm->ops.get_wave_state = get_wave_state;
2313
		dqm->ops.reset_queues = reset_queues_cpsch;
2314 2315
		dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info;
		dqm->ops.checkpoint_mqd = checkpoint_mqd;
2316 2317 2318
		break;
	case KFD_SCHED_POLICY_NO_HWS:
		/* initialize dqm for no cp scheduling */
2319 2320
		dqm->ops.start = start_nocpsch;
		dqm->ops.stop = stop_nocpsch;
2321
		dqm->ops.pre_reset = pre_reset;
2322 2323 2324
		dqm->ops.create_queue = create_queue_nocpsch;
		dqm->ops.destroy_queue = destroy_queue_nocpsch;
		dqm->ops.update_queue = update_queue;
2325 2326
		dqm->ops.register_process = register_process;
		dqm->ops.unregister_process = unregister_process;
2327
		dqm->ops.initialize = initialize_nocpsch;
2328
		dqm->ops.uninitialize = uninitialize;
2329
		dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
2330
		dqm->ops.process_termination = process_termination_nocpsch;
2331 2332 2333
		dqm->ops.evict_process_queues = evict_process_queues_nocpsch;
		dqm->ops.restore_process_queues =
			restore_process_queues_nocpsch;
2334
		dqm->ops.get_wave_state = get_wave_state;
2335 2336
		dqm->ops.get_queue_checkpoint_info = get_queue_checkpoint_info;
		dqm->ops.checkpoint_mqd = checkpoint_mqd;
2337 2338
		break;
	default:
2339
		pr_err("Invalid scheduling policy %d\n", dqm->sched_policy);
2340
		goto out_free;
2341 2342
	}

2343
	switch (dev->adev->asic_type) {
2344
	case CHIP_CARRIZO:
2345
		device_queue_manager_init_vi(&dqm->asic_ops);
2346 2347
		break;

2348
	case CHIP_KAVERI:
2349
		device_queue_manager_init_cik(&dqm->asic_ops);
2350
		break;
2351 2352 2353 2354 2355 2356 2357 2358 2359

	case CHIP_HAWAII:
		device_queue_manager_init_cik_hawaii(&dqm->asic_ops);
		break;

	case CHIP_TONGA:
	case CHIP_FIJI:
	case CHIP_POLARIS10:
	case CHIP_POLARIS11:
2360
	case CHIP_POLARIS12:
K
Kent Russell 已提交
2361
	case CHIP_VEGAM:
2362 2363
		device_queue_manager_init_vi_tonga(&dqm->asic_ops);
		break;
2364

2365
	default:
2366 2367 2368
		if (KFD_GC_VERSION(dev) >= IP_VERSION(11, 0, 0))
			device_queue_manager_init_v11(&dqm->asic_ops);
		else if (KFD_GC_VERSION(dev) >= IP_VERSION(10, 1, 1))
2369 2370 2371 2372 2373
			device_queue_manager_init_v10_navi10(&dqm->asic_ops);
		else if (KFD_GC_VERSION(dev) >= IP_VERSION(9, 0, 1))
			device_queue_manager_init_v9(&dqm->asic_ops);
		else {
			WARN(1, "Unexpected ASIC family %u",
2374
			     dev->adev->asic_type);
2375 2376
			goto out_free;
		}
2377 2378
	}

2379 2380 2381
	if (init_mqd_managers(dqm))
		goto out_free;

2382 2383 2384 2385 2386
	if (allocate_hiq_sdma_mqd(dqm)) {
		pr_err("Failed to allocate hiq sdma mqd trunk buffer\n");
		goto out_free;
	}

2387 2388
	if (!dqm->ops.initialize(dqm))
		return dqm;
2389

2390 2391 2392
out_free:
	kfree(dqm);
	return NULL;
2393 2394
}

2395 2396
static void deallocate_hiq_sdma_mqd(struct kfd_dev *dev,
				    struct kfd_mem_obj *mqd)
2397 2398 2399
{
	WARN(!mqd, "No hiq sdma mqd trunk to free");

2400
	amdgpu_amdkfd_free_gtt_mem(dev->adev, mqd->gtt_mem);
2401 2402
}

2403 2404
void device_queue_manager_uninit(struct device_queue_manager *dqm)
{
2405
	dqm->ops.uninitialize(dqm);
2406
	deallocate_hiq_sdma_mqd(dqm->dev, &dqm->hiq_sdma_mqd);
2407 2408
	kfree(dqm);
}
2409

2410
int kfd_dqm_evict_pasid(struct device_queue_manager *dqm, u32 pasid)
S
shaoyunl 已提交
2411 2412 2413 2414 2415 2416 2417
{
	struct kfd_process_device *pdd;
	struct kfd_process *p = kfd_lookup_process_by_pasid(pasid);
	int ret = 0;

	if (!p)
		return -EINVAL;
2418
	WARN(debug_evictions, "Evicting pid %d", p->lead_thread->pid);
S
shaoyunl 已提交
2419 2420 2421 2422 2423 2424 2425 2426
	pdd = kfd_get_process_device_data(dqm->dev, p);
	if (pdd)
		ret = dqm->ops.evict_process_queues(dqm, &pdd->qpd);
	kfd_unref_process(p);

	return ret;
}

2427 2428 2429 2430
static void kfd_process_hw_exception(struct work_struct *work)
{
	struct device_queue_manager *dqm = container_of(work,
			struct device_queue_manager, hw_exception_work);
2431
	amdgpu_amdkfd_gpu_reset(dqm->dev->adev);
2432 2433
}

2434 2435 2436 2437 2438 2439 2440 2441 2442 2443 2444 2445 2446 2447 2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463
#if defined(CONFIG_DEBUG_FS)

static void seq_reg_dump(struct seq_file *m,
			 uint32_t (*dump)[2], uint32_t n_regs)
{
	uint32_t i, count;

	for (i = 0, count = 0; i < n_regs; i++) {
		if (count == 0 ||
		    dump[i-1][0] + sizeof(uint32_t) != dump[i][0]) {
			seq_printf(m, "%s    %08x: %08x",
				   i ? "\n" : "",
				   dump[i][0], dump[i][1]);
			count = 7;
		} else {
			seq_printf(m, " %08x", dump[i][1]);
			count--;
		}
	}

	seq_puts(m, "\n");
}

int dqm_debugfs_hqds(struct seq_file *m, void *data)
{
	struct device_queue_manager *dqm = data;
	uint32_t (*dump)[2], n_regs;
	int pipe, queue;
	int r = 0;

2464
	if (!dqm->sched_running) {
2465
		seq_puts(m, " Device is stopped\n");
2466 2467 2468
		return 0;
	}

2469
	r = dqm->dev->kfd2kgd->hqd_dump(dqm->dev->adev,
2470 2471
					KFD_CIK_HIQ_PIPE, KFD_CIK_HIQ_QUEUE,
					&dump, &n_regs);
O
Oak Zeng 已提交
2472 2473
	if (!r) {
		seq_printf(m, "  HIQ on MEC %d Pipe %d Queue %d\n",
2474 2475 2476
			   KFD_CIK_HIQ_PIPE/get_pipes_per_mec(dqm)+1,
			   KFD_CIK_HIQ_PIPE%get_pipes_per_mec(dqm),
			   KFD_CIK_HIQ_QUEUE);
O
Oak Zeng 已提交
2477 2478 2479 2480 2481
		seq_reg_dump(m, dump, n_regs);

		kfree(dump);
	}

2482 2483 2484 2485 2486
	for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
		int pipe_offset = pipe * get_queues_per_pipe(dqm);

		for (queue = 0; queue < get_queues_per_pipe(dqm); queue++) {
			if (!test_bit(pipe_offset + queue,
2487
				      dqm->dev->shared_resources.cp_queue_bitmap))
2488 2489 2490
				continue;

			r = dqm->dev->kfd2kgd->hqd_dump(
2491
				dqm->dev->adev, pipe, queue, &dump, &n_regs);
2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502
			if (r)
				break;

			seq_printf(m, "  CP Pipe %d, Queue %d\n",
				  pipe, queue);
			seq_reg_dump(m, dump, n_regs);

			kfree(dump);
		}
	}

2503
	for (pipe = 0; pipe < get_num_all_sdma_engines(dqm); pipe++) {
2504
		for (queue = 0;
2505
		     queue < dqm->dev->device_info.num_sdma_queues_per_engine;
2506
		     queue++) {
2507
			r = dqm->dev->kfd2kgd->hqd_sdma_dump(
2508
				dqm->dev->adev, pipe, queue, &dump, &n_regs);
2509 2510 2511 2512 2513 2514 2515 2516 2517 2518 2519 2520 2521 2522
			if (r)
				break;

			seq_printf(m, "  SDMA Engine %d, RLC %d\n",
				  pipe, queue);
			seq_reg_dump(m, dump, n_regs);

			kfree(dump);
		}
	}

	return r;
}

2523
int dqm_debugfs_hang_hws(struct device_queue_manager *dqm)
2524 2525 2526 2527
{
	int r = 0;

	dqm_lock(dqm);
2528 2529 2530 2531 2532
	r = pm_debugfs_hang_hws(&dqm->packet_mgr);
	if (r) {
		dqm_unlock(dqm);
		return r;
	}
2533 2534 2535 2536 2537 2538 2539
	dqm->active_runlist = true;
	r = execute_queues_cpsch(dqm, KFD_UNMAP_QUEUES_FILTER_ALL_QUEUES, 0);
	dqm_unlock(dqm);

	return r;
}

2540
#endif