kfd_device_queue_manager.c 28.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
/*
 * Copyright 2014 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */

#include <linux/slab.h>
#include <linux/list.h>
#include <linux/types.h>
#include <linux/printk.h>
#include <linux/bitops.h>
29
#include <linux/sched.h>
30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
#include "kfd_priv.h"
#include "kfd_device_queue_manager.h"
#include "kfd_mqd_manager.h"
#include "cik_regs.h"
#include "kfd_kernel_queue.h"

/* Size of the per-pipe EOP queue */
#define CIK_HPD_EOP_BYTES_LOG2 11
#define CIK_HPD_EOP_BYTES (1U << CIK_HPD_EOP_BYTES_LOG2)

static int set_pasid_vmid_mapping(struct device_queue_manager *dqm,
					unsigned int pasid, unsigned int vmid);

static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
					struct queue *q,
					struct qcm_process_device *qpd);
46

47
static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock);
48 49
static int destroy_queues_cpsch(struct device_queue_manager *dqm,
				bool preempt_static_queues, bool lock);
50

51 52 53 54 55 56
static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
					struct queue *q,
					struct qcm_process_device *qpd);

static void deallocate_sdma_queue(struct device_queue_manager *dqm,
				unsigned int sdma_queue_id);
57

58 59
static inline
enum KFD_MQD_TYPE get_mqd_type_from_queue_type(enum kfd_queue_type type)
60
{
61
	if (type == KFD_QUEUE_TYPE_SDMA)
62 63
		return KFD_MQD_TYPE_SDMA;
	return KFD_MQD_TYPE_CP;
64 65
}

66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
static bool is_pipe_enabled(struct device_queue_manager *dqm, int mec, int pipe)
{
	int i;
	int pipe_offset = mec * dqm->dev->shared_resources.num_pipe_per_mec
		+ pipe * dqm->dev->shared_resources.num_queue_per_pipe;

	/* queue is available for KFD usage if bit is 1 */
	for (i = 0; i <  dqm->dev->shared_resources.num_queue_per_pipe; ++i)
		if (test_bit(pipe_offset + i,
			      dqm->dev->shared_resources.queue_bitmap))
			return true;
	return false;
}

unsigned int get_queues_num(struct device_queue_manager *dqm)
81 82
{
	BUG_ON(!dqm || !dqm->dev);
83 84
	return bitmap_weight(dqm->dev->shared_resources.queue_bitmap,
				KGD_MAX_QUEUES);
85 86
}

87
unsigned int get_queues_per_pipe(struct device_queue_manager *dqm)
88
{
89 90 91 92 93 94 95 96
	BUG_ON(!dqm || !dqm->dev);
	return dqm->dev->shared_resources.num_queue_per_pipe;
}

unsigned int get_pipes_per_mec(struct device_queue_manager *dqm)
{
	BUG_ON(!dqm || !dqm->dev);
	return dqm->dev->shared_resources.num_pipe_per_mec;
97 98
}

99
void program_sh_mem_settings(struct device_queue_manager *dqm,
100 101
					struct qcm_process_device *qpd)
{
102 103
	return dqm->dev->kfd2kgd->program_sh_mem_settings(
						dqm->dev->kgd, qpd->vmid,
104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123
						qpd->sh_mem_config,
						qpd->sh_mem_ape1_base,
						qpd->sh_mem_ape1_limit,
						qpd->sh_mem_bases);
}

static int allocate_vmid(struct device_queue_manager *dqm,
			struct qcm_process_device *qpd,
			struct queue *q)
{
	int bit, allocated_vmid;

	if (dqm->vmid_bitmap == 0)
		return -ENOMEM;

	bit = find_first_bit((unsigned long *)&dqm->vmid_bitmap, CIK_VMID_NUM);
	clear_bit(bit, (unsigned long *)&dqm->vmid_bitmap);

	/* Kaveri kfd vmid's starts from vmid 8 */
	allocated_vmid = bit + KFD_VMID_START_OFFSET;
124
	pr_debug("vmid allocation %d\n", allocated_vmid);
125 126 127 128 129 130 131 132 133 134 135 136 137 138 139
	qpd->vmid = allocated_vmid;
	q->properties.vmid = allocated_vmid;

	set_pasid_vmid_mapping(dqm, q->process->pasid, q->properties.vmid);
	program_sh_mem_settings(dqm, qpd);

	return 0;
}

static void deallocate_vmid(struct device_queue_manager *dqm,
				struct qcm_process_device *qpd,
				struct queue *q)
{
	int bit = qpd->vmid - KFD_VMID_START_OFFSET;

140 141 142
	/* Release the vmid mapping */
	set_pasid_vmid_mapping(dqm, 0, qpd->vmid);

143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160
	set_bit(bit, (unsigned long *)&dqm->vmid_bitmap);
	qpd->vmid = 0;
	q->properties.vmid = 0;
}

static int create_queue_nocpsch(struct device_queue_manager *dqm,
				struct queue *q,
				struct qcm_process_device *qpd,
				int *allocated_vmid)
{
	int retval;

	BUG_ON(!dqm || !q || !qpd || !allocated_vmid);

	print_queue(q);

	mutex_lock(&dqm->lock);

161
	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
162
		pr_warn("Can't create new usermode queue because %d queues were already created\n",
163 164 165 166 167
				dqm->total_queue_count);
		mutex_unlock(&dqm->lock);
		return -EPERM;
	}

168 169
	if (list_empty(&qpd->queues_list)) {
		retval = allocate_vmid(dqm, qpd, q);
170
		if (retval) {
171 172 173 174 175 176 177
			mutex_unlock(&dqm->lock);
			return retval;
		}
	}
	*allocated_vmid = qpd->vmid;
	q->properties.vmid = qpd->vmid;

178 179 180 181
	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE)
		retval = create_compute_queue_nocpsch(dqm, q, qpd);
	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
		retval = create_sdma_queue_nocpsch(dqm, q, qpd);
182

183
	if (retval) {
184 185 186 187 188 189 190 191 192
		if (list_empty(&qpd->queues_list)) {
			deallocate_vmid(dqm, qpd, q);
			*allocated_vmid = 0;
		}
		mutex_unlock(&dqm->lock);
		return retval;
	}

	list_add(&q->list, &qpd->queues_list);
193 194
	if (q->properties.is_active)
		dqm->queue_count++;
195

196 197
	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
		dqm->sdma_queue_count++;
198

199 200 201 202 203 204 205 206
	/*
	 * Unconditionally increment this counter, regardless of the queue's
	 * type or whether the queue is active.
	 */
	dqm->total_queue_count++;
	pr_debug("Total of %d queues are accountable so far\n",
			dqm->total_queue_count);

207 208 209 210 211 212 213
	mutex_unlock(&dqm->lock);
	return 0;
}

static int allocate_hqd(struct device_queue_manager *dqm, struct queue *q)
{
	bool set;
214
	int pipe, bit, i;
215 216 217

	set = false;

218 219
	for (pipe = dqm->next_pipe_to_allocate, i = 0;
			i < get_pipes_per_mec(dqm);
220 221 222 223 224
			pipe = ((pipe + 1) % get_pipes_per_mec(dqm)), ++i) {

		if (!is_pipe_enabled(dqm, 0, pipe))
			continue;

225 226 227
		if (dqm->allocated_queues[pipe] != 0) {
			bit = find_first_bit(
				(unsigned long *)&dqm->allocated_queues[pipe],
228
				get_queues_per_pipe(dqm));
229 230 231 232 233 234 235 236 237 238

			clear_bit(bit,
				(unsigned long *)&dqm->allocated_queues[pipe]);
			q->pipe = pipe;
			q->queue = bit;
			set = true;
			break;
		}
	}

239
	if (!set)
240 241
		return -EBUSY;

242
	pr_debug("hqd slot - pipe %d, queue %d\n", q->pipe, q->queue);
243
	/* horizontal hqd allocation */
244
	dqm->next_pipe_to_allocate = (pipe + 1) % get_pipes_per_mec(dqm);
245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263

	return 0;
}

static inline void deallocate_hqd(struct device_queue_manager *dqm,
				struct queue *q)
{
	set_bit(q->queue, (unsigned long *)&dqm->allocated_queues[q->pipe]);
}

static int create_compute_queue_nocpsch(struct device_queue_manager *dqm,
					struct queue *q,
					struct qcm_process_device *qpd)
{
	int retval;
	struct mqd_manager *mqd;

	BUG_ON(!dqm || !q || !qpd);

264
	mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
265
	if (!mqd)
266 267 268
		return -ENOMEM;

	retval = allocate_hqd(dqm, q);
269
	if (retval)
270 271 272 273
		return retval;

	retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
				&q->gart_mqd_addr, &q->properties);
274
	if (retval) {
275 276 277 278
		deallocate_hqd(dqm, q);
		return retval;
	}

279 280
	pr_debug("Loading mqd to hqd on pipe %d, queue %d\n",
			q->pipe, q->queue);
281 282

	retval = mqd->load_mqd(mqd, q->mqd, q->pipe,
283
			q->queue, (uint32_t __user *) q->properties.write_ptr);
284
	if (retval) {
285 286 287 288 289
		deallocate_hqd(dqm, q);
		mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
		return retval;
	}

290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305
	return 0;
}

static int destroy_queue_nocpsch(struct device_queue_manager *dqm,
				struct qcm_process_device *qpd,
				struct queue *q)
{
	int retval;
	struct mqd_manager *mqd;

	BUG_ON(!dqm || !q || !q->mqd || !qpd);

	retval = 0;

	mutex_lock(&dqm->lock);

306
	if (q->properties.type == KFD_QUEUE_TYPE_COMPUTE) {
307
		mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_COMPUTE);
308 309 310 311 312 313
		if (mqd == NULL) {
			retval = -ENOMEM;
			goto out;
		}
		deallocate_hqd(dqm, q);
	} else if (q->properties.type == KFD_QUEUE_TYPE_SDMA) {
314
		mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA);
315 316 317 318 319 320
		if (mqd == NULL) {
			retval = -ENOMEM;
			goto out;
		}
		dqm->sdma_queue_count--;
		deallocate_sdma_queue(dqm, q->sdma_id);
321
	} else {
322
		pr_debug("q->properties.type %d is invalid\n",
323 324
				q->properties.type);
		retval = -EINVAL;
325 326 327 328
		goto out;
	}

	retval = mqd->destroy_mqd(mqd, q->mqd,
329
				KFD_PREEMPT_TYPE_WAVEFRONT_RESET,
330 331 332
				QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS,
				q->pipe, q->queue);

333
	if (retval)
334 335 336 337 338 339 340
		goto out;

	mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);

	list_del(&q->list);
	if (list_empty(&qpd->queues_list))
		deallocate_vmid(dqm, qpd, q);
341 342
	if (q->properties.is_active)
		dqm->queue_count--;
343 344 345 346 347 348 349 350 351

	/*
	 * Unconditionally decrement this counter, regardless of the queue's
	 * type
	 */
	dqm->total_queue_count--;
	pr_debug("Total of %d queues are accountable so far\n",
			dqm->total_queue_count);

352 353 354 355 356 357 358 359 360
out:
	mutex_unlock(&dqm->lock);
	return retval;
}

static int update_queue(struct device_queue_manager *dqm, struct queue *q)
{
	int retval;
	struct mqd_manager *mqd;
361
	bool prev_active = false;
362 363 364 365

	BUG_ON(!dqm || !q || !q->mqd);

	mutex_lock(&dqm->lock);
O
Oded Gabbay 已提交
366 367
	mqd = dqm->ops.get_mqd_manager(dqm,
			get_mqd_type_from_queue_type(q->properties.type));
368
	if (!mqd) {
369 370 371 372
		mutex_unlock(&dqm->lock);
		return -ENOMEM;
	}

373
	if (q->properties.is_active)
374 375 376 377 378 379 380 381
		prev_active = true;

	/*
	 *
	 * check active state vs. the previous state
	 * and modify counter accordingly
	 */
	retval = mqd->update_mqd(mqd, q->mqd, &q->properties);
382
	if ((q->properties.is_active) && (!prev_active))
383
		dqm->queue_count++;
384
	else if (!q->properties.is_active && prev_active)
385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400
		dqm->queue_count--;

	if (sched_policy != KFD_SCHED_POLICY_NO_HWS)
		retval = execute_queues_cpsch(dqm, false);

	mutex_unlock(&dqm->lock);
	return retval;
}

static struct mqd_manager *get_mqd_manager_nocpsch(
		struct device_queue_manager *dqm, enum KFD_MQD_TYPE type)
{
	struct mqd_manager *mqd;

	BUG_ON(!dqm || type >= KFD_MQD_TYPE_MAX);

401
	pr_debug("mqd type %d\n", type);
402 403 404 405

	mqd = dqm->mqds[type];
	if (!mqd) {
		mqd = mqd_manager_init(type, dqm->dev);
406
		if (!mqd)
407
			pr_err("mqd manager is NULL");
408 409 410 411 412 413 414 415 416 417
		dqm->mqds[type] = mqd;
	}

	return mqd;
}

static int register_process_nocpsch(struct device_queue_manager *dqm,
					struct qcm_process_device *qpd)
{
	struct device_process_node *n;
418
	int retval;
419 420 421 422 423 424 425 426 427 428 429 430

	BUG_ON(!dqm || !qpd);

	n = kzalloc(sizeof(struct device_process_node), GFP_KERNEL);
	if (!n)
		return -ENOMEM;

	n->qpd = qpd;

	mutex_lock(&dqm->lock);
	list_add(&n->list, &dqm->queues);

431 432
	retval = dqm->ops_asic_specific.register_process(dqm, qpd);

433 434 435 436
	dqm->processes_count++;

	mutex_unlock(&dqm->lock);

437
	return retval;
438 439 440 441 442 443 444 445 446 447
}

static int unregister_process_nocpsch(struct device_queue_manager *dqm,
					struct qcm_process_device *qpd)
{
	int retval;
	struct device_process_node *cur, *next;

	BUG_ON(!dqm || !qpd);

448 449
	pr_debug("qpd->queues_list is %s\n",
			list_empty(&qpd->queues_list) ? "empty" : "not empty");
450 451 452 453 454 455 456

	retval = 0;
	mutex_lock(&dqm->lock);

	list_for_each_entry_safe(cur, next, &dqm->queues, list) {
		if (qpd == cur->qpd) {
			list_del(&cur->list);
457
			kfree(cur);
458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474
			dqm->processes_count--;
			goto out;
		}
	}
	/* qpd not found in dqm list */
	retval = 1;
out:
	mutex_unlock(&dqm->lock);
	return retval;
}

static int
set_pasid_vmid_mapping(struct device_queue_manager *dqm, unsigned int pasid,
			unsigned int vmid)
{
	uint32_t pasid_mapping;

475 476 477 478 479 480
	pasid_mapping = (pasid == 0) ? 0 :
		(uint32_t)pasid |
		ATC_VMID_PASID_MAPPING_VALID;

	return dqm->dev->kfd2kgd->set_pasid_vmid_mapping(
						dqm->dev->kgd, pasid_mapping,
481 482 483
						vmid);
}

484 485 486 487
static void init_interrupts(struct device_queue_manager *dqm)
{
	unsigned int i;

488
	BUG_ON(!dqm);
489

490 491 492
	for (i = 0 ; i < get_pipes_per_mec(dqm) ; i++)
		if (is_pipe_enabled(dqm, 0, i))
			dqm->dev->kfd2kgd->init_interrupts(dqm->dev->kgd, i);
493 494
}

495 496
static int initialize_nocpsch(struct device_queue_manager *dqm)
{
497
	int pipe, queue;
498 499 500

	BUG_ON(!dqm);

501
	pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
502 503 504 505

	mutex_init(&dqm->lock);
	INIT_LIST_HEAD(&dqm->queues);
	dqm->queue_count = dqm->next_pipe_to_allocate = 0;
506
	dqm->sdma_queue_count = 0;
507
	dqm->allocated_queues = kcalloc(get_pipes_per_mec(dqm),
508 509 510 511 512 513
					sizeof(unsigned int), GFP_KERNEL);
	if (!dqm->allocated_queues) {
		mutex_destroy(&dqm->lock);
		return -ENOMEM;
	}

514 515 516 517 518 519 520 521
	for (pipe = 0; pipe < get_pipes_per_mec(dqm); pipe++) {
		int pipe_offset = pipe * get_queues_per_pipe(dqm);

		for (queue = 0; queue < get_queues_per_pipe(dqm); queue++)
			if (test_bit(pipe_offset + queue,
				     dqm->dev->shared_resources.queue_bitmap))
				dqm->allocated_queues[pipe] |= 1 << queue;
	}
522 523

	dqm->vmid_bitmap = (1 << VMID_PER_DEVICE) - 1;
524
	dqm->sdma_bitmap = (1 << CIK_SDMA_QUEUES) - 1;
525 526 527 528 529 530

	return 0;
}

static void uninitialize_nocpsch(struct device_queue_manager *dqm)
{
531 532
	int i;

533 534 535 536 537
	BUG_ON(!dqm);

	BUG_ON(dqm->queue_count > 0 || dqm->processes_count > 0);

	kfree(dqm->allocated_queues);
538 539
	for (i = 0 ; i < KFD_MQD_TYPE_MAX ; i++)
		kfree(dqm->mqds[i]);
540
	mutex_destroy(&dqm->lock);
541
	kfd_gtt_sa_free(dqm->dev, dqm->pipeline_mem);
542 543 544 545
}

static int start_nocpsch(struct device_queue_manager *dqm)
{
546
	init_interrupts(dqm);
547 548 549 550 551 552 553 554
	return 0;
}

static int stop_nocpsch(struct device_queue_manager *dqm)
{
	return 0;
}

555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574
static int allocate_sdma_queue(struct device_queue_manager *dqm,
				unsigned int *sdma_queue_id)
{
	int bit;

	if (dqm->sdma_bitmap == 0)
		return -ENOMEM;

	bit = find_first_bit((unsigned long *)&dqm->sdma_bitmap,
				CIK_SDMA_QUEUES);

	clear_bit(bit, (unsigned long *)&dqm->sdma_bitmap);
	*sdma_queue_id = bit;

	return 0;
}

static void deallocate_sdma_queue(struct device_queue_manager *dqm,
				unsigned int sdma_queue_id)
{
575
	if (sdma_queue_id >= CIK_SDMA_QUEUES)
576 577 578 579 580 581 582 583 584 585 586
		return;
	set_bit(sdma_queue_id, (unsigned long *)&dqm->sdma_bitmap);
}

static int create_sdma_queue_nocpsch(struct device_queue_manager *dqm,
					struct queue *q,
					struct qcm_process_device *qpd)
{
	struct mqd_manager *mqd;
	int retval;

587
	mqd = dqm->ops.get_mqd_manager(dqm, KFD_MQD_TYPE_SDMA);
588 589 590 591
	if (!mqd)
		return -ENOMEM;

	retval = allocate_sdma_queue(dqm, &q->sdma_id);
592
	if (retval)
593 594 595 596 597
		return retval;

	q->properties.sdma_queue_id = q->sdma_id % CIK_SDMA_QUEUES_PER_ENGINE;
	q->properties.sdma_engine_id = q->sdma_id / CIK_SDMA_ENGINE_NUM;

598 599 600
	pr_debug("SDMA id is:    %d\n", q->sdma_id);
	pr_debug("SDMA queue id: %d\n", q->properties.sdma_queue_id);
	pr_debug("SDMA engine id: %d\n", q->properties.sdma_engine_id);
601

602
	dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd);
603 604
	retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
				&q->gart_mqd_addr, &q->properties);
605
	if (retval) {
606 607 608 609
		deallocate_sdma_queue(dqm, q->sdma_id);
		return retval;
	}

610 611
	retval = mqd->load_mqd(mqd, q->mqd, 0,
				0, NULL);
612
	if (retval) {
613 614 615 616 617
		deallocate_sdma_queue(dqm, q->sdma_id);
		mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
		return retval;
	}

618 619 620
	return 0;
}

621 622 623 624 625 626
/*
 * Device Queue Manager implementation for cp scheduler
 */

static int set_sched_resources(struct device_queue_manager *dqm)
{
627
	int i, mec;
628 629 630 631 632 633
	struct scheduling_resources res;

	BUG_ON(!dqm);

	res.vmid_mask = (1 << VMID_PER_DEVICE) - 1;
	res.vmid_mask <<= KFD_VMID_START_OFFSET;
634 635 636 637 638 639 640 641 642 643 644 645 646 647 648

	res.queue_mask = 0;
	for (i = 0; i < KGD_MAX_QUEUES; ++i) {
		mec = (i / dqm->dev->shared_resources.num_queue_per_pipe)
			/ dqm->dev->shared_resources.num_pipe_per_mec;

		if (!test_bit(i, dqm->dev->shared_resources.queue_bitmap))
			continue;

		/* only acquire queues from the first MEC */
		if (mec > 0)
			continue;

		/* This situation may be hit in the future if a new HW
		 * generation exposes more than 64 queues. If so, the
649 650
		 * definition of res.queue_mask needs updating
		 */
651
		if (WARN_ON(i >= (sizeof(res.queue_mask)*8))) {
652 653 654 655 656 657
			pr_err("Invalid queue enabled by amdgpu: %d\n", i);
			break;
		}

		res.queue_mask |= (1ull << i);
	}
658 659 660
	res.gws_mask = res.oac_mask = res.gds_heap_base =
						res.gds_heap_size = 0;

661 662 663
	pr_debug("Scheduling resources:\n"
			"vmid mask: 0x%8X\n"
			"queue mask: 0x%8llX\n",
664 665 666 667 668 669 670 671 672 673 674
			res.vmid_mask, res.queue_mask);

	return pm_send_set_resources(&dqm->packets, &res);
}

static int initialize_cpsch(struct device_queue_manager *dqm)
{
	int retval;

	BUG_ON(!dqm);

675
	pr_debug("num of pipes: %d\n", get_pipes_per_mec(dqm));
676 677 678 679

	mutex_init(&dqm->lock);
	INIT_LIST_HEAD(&dqm->queues);
	dqm->queue_count = dqm->processes_count = 0;
680
	dqm->sdma_queue_count = 0;
681
	dqm->active_runlist = false;
682
	retval = dqm->ops_asic_specific.initialize(dqm);
683
	if (retval)
684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702
		goto fail_init_pipelines;

	return 0;

fail_init_pipelines:
	mutex_destroy(&dqm->lock);
	return retval;
}

static int start_cpsch(struct device_queue_manager *dqm)
{
	struct device_process_node *node;
	int retval;

	BUG_ON(!dqm);

	retval = 0;

	retval = pm_init(&dqm->packets, dqm);
703
	if (retval)
704 705 706
		goto fail_packet_manager_init;

	retval = set_sched_resources(dqm);
707
	if (retval)
708 709
		goto fail_set_sched_resources;

710
	pr_debug("Allocating fence memory\n");
711 712

	/* allocate fence memory on the gart */
713 714
	retval = kfd_gtt_sa_allocate(dqm->dev, sizeof(*dqm->fence_addr),
					&dqm->fence_mem);
715

716
	if (retval)
717 718 719 720
		goto fail_allocate_vidmem;

	dqm->fence_addr = dqm->fence_mem->cpu_ptr;
	dqm->fence_gpu_addr = dqm->fence_mem->gpu_addr;
721 722 723

	init_interrupts(dqm);

724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745
	list_for_each_entry(node, &dqm->queues, list)
		if (node->qpd->pqm->process && dqm->dev)
			kfd_bind_process_to_device(dqm->dev,
						node->qpd->pqm->process);

	execute_queues_cpsch(dqm, true);

	return 0;
fail_allocate_vidmem:
fail_set_sched_resources:
	pm_uninit(&dqm->packets);
fail_packet_manager_init:
	return retval;
}

static int stop_cpsch(struct device_queue_manager *dqm)
{
	struct device_process_node *node;
	struct kfd_process_device *pdd;

	BUG_ON(!dqm);

746
	destroy_queues_cpsch(dqm, true, true);
747 748

	list_for_each_entry(node, &dqm->queues, list) {
749
		pdd = qpd_to_pdd(node->qpd);
750 751
		pdd->bound = false;
	}
752
	kfd_gtt_sa_free(dqm->dev, dqm->fence_mem);
753 754 755 756 757 758 759 760 761 762 763 764
	pm_uninit(&dqm->packets);

	return 0;
}

static int create_kernel_queue_cpsch(struct device_queue_manager *dqm,
					struct kernel_queue *kq,
					struct qcm_process_device *qpd)
{
	BUG_ON(!dqm || !kq || !qpd);

	mutex_lock(&dqm->lock);
765
	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
766
		pr_warn("Can't create new kernel queue because %d queues were already created\n",
767 768 769 770 771 772 773 774 775 776 777 778 779
				dqm->total_queue_count);
		mutex_unlock(&dqm->lock);
		return -EPERM;
	}

	/*
	 * Unconditionally increment this counter, regardless of the queue's
	 * type or whether the queue is active.
	 */
	dqm->total_queue_count++;
	pr_debug("Total of %d queues are accountable so far\n",
			dqm->total_queue_count);

780 781 782 783 784 785 786 787 788 789 790 791 792 793 794 795
	list_add(&kq->list, &qpd->priv_queue_list);
	dqm->queue_count++;
	qpd->is_debug = true;
	execute_queues_cpsch(dqm, false);
	mutex_unlock(&dqm->lock);

	return 0;
}

static void destroy_kernel_queue_cpsch(struct device_queue_manager *dqm,
					struct kernel_queue *kq,
					struct qcm_process_device *qpd)
{
	BUG_ON(!dqm || !kq);

	mutex_lock(&dqm->lock);
796 797
	/* here we actually preempt the DIQ */
	destroy_queues_cpsch(dqm, true, false);
798 799 800 801
	list_del(&kq->list);
	dqm->queue_count--;
	qpd->is_debug = false;
	execute_queues_cpsch(dqm, false);
802 803 804 805
	/*
	 * Unconditionally decrement this counter, regardless of the queue's
	 * type.
	 */
806
	dqm->total_queue_count--;
807 808
	pr_debug("Total of %d queues are accountable so far\n",
			dqm->total_queue_count);
809 810 811
	mutex_unlock(&dqm->lock);
}

812 813 814 815 816 817 818 819
static void select_sdma_engine_id(struct queue *q)
{
	static int sdma_id;

	q->sdma_id = sdma_id;
	sdma_id = (sdma_id + 1) % 2;
}

820 821 822 823 824 825 826 827 828 829 830 831 832 833 834
static int create_queue_cpsch(struct device_queue_manager *dqm, struct queue *q,
			struct qcm_process_device *qpd, int *allocate_vmid)
{
	int retval;
	struct mqd_manager *mqd;

	BUG_ON(!dqm || !q || !qpd);

	retval = 0;

	if (allocate_vmid)
		*allocate_vmid = 0;

	mutex_lock(&dqm->lock);

835
	if (dqm->total_queue_count >= max_num_of_queues_per_device) {
836
		pr_warn("Can't create new usermode queue because %d queues were already created\n",
837 838 839 840 841
				dqm->total_queue_count);
		retval = -EPERM;
		goto out;
	}

842 843 844
	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
		select_sdma_engine_id(q);

845
	mqd = dqm->ops.get_mqd_manager(dqm,
846 847
			get_mqd_type_from_queue_type(q->properties.type));

848
	if (!mqd) {
849 850 851 852
		mutex_unlock(&dqm->lock);
		return -ENOMEM;
	}

853
	dqm->ops_asic_specific.init_sdma_vm(dqm, q, qpd);
854 855
	retval = mqd->init_mqd(mqd, &q->mqd, &q->mqd_mem_obj,
				&q->gart_mqd_addr, &q->properties);
856
	if (retval)
857 858 859 860 861 862 863 864
		goto out;

	list_add(&q->list, &qpd->queues_list);
	if (q->properties.is_active) {
		dqm->queue_count++;
		retval = execute_queues_cpsch(dqm, false);
	}

865
	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
866
		dqm->sdma_queue_count++;
867 868 869 870 871 872 873 874 875
	/*
	 * Unconditionally increment this counter, regardless of the queue's
	 * type or whether the queue is active.
	 */
	dqm->total_queue_count++;

	pr_debug("Total of %d queues are accountable so far\n",
			dqm->total_queue_count);

876 877 878 879 880
out:
	mutex_unlock(&dqm->lock);
	return retval;
}

881
int amdkfd_fence_wait_timeout(unsigned int *fence_addr,
882 883
				unsigned int fence_value,
				unsigned long timeout)
884 885 886 887 888 889
{
	BUG_ON(!fence_addr);
	timeout += jiffies;

	while (*fence_addr != fence_value) {
		if (time_after(jiffies, timeout)) {
890
			pr_err("qcm fence wait loop timeout expired\n");
891 892
			return -ETIME;
		}
893
		schedule();
894 895 896 897 898
	}

	return 0;
}

899 900 901 902
static int destroy_sdma_queues(struct device_queue_manager *dqm,
				unsigned int sdma_engine)
{
	return pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_SDMA,
903
			KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES, 0, false,
904 905 906
			sdma_engine);
}

907 908
static int destroy_queues_cpsch(struct device_queue_manager *dqm,
				bool preempt_static_queues, bool lock)
909 910
{
	int retval;
911
	enum kfd_preempt_type_filter preempt_type;
912
	struct kfd_process_device *pdd;
913 914 915 916 917 918 919

	BUG_ON(!dqm);

	retval = 0;

	if (lock)
		mutex_lock(&dqm->lock);
920
	if (!dqm->active_runlist)
921
		goto out;
922

923
	pr_debug("Before destroying queues, sdma queue count is : %u\n",
924 925 926 927 928 929 930
		dqm->sdma_queue_count);

	if (dqm->sdma_queue_count > 0) {
		destroy_sdma_queues(dqm, 0);
		destroy_sdma_queues(dqm, 1);
	}

931 932 933 934
	preempt_type = preempt_static_queues ?
			KFD_PREEMPT_TYPE_FILTER_ALL_QUEUES :
			KFD_PREEMPT_TYPE_FILTER_DYNAMIC_QUEUES;

935
	retval = pm_send_unmap_queue(&dqm->packets, KFD_QUEUE_TYPE_COMPUTE,
936
			preempt_type, 0, false, 0);
937
	if (retval)
938 939 940 941 942 943
		goto out;

	*dqm->fence_addr = KFD_FENCE_INIT;
	pm_send_query_status(&dqm->packets, dqm->fence_gpu_addr,
				KFD_FENCE_COMPLETED);
	/* should be timed out */
944
	retval = amdkfd_fence_wait_timeout(dqm->fence_addr, KFD_FENCE_COMPLETED,
945
				QUEUE_PREEMPT_DEFAULT_TIMEOUT_MS);
946
	if (retval) {
947 948 949
		pdd = kfd_get_process_device_data(dqm->dev,
				kfd_get_process(current));
		pdd->reset_wavefronts = true;
950 951
		goto out;
	}
952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969
	pm_release_ib(&dqm->packets);
	dqm->active_runlist = false;

out:
	if (lock)
		mutex_unlock(&dqm->lock);
	return retval;
}

static int execute_queues_cpsch(struct device_queue_manager *dqm, bool lock)
{
	int retval;

	BUG_ON(!dqm);

	if (lock)
		mutex_lock(&dqm->lock);

970
	retval = destroy_queues_cpsch(dqm, false, false);
971
	if (retval) {
972
		pr_err("The cp might be in an unrecoverable state due to an unsuccessful queues preemption");
973 974 975 976 977 978 979 980 981 982 983 984 985 986
		goto out;
	}

	if (dqm->queue_count <= 0 || dqm->processes_count <= 0) {
		retval = 0;
		goto out;
	}

	if (dqm->active_runlist) {
		retval = 0;
		goto out;
	}

	retval = pm_send_runlist(&dqm->packets, &dqm->queues);
987
	if (retval) {
988
		pr_err("failed to execute runlist");
989 990 991 992 993 994 995 996 997 998 999 1000 1001 1002 1003 1004
		goto out;
	}
	dqm->active_runlist = true;

out:
	if (lock)
		mutex_unlock(&dqm->lock);
	return retval;
}

static int destroy_queue_cpsch(struct device_queue_manager *dqm,
				struct qcm_process_device *qpd,
				struct queue *q)
{
	int retval;
	struct mqd_manager *mqd;
1005
	bool preempt_all_queues;
1006 1007 1008

	BUG_ON(!dqm || !qpd || !q);

1009 1010
	preempt_all_queues = false;

1011 1012 1013 1014
	retval = 0;

	/* remove queue from list to prevent rescheduling after preemption */
	mutex_lock(&dqm->lock);
1015 1016 1017 1018 1019 1020 1021 1022 1023 1024 1025

	if (qpd->is_debug) {
		/*
		 * error, currently we do not allow to destroy a queue
		 * of a currently debugged process
		 */
		retval = -EBUSY;
		goto failed_try_destroy_debugged_queue;

	}

1026
	mqd = dqm->ops.get_mqd_manager(dqm,
1027
			get_mqd_type_from_queue_type(q->properties.type));
1028 1029 1030 1031 1032
	if (!mqd) {
		retval = -ENOMEM;
		goto failed;
	}

1033 1034 1035
	if (q->properties.type == KFD_QUEUE_TYPE_SDMA)
		dqm->sdma_queue_count--;

1036
	list_del(&q->list);
1037 1038
	if (q->properties.is_active)
		dqm->queue_count--;
1039 1040 1041 1042

	execute_queues_cpsch(dqm, false);

	mqd->uninit_mqd(mqd, q->mqd, q->mqd_mem_obj);
1043 1044 1045 1046 1047 1048 1049 1050

	/*
	 * Unconditionally decrement this counter, regardless of the queue's
	 * type
	 */
	dqm->total_queue_count--;
	pr_debug("Total of %d queues are accountable so far\n",
			dqm->total_queue_count);
1051 1052 1053 1054 1055 1056

	mutex_unlock(&dqm->lock);

	return 0;

failed:
1057 1058
failed_try_destroy_debugged_queue:

1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077
	mutex_unlock(&dqm->lock);
	return retval;
}

/*
 * Low bits must be 0000/FFFF as required by HW, high bits must be 0 to
 * stay in user mode.
 */
#define APE1_FIXED_BITS_MASK 0xFFFF80000000FFFFULL
/* APE1 limit is inclusive and 64K aligned. */
#define APE1_LIMIT_ALIGNMENT 0xFFFF

static bool set_cache_memory_policy(struct device_queue_manager *dqm,
				   struct qcm_process_device *qpd,
				   enum cache_policy default_policy,
				   enum cache_policy alternate_policy,
				   void __user *alternate_aperture_base,
				   uint64_t alternate_aperture_size)
{
1078
	bool retval;
1079 1080 1081 1082 1083 1084 1085 1086 1087 1088 1089 1090 1091 1092 1093 1094 1095 1096 1097 1098 1099 1100 1101 1102 1103 1104 1105 1106 1107 1108 1109 1110 1111 1112

	mutex_lock(&dqm->lock);

	if (alternate_aperture_size == 0) {
		/* base > limit disables APE1 */
		qpd->sh_mem_ape1_base = 1;
		qpd->sh_mem_ape1_limit = 0;
	} else {
		/*
		 * In FSA64, APE1_Base[63:0] = { 16{SH_MEM_APE1_BASE[31]},
		 *			SH_MEM_APE1_BASE[31:0], 0x0000 }
		 * APE1_Limit[63:0] = { 16{SH_MEM_APE1_LIMIT[31]},
		 *			SH_MEM_APE1_LIMIT[31:0], 0xFFFF }
		 * Verify that the base and size parameters can be
		 * represented in this format and convert them.
		 * Additionally restrict APE1 to user-mode addresses.
		 */

		uint64_t base = (uintptr_t)alternate_aperture_base;
		uint64_t limit = base + alternate_aperture_size - 1;

		if (limit <= base)
			goto out;

		if ((base & APE1_FIXED_BITS_MASK) != 0)
			goto out;

		if ((limit & APE1_FIXED_BITS_MASK) != APE1_LIMIT_ALIGNMENT)
			goto out;

		qpd->sh_mem_ape1_base = base >> 16;
		qpd->sh_mem_ape1_limit = limit >> 16;
	}

1113 1114 1115 1116 1117 1118 1119
	retval = dqm->ops_asic_specific.set_cache_memory_policy(
			dqm,
			qpd,
			default_policy,
			alternate_policy,
			alternate_aperture_base,
			alternate_aperture_size);
1120 1121 1122 1123

	if ((sched_policy == KFD_SCHED_POLICY_NO_HWS) && (qpd->vmid != 0))
		program_sh_mem_settings(dqm, qpd);

1124
	pr_debug("sh_mem_config: 0x%x, ape1_base: 0x%x, ape1_limit: 0x%x\n",
1125 1126 1127 1128
		qpd->sh_mem_config, qpd->sh_mem_ape1_base,
		qpd->sh_mem_ape1_limit);

	mutex_unlock(&dqm->lock);
1129
	return retval;
1130 1131 1132 1133 1134 1135 1136 1137 1138 1139 1140 1141

out:
	mutex_unlock(&dqm->lock);
	return false;
}

struct device_queue_manager *device_queue_manager_init(struct kfd_dev *dev)
{
	struct device_queue_manager *dqm;

	BUG_ON(!dev);

1142
	pr_debug("Loading device queue manager\n");
1143

1144 1145 1146 1147 1148 1149 1150 1151 1152
	dqm = kzalloc(sizeof(struct device_queue_manager), GFP_KERNEL);
	if (!dqm)
		return NULL;

	dqm->dev = dev;
	switch (sched_policy) {
	case KFD_SCHED_POLICY_HWS:
	case KFD_SCHED_POLICY_HWS_NO_OVERSUBSCRIPTION:
		/* initialize dqm for cp scheduling */
1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165
		dqm->ops.create_queue = create_queue_cpsch;
		dqm->ops.initialize = initialize_cpsch;
		dqm->ops.start = start_cpsch;
		dqm->ops.stop = stop_cpsch;
		dqm->ops.destroy_queue = destroy_queue_cpsch;
		dqm->ops.update_queue = update_queue;
		dqm->ops.get_mqd_manager = get_mqd_manager_nocpsch;
		dqm->ops.register_process = register_process_nocpsch;
		dqm->ops.unregister_process = unregister_process_nocpsch;
		dqm->ops.uninitialize = uninitialize_nocpsch;
		dqm->ops.create_kernel_queue = create_kernel_queue_cpsch;
		dqm->ops.destroy_kernel_queue = destroy_kernel_queue_cpsch;
		dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
1166 1167 1168
		break;
	case KFD_SCHED_POLICY_NO_HWS:
		/* initialize dqm for no cp scheduling */
1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179
		dqm->ops.start = start_nocpsch;
		dqm->ops.stop = stop_nocpsch;
		dqm->ops.create_queue = create_queue_nocpsch;
		dqm->ops.destroy_queue = destroy_queue_nocpsch;
		dqm->ops.update_queue = update_queue;
		dqm->ops.get_mqd_manager = get_mqd_manager_nocpsch;
		dqm->ops.register_process = register_process_nocpsch;
		dqm->ops.unregister_process = unregister_process_nocpsch;
		dqm->ops.initialize = initialize_nocpsch;
		dqm->ops.uninitialize = uninitialize_nocpsch;
		dqm->ops.set_cache_memory_policy = set_cache_memory_policy;
1180 1181 1182 1183 1184 1185
		break;
	default:
		BUG();
		break;
	}

1186 1187 1188
	switch (dev->device_info->asic_family) {
	case CHIP_CARRIZO:
		device_queue_manager_init_vi(&dqm->ops_asic_specific);
1189 1190
		break;

1191 1192
	case CHIP_KAVERI:
		device_queue_manager_init_cik(&dqm->ops_asic_specific);
1193
		break;
1194 1195
	}

1196
	if (dqm->ops.initialize(dqm)) {
1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207
		kfree(dqm);
		return NULL;
	}

	return dqm;
}

void device_queue_manager_uninit(struct device_queue_manager *dqm)
{
	BUG_ON(!dqm);

1208
	dqm->ops.uninitialize(dqm);
1209 1210
	kfree(dqm);
}