gpu_scheduler.c 16.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
/*
 * Copyright 2015 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 *
 */
#include <linux/kthread.h>
#include <linux/wait.h>
#include <linux/sched.h>
#include <drm/drmP.h>
#include "gpu_scheduler.h"

30 31 32
#define CREATE_TRACE_POINTS
#include "gpu_sched_trace.h"

33
static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity);
34
static void amd_sched_wakeup(struct amd_gpu_scheduler *sched);
35
static void amd_sched_process_job(struct fence *f, struct fence_cb *cb);
36

37 38 39
struct kmem_cache *sched_fence_slab;
atomic_t sched_fence_slab_ref = ATOMIC_INIT(0);

40
/* Initialize a given run queue struct */
41
static void amd_sched_rq_init(struct amd_sched_rq *rq)
42
{
43
	spin_lock_init(&rq->lock);
44 45
	INIT_LIST_HEAD(&rq->entities);
	rq->current_entity = NULL;
46 47
}

48 49
static void amd_sched_rq_add_entity(struct amd_sched_rq *rq,
				    struct amd_sched_entity *entity)
50
{
51 52
	if (!list_empty(&entity->list))
		return;
53
	spin_lock(&rq->lock);
54
	list_add_tail(&entity->list, &rq->entities);
55
	spin_unlock(&rq->lock);
56 57
}

58 59
static void amd_sched_rq_remove_entity(struct amd_sched_rq *rq,
				       struct amd_sched_entity *entity)
60
{
61 62
	if (list_empty(&entity->list))
		return;
63
	spin_lock(&rq->lock);
64 65 66
	list_del_init(&entity->list);
	if (rq->current_entity == entity)
		rq->current_entity = NULL;
67
	spin_unlock(&rq->lock);
68 69 70
}

/**
71 72 73 74 75
 * Select an entity which could provide a job to run
 *
 * @rq		The run queue to check.
 *
 * Try to find a ready entity, returns NULL if none found.
76
 */
77 78
static struct amd_sched_entity *
amd_sched_rq_select_entity(struct amd_sched_rq *rq)
79
{
80
	struct amd_sched_entity *entity;
81

82 83 84
	spin_lock(&rq->lock);

	entity = rq->current_entity;
85 86
	if (entity) {
		list_for_each_entry_continue(entity, &rq->entities, list) {
87
			if (amd_sched_entity_is_ready(entity)) {
88
				rq->current_entity = entity;
89
				spin_unlock(&rq->lock);
90
				return entity;
91
			}
92 93 94
		}
	}

95
	list_for_each_entry(entity, &rq->entities, list) {
96

97
		if (amd_sched_entity_is_ready(entity)) {
98
			rq->current_entity = entity;
99
			spin_unlock(&rq->lock);
100
			return entity;
101
		}
102

103 104 105
		if (entity == rq->current_entity)
			break;
	}
106

107 108
	spin_unlock(&rq->lock);

109
	return NULL;
110 111 112 113 114 115
}

/**
 * Init a context entity used by scheduler when submit to HW ring.
 *
 * @sched	The pointer to the scheduler
116
 * @entity	The pointer to a valid amd_sched_entity
117
 * @rq		The run queue this entity belongs
118
 * @kernel	If this is an entity for the kernel
119
 * @jobs	The max number of jobs in the job queue
120 121 122
 *
 * return 0 if succeed. negative error code on failure
*/
123
int amd_sched_entity_init(struct amd_gpu_scheduler *sched,
124
			  struct amd_sched_entity *entity,
125
			  struct amd_sched_rq *rq,
126
			  uint32_t jobs)
127
{
128 129
	int r;

130 131 132
	if (!(sched && entity && rq))
		return -EINVAL;

133
	memset(entity, 0, sizeof(struct amd_sched_entity));
134 135 136
	INIT_LIST_HEAD(&entity->list);
	entity->rq = rq;
	entity->sched = sched;
137 138

	spin_lock_init(&entity->queue_lock);
139 140 141 142
	r = kfifo_alloc(&entity->job_queue, jobs * sizeof(void *), GFP_KERNEL);
	if (r)
		return r;

143
	atomic_set(&entity->fence_seq, 0);
144
	entity->fence_context = fence_context_alloc(2);
145 146 147 148 149 150 151 152 153 154 155 156

	return 0;
}

/**
 * Query if entity is initialized
 *
 * @sched       Pointer to scheduler instance
 * @entity	The pointer to a valid scheduler entity
 *
 * return true if entity is initialized, false otherwise
*/
157 158
static bool amd_sched_entity_is_initialized(struct amd_gpu_scheduler *sched,
					    struct amd_sched_entity *entity)
159
{
160 161
	return entity->sched == sched &&
		entity->rq != NULL;
162 163
}

164 165 166 167 168 169 170 171
/**
 * Check if entity is idle
 *
 * @entity	The pointer to a valid scheduler entity
 *
 * Return true if entity don't has any unscheduled jobs.
 */
static bool amd_sched_entity_is_idle(struct amd_sched_entity *entity)
172
{
173 174
	rmb();
	if (kfifo_is_empty(&entity->job_queue))
175 176 177 178 179
		return true;

	return false;
}

180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
/**
 * Check if entity is ready
 *
 * @entity	The pointer to a valid scheduler entity
 *
 * Return true if entity could provide a job.
 */
static bool amd_sched_entity_is_ready(struct amd_sched_entity *entity)
{
	if (kfifo_is_empty(&entity->job_queue))
		return false;

	if (ACCESS_ONCE(entity->dependency))
		return false;

	return true;
}

198 199 200 201 202 203
/**
 * Destroy a context entity
 *
 * @sched       Pointer to scheduler instance
 * @entity	The pointer to a valid scheduler entity
 *
204
 * Cleanup and free the allocated resources.
205
 */
206 207
void amd_sched_entity_fini(struct amd_gpu_scheduler *sched,
			   struct amd_sched_entity *entity)
208
{
209
	struct amd_sched_rq *rq = entity->rq;
210

211
	if (!amd_sched_entity_is_initialized(sched, entity))
212
		return;
213

214 215 216 217
	/**
	 * The client will not queue more IBs during this fini, consume existing
	 * queued IBs
	*/
218
	wait_event(sched->job_scheduled, amd_sched_entity_is_idle(entity));
219

220
	amd_sched_rq_remove_entity(rq, entity);
221 222 223
	kfifo_free(&entity->job_queue);
}

224 225 226 227 228 229
static void amd_sched_entity_wakeup(struct fence *f, struct fence_cb *cb)
{
	struct amd_sched_entity *entity =
		container_of(cb, struct amd_sched_entity, cb);
	entity->dependency = NULL;
	fence_put(f);
230
	amd_sched_wakeup(entity->sched);
231 232
}

233 234 235 236 237 238 239 240
static void amd_sched_entity_clear_dep(struct fence *f, struct fence_cb *cb)
{
	struct amd_sched_entity *entity =
		container_of(cb, struct amd_sched_entity, cb);
	entity->dependency = NULL;
	fence_put(f);
}

241 242 243 244 245 246 247 248 249 250 251 252 253 254 255
static bool amd_sched_entity_add_dependency_cb(struct amd_sched_entity *entity)
{
	struct amd_gpu_scheduler *sched = entity->sched;
	struct fence * fence = entity->dependency;
	struct amd_sched_fence *s_fence;

	if (fence->context == entity->fence_context) {
		/* We can ignore fences from ourself */
		fence_put(entity->dependency);
		return false;
	}

	s_fence = to_amd_sched_fence(fence);
	if (s_fence && s_fence->sched == sched) {

256 257 258 259 260 261 262 263 264 265 266 267 268 269
		/*
		 * Fence is from the same scheduler, only need to wait for
		 * it to be scheduled
		 */
		fence = fence_get(&s_fence->scheduled);
		fence_put(entity->dependency);
		entity->dependency = fence;
		if (!fence_add_callback(fence, &entity->cb,
					amd_sched_entity_clear_dep))
			return true;

		/* Ignore it when it is already scheduled */
		fence_put(fence);
		return false;
270 271 272 273 274 275 276 277 278 279
	}

	if (!fence_add_callback(entity->dependency, &entity->cb,
				amd_sched_entity_wakeup))
		return true;

	fence_put(entity->dependency);
	return false;
}

280 281 282
static struct amd_sched_job *
amd_sched_entity_pop_job(struct amd_sched_entity *entity)
{
283
	struct amd_gpu_scheduler *sched = entity->sched;
284
	struct amd_sched_job *sched_job;
285

286
	if (!kfifo_out_peek(&entity->job_queue, &sched_job, sizeof(sched_job)))
287 288
		return NULL;

289 290
	while ((entity->dependency = sched->ops->dependency(sched_job)))
		if (amd_sched_entity_add_dependency_cb(entity))
291 292
			return NULL;

293
	return sched_job;
294 295
}

296
/**
297
 * Helper to submit a job to the job queue
298
 *
299
 * @sched_job		The pointer to job required to submit
300 301 302
 *
 * Returns true if we could submit the job.
 */
303
static bool amd_sched_entity_in(struct amd_sched_job *sched_job)
304
{
305
	struct amd_gpu_scheduler *sched = sched_job->sched;
306
	struct amd_sched_entity *entity = sched_job->s_entity;
307 308 309
	bool added, first = false;

	spin_lock(&entity->queue_lock);
310 311
	added = kfifo_in(&entity->job_queue, &sched_job,
			sizeof(sched_job)) == sizeof(sched_job);
312

313
	if (added && kfifo_len(&entity->job_queue) == sizeof(sched_job))
314 315 316 317 318
		first = true;

	spin_unlock(&entity->queue_lock);

	/* first job wakes up scheduler */
319 320 321
	if (first) {
		/* Add the entity to the run queue */
		amd_sched_rq_add_entity(entity->rq, entity);
322
		amd_sched_wakeup(sched);
323
	}
324 325 326
	return added;
}

327 328 329
/* job_finish is called after hw fence signaled, and
 * the job had already been deleted from ring_mirror_list
 */
330
static void amd_sched_job_finish(struct work_struct *work)
331
{
332 333
	struct amd_sched_job *s_job = container_of(work, struct amd_sched_job,
						   finish_work);
334 335
	struct amd_gpu_scheduler *sched = s_job->sched;

336
	/* remove job from ring_mirror_list */
337
	spin_lock(&sched->job_list_lock);
338
	list_del_init(&s_job->node);
339
	if (sched->timeout != MAX_SCHEDULE_TIMEOUT) {
340 341
		struct amd_sched_job *next;

342
		spin_unlock(&sched->job_list_lock);
343
		cancel_delayed_work_sync(&s_job->work_tdr);
344
		spin_lock(&sched->job_list_lock);
345 346 347 348 349

		/* queue TDR for next job */
		next = list_first_entry_or_null(&sched->ring_mirror_list,
						struct amd_sched_job, node);

350
		if (next)
351 352
			schedule_delayed_work(&next->work_tdr, sched->timeout);
	}
353
	spin_unlock(&sched->job_list_lock);
354 355 356 357 358 359 360 361
	sched->ops->free_job(s_job);
}

static void amd_sched_job_finish_cb(struct fence *f, struct fence_cb *cb)
{
	struct amd_sched_job *job = container_of(cb, struct amd_sched_job,
						 finish_cb);
	schedule_work(&job->finish_work);
362 363
}

364
static void amd_sched_job_begin(struct amd_sched_job *s_job)
365 366 367
{
	struct amd_gpu_scheduler *sched = s_job->sched;

368
	spin_lock(&sched->job_list_lock);
369
	list_add_tail(&s_job->node, &sched->ring_mirror_list);
370
	if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
371 372
	    list_first_entry_or_null(&sched->ring_mirror_list,
				     struct amd_sched_job, node) == s_job)
373
		schedule_delayed_work(&s_job->work_tdr, sched->timeout);
374
	spin_unlock(&sched->job_list_lock);
375 376
}

377 378 379 380 381 382 383 384
static void amd_sched_job_timedout(struct work_struct *work)
{
	struct amd_sched_job *job = container_of(work, struct amd_sched_job,
						 work_tdr.work);

	job->sched->ops->timedout_job(job);
}

385 386 387 388 389 390 391 392 393 394 395 396 397 398
void amd_sched_hw_job_reset(struct amd_gpu_scheduler *sched)
{
	struct amd_sched_job *s_job;

	spin_lock(&sched->job_list_lock);
	list_for_each_entry_reverse(s_job, &sched->ring_mirror_list, node) {
		if (fence_remove_callback(s_job->s_fence->parent, &s_job->s_fence->cb)) {
			fence_put(s_job->s_fence->parent);
			s_job->s_fence->parent = NULL;
		}
	}
	spin_unlock(&sched->job_list_lock);
}

399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430
void amd_sched_job_recovery(struct amd_gpu_scheduler *sched)
{
	struct amd_sched_job *s_job;
	int r;

	spin_lock(&sched->job_list_lock);
	s_job = list_first_entry_or_null(&sched->ring_mirror_list,
					 struct amd_sched_job, node);
	if (s_job)
		schedule_delayed_work(&s_job->work_tdr, sched->timeout);

	list_for_each_entry(s_job, &sched->ring_mirror_list, node) {
		struct amd_sched_fence *s_fence = s_job->s_fence;
		struct fence *fence = sched->ops->run_job(s_job);
		if (fence) {
			s_fence->parent = fence_get(fence);
			r = fence_add_callback(fence, &s_fence->cb,
					       amd_sched_process_job);
			if (r == -ENOENT)
				amd_sched_process_job(fence, &s_fence->cb);
			else if (r)
				DRM_ERROR("fence add callback failed (%d)\n",
					  r);
			fence_put(fence);
		} else {
			DRM_ERROR("Failed to run job!\n");
			amd_sched_process_job(NULL, &s_fence->cb);
		}
	}
	spin_unlock(&sched->job_list_lock);
}

431 432 433
/**
 * Submit a job to the job queue
 *
434
 * @sched_job		The pointer to job required to submit
435 436 437
 *
 * Returns 0 for success, negative error code otherwise.
 */
438
void amd_sched_entity_push_job(struct amd_sched_job *sched_job)
439 440 441
{
	struct amd_sched_entity *entity = sched_job->s_entity;

442
	trace_amd_sched_job(sched_job);
443
	fence_add_callback(&sched_job->s_fence->finished, &sched_job->finish_cb,
444
			   amd_sched_job_finish_cb);
445
	wait_event(entity->sched->job_scheduled,
446
		   amd_sched_entity_in(sched_job));
447 448
}

449 450
/* init a sched_job with basic field */
int amd_sched_job_init(struct amd_sched_job *job,
451 452
		       struct amd_gpu_scheduler *sched,
		       struct amd_sched_entity *entity,
453
		       void *owner)
454 455 456 457 458 459 460
{
	job->sched = sched;
	job->s_entity = entity;
	job->s_fence = amd_sched_fence_create(entity, owner);
	if (!job->s_fence)
		return -ENOMEM;

461 462
	INIT_WORK(&job->finish_work, amd_sched_job_finish);
	INIT_LIST_HEAD(&job->node);
463
	INIT_DELAYED_WORK(&job->work_tdr, amd_sched_job_timedout);
464

465 466 467
	return 0;
}

468 469 470 471 472 473 474 475 476
/**
 * Return ture if we can push more jobs to the hw.
 */
static bool amd_sched_ready(struct amd_gpu_scheduler *sched)
{
	return atomic_read(&sched->hw_rq_count) <
		sched->hw_submission_limit;
}

477 478 479 480 481 482
/**
 * Wake up the scheduler when it is ready
 */
static void amd_sched_wakeup(struct amd_gpu_scheduler *sched)
{
	if (amd_sched_ready(sched))
483
		wake_up_interruptible(&sched->wake_up_worker);
484 485
}

486
/**
487
 * Select next entity to process
488
*/
489 490
static struct amd_sched_entity *
amd_sched_select_entity(struct amd_gpu_scheduler *sched)
491
{
492
	struct amd_sched_entity *entity;
493
	int i;
494 495 496 497 498

	if (!amd_sched_ready(sched))
		return NULL;

	/* Kernel run queue has higher priority than normal run queue*/
499 500 501 502 503
	for (i = 0; i < AMD_SCHED_MAX_PRIORITY; i++) {
		entity = amd_sched_rq_select_entity(&sched->sched_rq[i]);
		if (entity)
			break;
	}
504

505
	return entity;
506 507
}

508 509
static void amd_sched_process_job(struct fence *f, struct fence_cb *cb)
{
510 511
	struct amd_sched_fence *s_fence =
		container_of(cb, struct amd_sched_fence, cb);
512
	struct amd_gpu_scheduler *sched = s_fence->sched;
513

514
	atomic_dec(&sched->hw_rq_count);
515
	amd_sched_fence_finished(s_fence);
M
Monk Liu 已提交
516

517
	trace_amd_sched_process_job(s_fence);
518
	fence_put(&s_fence->finished);
519
	wake_up_interruptible(&sched->wake_up_worker);
520 521
}

522 523 524 525 526 527 528 529 530 531
static bool amd_sched_blocked(struct amd_gpu_scheduler *sched)
{
	if (kthread_should_park()) {
		kthread_parkme();
		return true;
	}

	return false;
}

532 533 534 535
static int amd_sched_main(void *param)
{
	struct sched_param sparam = {.sched_priority = 1};
	struct amd_gpu_scheduler *sched = (struct amd_gpu_scheduler *)param;
536
	int r, count;
537 538 539 540

	sched_setscheduler(current, SCHED_FIFO, &sparam);

	while (!kthread_should_stop()) {
541
		struct amd_sched_entity *entity = NULL;
542
		struct amd_sched_fence *s_fence;
543
		struct amd_sched_job *sched_job;
544 545
		struct fence *fence;

546
		wait_event_interruptible(sched->wake_up_worker,
547 548 549
					 (!amd_sched_blocked(sched) &&
					  (entity = amd_sched_select_entity(sched))) ||
					 kthread_should_stop());
550

551 552 553 554
		if (!entity)
			continue;

		sched_job = amd_sched_entity_pop_job(entity);
555
		if (!sched_job)
556 557
			continue;

558
		s_fence = sched_job->s_fence;
559

560
		atomic_inc(&sched->hw_rq_count);
561 562
		amd_sched_job_begin(sched_job);

563
		fence = sched->ops->run_job(sched_job);
564
		amd_sched_fence_scheduled(s_fence);
565
		if (fence) {
566
			s_fence->parent = fence_get(fence);
567
			r = fence_add_callback(fence, &s_fence->cb,
568 569
					       amd_sched_process_job);
			if (r == -ENOENT)
570
				amd_sched_process_job(fence, &s_fence->cb);
571
			else if (r)
572 573
				DRM_ERROR("fence add callback failed (%d)\n",
					  r);
574
			fence_put(fence);
575 576
		} else {
			DRM_ERROR("Failed to run job!\n");
577
			amd_sched_process_job(NULL, &s_fence->cb);
578
		}
579

580 581 582
		count = kfifo_out(&entity->job_queue, &sched_job,
				sizeof(sched_job));
		WARN_ON(count != sizeof(sched_job));
583
		wake_up(&sched->job_scheduled);
584 585 586 587 588
	}
	return 0;
}

/**
589
 * Init a gpu scheduler instance
590
 *
591
 * @sched		The pointer to the scheduler
592 593
 * @ops			The backend operations for this scheduler.
 * @hw_submissions	Number of hw submissions to do.
594
 * @name		Name used for debugging
595
 *
596
 * Return 0 on success, otherwise error code.
597
*/
598
int amd_sched_init(struct amd_gpu_scheduler *sched,
599
		   const struct amd_sched_backend_ops *ops,
600
		   unsigned hw_submission, long timeout, const char *name)
601
{
602
	int i;
603
	sched->ops = ops;
604
	sched->hw_submission_limit = hw_submission;
605
	sched->name = name;
606
	sched->timeout = timeout;
607 608
	for (i = 0; i < AMD_SCHED_MAX_PRIORITY; i++)
		amd_sched_rq_init(&sched->sched_rq[i]);
609

610 611
	init_waitqueue_head(&sched->wake_up_worker);
	init_waitqueue_head(&sched->job_scheduled);
612 613
	INIT_LIST_HEAD(&sched->ring_mirror_list);
	spin_lock_init(&sched->job_list_lock);
614
	atomic_set(&sched->hw_rq_count, 0);
615 616 617 618 619 620 621
	if (atomic_inc_return(&sched_fence_slab_ref) == 1) {
		sched_fence_slab = kmem_cache_create(
			"amd_sched_fence", sizeof(struct amd_sched_fence), 0,
			SLAB_HWCACHE_ALIGN, NULL);
		if (!sched_fence_slab)
			return -ENOMEM;
	}
622

623
	/* Each scheduler will run on a seperate kernel thread */
624
	sched->thread = kthread_run(amd_sched_main, sched, sched->name);
625
	if (IS_ERR(sched->thread)) {
626 627
		DRM_ERROR("Failed to create scheduler for %s.\n", name);
		return PTR_ERR(sched->thread);
628 629
	}

630
	return 0;
631 632 633 634 635 636 637
}

/**
 * Destroy a gpu scheduler
 *
 * @sched	The pointer to the scheduler
 */
638
void amd_sched_fini(struct amd_gpu_scheduler *sched)
639
{
640 641
	if (sched->thread)
		kthread_stop(sched->thread);
642 643
	if (atomic_dec_and_test(&sched_fence_slab_ref))
		kmem_cache_destroy(sched_fence_slab);
644
}