sched_main.c 23.6 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
/*
 * Copyright 2015 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 */
23

24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46
/**
 * DOC: Overview
 *
 * The GPU scheduler provides entities which allow userspace to push jobs
 * into software queues which are then scheduled on a hardware run queue.
 * The software queues have a priority among them. The scheduler selects the entities
 * from the run queue using a FIFO. The scheduler provides dependency handling
 * features among jobs. The driver is supposed to provide callback functions for
 * backend operations to the scheduler like submitting a job to hardware run queue,
 * returning the dependencies of a job etc.
 *
 * The organisation of the scheduler is the following:
 *
 * 1. Each hw run queue has one scheduler
 * 2. Each scheduler has multiple run queues with different priorities
 *    (e.g., HIGH_HW,HIGH_SW, KERNEL, NORMAL)
 * 3. Each scheduler run queue has a queue of entities to schedule
 * 4. Entities themselves maintain a queue of jobs that will be scheduled on
 *    the hardware.
 *
 * The jobs in a entity are always scheduled in the order that they were pushed.
 */

47 48 49
#include <linux/kthread.h>
#include <linux/wait.h>
#include <linux/sched.h>
50
#include <linux/completion.h>
51
#include <uapi/linux/sched/types.h>
S
Sam Ravnborg 已提交
52 53

#include <drm/drm_print.h>
54 55
#include <drm/gpu_scheduler.h>
#include <drm/spsc_queue.h>
56

57
#define CREATE_TRACE_POINTS
58
#include "gpu_scheduler_trace.h"
59

60 61
#define to_drm_sched_job(sched_job)		\
		container_of((sched_job), struct drm_sched_job, queue_node)
62

63 64 65
/**
 * drm_sched_rq_init - initialize a given run queue struct
 *
66
 * @sched: scheduler instance to associate with this run queue
67 68 69 70
 * @rq: scheduler run queue
 *
 * Initializes a scheduler runqueue.
 */
71 72
static void drm_sched_rq_init(struct drm_gpu_scheduler *sched,
			      struct drm_sched_rq *rq)
73
{
74
	spin_lock_init(&rq->lock);
75 76
	INIT_LIST_HEAD(&rq->entities);
	rq->current_entity = NULL;
77
	rq->sched = sched;
78 79
}

80 81 82 83 84 85 86 87
/**
 * drm_sched_rq_add_entity - add an entity
 *
 * @rq: scheduler run queue
 * @entity: scheduler entity
 *
 * Adds a scheduler entity to the run queue.
 */
88 89
void drm_sched_rq_add_entity(struct drm_sched_rq *rq,
			     struct drm_sched_entity *entity)
90
{
91 92
	if (!list_empty(&entity->list))
		return;
93
	spin_lock(&rq->lock);
94
	atomic_inc(&rq->sched->score);
95
	list_add_tail(&entity->list, &rq->entities);
96
	spin_unlock(&rq->lock);
97 98
}

99 100 101 102 103 104 105 106
/**
 * drm_sched_rq_remove_entity - remove an entity
 *
 * @rq: scheduler run queue
 * @entity: scheduler entity
 *
 * Removes a scheduler entity from the run queue.
 */
107 108
void drm_sched_rq_remove_entity(struct drm_sched_rq *rq,
				struct drm_sched_entity *entity)
109
{
110 111
	if (list_empty(&entity->list))
		return;
112
	spin_lock(&rq->lock);
113
	atomic_dec(&rq->sched->score);
114 115 116
	list_del_init(&entity->list);
	if (rq->current_entity == entity)
		rq->current_entity = NULL;
117
	spin_unlock(&rq->lock);
118 119 120
}

/**
121
 * drm_sched_rq_select_entity - Select an entity which could provide a job to run
122
 *
123
 * @rq: scheduler run queue to check.
124 125
 *
 * Try to find a ready entity, returns NULL if none found.
126
 */
127 128
static struct drm_sched_entity *
drm_sched_rq_select_entity(struct drm_sched_rq *rq)
129
{
130
	struct drm_sched_entity *entity;
131

132 133 134
	spin_lock(&rq->lock);

	entity = rq->current_entity;
135 136
	if (entity) {
		list_for_each_entry_continue(entity, &rq->entities, list) {
137
			if (drm_sched_entity_is_ready(entity)) {
138
				rq->current_entity = entity;
139
				reinit_completion(&entity->entity_idle);
140
				spin_unlock(&rq->lock);
141
				return entity;
142
			}
143 144 145
		}
	}

146
	list_for_each_entry(entity, &rq->entities, list) {
147

148
		if (drm_sched_entity_is_ready(entity)) {
149
			rq->current_entity = entity;
150
			reinit_completion(&entity->entity_idle);
151
			spin_unlock(&rq->lock);
152
			return entity;
153
		}
154

155 156 157
		if (entity == rq->current_entity)
			break;
	}
158

159 160
	spin_unlock(&rq->lock);

161
	return NULL;
162 163
}

164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197
/**
 * drm_sched_job_done - complete a job
 * @s_job: pointer to the job which is done
 *
 * Finish the job's fence and wake up the worker thread.
 */
static void drm_sched_job_done(struct drm_sched_job *s_job)
{
	struct drm_sched_fence *s_fence = s_job->s_fence;
	struct drm_gpu_scheduler *sched = s_fence->sched;

	atomic_dec(&sched->hw_rq_count);
	atomic_dec(&sched->score);

	trace_drm_sched_process_job(s_fence);

	dma_fence_get(&s_fence->finished);
	drm_sched_fence_finished(s_fence);
	dma_fence_put(&s_fence->finished);
	wake_up_interruptible(&sched->wake_up_worker);
}

/**
 * drm_sched_job_done_cb - the callback for a done job
 * @f: fence
 * @cb: fence callbacks
 */
static void drm_sched_job_done_cb(struct dma_fence *f, struct dma_fence_cb *cb)
{
	struct drm_sched_job *s_job = container_of(cb, struct drm_sched_job, cb);

	drm_sched_job_done(s_job);
}

198 199 200 201 202 203 204 205
/**
 * drm_sched_dependency_optimized
 *
 * @fence: the dependency fence
 * @entity: the entity which depends on the above fence
 *
 * Returns true if the dependency can be optimized and false otherwise
 */
206 207
bool drm_sched_dependency_optimized(struct dma_fence* fence,
				    struct drm_sched_entity *entity)
C
Chunming Zhou 已提交
208
{
209
	struct drm_gpu_scheduler *sched = entity->rq->sched;
210
	struct drm_sched_fence *s_fence;
C
Chunming Zhou 已提交
211 212 213 214 215

	if (!fence || dma_fence_is_signaled(fence))
		return false;
	if (fence->context == entity->fence_context)
		return true;
216
	s_fence = to_drm_sched_fence(fence);
C
Chunming Zhou 已提交
217 218 219 220 221
	if (s_fence && s_fence->sched == sched)
		return true;

	return false;
}
222
EXPORT_SYMBOL(drm_sched_dependency_optimized);
C
Chunming Zhou 已提交
223

224 225 226 227 228 229 230 231 232 233
/**
 * drm_sched_start_timeout - start timeout for reset worker
 *
 * @sched: scheduler instance to start the worker for
 *
 * Start the timeout for the given scheduler.
 */
static void drm_sched_start_timeout(struct drm_gpu_scheduler *sched)
{
	if (sched->timeout != MAX_SCHEDULE_TIMEOUT &&
234
	    !list_empty(&sched->pending_list))
235 236 237
		schedule_delayed_work(&sched->work_tdr, sched->timeout);
}

C
Christian König 已提交
238 239 240 241 242 243 244 245 246 247 248 249 250
/**
 * drm_sched_fault - immediately start timeout handler
 *
 * @sched: scheduler where the timeout handling should be started.
 *
 * Start timeout handling immediately when the driver detects a hardware fault.
 */
void drm_sched_fault(struct drm_gpu_scheduler *sched)
{
	mod_delayed_work(system_wq, &sched->work_tdr, 0);
}
EXPORT_SYMBOL(drm_sched_fault);

251 252 253 254 255 256 257
/**
 * drm_sched_suspend_timeout - Suspend scheduler job timeout
 *
 * @sched: scheduler instance for which to suspend the timeout
 *
 * Suspend the delayed work timeout for the scheduler. This is done by
 * modifying the delayed work timeout to an arbitrary large value,
258
 * MAX_SCHEDULE_TIMEOUT in this case.
259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286
 *
 * Returns the timeout remaining
 *
 */
unsigned long drm_sched_suspend_timeout(struct drm_gpu_scheduler *sched)
{
	unsigned long sched_timeout, now = jiffies;

	sched_timeout = sched->work_tdr.timer.expires;

	/*
	 * Modify the timeout to an arbitrarily large value. This also prevents
	 * the timeout to be restarted when new submissions arrive
	 */
	if (mod_delayed_work(system_wq, &sched->work_tdr, MAX_SCHEDULE_TIMEOUT)
			&& time_after(sched_timeout, now))
		return sched_timeout - now;
	else
		return sched->timeout;
}
EXPORT_SYMBOL(drm_sched_suspend_timeout);

/**
 * drm_sched_resume_timeout - Resume scheduler job timeout
 *
 * @sched: scheduler instance for which to resume the timeout
 * @remaining: remaining timeout
 *
287
 * Resume the delayed work timeout for the scheduler.
288 289 290 291
 */
void drm_sched_resume_timeout(struct drm_gpu_scheduler *sched,
		unsigned long remaining)
{
292
	spin_lock(&sched->job_list_lock);
293

294
	if (list_empty(&sched->pending_list))
295 296 297 298
		cancel_delayed_work(&sched->work_tdr);
	else
		mod_delayed_work(system_wq, &sched->work_tdr, remaining);

299
	spin_unlock(&sched->job_list_lock);
300 301 302
}
EXPORT_SYMBOL(drm_sched_resume_timeout);

303
static void drm_sched_job_begin(struct drm_sched_job *s_job)
304
{
305
	struct drm_gpu_scheduler *sched = s_job->sched;
306

307
	spin_lock(&sched->job_list_lock);
308
	list_add_tail(&s_job->list, &sched->pending_list);
309
	drm_sched_start_timeout(sched);
310
	spin_unlock(&sched->job_list_lock);
311 312
}

313
static void drm_sched_job_timedout(struct work_struct *work)
314
{
315 316 317 318
	struct drm_gpu_scheduler *sched;
	struct drm_sched_job *job;

	sched = container_of(work, struct drm_gpu_scheduler, work_tdr.work);
319 320

	/* Protects against concurrent deletion in drm_sched_get_cleanup_job */
321
	spin_lock(&sched->job_list_lock);
322
	job = list_first_entry_or_null(&sched->pending_list,
L
Luben Tuikov 已提交
323
				       struct drm_sched_job, list);
324

325
	if (job) {
326 327 328 329 330
		/*
		 * Remove the bad job so it cannot be freed by concurrent
		 * drm_sched_cleanup_jobs. It will be reinserted back after sched->thread
		 * is parked at which point it's safe.
		 */
L
Luben Tuikov 已提交
331
		list_del_init(&job->list);
332
		spin_unlock(&sched->job_list_lock);
333

334
		job->sched->ops->timedout_job(job);
335

336 337 338 339 340 341 342 343
		/*
		 * Guilty job did complete and hence needs to be manually removed
		 * See drm_sched_stop doc.
		 */
		if (sched->free_guilty) {
			job->sched->ops->free_job(job);
			sched->free_guilty = false;
		}
344
	} else {
345
		spin_unlock(&sched->job_list_lock);
346
	}
347

348
	spin_lock(&sched->job_list_lock);
349
	drm_sched_start_timeout(sched);
350
	spin_unlock(&sched->job_list_lock);
351 352
}

353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397
 /**
  * drm_sched_increase_karma - Update sched_entity guilty flag
  *
  * @bad: The job guilty of time out
  *
  * Increment on every hang caused by the 'bad' job. If this exceeds the hang
  * limit of the scheduler then the respective sched entity is marked guilty and
  * jobs from it will not be scheduled further
  */
void drm_sched_increase_karma(struct drm_sched_job *bad)
{
	int i;
	struct drm_sched_entity *tmp;
	struct drm_sched_entity *entity;
	struct drm_gpu_scheduler *sched = bad->sched;

	/* don't increase @bad's karma if it's from KERNEL RQ,
	 * because sometimes GPU hang would cause kernel jobs (like VM updating jobs)
	 * corrupt but keep in mind that kernel jobs always considered good.
	 */
	if (bad->s_priority != DRM_SCHED_PRIORITY_KERNEL) {
		atomic_inc(&bad->karma);
		for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_KERNEL;
		     i++) {
			struct drm_sched_rq *rq = &sched->sched_rq[i];

			spin_lock(&rq->lock);
			list_for_each_entry_safe(entity, tmp, &rq->entities, list) {
				if (bad->s_fence->scheduled.context ==
				    entity->fence_context) {
					if (atomic_read(&bad->karma) >
					    bad->sched->hang_limit)
						if (entity->guilty)
							atomic_set(entity->guilty, 1);
					break;
				}
			}
			spin_unlock(&rq->lock);
			if (&entity->list != &rq->entities)
				break;
		}
	}
}
EXPORT_SYMBOL(drm_sched_increase_karma);

398
/**
399
 * drm_sched_stop - stop the scheduler
400 401
 *
 * @sched: scheduler instance
402
 * @bad: job which caused the time out
403
 *
404 405 406
 * Stop the scheduler and also removes and frees all completed jobs.
 * Note: bad job will not be freed as it might be used later and so it's
 * callers responsibility to release it manually if it's not part of the
407
 * pending list any more.
408
 *
409
 */
410
void drm_sched_stop(struct drm_gpu_scheduler *sched, struct drm_sched_job *bad)
411
{
412
	struct drm_sched_job *s_job, *tmp;
413

414 415
	kthread_park(sched->thread);

416 417 418 419 420 421 422 423 424 425 426 427
	/*
	 * Reinsert back the bad job here - now it's safe as
	 * drm_sched_get_cleanup_job cannot race against us and release the
	 * bad job at this point - we parked (waited for) any in progress
	 * (earlier) cleanups and drm_sched_get_cleanup_job will not be called
	 * now until the scheduler thread is unparked.
	 */
	if (bad && bad->sched == sched)
		/*
		 * Add at the head of the queue to reflect it was the earliest
		 * job extracted.
		 */
428
		list_add(&bad->list, &sched->pending_list);
429

430
	/*
431
	 * Iterate the job list from later to  earlier one and either deactive
432
	 * their HW callbacks or remove them from pending list if they already
433 434
	 * signaled.
	 * This iteration is thread safe as sched thread is stopped.
435
	 */
436
	list_for_each_entry_safe_reverse(s_job, tmp, &sched->pending_list,
L
Luben Tuikov 已提交
437
					 list) {
438 439
		if (s_job->s_fence->parent &&
		    dma_fence_remove_callback(s_job->s_fence->parent,
440
					      &s_job->cb)) {
441
			atomic_dec(&sched->hw_rq_count);
442
		} else {
443
			/*
444
			 * remove job from pending_list.
445 446
			 * Locking here is for concurrent resume timeout
			 */
447
			spin_lock(&sched->job_list_lock);
L
Luben Tuikov 已提交
448
			list_del_init(&s_job->list);
449
			spin_unlock(&sched->job_list_lock);
450 451 452 453 454 455 456 457 458 459 460

			/*
			 * Wait for job's HW fence callback to finish using s_job
			 * before releasing it.
			 *
			 * Job is still alive so fence refcount at least 1
			 */
			dma_fence_wait(&s_job->s_fence->finished, false);

			/*
			 * We must keep bad job alive for later use during
461 462
			 * recovery by some of the drivers but leave a hint
			 * that the guilty job must be released.
463 464 465
			 */
			if (bad != s_job)
				sched->ops->free_job(s_job);
466 467
			else
				sched->free_guilty = true;
468 469
		}
	}
470 471 472 473 474 475 476 477

	/*
	 * Stop pending timer in flight as we rearm it in  drm_sched_start. This
	 * avoids the pending timeout work in progress to fire right away after
	 * this TDR finished and before the newly restarted jobs had a
	 * chance to complete.
	 */
	cancel_delayed_work(&sched->work_tdr);
478
}
479 480

EXPORT_SYMBOL(drm_sched_stop);
481

482
/**
483
 * drm_sched_start - recover jobs after a reset
484 485
 *
 * @sched: scheduler instance
486
 * @full_recovery: proceed with complete sched restart
487 488
 *
 */
489
void drm_sched_start(struct drm_gpu_scheduler *sched, bool full_recovery)
490
{
491
	struct drm_sched_job *s_job, *tmp;
492 493
	int r;

494 495
	/*
	 * Locking the list is not required here as the sched thread is parked
496
	 * so no new jobs are being inserted or removed. Also concurrent
497 498
	 * GPU recovers can't run in parallel.
	 */
499
	list_for_each_entry_safe(s_job, tmp, &sched->pending_list, list) {
500
		struct dma_fence *fence = s_job->s_fence->parent;
501

502 503 504 505 506
		atomic_inc(&sched->hw_rq_count);

		if (!full_recovery)
			continue;

507
		if (fence) {
508
			r = dma_fence_add_callback(fence, &s_job->cb,
509
						   drm_sched_job_done_cb);
510
			if (r == -ENOENT)
511
				drm_sched_job_done(s_job);
512 513 514
			else if (r)
				DRM_ERROR("fence add callback failed (%d)\n",
					  r);
515
		} else
516
			drm_sched_job_done(s_job);
517
	}
518

519
	if (full_recovery) {
520
		spin_lock(&sched->job_list_lock);
521
		drm_sched_start_timeout(sched);
522
		spin_unlock(&sched->job_list_lock);
523
	}
524 525

	kthread_unpark(sched->thread);
526
}
527 528 529
EXPORT_SYMBOL(drm_sched_start);

/**
530
 * drm_sched_resubmit_jobs - helper to relaunch jobs from the pending list
531 532 533 534 535 536 537 538 539
 *
 * @sched: scheduler instance
 *
 */
void drm_sched_resubmit_jobs(struct drm_gpu_scheduler *sched)
{
	struct drm_sched_job *s_job, *tmp;
	uint64_t guilty_context;
	bool found_guilty = false;
540
	struct dma_fence *fence;
541

542
	list_for_each_entry_safe(s_job, tmp, &sched->pending_list, list) {
543 544 545 546 547 548 549 550 551 552
		struct drm_sched_fence *s_fence = s_job->s_fence;

		if (!found_guilty && atomic_read(&s_job->karma) > sched->hang_limit) {
			found_guilty = true;
			guilty_context = s_job->s_fence->scheduled.context;
		}

		if (found_guilty && s_job->s_fence->scheduled.context == guilty_context)
			dma_fence_set_error(&s_fence->finished, -ECANCELED);

553
		dma_fence_put(s_job->s_fence->parent);
554 555 556
		fence = sched->ops->run_job(s_job);

		if (IS_ERR_OR_NULL(fence)) {
557 558 559
			if (IS_ERR(fence))
				dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));

560 561 562 563
			s_job->s_fence->parent = NULL;
		} else {
			s_job->s_fence->parent = fence;
		}
564 565 566
	}
}
EXPORT_SYMBOL(drm_sched_resubmit_jobs);
567

568
/**
569
 * drm_sched_job_init - init a scheduler job
570
 *
571 572 573 574 575
 * @job: scheduler job to init
 * @entity: scheduler entity to use
 * @owner: job owner for debugging
 *
 * Refer to drm_sched_entity_push_job() documentation
576
 * for locking considerations.
577 578
 *
 * Returns 0 for success, negative error code otherwise.
579
 */
580 581
int drm_sched_job_init(struct drm_sched_job *job,
		       struct drm_sched_entity *entity,
582
		       void *owner)
583
{
584 585 586
	struct drm_gpu_scheduler *sched;

	drm_sched_entity_select_rq(entity);
587 588 589
	if (!entity->rq)
		return -ENOENT;

590
	sched = entity->rq->sched;
591

592
	job->sched = sched;
593
	job->entity = entity;
594
	job->s_priority = entity->rq - sched->sched_rq;
595
	job->s_fence = drm_sched_fence_create(entity, owner);
596 597
	if (!job->s_fence)
		return -ENOMEM;
598
	job->id = atomic64_inc_return(&sched->job_id_count);
599

L
Luben Tuikov 已提交
600
	INIT_LIST_HEAD(&job->list);
601

602 603
	return 0;
}
604
EXPORT_SYMBOL(drm_sched_job_init);
605

606 607 608 609 610 611 612 613 614 615 616 617
/**
 * drm_sched_job_cleanup - clean up scheduler job resources
 *
 * @job: scheduler job to clean up
 */
void drm_sched_job_cleanup(struct drm_sched_job *job)
{
	dma_fence_put(&job->s_fence->finished);
	job->s_fence = NULL;
}
EXPORT_SYMBOL(drm_sched_job_cleanup);

618
/**
619 620 621 622 623
 * drm_sched_ready - is the scheduler ready
 *
 * @sched: scheduler instance
 *
 * Return true if we can push more jobs to the hw, otherwise false.
624
 */
625
static bool drm_sched_ready(struct drm_gpu_scheduler *sched)
626 627 628 629 630
{
	return atomic_read(&sched->hw_rq_count) <
		sched->hw_submission_limit;
}

631
/**
632 633 634 635
 * drm_sched_wakeup - Wake up the scheduler when it is ready
 *
 * @sched: scheduler instance
 *
636
 */
637
void drm_sched_wakeup(struct drm_gpu_scheduler *sched)
638
{
639
	if (drm_sched_ready(sched))
640
		wake_up_interruptible(&sched->wake_up_worker);
641 642
}

643
/**
644 645 646 647 648 649
 * drm_sched_select_entity - Select next entity to process
 *
 * @sched: scheduler instance
 *
 * Returns the entity to process or NULL if none are found.
 */
650 651
static struct drm_sched_entity *
drm_sched_select_entity(struct drm_gpu_scheduler *sched)
652
{
653
	struct drm_sched_entity *entity;
654
	int i;
655

656
	if (!drm_sched_ready(sched))
657 658 659
		return NULL;

	/* Kernel run queue has higher priority than normal run queue*/
660
	for (i = DRM_SCHED_PRIORITY_COUNT - 1; i >= DRM_SCHED_PRIORITY_MIN; i--) {
661
		entity = drm_sched_rq_select_entity(&sched->sched_rq[i]);
662 663 664
		if (entity)
			break;
	}
665

666
	return entity;
667 668
}

669
/**
670
 * drm_sched_get_cleanup_job - fetch the next finished job to be destroyed
671 672 673
 *
 * @sched: scheduler instance
 *
674
 * Returns the next finished job from the pending list (if there is one)
675
 * ready for it to be destroyed.
676
 */
677 678
static struct drm_sched_job *
drm_sched_get_cleanup_job(struct drm_gpu_scheduler *sched)
679
{
680
	struct drm_sched_job *job;
681

682 683
	/*
	 * Don't destroy jobs while the timeout worker is running  OR thread
684
	 * is being parked and hence assumed to not touch pending_list
685 686 687
	 */
	if ((sched->timeout != MAX_SCHEDULE_TIMEOUT &&
	    !cancel_delayed_work(&sched->work_tdr)) ||
688
	    kthread_should_park())
689
		return NULL;
690

691
	spin_lock(&sched->job_list_lock);
692

693
	job = list_first_entry_or_null(&sched->pending_list,
L
Luben Tuikov 已提交
694
				       struct drm_sched_job, list);
695

696
	if (job && dma_fence_is_signaled(&job->s_fence->finished)) {
697
		/* remove job from pending_list */
L
Luben Tuikov 已提交
698
		list_del_init(&job->list);
699 700 701 702
	} else {
		job = NULL;
		/* queue timeout for next job */
		drm_sched_start_timeout(sched);
703 704
	}

705
	spin_unlock(&sched->job_list_lock);
706

707
	return job;
708 709
}

710 711 712 713 714 715 716 717 718 719 720 721 722 723
/**
 * drm_sched_pick_best - Get a drm sched from a sched_list with the least load
 * @sched_list: list of drm_gpu_schedulers
 * @num_sched_list: number of drm_gpu_schedulers in the sched_list
 *
 * Returns pointer of the sched with the least load or NULL if none of the
 * drm_gpu_schedulers are ready
 */
struct drm_gpu_scheduler *
drm_sched_pick_best(struct drm_gpu_scheduler **sched_list,
		     unsigned int num_sched_list)
{
	struct drm_gpu_scheduler *sched, *picked_sched = NULL;
	int i;
724
	unsigned int min_score = UINT_MAX, num_score;
725 726 727 728 729 730 731 732 733 734

	for (i = 0; i < num_sched_list; ++i) {
		sched = sched_list[i];

		if (!sched->ready) {
			DRM_WARN("scheduler %s is not ready, skipping",
				 sched->name);
			continue;
		}

735 736 737
		num_score = atomic_read(&sched->score);
		if (num_score < min_score) {
			min_score = num_score;
738 739 740 741 742 743 744 745
			picked_sched = sched;
		}
	}

	return picked_sched;
}
EXPORT_SYMBOL(drm_sched_pick_best);

746 747 748 749 750 751 752
/**
 * drm_sched_blocked - check if the scheduler is blocked
 *
 * @sched: scheduler instance
 *
 * Returns true if blocked, otherwise false.
 */
753
static bool drm_sched_blocked(struct drm_gpu_scheduler *sched)
754 755 756 757 758 759 760 761 762
{
	if (kthread_should_park()) {
		kthread_parkme();
		return true;
	}

	return false;
}

763 764 765 766 767 768 769
/**
 * drm_sched_main - main scheduler thread
 *
 * @param: scheduler instance
 *
 * Returns 0.
 */
770
static int drm_sched_main(void *param)
771
{
772
	struct drm_gpu_scheduler *sched = (struct drm_gpu_scheduler *)param;
773
	int r;
774

775
	sched_set_fifo_low(current);
776 777

	while (!kthread_should_stop()) {
778 779 780
		struct drm_sched_entity *entity = NULL;
		struct drm_sched_fence *s_fence;
		struct drm_sched_job *sched_job;
781
		struct dma_fence *fence;
782
		struct drm_sched_job *cleanup_job = NULL;
783

784
		wait_event_interruptible(sched->wake_up_worker,
785
					 (cleanup_job = drm_sched_get_cleanup_job(sched)) ||
786 787
					 (!drm_sched_blocked(sched) &&
					  (entity = drm_sched_select_entity(sched))) ||
788 789 790 791 792 793 794
					 kthread_should_stop());

		if (cleanup_job) {
			sched->ops->free_job(cleanup_job);
			/* queue timeout for next job */
			drm_sched_start_timeout(sched);
		}
795

796 797 798
		if (!entity)
			continue;

799
		sched_job = drm_sched_entity_pop_job(entity);
800 801 802

		complete(&entity->entity_idle);

803
		if (!sched_job)
804 805
			continue;

806
		s_fence = sched_job->s_fence;
807

808
		atomic_inc(&sched->hw_rq_count);
809
		drm_sched_job_begin(sched_job);
810

R
Robert Beckett 已提交
811
		trace_drm_run_job(sched_job, entity);
812
		fence = sched->ops->run_job(sched_job);
813
		drm_sched_fence_scheduled(s_fence);
814

815
		if (!IS_ERR_OR_NULL(fence)) {
816
			s_fence->parent = dma_fence_get(fence);
817
			r = dma_fence_add_callback(fence, &sched_job->cb,
818
						   drm_sched_job_done_cb);
819
			if (r == -ENOENT)
820
				drm_sched_job_done(sched_job);
821
			else if (r)
822 823
				DRM_ERROR("fence add callback failed (%d)\n",
					  r);
824
			dma_fence_put(fence);
825
		} else {
826 827
			if (IS_ERR(fence))
				dma_fence_set_error(&s_fence->finished, PTR_ERR(fence));
828

829
			drm_sched_job_done(sched_job);
830
		}
831

832
		wake_up(&sched->job_scheduled);
833 834 835 836 837
	}
	return 0;
}

/**
838
 * drm_sched_init - Init a gpu scheduler instance
839
 *
840 841 842 843 844 845
 * @sched: scheduler instance
 * @ops: backend operations for this scheduler
 * @hw_submission: number of hw submissions that can be in flight
 * @hang_limit: number of times to allow a job to hang before dropping it
 * @timeout: timeout value in jiffies for the scheduler
 * @name: name used for debugging
846
 *
847
 * Return 0 on success, otherwise error code.
848
 */
849 850
int drm_sched_init(struct drm_gpu_scheduler *sched,
		   const struct drm_sched_backend_ops *ops,
M
Monk Liu 已提交
851 852 853 854
		   unsigned hw_submission,
		   unsigned hang_limit,
		   long timeout,
		   const char *name)
855
{
856
	int i, ret;
857
	sched->ops = ops;
858
	sched->hw_submission_limit = hw_submission;
859
	sched->name = name;
860
	sched->timeout = timeout;
M
Monk Liu 已提交
861
	sched->hang_limit = hang_limit;
862
	for (i = DRM_SCHED_PRIORITY_MIN; i < DRM_SCHED_PRIORITY_COUNT; i++)
863
		drm_sched_rq_init(sched, &sched->sched_rq[i]);
864

865 866
	init_waitqueue_head(&sched->wake_up_worker);
	init_waitqueue_head(&sched->job_scheduled);
867
	INIT_LIST_HEAD(&sched->pending_list);
868
	spin_lock_init(&sched->job_list_lock);
869
	atomic_set(&sched->hw_rq_count, 0);
870
	INIT_DELAYED_WORK(&sched->work_tdr, drm_sched_job_timedout);
871
	atomic_set(&sched->score, 0);
872
	atomic64_set(&sched->job_id_count, 0);
873

874
	/* Each scheduler will run on a seperate kernel thread */
875
	sched->thread = kthread_run(drm_sched_main, sched, sched->name);
876
	if (IS_ERR(sched->thread)) {
877 878
		ret = PTR_ERR(sched->thread);
		sched->thread = NULL;
879
		DRM_ERROR("Failed to create scheduler for %s.\n", name);
880
		return ret;
881 882
	}

883
	sched->ready = true;
884
	return 0;
885
}
886
EXPORT_SYMBOL(drm_sched_init);
887 888

/**
889 890 891
 * drm_sched_fini - Destroy a gpu scheduler
 *
 * @sched: scheduler instance
892
 *
893
 * Tears down and cleans up the scheduler.
894
 */
895
void drm_sched_fini(struct drm_gpu_scheduler *sched)
896
{
897 898
	if (sched->thread)
		kthread_stop(sched->thread);
899

900 901 902
	/* Confirm no work left behind accessing device structures */
	cancel_delayed_work_sync(&sched->work_tdr);

903
	sched->ready = false;
904
}
905
EXPORT_SYMBOL(drm_sched_fini);