radeon_fence.c 24.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
/*
 * Copyright 2009 Jerome Glisse.
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sub license, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial portions
 * of the Software.
 *
 */
/*
 * Authors:
 *    Jerome Glisse <glisse@freedesktop.org>
 *    Dave Airlie
 */
#include <linux/seq_file.h>
A
Arun Sharma 已提交
32
#include <linux/atomic.h>
33 34
#include <linux/wait.h>
#include <linux/kref.h>
35
#include <linux/slab.h>
C
Christian König 已提交
36
#include <linux/firmware.h>
37
#include <drm/drmP.h>
38 39
#include "radeon_reg.h"
#include "radeon.h"
40
#include "radeon_trace.h"
41

42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61
/*
 * Fences
 * Fences mark an event in the GPUs pipeline and are used
 * for GPU/CPU synchronization.  When the fence is written,
 * it is expected that all buffers associated with that fence
 * are no longer in use by the associated ring on the GPU and
 * that the the relevant GPU caches have been flushed.  Whether
 * we use a scratch register or memory location depends on the asic
 * and whether writeback is enabled.
 */

/**
 * radeon_fence_write - write a fence value
 *
 * @rdev: radeon_device pointer
 * @seq: sequence number to write
 * @ring: ring index the fence is associated with
 *
 * Writes a fence value to memory or a scratch register (all asics).
 */
62
static void radeon_fence_write(struct radeon_device *rdev, u32 seq, int ring)
63
{
64 65
	struct radeon_fence_driver *drv = &rdev->fence_drv[ring];
	if (likely(rdev->wb.enabled || !drv->scratch_reg)) {
66 67 68
		if (drv->cpu_addr) {
			*drv->cpu_addr = cpu_to_le32(seq);
		}
69
	} else {
70
		WREG32(drv->scratch_reg, seq);
71
	}
72 73
}

74 75 76 77 78 79 80 81 82
/**
 * radeon_fence_read - read a fence value
 *
 * @rdev: radeon_device pointer
 * @ring: ring index the fence is associated with
 *
 * Reads a fence value from memory or a scratch register (all asics).
 * Returns the value of the fence read from memory or register.
 */
83
static u32 radeon_fence_read(struct radeon_device *rdev, int ring)
84
{
85
	struct radeon_fence_driver *drv = &rdev->fence_drv[ring];
86
	u32 seq = 0;
87

88
	if (likely(rdev->wb.enabled || !drv->scratch_reg)) {
89 90 91 92 93
		if (drv->cpu_addr) {
			seq = le32_to_cpu(*drv->cpu_addr);
		} else {
			seq = lower_32_bits(atomic64_read(&drv->last_seq));
		}
94
	} else {
95
		seq = RREG32(drv->scratch_reg);
96
	}
97 98 99
	return seq;
}

100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118
/**
 * radeon_fence_schedule_check - schedule lockup check
 *
 * @rdev: radeon_device pointer
 * @ring: ring index we should work with
 *
 * Queues a delayed work item to check for lockups.
 */
static void radeon_fence_schedule_check(struct radeon_device *rdev, int ring)
{
	/*
	 * Do not reset the timer here with mod_delayed_work,
	 * this can livelock in an interaction with TTM delayed destroy.
	 */
	queue_delayed_work(system_power_efficient_wq,
			   &rdev->fence_drv[ring].lockup_work,
			   RADEON_FENCE_JIFFIES_TIMEOUT);
}

119 120 121 122 123 124 125 126 127 128
/**
 * radeon_fence_emit - emit a fence on the requested ring
 *
 * @rdev: radeon_device pointer
 * @fence: radeon fence object
 * @ring: ring index the fence is associated with
 *
 * Emits a fence command on the requested ring (all asics).
 * Returns 0 on success, -ENOMEM on failure.
 */
129 130 131
int radeon_fence_emit(struct radeon_device *rdev,
		      struct radeon_fence **fence,
		      int ring)
132
{
133
	/* we are protected by the ring emission mutex */
134 135 136
	*fence = kmalloc(sizeof(struct radeon_fence), GFP_KERNEL);
	if ((*fence) == NULL) {
		return -ENOMEM;
137
	}
138 139
	kref_init(&((*fence)->kref));
	(*fence)->rdev = rdev;
140
	(*fence)->seq = ++rdev->fence_drv[ring].sync_seq[ring];
141 142
	(*fence)->ring = ring;
	radeon_fence_ring_emit(rdev, ring, *fence);
143
	trace_radeon_fence_emit(rdev->ddev, ring, (*fence)->seq);
144
	radeon_fence_schedule_check(rdev, ring);
145 146 147
	return 0;
}

148
/**
149
 * radeon_fence_activity - check for fence activity
150 151 152 153
 *
 * @rdev: radeon_device pointer
 * @ring: ring index the fence is associated with
 *
154 155 156
 * Checks the current fence value and calculates the last
 * signalled fence value. Returns true if activity occured
 * on the ring, and the fence_queue should be waken up.
157
 */
158
static bool radeon_fence_activity(struct radeon_device *rdev, int ring)
159
{
160
	uint64_t seq, last_seq, last_emitted;
161
	unsigned count_loop = 0;
162 163
	bool wake = false;

164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
	/* Note there is a scenario here for an infinite loop but it's
	 * very unlikely to happen. For it to happen, the current polling
	 * process need to be interrupted by another process and another
	 * process needs to update the last_seq btw the atomic read and
	 * xchg of the current process.
	 *
	 * More over for this to go in infinite loop there need to be
	 * continuously new fence signaled ie radeon_fence_read needs
	 * to return a different value each time for both the currently
	 * polling process and the other process that xchg the last_seq
	 * btw atomic read and xchg of the current process. And the
	 * value the other process set as last seq must be higher than
	 * the seq value we just read. Which means that current process
	 * need to be interrupted after radeon_fence_read and before
	 * atomic xchg.
	 *
	 * To be even more safe we count the number of time we loop and
	 * we bail after 10 loop just accepting the fact that we might
	 * have temporarly set the last_seq not to the true real last
	 * seq but to an older one.
	 */
	last_seq = atomic64_read(&rdev->fence_drv[ring].last_seq);
	do {
187
		last_emitted = rdev->fence_drv[ring].sync_seq[ring];
188 189 190
		seq = radeon_fence_read(rdev, ring);
		seq |= last_seq & 0xffffffff00000000LL;
		if (seq < last_seq) {
191 192
			seq &= 0xffffffff;
			seq |= last_emitted & 0xffffffff00000000LL;
193
		}
194

195
		if (seq <= last_seq || seq > last_emitted) {
196
			break;
197 198 199 200 201 202
		}
		/* If we loop over we don't want to return without
		 * checking if a fence is signaled as it means that the
		 * seq we just read is different from the previous on.
		 */
		wake = true;
203
		last_seq = seq;
204 205 206 207 208 209 210 211 212 213
		if ((count_loop++) > 10) {
			/* We looped over too many time leave with the
			 * fact that we might have set an older fence
			 * seq then the current real last seq as signaled
			 * by the hw.
			 */
			break;
		}
	} while (atomic64_xchg(&rdev->fence_drv[ring].last_seq, seq) > seq);

214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274
	if (seq < last_emitted)
		radeon_fence_schedule_check(rdev, ring);

	return wake;
}

/**
 * radeon_fence_check_lockup - check for hardware lockup
 *
 * @work: delayed work item
 *
 * Checks for fence activity and if there is none probe
 * the hardware if a lockup occured.
 */
static void radeon_fence_check_lockup(struct work_struct *work)
{
	struct radeon_fence_driver *fence_drv;
	struct radeon_device *rdev;
	int ring;

	fence_drv = container_of(work, struct radeon_fence_driver,
				 lockup_work.work);
	rdev = fence_drv->rdev;
	ring = fence_drv - &rdev->fence_drv[0];

	if (!down_read_trylock(&rdev->exclusive_lock)) {
		/* just reschedule the check if a reset is going on */
		radeon_fence_schedule_check(rdev, ring);
		return;
	}

	if (radeon_fence_activity(rdev, ring))
		wake_up_all(&rdev->fence_queue);

	else if (radeon_ring_is_lockup(rdev, ring, &rdev->ring[ring])) {

		/* good news we believe it's a lockup */
		dev_warn(rdev->dev, "GPU lockup (current fence id "
			 "0x%016llx last fence id 0x%016llx on ring %d)\n",
			 (uint64_t)atomic64_read(&fence_drv->last_seq),
			 fence_drv->sync_seq[ring], ring);

		/* remember that we need an reset */
		rdev->needs_reset = true;
		wake_up_all(&rdev->fence_queue);
	}
	up_read(&rdev->exclusive_lock);
}

/**
 * radeon_fence_process - process a fence
 *
 * @rdev: radeon_device pointer
 * @ring: ring index the fence is associated with
 *
 * Checks the current fence value and wakes the fence queue
 * if the sequence number has increased (all asics).
 */
void radeon_fence_process(struct radeon_device *rdev, int ring)
{
	if (radeon_fence_activity(rdev, ring))
275
		wake_up_all(&rdev->fence_queue);
276 277
}

278 279 280 281 282 283 284
/**
 * radeon_fence_destroy - destroy a fence
 *
 * @kref: fence kref
 *
 * Frees the fence object (all asics).
 */
285 286
static void radeon_fence_destroy(struct kref *kref)
{
287
	struct radeon_fence *fence;
288 289 290 291 292

	fence = container_of(kref, struct radeon_fence, kref);
	kfree(fence);
}

293
/**
294
 * radeon_fence_seq_signaled - check if a fence sequence number has signaled
295 296 297 298 299
 *
 * @rdev: radeon device pointer
 * @seq: sequence number
 * @ring: ring index the fence is associated with
 *
300
 * Check if the last signaled fence sequnce number is >= the requested
301 302 303 304 305 306
 * sequence number (all asics).
 * Returns true if the fence has signaled (current fence value
 * is >= requested value) or false if it has not (current fence
 * value is < the requested value.  Helper function for
 * radeon_fence_signaled().
 */
307 308
static bool radeon_fence_seq_signaled(struct radeon_device *rdev,
				      u64 seq, unsigned ring)
309
{
310 311 312 313 314 315
	if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
		return true;
	}
	/* poll new last sequence at least once */
	radeon_fence_process(rdev, ring);
	if (atomic64_read(&rdev->fence_drv[ring].last_seq) >= seq) {
316
		return true;
317 318 319
	}
	return false;
}
320

321 322 323 324 325 326 327 328
/**
 * radeon_fence_signaled - check if a fence has signaled
 *
 * @fence: radeon fence object
 *
 * Check if the requested fence has signaled (all asics).
 * Returns true if the fence has signaled or false if it has not.
 */
329 330
bool radeon_fence_signaled(struct radeon_fence *fence)
{
331
	if (!fence)
332
		return true;
333
	if (radeon_fence_seq_signaled(fence->rdev, fence->seq, fence->ring))
334 335
		return true;
	return false;
336 337
}

338
/**
339
 * radeon_fence_any_seq_signaled - check if any sequence number is signaled
340 341
 *
 * @rdev: radeon device pointer
342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360
 * @seq: sequence numbers
 *
 * Check if the last signaled fence sequnce number is >= the requested
 * sequence number (all asics).
 * Returns true if any has signaled (current value is >= requested value)
 * or false if it has not. Helper function for radeon_fence_wait_seq.
 */
static bool radeon_fence_any_seq_signaled(struct radeon_device *rdev, u64 *seq)
{
	unsigned i;

	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
		if (seq[i] && radeon_fence_seq_signaled(rdev, seq[i], i))
			return true;
	}
	return false;
}

/**
361
 * radeon_fence_wait_seq_timeout - wait for a specific sequence numbers
362 363 364
 *
 * @rdev: radeon device pointer
 * @target_seq: sequence number(s) we want to wait for
365
 * @intr: use interruptable sleep
366
 * @timeout: maximum time to wait, or MAX_SCHEDULE_TIMEOUT for infinite wait
367
 *
368 369
 * Wait for the requested sequence number(s) to be written by any ring
 * (all asics).  Sequnce number array is indexed by ring id.
370 371
 * @intr selects whether to use interruptable (true) or non-interruptable
 * (false) sleep when waiting for the sequence number.  Helper function
372
 * for radeon_fence_wait_*().
373 374
 * Returns remaining time if the sequence number has passed, 0 when
 * the wait timeout, or an error for all other cases.
375
 * -EDEADLK is returned when a GPU lockup has been detected.
376
 */
377 378 379
static long radeon_fence_wait_seq_timeout(struct radeon_device *rdev,
					  u64 *target_seq, bool intr,
					  long timeout)
380
{
381 382
	long r;
	int i;
383

384
	if (radeon_fence_any_seq_signaled(rdev, target_seq))
385
		return timeout;
386

387 388 389 390
	/* enable IRQs and tracing */
	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
		if (!target_seq[i])
			continue;
391

392 393 394
		trace_radeon_fence_wait_begin(rdev->ddev, i, target_seq[i]);
		radeon_irq_kms_sw_irq_get(rdev, i);
	}
395

396 397 398
	if (intr) {
		r = wait_event_interruptible_timeout(rdev->fence_queue, (
			radeon_fence_any_seq_signaled(rdev, target_seq)
399
			 || rdev->needs_reset), timeout);
400 401 402
	} else {
		r = wait_event_timeout(rdev->fence_queue, (
			radeon_fence_any_seq_signaled(rdev, target_seq)
403
			 || rdev->needs_reset), timeout);
404
	}
405

406 407
	if (rdev->needs_reset)
		r = -EDEADLK;
408

409 410 411
	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
		if (!target_seq[i])
			continue;
412

413 414
		radeon_irq_kms_sw_irq_put(rdev, i);
		trace_radeon_fence_wait_end(rdev->ddev, i, target_seq[i]);
415
	}
416

417
	return r;
418 419
}

420 421 422 423
/**
 * radeon_fence_wait - wait for a fence to signal
 *
 * @fence: radeon fence object
424
 * @intr: use interruptible sleep
425 426 427 428 429 430
 *
 * Wait for the requested fence to signal (all asics).
 * @intr selects whether to use interruptable (true) or non-interruptable
 * (false) sleep when waiting for the fence.
 * Returns 0 if the fence has passed, error for all other cases.
 */
431
int radeon_fence_wait(struct radeon_fence *fence, bool intr)
432
{
433
	uint64_t seq[RADEON_NUM_RINGS] = {};
434
	long r;
435

436 437 438
	if (fence == NULL) {
		WARN(1, "Querying an invalid fence : %p !\n", fence);
		return -EINVAL;
439
	}
440

441
	seq[fence->ring] = fence->seq;
442 443
	r = radeon_fence_wait_seq_timeout(fence->rdev, seq, intr, MAX_SCHEDULE_TIMEOUT);
	if (r < 0) {
444
		return r;
445
	}
446 447 448 449

	return 0;
}

450 451 452 453 454 455 456 457 458 459 460 461 462
/**
 * radeon_fence_wait_any - wait for a fence to signal on any ring
 *
 * @rdev: radeon device pointer
 * @fences: radeon fence object(s)
 * @intr: use interruptable sleep
 *
 * Wait for any requested fence to signal (all asics).  Fence
 * array is indexed by ring id.  @intr selects whether to use
 * interruptable (true) or non-interruptable (false) sleep when
 * waiting for the fences. Used by the suballocator.
 * Returns 0 if any fence has passed, error for all other cases.
 */
463 464 465 466 467
int radeon_fence_wait_any(struct radeon_device *rdev,
			  struct radeon_fence **fences,
			  bool intr)
{
	uint64_t seq[RADEON_NUM_RINGS];
468
	unsigned i, num_rings = 0;
469
	long r;
470 471 472 473 474 475 476 477

	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
		seq[i] = 0;

		if (!fences[i]) {
			continue;
		}

478
		seq[i] = fences[i]->seq;
479
		++num_rings;
480 481
	}

482 483 484 485
	/* nothing to wait for ? */
	if (num_rings == 0)
		return -ENOENT;

486 487
	r = radeon_fence_wait_seq_timeout(rdev, seq, intr, MAX_SCHEDULE_TIMEOUT);
	if (r < 0) {
488 489 490 491 492
		return r;
	}
	return 0;
}

493
/**
494
 * radeon_fence_wait_next - wait for the next fence to signal
495 496 497 498 499 500 501 502
 *
 * @rdev: radeon device pointer
 * @ring: ring index the fence is associated with
 *
 * Wait for the next fence on the requested ring to signal (all asics).
 * Returns 0 if the next fence has passed, error for all other cases.
 * Caller must hold ring lock.
 */
503
int radeon_fence_wait_next(struct radeon_device *rdev, int ring)
504
{
505
	uint64_t seq[RADEON_NUM_RINGS] = {};
506
	long r;
507

508 509
	seq[ring] = atomic64_read(&rdev->fence_drv[ring].last_seq) + 1ULL;
	if (seq[ring] >= rdev->fence_drv[ring].sync_seq[ring]) {
510 511 512
		/* nothing to wait for, last_seq is
		   already the last emited fence */
		return -ENOENT;
513
	}
514 515 516 517
	r = radeon_fence_wait_seq_timeout(rdev, seq, false, MAX_SCHEDULE_TIMEOUT);
	if (r < 0)
		return r;
	return 0;
518 519
}

520
/**
521
 * radeon_fence_wait_empty - wait for all fences to signal
522 523 524 525 526 527 528 529
 *
 * @rdev: radeon device pointer
 * @ring: ring index the fence is associated with
 *
 * Wait for all fences on the requested ring to signal (all asics).
 * Returns 0 if the fences have passed, error for all other cases.
 * Caller must hold ring lock.
 */
530
int radeon_fence_wait_empty(struct radeon_device *rdev, int ring)
531
{
532
	uint64_t seq[RADEON_NUM_RINGS] = {};
533
	long r;
534

535
	seq[ring] = rdev->fence_drv[ring].sync_seq[ring];
536 537 538
	if (!seq[ring])
		return 0;

539 540
	r = radeon_fence_wait_seq_timeout(rdev, seq, false, MAX_SCHEDULE_TIMEOUT);
	if (r < 0) {
541
		if (r == -EDEADLK)
542
			return -EDEADLK;
543

544
		dev_err(rdev->dev, "error waiting for ring[%d] to become idle (%ld)\n",
545
			ring, r);
546
	}
547
	return 0;
548 549
}

550 551 552 553 554 555 556 557
/**
 * radeon_fence_ref - take a ref on a fence
 *
 * @fence: radeon fence object
 *
 * Take a reference on a fence (all asics).
 * Returns the fence.
 */
558 559 560 561 562 563
struct radeon_fence *radeon_fence_ref(struct radeon_fence *fence)
{
	kref_get(&fence->kref);
	return fence;
}

564 565 566 567 568 569 570
/**
 * radeon_fence_unref - remove a ref on a fence
 *
 * @fence: radeon fence object
 *
 * Remove a reference on a fence (all asics).
 */
571 572 573 574 575 576
void radeon_fence_unref(struct radeon_fence **fence)
{
	struct radeon_fence *tmp = *fence;

	*fence = NULL;
	if (tmp) {
P
Paul Bolle 已提交
577
		kref_put(&tmp->kref, radeon_fence_destroy);
578 579 580
	}
}

581 582 583 584 585 586 587 588 589 590
/**
 * radeon_fence_count_emitted - get the count of emitted fences
 *
 * @rdev: radeon device pointer
 * @ring: ring index the fence is associated with
 *
 * Get the number of fences emitted on the requested ring (all asics).
 * Returns the number of emitted fences on the ring.  Used by the
 * dynpm code to ring track activity.
 */
591
unsigned radeon_fence_count_emitted(struct radeon_device *rdev, int ring)
592
{
593
	uint64_t emitted;
594

595 596 597
	/* We are not protected by ring lock when reading the last sequence
	 * but it's ok to report slightly wrong fence count here.
	 */
598
	radeon_fence_process(rdev, ring);
599 600
	emitted = rdev->fence_drv[ring].sync_seq[ring]
		- atomic64_read(&rdev->fence_drv[ring].last_seq);
601 602 603
	/* to avoid 32bits warp around */
	if (emitted > 0x10000000) {
		emitted = 0x10000000;
604
	}
605
	return (unsigned)emitted;
606 607
}

608 609 610 611 612 613 614 615 616 617 618
/**
 * radeon_fence_need_sync - do we need a semaphore
 *
 * @fence: radeon fence object
 * @dst_ring: which ring to check against
 *
 * Check if the fence needs to be synced against another ring
 * (all asics).  If so, we need to emit a semaphore.
 * Returns true if we need to sync with another ring, false if
 * not.
 */
619 620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639
bool radeon_fence_need_sync(struct radeon_fence *fence, int dst_ring)
{
	struct radeon_fence_driver *fdrv;

	if (!fence) {
		return false;
	}

	if (fence->ring == dst_ring) {
		return false;
	}

	/* we are protected by the ring mutex */
	fdrv = &fence->rdev->fence_drv[dst_ring];
	if (fence->seq <= fdrv->sync_seq[fence->ring]) {
		return false;
	}

	return true;
}

640 641 642 643 644 645 646 647 648
/**
 * radeon_fence_note_sync - record the sync point
 *
 * @fence: radeon fence object
 * @dst_ring: which ring to check against
 *
 * Note the sequence number at which point the fence will
 * be synced with the requested ring (all asics).
 */
649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672
void radeon_fence_note_sync(struct radeon_fence *fence, int dst_ring)
{
	struct radeon_fence_driver *dst, *src;
	unsigned i;

	if (!fence) {
		return;
	}

	if (fence->ring == dst_ring) {
		return;
	}

	/* we are protected by the ring mutex */
	src = &fence->rdev->fence_drv[fence->ring];
	dst = &fence->rdev->fence_drv[dst_ring];
	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
		if (i == dst_ring) {
			continue;
		}
		dst->sync_seq[i] = max(dst->sync_seq[i], src->sync_seq[i]);
	}
}

673 674 675 676 677 678 679 680 681 682 683 684
/**
 * radeon_fence_driver_start_ring - make the fence driver
 * ready for use on the requested ring.
 *
 * @rdev: radeon device pointer
 * @ring: ring index to start the fence driver on
 *
 * Make the fence driver ready for processing (all asics).
 * Not all asics have all rings, so each asic will only
 * start the fence driver on the rings it has.
 * Returns 0 for success, errors for failure.
 */
685
int radeon_fence_driver_start_ring(struct radeon_device *rdev, int ring)
686
{
687 688
	uint64_t index;
	int r;
689

690
	radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
691
	if (rdev->wb.use_event || !radeon_ring_supports_scratch_reg(rdev, &rdev->ring[ring])) {
692
		rdev->fence_drv[ring].scratch_reg = 0;
C
Christian König 已提交
693 694 695 696 697 698 699 700
		if (ring != R600_RING_TYPE_UVD_INDEX) {
			index = R600_WB_EVENT_OFFSET + ring * 4;
			rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
			rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr +
							 index;

		} else {
			/* put fence directly behind firmware */
701
			index = ALIGN(rdev->uvd_fw->size, 8);
702 703
			rdev->fence_drv[ring].cpu_addr = rdev->uvd.cpu_addr + index;
			rdev->fence_drv[ring].gpu_addr = rdev->uvd.gpu_addr + index;
C
Christian König 已提交
704 705
		}

706
	} else {
707 708 709 710 711
		r = radeon_scratch_get(rdev, &rdev->fence_drv[ring].scratch_reg);
		if (r) {
			dev_err(rdev->dev, "fence failed to get scratch register\n");
			return r;
		}
712 713 714
		index = RADEON_WB_SCRATCH_OFFSET +
			rdev->fence_drv[ring].scratch_reg -
			rdev->scratch.reg_base;
C
Christian König 已提交
715 716
		rdev->fence_drv[ring].cpu_addr = &rdev->wb.wb[index/4];
		rdev->fence_drv[ring].gpu_addr = rdev->wb.gpu_addr + index;
717
	}
718
	radeon_fence_write(rdev, atomic64_read(&rdev->fence_drv[ring].last_seq), ring);
719
	rdev->fence_drv[ring].initialized = true;
720
	dev_info(rdev->dev, "fence driver on ring %d use gpu addr 0x%016llx and cpu addr 0x%p\n",
721 722 723 724
		 ring, rdev->fence_drv[ring].gpu_addr, rdev->fence_drv[ring].cpu_addr);
	return 0;
}

725 726 727 728 729 730 731 732 733 734
/**
 * radeon_fence_driver_init_ring - init the fence driver
 * for the requested ring.
 *
 * @rdev: radeon device pointer
 * @ring: ring index to start the fence driver on
 *
 * Init the fence driver for the requested ring (all asics).
 * Helper function for radeon_fence_driver_init().
 */
735 736
static void radeon_fence_driver_init_ring(struct radeon_device *rdev, int ring)
{
737 738
	int i;

739 740 741
	rdev->fence_drv[ring].scratch_reg = -1;
	rdev->fence_drv[ring].cpu_addr = NULL;
	rdev->fence_drv[ring].gpu_addr = 0;
742 743
	for (i = 0; i < RADEON_NUM_RINGS; ++i)
		rdev->fence_drv[ring].sync_seq[i] = 0;
744
	atomic64_set(&rdev->fence_drv[ring].last_seq, 0);
745
	rdev->fence_drv[ring].initialized = false;
746 747 748
	INIT_DELAYED_WORK(&rdev->fence_drv[ring].lockup_work,
			  radeon_fence_check_lockup);
	rdev->fence_drv[ring].rdev = rdev;
749 750
}

751 752 753 754 755 756 757 758 759 760 761 762
/**
 * radeon_fence_driver_init - init the fence driver
 * for all possible rings.
 *
 * @rdev: radeon device pointer
 *
 * Init the fence driver for all possible rings (all asics).
 * Not all asics have all rings, so each asic will only
 * start the fence driver on the rings it has using
 * radeon_fence_driver_start_ring().
 * Returns 0 for success.
 */
763 764 765 766
int radeon_fence_driver_init(struct radeon_device *rdev)
{
	int ring;

767
	init_waitqueue_head(&rdev->fence_queue);
768 769
	for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
		radeon_fence_driver_init_ring(rdev, ring);
770 771
	}
	if (radeon_debugfs_fence_init(rdev)) {
772
		dev_err(rdev->dev, "fence debugfs file creation failed\n");
773 774 775 776
	}
	return 0;
}

777 778 779 780 781 782 783 784
/**
 * radeon_fence_driver_fini - tear down the fence driver
 * for all possible rings.
 *
 * @rdev: radeon device pointer
 *
 * Tear down the fence driver for all possible rings (all asics).
 */
785 786
void radeon_fence_driver_fini(struct radeon_device *rdev)
{
787
	int ring, r;
788

789
	mutex_lock(&rdev->ring_lock);
790 791 792
	for (ring = 0; ring < RADEON_NUM_RINGS; ring++) {
		if (!rdev->fence_drv[ring].initialized)
			continue;
793
		r = radeon_fence_wait_empty(rdev, ring);
794 795
		if (r) {
			/* no need to trigger GPU reset as we are unloading */
796
			radeon_fence_driver_force_completion(rdev, ring);
797
		}
798
		cancel_delayed_work_sync(&rdev->fence_drv[ring].lockup_work);
799
		wake_up_all(&rdev->fence_queue);
800 801 802
		radeon_scratch_free(rdev, rdev->fence_drv[ring].scratch_reg);
		rdev->fence_drv[ring].initialized = false;
	}
803
	mutex_unlock(&rdev->ring_lock);
804 805
}

806 807 808 809
/**
 * radeon_fence_driver_force_completion - force all fence waiter to complete
 *
 * @rdev: radeon device pointer
810
 * @ring: the ring to complete
811 812 813 814
 *
 * In case of GPU reset failure make sure no process keep waiting on fence
 * that will never complete.
 */
815
void radeon_fence_driver_force_completion(struct radeon_device *rdev, int ring)
816
{
817
	if (rdev->fence_drv[ring].initialized) {
818
		radeon_fence_write(rdev, rdev->fence_drv[ring].sync_seq[ring], ring);
819 820
		cancel_delayed_work_sync(&rdev->fence_drv[ring].lockup_work);
	}
821 822
}

823 824 825 826 827 828 829 830 831 832

/*
 * Fence debugfs
 */
#if defined(CONFIG_DEBUG_FS)
static int radeon_debugfs_fence_info(struct seq_file *m, void *data)
{
	struct drm_info_node *node = (struct drm_info_node *)m->private;
	struct drm_device *dev = node->minor->dev;
	struct radeon_device *rdev = dev->dev_private;
833
	int i, j;
834 835 836 837 838

	for (i = 0; i < RADEON_NUM_RINGS; ++i) {
		if (!rdev->fence_drv[i].initialized)
			continue;

839 840
		radeon_fence_process(rdev, i);

841
		seq_printf(m, "--- ring %d ---\n", i);
842 843
		seq_printf(m, "Last signaled fence 0x%016llx\n",
			   (unsigned long long)atomic64_read(&rdev->fence_drv[i].last_seq));
844 845 846 847 848 849 850 851
		seq_printf(m, "Last emitted        0x%016llx\n",
			   rdev->fence_drv[i].sync_seq[i]);

		for (j = 0; j < RADEON_NUM_RINGS; ++j) {
			if (i != j && rdev->fence_drv[j].initialized)
				seq_printf(m, "Last sync to ring %d 0x%016llx\n",
					   j, rdev->fence_drv[i].sync_seq[j]);
		}
852 853 854 855
	}
	return 0;
}

856 857 858 859 860 861 862 863 864 865 866 867 868 869
/**
 * radeon_debugfs_gpu_reset - manually trigger a gpu reset
 *
 * Manually trigger a gpu reset at the next fence wait.
 */
static int radeon_debugfs_gpu_reset(struct seq_file *m, void *data)
{
	struct drm_info_node *node = (struct drm_info_node *) m->private;
	struct drm_device *dev = node->minor->dev;
	struct radeon_device *rdev = dev->dev_private;

	down_read(&rdev->exclusive_lock);
	seq_printf(m, "%d\n", rdev->needs_reset);
	rdev->needs_reset = true;
870
	wake_up_all(&rdev->fence_queue);
871 872 873 874 875
	up_read(&rdev->exclusive_lock);

	return 0;
}

876 877
static struct drm_info_list radeon_debugfs_fence_list[] = {
	{"radeon_fence_info", &radeon_debugfs_fence_info, 0, NULL},
878
	{"radeon_gpu_reset", &radeon_debugfs_gpu_reset, 0, NULL}
879 880 881 882 883 884
};
#endif

int radeon_debugfs_fence_init(struct radeon_device *rdev)
{
#if defined(CONFIG_DEBUG_FS)
885
	return radeon_debugfs_add_files(rdev, radeon_debugfs_fence_list, 2);
886 887 888 889
#else
	return 0;
#endif
}