i915_active.c 21.4 KB
Newer Older
1 2 3 4 5 6
/*
 * SPDX-License-Identifier: MIT
 *
 * Copyright © 2019 Intel Corporation
 */

7 8
#include <linux/debugobjects.h>

9
#include "gt/intel_engine_pm.h"
10
#include "gt/intel_ring.h"
11

12 13
#include "i915_drv.h"
#include "i915_active.h"
14
#include "i915_globals.h"
15

16 17 18 19 20 21 22 23
/*
 * Active refs memory management
 *
 * To be more economical with memory, we reap all the i915_active trees as
 * they idle (when we know the active requests are inactive) and allocate the
 * nodes from a local slab cache to hopefully reduce the fragmentation.
 */
static struct i915_global_active {
24
	struct i915_global base;
25 26 27
	struct kmem_cache *slab_cache;
} global;

28
struct active_node {
29
	struct i915_active_fence base;
30 31 32 33 34
	struct i915_active *ref;
	struct rb_node node;
	u64 timeline;
};

35
static inline struct active_node *
36
node_from_active(struct i915_active_fence *active)
37 38 39 40 41 42
{
	return container_of(active, struct active_node, base);
}

#define take_preallocated_barriers(x) llist_del_all(&(x)->preallocated_barriers)

43
static inline bool is_barrier(const struct i915_active_fence *active)
44
{
45
	return IS_ERR(rcu_access_pointer(active->fence));
46 47 48 49 50
}

static inline struct llist_node *barrier_to_ll(struct active_node *node)
{
	GEM_BUG_ON(!is_barrier(&node->base));
51
	return (struct llist_node *)&node->base.cb.node;
52 53
}

54 55 56
static inline struct intel_engine_cs *
__barrier_to_engine(struct active_node *node)
{
57
	return (struct intel_engine_cs *)READ_ONCE(node->base.cb.node.prev);
58 59
}

60 61 62 63
static inline struct intel_engine_cs *
barrier_to_engine(struct active_node *node)
{
	GEM_BUG_ON(!is_barrier(&node->base));
64
	return __barrier_to_engine(node);
65 66 67 68 69
}

static inline struct active_node *barrier_from_ll(struct llist_node *x)
{
	return container_of((struct list_head *)x,
70
			    struct active_node, base.cb.node);
71 72
}

73 74 75 76 77 78
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM) && IS_ENABLED(CONFIG_DEBUG_OBJECTS)

static void *active_debug_hint(void *addr)
{
	struct i915_active *ref = addr;

79
	return (void *)ref->active ?: (void *)ref->retire ?: (void *)ref;
80 81 82 83 84 85 86 87 88 89 90 91 92 93
}

static struct debug_obj_descr active_debug_desc = {
	.name = "i915_active",
	.debug_hint = active_debug_hint,
};

static void debug_active_init(struct i915_active *ref)
{
	debug_object_init(ref, &active_debug_desc);
}

static void debug_active_activate(struct i915_active *ref)
{
94 95 96
	lockdep_assert_held(&ref->mutex);
	if (!atomic_read(&ref->count)) /* before the first inc */
		debug_object_activate(ref, &active_debug_desc);
97 98 99 100
}

static void debug_active_deactivate(struct i915_active *ref)
{
101 102 103
	lockdep_assert_held(&ref->mutex);
	if (!atomic_read(&ref->count)) /* after the last dec */
		debug_object_deactivate(ref, &active_debug_desc);
104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125
}

static void debug_active_fini(struct i915_active *ref)
{
	debug_object_free(ref, &active_debug_desc);
}

static void debug_active_assert(struct i915_active *ref)
{
	debug_object_assert_init(ref, &active_debug_desc);
}

#else

static inline void debug_active_init(struct i915_active *ref) { }
static inline void debug_active_activate(struct i915_active *ref) { }
static inline void debug_active_deactivate(struct i915_active *ref) { }
static inline void debug_active_fini(struct i915_active *ref) { }
static inline void debug_active_assert(struct i915_active *ref) { }

#endif

126
static void
127
__active_retire(struct i915_active *ref)
128 129
{
	struct active_node *it, *n;
130 131 132 133
	struct rb_root root;
	bool retire = false;

	lockdep_assert_held(&ref->mutex);
134
	GEM_BUG_ON(i915_active_is_idle(ref));
135 136 137 138 139 140 141 142 143

	/* return the unused nodes to our slabcache -- flushing the allocator */
	if (atomic_dec_and_test(&ref->count)) {
		debug_active_deactivate(ref);
		root = ref->tree;
		ref->tree = RB_ROOT;
		ref->cache = NULL;
		retire = true;
	}
144

145 146 147 148
	mutex_unlock(&ref->mutex);
	if (!retire)
		return;

149
	GEM_BUG_ON(rcu_access_pointer(ref->excl.fence));
150
	rbtree_postorder_for_each_entry_safe(it, n, &root, node) {
151
		GEM_BUG_ON(i915_active_fence_isset(&it->base));
152
		kmem_cache_free(global.slab_cache, it);
153
	}
154 155 156 157

	/* After the final retire, the entire struct may be freed */
	if (ref->retire)
		ref->retire(ref);
158 159 160

	/* ... except if you wait on it, you must manage your own references! */
	wake_up_var(ref);
161 162
}

163 164 165 166 167 168 169 170 171 172 173 174 175
static void
active_work(struct work_struct *wrk)
{
	struct i915_active *ref = container_of(wrk, typeof(*ref), work);

	GEM_BUG_ON(!atomic_read(&ref->count));
	if (atomic_add_unless(&ref->count, -1, 1))
		return;

	mutex_lock(&ref->mutex);
	__active_retire(ref);
}

176
static void
177
active_retire(struct i915_active *ref)
178
{
179 180
	GEM_BUG_ON(!atomic_read(&ref->count));
	if (atomic_add_unless(&ref->count, -1, 1))
181 182
		return;

183 184 185 186 187 188 189
	/* If we are inside interrupt context (fence signaling), defer */
	if (ref->flags & I915_ACTIVE_RETIRE_SLEEPS ||
	    !mutex_trylock(&ref->mutex)) {
		queue_work(system_unbound_wq, &ref->work);
		return;
	}

190
	__active_retire(ref);
191 192 193
}

static void
194
node_retire(struct dma_fence *fence, struct dma_fence_cb *cb)
195
{
196 197
	i915_active_fence_cb(fence, cb);
	active_retire(container_of(cb, struct active_node, base.cb)->ref);
198 199
}

200 201 202 203 204 205 206 207
static void
excl_retire(struct dma_fence *fence, struct dma_fence_cb *cb)
{
	i915_active_fence_cb(fence, cb);
	active_retire(container_of(cb, struct i915_active, excl.cb));
}

static struct i915_active_fence *
208
active_instance(struct i915_active *ref, struct intel_timeline *tl)
209
{
210
	struct active_node *node, *prealloc;
211
	struct rb_node **p, *parent;
212
	u64 idx = tl->fence_context;
213 214 215 216 217 218 219 220

	/*
	 * We track the most recently used timeline to skip a rbtree search
	 * for the common case, under typical loads we never need the rbtree
	 * at all. We can reuse the last slot if it is empty, that is
	 * after the previous activity has been retired, or if it matches the
	 * current timeline.
	 */
221 222 223 224 225 226 227 228
	node = READ_ONCE(ref->cache);
	if (node && node->timeline == idx)
		return &node->base;

	/* Preallocate a replacement, just in case */
	prealloc = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
	if (!prealloc)
		return NULL;
229

230 231
	mutex_lock(&ref->mutex);
	GEM_BUG_ON(i915_active_is_idle(ref));
232 233 234 235 236 237 238

	parent = NULL;
	p = &ref->tree.rb_node;
	while (*p) {
		parent = *p;

		node = rb_entry(parent, struct active_node, node);
239 240 241 242
		if (node->timeline == idx) {
			kmem_cache_free(global.slab_cache, prealloc);
			goto out;
		}
243 244 245 246 247 248 249

		if (node->timeline < idx)
			p = &parent->rb_right;
		else
			p = &parent->rb_left;
	}

250
	node = prealloc;
251
	__i915_active_fence_init(&node->base, &tl->mutex, NULL, node_retire);
252 253 254 255 256 257 258
	node->ref = ref;
	node->timeline = idx;

	rb_link_node(&node->node, parent, p);
	rb_insert_color(&node->node, &ref->tree);

out:
259 260 261
	ref->cache = node;
	mutex_unlock(&ref->mutex);

262
	BUILD_BUG_ON(offsetof(typeof(*node), base));
263
	return &node->base;
264 265
}

266
void __i915_active_init(struct i915_active *ref,
267 268 269
			int (*active)(struct i915_active *ref),
			void (*retire)(struct i915_active *ref),
			struct lock_class_key *key)
270
{
271 272
	unsigned long bits;

273 274
	debug_active_init(ref);

275
	ref->flags = 0;
276
	ref->active = active;
277 278 279
	ref->retire = ptr_unpack_bits(retire, &bits, 2);
	if (bits & I915_ACTIVE_MAY_SLEEP)
		ref->flags |= I915_ACTIVE_RETIRE_SLEEPS;
280

281
	ref->tree = RB_ROOT;
282
	ref->cache = NULL;
283
	init_llist_head(&ref->preallocated_barriers);
284 285
	atomic_set(&ref->count, 0);
	__mutex_init(&ref->mutex, "i915_active", key);
286
	__i915_active_fence_init(&ref->excl, &ref->mutex, NULL, excl_retire);
287
	INIT_WORK(&ref->work, active_work);
288 289
}

290 291 292 293
static bool ____active_del_barrier(struct i915_active *ref,
				   struct active_node *node,
				   struct intel_engine_cs *engine)

294 295 296 297
{
	struct llist_node *head = NULL, *tail = NULL;
	struct llist_node *pos, *next;

298
	GEM_BUG_ON(node->timeline != engine->kernel_context->timeline->fence_context);
299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331

	/*
	 * Rebuild the llist excluding our node. We may perform this
	 * outside of the kernel_context timeline mutex and so someone
	 * else may be manipulating the engine->barrier_tasks, in
	 * which case either we or they will be upset :)
	 *
	 * A second __active_del_barrier() will report failure to claim
	 * the active_node and the caller will just shrug and know not to
	 * claim ownership of its node.
	 *
	 * A concurrent i915_request_add_active_barriers() will miss adding
	 * any of the tasks, but we will try again on the next -- and since
	 * we are actively using the barrier, we know that there will be
	 * at least another opportunity when we idle.
	 */
	llist_for_each_safe(pos, next, llist_del_all(&engine->barrier_tasks)) {
		if (node == barrier_from_ll(pos)) {
			node = NULL;
			continue;
		}

		pos->next = head;
		head = pos;
		if (!tail)
			tail = pos;
	}
	if (head)
		llist_add_batch(head, tail, &engine->barrier_tasks);

	return !node;
}

332 333 334 335 336 337
static bool
__active_del_barrier(struct i915_active *ref, struct active_node *node)
{
	return ____active_del_barrier(ref, node, barrier_to_engine(node));
}

338
int i915_active_ref(struct i915_active *ref,
339
		    struct intel_timeline *tl,
340
		    struct dma_fence *fence)
341
{
342
	struct i915_active_fence *active;
343
	int err;
344

345 346
	lockdep_assert_held(&tl->mutex);

347
	/* Prevent reaping in case we malloc/wait while building the tree */
348 349 350
	err = i915_active_acquire(ref);
	if (err)
		return err;
351

352
	active = active_instance(ref, tl);
353 354
	if (!active) {
		err = -ENOMEM;
355 356
		goto out;
	}
357

358 359 360 361 362 363 364
	if (is_barrier(active)) { /* proto-node used by our idle barrier */
		/*
		 * This request is on the kernel_context timeline, and so
		 * we can use it to substitute for the pending idle-barrer
		 * request that we want to emit on the kernel_context.
		 */
		__active_del_barrier(ref, node_from_active(active));
365 366
		RCU_INIT_POINTER(active->fence, NULL);
		atomic_dec(&ref->count);
367
	}
368 369
	if (!__i915_active_fence_set(active, fence))
		atomic_inc(&ref->count);
370

371 372 373
out:
	i915_active_release(ref);
	return err;
374 375
}

376 377 378 379 380
void i915_active_set_exclusive(struct i915_active *ref, struct dma_fence *f)
{
	/* We expect the caller to manage the exclusive timeline ordering */
	GEM_BUG_ON(i915_active_is_idle(ref));

381 382 383 384 385 386 387 388 389 390
	/*
	 * As we don't know which mutex the caller is using, we told a small
	 * lie to the debug code that it is using the i915_active.mutex;
	 * and now we must stick to that lie.
	 */
	mutex_acquire(&ref->mutex.dep_map, 0, 0, _THIS_IP_);
	if (!__i915_active_fence_set(&ref->excl, f))
		atomic_inc(&ref->count);
	mutex_release(&ref->mutex.dep_map, 0, _THIS_IP_);
}
391

392 393 394 395
bool i915_active_acquire_if_busy(struct i915_active *ref)
{
	debug_active_assert(ref);
	return atomic_add_unless(&ref->count, 1, 0);
396 397
}

398
int i915_active_acquire(struct i915_active *ref)
399
{
400 401
	int err;

402
	if (i915_active_acquire_if_busy(ref))
403
		return 0;
404

405 406 407
	err = mutex_lock_interruptible(&ref->mutex);
	if (err)
		return err;
408

409 410 411 412 413 414 415 416 417 418
	if (!atomic_read(&ref->count) && ref->active)
		err = ref->active(ref);
	if (!err) {
		debug_active_activate(ref);
		atomic_inc(&ref->count);
	}

	mutex_unlock(&ref->mutex);

	return err;
419 420 421 422
}

void i915_active_release(struct i915_active *ref)
{
423
	debug_active_assert(ref);
424
	active_retire(ref);
425 426
}

427
static void enable_signaling(struct i915_active_fence *active)
428
{
429
	struct dma_fence *fence;
430

431 432 433
	fence = i915_active_fence_get(active);
	if (!fence)
		return;
434

435 436
	dma_fence_enable_sw_signaling(fence);
	dma_fence_put(fence);
437 438
}

439 440 441
int i915_active_wait(struct i915_active *ref)
{
	struct active_node *it, *n;
442
	int err = 0;
443

444 445
	might_sleep();

446
	if (!i915_active_acquire_if_busy(ref))
447
		return 0;
448

449 450
	/* Flush lazy signals */
	enable_signaling(&ref->excl);
451
	rbtree_postorder_for_each_entry_safe(it, n, &ref->tree, node) {
452 453
		if (is_barrier(&it->base)) /* unconnected idle barrier */
			continue;
454

455
		enable_signaling(&it->base);
456
	}
457
	/* Any fence added after the wait begins will not be auto-signaled */
458

459
	i915_active_release(ref);
460 461 462
	if (err)
		return err;

463
	if (wait_var_event_interruptible(ref, i915_active_is_idle(ref)))
464 465
		return -EINTR;

466
	return 0;
467 468 469 470
}

int i915_request_await_active(struct i915_request *rq, struct i915_active *ref)
{
471
	int err = 0;
472

473
	if (rcu_access_pointer(ref->excl.fence)) {
474
		struct dma_fence *fence;
475

476
		rcu_read_lock();
477
		fence = dma_fence_get_rcu_safe(&ref->excl.fence);
478 479 480 481 482
		rcu_read_unlock();
		if (fence) {
			err = i915_request_await_dma_fence(rq, fence);
			dma_fence_put(fence);
		}
483 484
	}

485 486
	/* In the future we may choose to await on all fences */

487
	return err;
488 489
}

490
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
491 492
void i915_active_fini(struct i915_active *ref)
{
493
	debug_active_fini(ref);
494
	GEM_BUG_ON(atomic_read(&ref->count));
495 496
	GEM_BUG_ON(work_pending(&ref->work));
	GEM_BUG_ON(!RB_EMPTY_ROOT(&ref->tree));
497
	mutex_destroy(&ref->mutex);
498
}
499
#endif
500

501 502
static inline bool is_idle_barrier(struct active_node *node, u64 idx)
{
503
	return node->timeline == idx && !i915_active_fence_isset(&node->base);
504 505 506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536 537 538 539 540 541 542 543 544 545 546 547 548 549 550 551 552
}

static struct active_node *reuse_idle_barrier(struct i915_active *ref, u64 idx)
{
	struct rb_node *prev, *p;

	if (RB_EMPTY_ROOT(&ref->tree))
		return NULL;

	mutex_lock(&ref->mutex);
	GEM_BUG_ON(i915_active_is_idle(ref));

	/*
	 * Try to reuse any existing barrier nodes already allocated for this
	 * i915_active, due to overlapping active phases there is likely a
	 * node kept alive (as we reuse before parking). We prefer to reuse
	 * completely idle barriers (less hassle in manipulating the llists),
	 * but otherwise any will do.
	 */
	if (ref->cache && is_idle_barrier(ref->cache, idx)) {
		p = &ref->cache->node;
		goto match;
	}

	prev = NULL;
	p = ref->tree.rb_node;
	while (p) {
		struct active_node *node =
			rb_entry(p, struct active_node, node);

		if (is_idle_barrier(node, idx))
			goto match;

		prev = p;
		if (node->timeline < idx)
			p = p->rb_right;
		else
			p = p->rb_left;
	}

	/*
	 * No quick match, but we did find the leftmost rb_node for the
	 * kernel_context. Walk the rb_tree in-order to see if there were
	 * any idle-barriers on this timeline that we missed, or just use
	 * the first pending barrier.
	 */
	for (p = prev; p; p = rb_next(p)) {
		struct active_node *node =
			rb_entry(p, struct active_node, node);
553
		struct intel_engine_cs *engine;
554 555 556 557 558 559 560 561 562 563 564 565 566 567 568 569 570

		if (node->timeline > idx)
			break;

		if (node->timeline < idx)
			continue;

		if (is_idle_barrier(node, idx))
			goto match;

		/*
		 * The list of pending barriers is protected by the
		 * kernel_context timeline, which notably we do not hold
		 * here. i915_request_add_active_barriers() may consume
		 * the barrier before we claim it, so we have to check
		 * for success.
		 */
571 572 573 574
		engine = __barrier_to_engine(node);
		smp_rmb(); /* serialise with add_active_barriers */
		if (is_barrier(&node->base) &&
		    ____active_del_barrier(ref, node, engine))
575 576 577 578 579 580 581 582 583 584 585 586 587 588 589 590
			goto match;
	}

	mutex_unlock(&ref->mutex);

	return NULL;

match:
	rb_erase(p, &ref->tree); /* Hide from waits and sibling allocations */
	if (p == &ref->cache->node)
		ref->cache = NULL;
	mutex_unlock(&ref->mutex);

	return rb_entry(p, struct active_node, node);
}

591 592 593
int i915_active_acquire_preallocate_barrier(struct i915_active *ref,
					    struct intel_engine_cs *engine)
{
594
	intel_engine_mask_t tmp, mask = engine->mask;
595
	struct intel_gt *gt = engine->gt;
596 597
	struct llist_node *pos, *next;
	int err;
598

599
	GEM_BUG_ON(i915_active_is_idle(ref));
600 601 602 603 604 605 606 607
	GEM_BUG_ON(!llist_empty(&ref->preallocated_barriers));

	/*
	 * Preallocate a node for each physical engine supporting the target
	 * engine (remember virtual engines have more than one sibling).
	 * We can then use the preallocated nodes in
	 * i915_active_acquire_barrier()
	 */
608
	for_each_engine_masked(engine, gt, mask, tmp) {
609
		u64 idx = engine->kernel_context->timeline->fence_context;
610 611
		struct active_node *node;

612 613 614 615 616 617 618 619
		node = reuse_idle_barrier(ref, idx);
		if (!node) {
			node = kmem_cache_alloc(global.slab_cache, GFP_KERNEL);
			if (!node) {
				err = ENOMEM;
				goto unwind;
			}

620 621 622 623
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
			node->base.lock =
				&engine->kernel_context->timeline->mutex;
#endif
624 625
			RCU_INIT_POINTER(node->base.fence, NULL);
			node->base.cb.func = node_retire;
626 627
			node->timeline = idx;
			node->ref = ref;
628 629
		}

630
		if (!i915_active_fence_isset(&node->base)) {
631 632 633 634 635 636 637 638 639
			/*
			 * Mark this as being *our* unconnected proto-node.
			 *
			 * Since this node is not in any list, and we have
			 * decoupled it from the rbtree, we can reuse the
			 * request to indicate this is an idle-barrier node
			 * and then we can use the rb_node and list pointers
			 * for our tracking of the pending barrier.
			 */
640 641
			RCU_INIT_POINTER(node->base.fence, ERR_PTR(-EAGAIN));
			node->base.cb.node.prev = (void *)engine;
642 643
			atomic_inc(&ref->count);
		}
644

645 646
		GEM_BUG_ON(barrier_to_engine(node) != engine);
		llist_add(barrier_to_ll(node), &ref->preallocated_barriers);
647
		intel_engine_pm_get(engine);
648 649
	}

650 651 652
	return 0;

unwind:
653 654
	llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) {
		struct active_node *node = barrier_from_ll(pos);
655

656 657
		atomic_dec(&ref->count);
		intel_engine_pm_put(barrier_to_engine(node));
658 659 660

		kmem_cache_free(global.slab_cache, node);
	}
661 662 663 664 665 666 667
	return err;
}

void i915_active_acquire_barrier(struct i915_active *ref)
{
	struct llist_node *pos, *next;

668
	GEM_BUG_ON(i915_active_is_idle(ref));
669

670 671 672 673 674 675
	/*
	 * Transfer the list of preallocated barriers into the
	 * i915_active rbtree, but only as proto-nodes. They will be
	 * populated by i915_request_add_active_barriers() to point to the
	 * request that will eventually release them.
	 */
676
	mutex_lock_nested(&ref->mutex, SINGLE_DEPTH_NESTING);
677 678 679
	llist_for_each_safe(pos, next, take_preallocated_barriers(ref)) {
		struct active_node *node = barrier_from_ll(pos);
		struct intel_engine_cs *engine = barrier_to_engine(node);
680 681 682 683 684
		struct rb_node **p, *parent;

		parent = NULL;
		p = &ref->tree.rb_node;
		while (*p) {
685 686
			struct active_node *it;

687
			parent = *p;
688 689 690

			it = rb_entry(parent, struct active_node, node);
			if (it->timeline < node->timeline)
691 692 693 694 695 696 697
				p = &parent->rb_right;
			else
				p = &parent->rb_left;
		}
		rb_link_node(&node->node, parent, p);
		rb_insert_color(&node->node, &ref->tree);

698
		GEM_BUG_ON(!intel_engine_pm_is_awake(engine));
699
		llist_add(barrier_to_ll(node), &engine->barrier_tasks);
700
		intel_engine_pm_put(engine);
701
	}
702
	mutex_unlock(&ref->mutex);
703 704
}

705
void i915_request_add_active_barriers(struct i915_request *rq)
706 707 708
{
	struct intel_engine_cs *engine = rq->engine;
	struct llist_node *node, *next;
709
	unsigned long flags;
710

711
	GEM_BUG_ON(intel_engine_is_virtual(engine));
712
	GEM_BUG_ON(i915_request_timeline(rq) != engine->kernel_context->timeline);
713

714 715 716
	node = llist_del_all(&engine->barrier_tasks);
	if (!node)
		return;
717 718 719 720 721
	/*
	 * Attach the list of proto-fences to the in-flight request such
	 * that the parent i915_active will be released when this request
	 * is retired.
	 */
722 723 724
	spin_lock_irqsave(&rq->lock, flags);
	llist_for_each_safe(node, next, node) {
		RCU_INIT_POINTER(barrier_from_ll(node)->base.fence, &rq->fence);
725
		smp_wmb(); /* serialise with reuse_idle_barrier */
726 727 728 729 730 731 732 733 734 735 736 737 738 739 740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778
		list_add_tail((struct list_head *)node, &rq->fence.cb_list);
	}
	spin_unlock_irqrestore(&rq->lock, flags);
}

#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
#define active_is_held(active) lockdep_is_held((active)->lock)
#else
#define active_is_held(active) true
#endif

/*
 * __i915_active_fence_set: Update the last active fence along its timeline
 * @active: the active tracker
 * @fence: the new fence (under construction)
 *
 * Records the new @fence as the last active fence along its timeline in
 * this active tracker, moving the tracking callbacks from the previous
 * fence onto this one. Returns the previous fence (if not already completed),
 * which the caller must ensure is executed before the new fence. To ensure
 * that the order of fences within the timeline of the i915_active_fence is
 * maintained, it must be locked by the caller.
 */
struct dma_fence *
__i915_active_fence_set(struct i915_active_fence *active,
			struct dma_fence *fence)
{
	struct dma_fence *prev;
	unsigned long flags;

	/* NB: must be serialised by an outer timeline mutex (active->lock) */
	spin_lock_irqsave(fence->lock, flags);
	GEM_BUG_ON(test_bit(DMA_FENCE_FLAG_SIGNALED_BIT, &fence->flags));

	prev = rcu_dereference_protected(active->fence, active_is_held(active));
	if (prev) {
		GEM_BUG_ON(prev == fence);
		spin_lock_nested(prev->lock, SINGLE_DEPTH_NESTING);
		__list_del_entry(&active->cb.node);
		spin_unlock(prev->lock); /* serialise with prev->cb_list */

		/*
		 * active->fence is reset by the callback from inside
		 * interrupt context. We need to serialise our list
		 * manipulation with the fence->lock to prevent the prev
		 * being lost inside an interrupt (it can't be replaced as
		 * no other caller is allowed to enter __i915_active_fence_set
		 * as we hold the timeline lock). After serialising with
		 * the callback, we need to double check which ran first,
		 * our list_del() [decoupling prev from the callback] or
		 * the callback...
		 */
		prev = rcu_access_pointer(active->fence);
779
	}
780 781 782 783 784 785 786

	rcu_assign_pointer(active->fence, fence);
	list_add_tail(&active->cb.node, &fence->cb_list);

	spin_unlock_irqrestore(fence->lock, flags);

	return prev;
787 788
}

789 790
int i915_active_fence_set(struct i915_active_fence *active,
			  struct i915_request *rq)
791
{
792 793
	struct dma_fence *fence;
	int err = 0;
794

795 796 797 798
#if IS_ENABLED(CONFIG_DRM_I915_DEBUG_GEM)
	lockdep_assert_held(active->lock);
#endif

799 800 801 802 803 804 805 806 807 808
	/* Must maintain timeline ordering wrt previous active requests */
	rcu_read_lock();
	fence = __i915_active_fence_set(active, &rq->fence);
	if (fence) /* but the previous fence may not belong to that timeline! */
		fence = dma_fence_get_rcu(fence);
	rcu_read_unlock();
	if (fence) {
		err = i915_request_await_dma_fence(rq, fence);
		dma_fence_put(fence);
	}
809

810
	return err;
811 812
}

813
void i915_active_noop(struct dma_fence *fence, struct dma_fence_cb *cb)
814
{
815
	i915_active_fence_cb(fence, cb);
816 817
}

818 819 820
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftests/i915_active.c"
#endif
821

822
static void i915_global_active_shrink(void)
823
{
824
	kmem_cache_shrink(global.slab_cache);
825 826
}

827
static void i915_global_active_exit(void)
828
{
829
	kmem_cache_destroy(global.slab_cache);
830 831
}

832 833 834 835 836 837
static struct i915_global_active global = { {
	.shrink = i915_global_active_shrink,
	.exit = i915_global_active_exit,
} };

int __init i915_global_active_init(void)
838
{
839 840 841 842 843 844
	global.slab_cache = KMEM_CACHE(active_node, SLAB_HWCACHE_ALIGN);
	if (!global.slab_cache)
		return -ENOMEM;

	i915_global_register(&global.base);
	return 0;
845
}