intel_context.c 11.7 KB
Newer Older
C
Chris Wilson 已提交
1
// SPDX-License-Identifier: MIT
2 3 4 5
/*
 * Copyright © 2019 Intel Corporation
 */

6 7 8
#include "gem/i915_gem_context.h"
#include "gem/i915_gem_pm.h"

9
#include "i915_drv.h"
10
#include "i915_trace.h"
11

12
#include "intel_context.h"
13
#include "intel_engine.h"
14
#include "intel_engine_pm.h"
15
#include "intel_ring.h"
16

17
static struct kmem_cache *slab_ce;
18

19
static struct intel_context *intel_context_alloc(void)
20
{
21
	return kmem_cache_zalloc(slab_ce, GFP_KERNEL);
22 23
}

24
static void rcu_context_free(struct rcu_head *rcu)
25
{
26 27
	struct intel_context *ce = container_of(rcu, typeof(*ce), rcu);

28
	trace_intel_context_free(ce);
29
	kmem_cache_free(slab_ce, ce);
30 31
}

32 33 34 35 36
void intel_context_free(struct intel_context *ce)
{
	call_rcu(&ce->rcu, rcu_context_free);
}

37
struct intel_context *
38
intel_context_create(struct intel_engine_cs *engine)
39
{
40
	struct intel_context *ce;
41 42 43 44 45

	ce = intel_context_alloc();
	if (!ce)
		return ERR_PTR(-ENOMEM);

46
	intel_context_init(ce, engine);
47
	trace_intel_context_create(ce);
48
	return ce;
49 50
}

51 52 53 54 55 56 57 58
int intel_context_alloc_state(struct intel_context *ce)
{
	int err = 0;

	if (mutex_lock_interruptible(&ce->pin_mutex))
		return -EINTR;

	if (!test_bit(CONTEXT_ALLOC_BIT, &ce->flags)) {
59 60 61 62 63
		if (intel_context_is_banned(ce)) {
			err = -EIO;
			goto unlock;
		}

64 65 66 67 68 69 70 71 72 73 74 75
		err = ce->ops->alloc(ce);
		if (unlikely(err))
			goto unlock;

		set_bit(CONTEXT_ALLOC_BIT, &ce->flags);
	}

unlock:
	mutex_unlock(&ce->pin_mutex);
	return err;
}

76 77 78 79
static int intel_context_active_acquire(struct intel_context *ce)
{
	int err;

80 81
	__i915_active_acquire(&ce->active);

82
	if (intel_context_is_barrier(ce) || intel_engine_uses_guc(ce->engine))
83
		return 0;
84 85

	/* Preallocate tracking nodes */
86 87 88 89
	err = i915_active_acquire_preallocate_barrier(&ce->active,
						      ce->engine);
	if (err)
		i915_active_release(&ce->active);
90

91
	return err;
92 93 94 95 96 97 98 99 100
}

static void intel_context_active_release(struct intel_context *ce)
{
	/* Nodes preallocated in intel_context_active() */
	i915_active_acquire_barrier(&ce->active);
	i915_active_release(&ce->active);
}

101
static int __context_pin_state(struct i915_vma *vma, struct i915_gem_ww_ctx *ww)
102
{
103
	unsigned int bias = i915_ggtt_pin_bias(vma) | PIN_OFFSET_BIAS;
104
	int err;
105

106
	err = i915_ggtt_pin(vma, ww, 0, bias | PIN_HIGH);
107 108 109
	if (err)
		return err;

110 111 112 113
	err = i915_active_acquire(&vma->active);
	if (err)
		goto err_unpin;

114 115 116 117
	/*
	 * And mark it as a globally pinned object to let the shrinker know
	 * it cannot reclaim the object until we release it.
	 */
118
	i915_vma_make_unshrinkable(vma);
119 120 121
	vma->obj->mm.dirty = true;

	return 0;
122 123 124 125

err_unpin:
	i915_vma_unpin(vma);
	return err;
126 127 128 129
}

static void __context_unpin_state(struct i915_vma *vma)
{
130
	i915_vma_make_shrinkable(vma);
131
	i915_active_release(&vma->active);
132
	__i915_vma_unpin(vma);
133 134
}

135 136
static int __ring_active(struct intel_ring *ring,
			 struct i915_gem_ww_ctx *ww)
137 138 139
{
	int err;

140
	err = intel_ring_pin(ring, ww);
141 142 143
	if (err)
		return err;

144
	err = i915_active_acquire(&ring->vma->active);
145
	if (err)
146
		goto err_pin;
147 148 149

	return 0;

150 151
err_pin:
	intel_ring_unpin(ring);
152 153 154 155 156 157
	return err;
}

static void __ring_retire(struct intel_ring *ring)
{
	i915_active_release(&ring->vma->active);
158
	intel_ring_unpin(ring);
159 160
}

161 162
static int intel_context_pre_pin(struct intel_context *ce,
				 struct i915_gem_ww_ctx *ww)
163 164 165 166 167
{
	int err;

	CE_TRACE(ce, "active\n");

168
	err = __ring_active(ce->ring, ww);
169 170 171
	if (err)
		return err;

172
	err = intel_timeline_pin(ce->timeline, ww);
173 174 175 176 177 178
	if (err)
		goto err_ring;

	if (!ce->state)
		return 0;

179
	err = __context_pin_state(ce->state, ww);
180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201
	if (err)
		goto err_timeline;


	return 0;

err_timeline:
	intel_timeline_unpin(ce->timeline);
err_ring:
	__ring_retire(ce->ring);
	return err;
}

static void intel_context_post_unpin(struct intel_context *ce)
{
	if (ce->state)
		__context_unpin_state(ce->state);

	intel_timeline_unpin(ce->timeline);
	__ring_retire(ce->ring);
}

202 203
int __intel_context_do_pin_ww(struct intel_context *ce,
			      struct i915_gem_ww_ctx *ww)
204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219
{
	bool handoff = false;
	void *vaddr;
	int err = 0;

	if (unlikely(!test_bit(CONTEXT_ALLOC_BIT, &ce->flags))) {
		err = intel_context_alloc_state(ce);
		if (err)
			return err;
	}

	/*
	 * We always pin the context/ring/timeline here, to ensure a pin
	 * refcount for __intel_context_active(), which prevent a lock
	 * inversion of ce->pin_mutex vs dma_resv_lock().
	 */
220 221 222 223 224 225 226 227

	err = i915_gem_object_lock(ce->timeline->hwsp_ggtt->obj, ww);
	if (!err && ce->ring->vma->obj)
		err = i915_gem_object_lock(ce->ring->vma->obj, ww);
	if (!err && ce->state)
		err = i915_gem_object_lock(ce->state->obj, ww);
	if (!err)
		err = intel_context_pre_pin(ce, ww);
228 229 230 231 232 233 234
	if (err)
		return err;

	err = i915_active_acquire(&ce->active);
	if (err)
		goto err_ctx_unpin;

235
	err = ce->ops->pre_pin(ce, ww, &vaddr);
236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269
	if (err)
		goto err_release;

	err = mutex_lock_interruptible(&ce->pin_mutex);
	if (err)
		goto err_post_unpin;

	if (unlikely(intel_context_is_closed(ce))) {
		err = -ENOENT;
		goto err_unlock;
	}

	if (likely(!atomic_add_unless(&ce->pin_count, 1, 0))) {
		err = intel_context_active_acquire(ce);
		if (unlikely(err))
			goto err_unlock;

		err = ce->ops->pin(ce, vaddr);
		if (err) {
			intel_context_active_release(ce);
			goto err_unlock;
		}

		CE_TRACE(ce, "pin ring:{start:%08x, head:%04x, tail:%04x}\n",
			 i915_ggtt_offset(ce->ring->vma),
			 ce->ring->head, ce->ring->tail);

		handoff = true;
		smp_mb__before_atomic(); /* flush pin before it is visible */
		atomic_inc(&ce->pin_count);
	}

	GEM_BUG_ON(!intel_context_is_pinned(ce)); /* no overflow! */

270 271
	trace_intel_context_do_pin(ce);

272 273 274 275 276 277 278 279 280
err_unlock:
	mutex_unlock(&ce->pin_mutex);
err_post_unpin:
	if (!handoff)
		ce->ops->post_unpin(ce);
err_release:
	i915_active_release(&ce->active);
err_ctx_unpin:
	intel_context_post_unpin(ce);
281 282 283 284 285 286 287 288 289

	/*
	 * Unlock the hwsp_ggtt object since it's shared.
	 * In principle we can unlock all the global state locked above
	 * since it's pinned and doesn't need fencing, and will
	 * thus remain resident until it is explicitly unpinned.
	 */
	i915_gem_ww_unlock_single(ce->timeline->hwsp_ggtt->obj);

290 291 292
	return err;
}

293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309
int __intel_context_do_pin(struct intel_context *ce)
{
	struct i915_gem_ww_ctx ww;
	int err;

	i915_gem_ww_ctx_init(&ww, true);
retry:
	err = __intel_context_do_pin_ww(ce, &ww);
	if (err == -EDEADLK) {
		err = i915_gem_ww_ctx_backoff(&ww);
		if (!err)
			goto retry;
	}
	i915_gem_ww_ctx_fini(&ww);
	return err;
}

310
void __intel_context_do_unpin(struct intel_context *ce, int sub)
311
{
312
	if (!atomic_sub_and_test(sub, &ce->pin_count))
313 314 315 316 317 318 319 320 321 322 323 324 325 326
		return;

	CE_TRACE(ce, "unpin\n");
	ce->ops->unpin(ce);
	ce->ops->post_unpin(ce);

	/*
	 * Once released, we may asynchronously drop the active reference.
	 * As that may be the only reference keeping the context alive,
	 * take an extra now so that it is not freed before we finish
	 * dereferencing it.
	 */
	intel_context_get(ce);
	intel_context_active_release(ce);
327
	trace_intel_context_do_unpin(ce);
328 329 330
	intel_context_put(ce);
}

331
static void __intel_context_retire(struct i915_active *active)
332 333 334
{
	struct intel_context *ce = container_of(active, typeof(*ce), active);

335 336 337
	CE_TRACE(ce, "retire runtime: { total:%lluns, avg:%lluns }\n",
		 intel_context_get_total_runtime_ns(ce),
		 intel_context_get_avg_runtime_ns(ce));
338

339
	set_bit(CONTEXT_VALID_BIT, &ce->flags);
340
	intel_context_post_unpin(ce);
341
	intel_context_put(ce);
342 343
}

344
static int __intel_context_active(struct i915_active *active)
345
{
346
	struct intel_context *ce = container_of(active, typeof(*ce), active);
347 348 349

	intel_context_get(ce);

350
	/* everything should already be activated by intel_context_pre_pin() */
351 352
	GEM_WARN_ON(!i915_active_acquire_if_busy(&ce->ring->vma->active));
	__intel_ring_pin(ce->ring);
353

354
	__intel_timeline_pin(ce->timeline);
355

356 357 358 359 360
	if (ce->state) {
		GEM_WARN_ON(!i915_active_acquire_if_busy(&ce->state->active));
		__i915_vma_pin(ce->state);
		i915_vma_make_unshrinkable(ce->state);
	}
361

362 363 364
	return 0;
}

365 366 367 368 369 370
static int sw_fence_dummy_notify(struct i915_sw_fence *sf,
				 enum i915_sw_fence_notify state)
{
	return NOTIFY_DONE;
}

371
void
372
intel_context_init(struct intel_context *ce, struct intel_engine_cs *engine)
373
{
374
	GEM_BUG_ON(!engine->cops);
375
	GEM_BUG_ON(!engine->gt->vm);
376 377 378 379 380 381

	kref_init(&ce->ref);

	ce->engine = engine;
	ce->ops = engine->cops;
	ce->sseu = engine->sseu;
382 383
	ce->ring = NULL;
	ce->ring_size = SZ_4K;
384

385 386
	ewma_runtime_init(&ce->runtime.avg);

387
	ce->vm = i915_vm_get(engine->gt->vm);
388

389 390
	/* NB ce->signal_link/lock is used under RCU */
	spin_lock_init(&ce->signal_lock);
391 392 393 394
	INIT_LIST_HEAD(&ce->signals);

	mutex_init(&ce->pin_mutex);

395
	spin_lock_init(&ce->guc_state.lock);
396
	INIT_LIST_HEAD(&ce->guc_state.fences);
397

398 399 400
	spin_lock_init(&ce->guc_active.lock);
	INIT_LIST_HEAD(&ce->guc_active.requests);

401 402
	ce->guc_id.id = GUC_INVALID_LRC_ID;
	INIT_LIST_HEAD(&ce->guc_id.link);
403

404 405 406 407
	/*
	 * Initialize fence to be complete as this is expected to be complete
	 * unless there is a pending schedule disable outstanding.
	 */
408 409 410
	i915_sw_fence_init(&ce->guc_state.blocked,
			   sw_fence_dummy_notify);
	i915_sw_fence_commit(&ce->guc_state.blocked);
411

412
	i915_active_init(&ce->active,
413
			 __intel_context_active, __intel_context_retire, 0);
414 415
}

416 417
void intel_context_fini(struct intel_context *ce)
{
418 419
	if (ce->timeline)
		intel_timeline_put(ce->timeline);
420 421
	i915_vm_put(ce->vm);

422 423 424 425
	mutex_destroy(&ce->pin_mutex);
	i915_active_fini(&ce->active);
}

426
void i915_context_module_exit(void)
427
{
428
	kmem_cache_destroy(slab_ce);
429 430
}

431
int __init i915_context_module_init(void)
432
{
433 434
	slab_ce = KMEM_CACHE(intel_context, SLAB_HWCACHE_ALIGN);
	if (!slab_ce)
435 436 437 438
		return -ENOMEM;

	return 0;
}
439 440 441

void intel_context_enter_engine(struct intel_context *ce)
{
442
	intel_engine_pm_get(ce->engine);
443
	intel_timeline_enter(ce->timeline);
444 445 446 447
}

void intel_context_exit_engine(struct intel_context *ce)
{
448
	intel_timeline_exit(ce->timeline);
449
	intel_engine_pm_put(ce->engine);
450
}
451

452 453 454
int intel_context_prepare_remote_request(struct intel_context *ce,
					 struct i915_request *rq)
{
455
	struct intel_timeline *tl = ce->timeline;
456 457 458
	int err;

	/* Only suitable for use in remotely modifying this context */
459
	GEM_BUG_ON(rq->context == ce);
460

461
	if (rcu_access_pointer(rq->timeline) != tl) { /* timeline sharing! */
462
		/* Queue this switch after current activity by this context. */
463
		err = i915_active_fence_set(&tl->last_request, rq);
464
		if (err)
465
			return err;
466
	}
467 468 469 470 471 472 473 474 475

	/*
	 * Guarantee context image and the timeline remains pinned until the
	 * modifying request is retired by setting the ce activity tracker.
	 *
	 * But we only need to take one pin on the account of it. Or in other
	 * words transfer the pinned ce object to tracked active request.
	 */
	GEM_BUG_ON(i915_active_is_idle(&ce->active));
476
	return i915_active_add_request(&ce->active, rq);
477 478
}

479 480
struct i915_request *intel_context_create_request(struct intel_context *ce)
{
481
	struct i915_gem_ww_ctx ww;
482 483 484
	struct i915_request *rq;
	int err;

485 486 487 488 489 490 491 492 493 494
	i915_gem_ww_ctx_init(&ww, true);
retry:
	err = intel_context_pin_ww(ce, &ww);
	if (!err) {
		rq = i915_request_create(ce);
		intel_context_unpin(ce);
	} else if (err == -EDEADLK) {
		err = i915_gem_ww_ctx_backoff(&ww);
		if (!err)
			goto retry;
495
		rq = ERR_PTR(err);
496 497 498
	} else {
		rq = ERR_PTR(err);
	}
499

500
	i915_gem_ww_ctx_fini(&ww);
501

502 503 504 505 506 507 508 509 510 511 512 513
	if (IS_ERR(rq))
		return rq;

	/*
	 * timeline->mutex should be the inner lock, but is used as outer lock.
	 * Hack around this to shut up lockdep in selftests..
	 */
	lockdep_unpin_lock(&ce->timeline->mutex, rq->cookie);
	mutex_release(&ce->timeline->mutex.dep_map, _RET_IP_);
	mutex_acquire(&ce->timeline->mutex.dep_map, SINGLE_DEPTH_NESTING, 0, _RET_IP_);
	rq->cookie = lockdep_pin_lock(&ce->timeline->mutex);

514 515
	return rq;
}
516

517 518 519 520 521 522 523 524 525 526 527 528 529 530 531 532 533 534 535 536
struct i915_request *intel_context_find_active_request(struct intel_context *ce)
{
	struct i915_request *rq, *active = NULL;
	unsigned long flags;

	GEM_BUG_ON(!intel_engine_uses_guc(ce->engine));

	spin_lock_irqsave(&ce->guc_active.lock, flags);
	list_for_each_entry_reverse(rq, &ce->guc_active.requests,
				    sched.link) {
		if (i915_request_completed(rq))
			break;

		active = rq;
	}
	spin_unlock_irqrestore(&ce->guc_active.lock, flags);

	return active;
}

537 538 539
#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftest_context.c"
#endif