i915_gem_context.c 31.0 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
/*
 * Copyright © 2011-2012 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 *
 * Authors:
 *    Ben Widawsky <ben@bwidawsk.net>
 *
 */

/*
 * This file implements HW context support. On gen5+ a HW context consists of an
 * opaque GPU object which is referenced at times of context saves and restores.
 * With RC6 enabled, the context is also referenced as the GPU enters and exists
 * from RC6 (GPU has it's own internal power context, except on gen5). Though
 * something like a context does exist for the media ring, the code only
 * supports contexts for the render ring.
 *
 * In software, there is a distinction between contexts created by the user,
 * and the default HW context. The default HW context is used by GPU clients
 * that do not request setup of their own hardware context. The default
 * context's state is never restored to help prevent programming errors. This
 * would happen if a client ran and piggy-backed off another clients GPU state.
 * The default context only exists to give the GPU some offset to load as the
 * current to invoke a save of the context we actually care about. In fact, the
 * code could likely be constructed, albeit in a more complicated fashion, to
 * never use the default context, though that limits the driver's ability to
 * swap out, and/or destroy other contexts.
 *
 * All other contexts are created as a request by the GPU client. These contexts
 * store GPU state, and thus allow GPU clients to not re-emit state (and
 * potentially query certain state) at any time. The kernel driver makes
 * certain that the appropriate commands are inserted.
 *
 * The context life cycle is semi-complicated in that context BOs may live
 * longer than the context itself because of the way the hardware, and object
 * tracking works. Below is a very crude representation of the state machine
 * describing the context life.
 *                                         refcount     pincount     active
 * S0: initial state                          0            0           0
 * S1: context created                        1            0           0
 * S2: context is currently running           2            1           X
 * S3: GPU referenced, but not current        2            0           1
 * S4: context is current, but destroyed      1            1           0
 * S5: like S3, but destroyed                 1            0           1
 *
 * The most common (but not all) transitions:
 * S0->S1: client creates a context
 * S1->S2: client submits execbuf with context
 * S2->S3: other clients submits execbuf with context
 * S3->S1: context object was retired
 * S3->S2: clients submits another execbuf
 * S2->S4: context destroy called with current context
 * S3->S5->S0: destroy path
 * S4->S5->S0: destroy path on current context
 *
 * There are two confusing terms used above:
 *  The "current context" means the context which is currently running on the
D
Damien Lespiau 已提交
76
 *  GPU. The GPU has loaded its state already and has stored away the gtt
77 78 79 80 81 82 83 84 85 86 87
 *  offset of the BO. The GPU is not actively referencing the data at this
 *  offset, but it will on the next context switch. The only way to avoid this
 *  is to do a GPU reset.
 *
 *  An "active context' is one which was previously the "current context" and is
 *  on the active list waiting for the next context switch to occur. Until this
 *  happens, the object must remain at the same gtt offset. It is therefore
 *  possible to destroy a context, but it is still active.
 *
 */

88
#include <linux/log2.h>
89 90
#include <drm/drmP.h>
#include <drm/i915_drm.h>
91
#include "i915_drv.h"
92
#include "i915_trace.h"
93

94 95
#define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1

96
static void lut_close(struct i915_gem_context *ctx)
97
{
98 99 100 101 102 103 104
	struct i915_lut_handle *lut, *ln;
	struct radix_tree_iter iter;
	void __rcu **slot;

	list_for_each_entry_safe(lut, ln, &ctx->handles_list, ctx_link) {
		list_del(&lut->obj_link);
		kmem_cache_free(ctx->i915->luts, lut);
105 106
	}

107 108 109
	radix_tree_for_each_slot(slot, &ctx->handles_vma, &iter, 0) {
		struct i915_vma *vma = rcu_dereference_raw(*slot);
		struct drm_i915_gem_object *obj = vma->obj;
110

111
		radix_tree_iter_delete(&ctx->handles_vma, &iter, slot);
112

113 114
		if (!i915_vma_is_ggtt(vma))
			i915_vma_close(vma);
115

116
		__i915_gem_object_release_unless_active(obj);
117 118 119
	}
}

120
static void i915_gem_context_free(struct i915_gem_context *ctx)
121
{
122
	int i;
123

124
	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
125
	GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
126

127 128
	i915_ppgtt_put(ctx->ppgtt);

129 130 131 132 133 134 135
	for (i = 0; i < I915_NUM_ENGINES; i++) {
		struct intel_context *ce = &ctx->engine[i];

		if (!ce->state)
			continue;

		WARN_ON(ce->pin_count);
136
		if (ce->ring)
137
			intel_ring_free(ce->ring);
138

139
		__i915_gem_object_release_unless_active(ce->state->obj);
140 141
	}

142
	kfree(ctx->name);
143
	put_pid(ctx->pid);
144

B
Ben Widawsky 已提交
145
	list_del(&ctx->link);
146

147
	ida_simple_remove(&ctx->i915->contexts.hw_ida, ctx->hw_id);
148
	kfree_rcu(ctx, rcu);
149 150
}

151 152 153
static void contexts_free(struct drm_i915_private *i915)
{
	struct llist_node *freed = llist_del_all(&i915->contexts.free_list);
154
	struct i915_gem_context *ctx, *cn;
155 156 157

	lockdep_assert_held(&i915->drm.struct_mutex);

158
	llist_for_each_entry_safe(ctx, cn, freed, free_link)
159 160 161
		i915_gem_context_free(ctx);
}

162 163 164 165 166 167 168 169 170 171 172 173 174 175 176
static void contexts_free_first(struct drm_i915_private *i915)
{
	struct i915_gem_context *ctx;
	struct llist_node *freed;

	lockdep_assert_held(&i915->drm.struct_mutex);

	freed = llist_del_first(&i915->contexts.free_list);
	if (!freed)
		return;

	ctx = container_of(freed, typeof(*ctx), free_link);
	i915_gem_context_free(ctx);
}

177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196
static void contexts_free_worker(struct work_struct *work)
{
	struct drm_i915_private *i915 =
		container_of(work, typeof(*i915), contexts.free_work);

	mutex_lock(&i915->drm.struct_mutex);
	contexts_free(i915);
	mutex_unlock(&i915->drm.struct_mutex);
}

void i915_gem_context_release(struct kref *ref)
{
	struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref);
	struct drm_i915_private *i915 = ctx->i915;

	trace_i915_context_free(ctx);
	if (llist_add(&ctx->free_link, &i915->contexts.free_list))
		queue_work(i915->wq, &i915->contexts.free_work);
}

197 198
static void context_close(struct i915_gem_context *ctx)
{
199
	i915_gem_context_set_closed(ctx);
200 201

	lut_close(ctx);
202 203
	if (ctx->ppgtt)
		i915_ppgtt_close(&ctx->ppgtt->base);
204

205 206 207 208
	ctx->file_priv = ERR_PTR(-EBADF);
	i915_gem_context_put(ctx);
}

209 210 211 212
static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out)
{
	int ret;

213
	ret = ida_simple_get(&dev_priv->contexts.hw_ida,
214 215 216 217 218 219
			     0, MAX_CONTEXT_HW_ID, GFP_KERNEL);
	if (ret < 0) {
		/* Contexts are only released when no longer active.
		 * Flush any pending retires to hopefully release some
		 * stale contexts and try again.
		 */
220
		i915_gem_retire_requests(dev_priv);
221
		ret = ida_simple_get(&dev_priv->contexts.hw_ida,
222 223 224 225 226 227 228 229 230
				     0, MAX_CONTEXT_HW_ID, GFP_KERNEL);
		if (ret < 0)
			return ret;
	}

	*out = ret;
	return 0;
}

231 232
static u32 default_desc_template(const struct drm_i915_private *i915,
				 const struct i915_hw_ppgtt *ppgtt)
233
{
234
	u32 address_mode;
235 236
	u32 desc;

237
	desc = GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
238

239 240 241 242 243 244
	address_mode = INTEL_LEGACY_32B_CONTEXT;
	if (ppgtt && i915_vm_is_48bit(&ppgtt->base))
		address_mode = INTEL_LEGACY_64B_CONTEXT;
	desc |= address_mode << GEN8_CTX_ADDRESSING_MODE_SHIFT;

	if (IS_GEN8(i915))
245 246 247 248 249 250 251 252 253 254
		desc |= GEN8_CTX_L3LLC_COHERENT;

	/* TODO: WaDisableLiteRestore when we start using semaphore
	 * signalling between Command Streamers
	 * ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE;
	 */

	return desc;
}

255
static struct i915_gem_context *
256
__create_hw_context(struct drm_i915_private *dev_priv,
257
		    struct drm_i915_file_private *file_priv)
258
{
259
	struct i915_gem_context *ctx;
T
Tejun Heo 已提交
260
	int ret;
261

262
	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
263 264
	if (ctx == NULL)
		return ERR_PTR(-ENOMEM);
265

266 267 268 269 270 271
	ret = assign_hw_id(dev_priv, &ctx->hw_id);
	if (ret) {
		kfree(ctx);
		return ERR_PTR(ret);
	}

272
	kref_init(&ctx->ref);
273
	list_add_tail(&ctx->link, &dev_priv->contexts.list);
274
	ctx->i915 = dev_priv;
275
	ctx->priority = I915_PRIORITY_NORMAL;
276

277 278
	INIT_RADIX_TREE(&ctx->handles_vma, GFP_KERNEL);
	INIT_LIST_HEAD(&ctx->handles_list);
279

280
	/* Default context will never have a file_priv */
281 282
	ret = DEFAULT_CONTEXT_HANDLE;
	if (file_priv) {
283
		ret = idr_alloc(&file_priv->context_idr, ctx,
284
				DEFAULT_CONTEXT_HANDLE, 0, GFP_KERNEL);
285
		if (ret < 0)
286
			goto err_lut;
287 288
	}
	ctx->user_handle = ret;
289 290

	ctx->file_priv = file_priv;
291
	if (file_priv) {
292
		ctx->pid = get_task_pid(current, PIDTYPE_PID);
293 294 295 296 297 298 299 300 301
		ctx->name = kasprintf(GFP_KERNEL, "%s[%d]/%x",
				      current->comm,
				      pid_nr(ctx->pid),
				      ctx->user_handle);
		if (!ctx->name) {
			ret = -ENOMEM;
			goto err_pid;
		}
	}
302

303 304 305
	/* NB: Mark all slices as needing a remap so that when the context first
	 * loads it will restore whatever remap state already exists. If there
	 * is no remap info, it will be a NOP. */
306
	ctx->remap_slice = ALL_L3_SLICES(dev_priv);
307

308
	i915_gem_context_set_bannable(ctx);
309
	ctx->ring_size = 4 * PAGE_SIZE;
310 311
	ctx->desc_template =
		default_desc_template(dev_priv, dev_priv->mm.aliasing_ppgtt);
312

313 314 315 316
	/* GuC requires the ring to be placed above GUC_WOPCM_TOP. If GuC is not
	 * present or not in use we still need a small bias as ring wraparound
	 * at offset 0 sometimes hangs. No idea why.
	 */
317
	if (HAS_GUC(dev_priv) && i915_modparams.enable_guc_loading)
318 319
		ctx->ggtt_offset_bias = GUC_WOPCM_TOP;
	else
320
		ctx->ggtt_offset_bias = I915_GTT_PAGE_SIZE;
321

322
	return ctx;
323

324 325 326
err_pid:
	put_pid(ctx->pid);
	idr_remove(&file_priv->context_idr, ctx->user_handle);
327
err_lut:
328
	context_close(ctx);
329
	return ERR_PTR(ret);
330 331
}

332 333 334 335 336 337 338
static void __destroy_hw_context(struct i915_gem_context *ctx,
				 struct drm_i915_file_private *file_priv)
{
	idr_remove(&file_priv->context_idr, ctx->user_handle);
	context_close(ctx);
}

339 340 341 342 343
/**
 * The default context needs to exist per ring that uses contexts. It stores the
 * context state of the GPU for applications that don't utilize HW contexts, as
 * well as an idle case.
 */
344
static struct i915_gem_context *
345
i915_gem_create_context(struct drm_i915_private *dev_priv,
346
			struct drm_i915_file_private *file_priv)
347
{
348
	struct i915_gem_context *ctx;
349

350
	lockdep_assert_held(&dev_priv->drm.struct_mutex);
351

352 353
	/* Reap the most stale context */
	contexts_free_first(dev_priv);
354

355
	ctx = __create_hw_context(dev_priv, file_priv);
356
	if (IS_ERR(ctx))
357
		return ctx;
358

359
	if (USES_FULL_PPGTT(dev_priv)) {
C
Chris Wilson 已提交
360
		struct i915_hw_ppgtt *ppgtt;
361

362
		ppgtt = i915_ppgtt_create(dev_priv, file_priv, ctx->name);
363
		if (IS_ERR(ppgtt)) {
364 365
			DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n",
					 PTR_ERR(ppgtt));
366
			__destroy_hw_context(ctx, file_priv);
367
			return ERR_CAST(ppgtt);
368 369 370
		}

		ctx->ppgtt = ppgtt;
371
		ctx->desc_template = default_desc_template(dev_priv, ppgtt);
372
	}
373

374 375
	trace_i915_context_create(ctx);

376
	return ctx;
377 378
}

379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401
/**
 * i915_gem_context_create_gvt - create a GVT GEM context
 * @dev: drm device *
 *
 * This function is used to create a GVT specific GEM context.
 *
 * Returns:
 * pointer to i915_gem_context on success, error pointer if failed
 *
 */
struct i915_gem_context *
i915_gem_context_create_gvt(struct drm_device *dev)
{
	struct i915_gem_context *ctx;
	int ret;

	if (!IS_ENABLED(CONFIG_DRM_I915_GVT))
		return ERR_PTR(-ENODEV);

	ret = i915_mutex_lock_interruptible(dev);
	if (ret)
		return ERR_PTR(ret);

402
	ctx = __create_hw_context(to_i915(dev), NULL);
403 404 405
	if (IS_ERR(ctx))
		goto out;

406
	ctx->file_priv = ERR_PTR(-EBADF);
407 408 409
	i915_gem_context_set_closed(ctx); /* not user accessible */
	i915_gem_context_clear_bannable(ctx);
	i915_gem_context_set_force_single_submission(ctx);
410
	if (!i915_modparams.enable_guc_submission)
411
		ctx->ring_size = 512 * PAGE_SIZE; /* Max ring buffer size */
412 413

	GEM_BUG_ON(i915_gem_context_is_kernel(ctx));
414 415 416 417 418
out:
	mutex_unlock(&dev->struct_mutex);
	return ctx;
}

419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449
static struct i915_gem_context *
create_kernel_context(struct drm_i915_private *i915, int prio)
{
	struct i915_gem_context *ctx;

	ctx = i915_gem_create_context(i915, NULL);
	if (IS_ERR(ctx))
		return ctx;

	i915_gem_context_clear_bannable(ctx);
	ctx->priority = prio;
	ctx->ring_size = PAGE_SIZE;

	GEM_BUG_ON(!i915_gem_context_is_kernel(ctx));

	return ctx;
}

static void
destroy_kernel_context(struct i915_gem_context **ctxp)
{
	struct i915_gem_context *ctx;

	/* Keep the context ref so that we can free it immediately ourselves */
	ctx = i915_gem_context_get(fetch_and_zero(ctxp));
	GEM_BUG_ON(!i915_gem_context_is_kernel(ctx));

	context_close(ctx);
	i915_gem_context_free(ctx);
}

450
int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
451
{
452
	struct i915_gem_context *ctx;
453
	int err;
454

455
	GEM_BUG_ON(dev_priv->kernel_context);
456

457
	INIT_LIST_HEAD(&dev_priv->contexts.list);
458 459
	INIT_WORK(&dev_priv->contexts.free_work, contexts_free_worker);
	init_llist_head(&dev_priv->contexts.free_list);
460

461 462
	if (intel_vgpu_active(dev_priv) &&
	    HAS_LOGICAL_RING_CONTEXTS(dev_priv)) {
463
		if (!i915_modparams.enable_execlists) {
464 465 466 467 468
			DRM_INFO("Only EXECLIST mode is supported in vgpu.\n");
			return -EINVAL;
		}
	}

469 470
	/* Using the simple ida interface, the max is limited by sizeof(int) */
	BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX);
471
	ida_init(&dev_priv->contexts.hw_ida);
472

473 474
	/* lowest priority; idle task */
	ctx = create_kernel_context(dev_priv, I915_PRIORITY_MIN);
475
	if (IS_ERR(ctx)) {
476 477 478
		DRM_ERROR("Failed to create default global context\n");
		err = PTR_ERR(ctx);
		goto err;
479
	}
480 481
	/*
	 * For easy recognisablity, we want the kernel context to be 0 and then
482 483 484
	 * all user contexts will have non-zero hw_id.
	 */
	GEM_BUG_ON(ctx->hw_id);
485
	dev_priv->kernel_context = ctx;
486

487 488 489 490 491 492 493 494
	/* highest priority; preempting task */
	ctx = create_kernel_context(dev_priv, INT_MAX);
	if (IS_ERR(ctx)) {
		DRM_ERROR("Failed to create default preempt context\n");
		err = PTR_ERR(ctx);
		goto err_kernel_context;
	}
	dev_priv->preempt_context = ctx;
495

496
	DRM_DEBUG_DRIVER("%s context support initialized\n",
497 498
			 dev_priv->engine[RCS]->context_size ? "logical" :
			 "fake");
499
	return 0;
500 501 502 503 504

err_kernel_context:
	destroy_kernel_context(&dev_priv->kernel_context);
err:
	return err;
505 506
}

507
void i915_gem_contexts_lost(struct drm_i915_private *dev_priv)
508 509
{
	struct intel_engine_cs *engine;
510
	enum intel_engine_id id;
511

512
	lockdep_assert_held(&dev_priv->drm.struct_mutex);
513

514
	for_each_engine(engine, dev_priv, id) {
515 516 517 518 519 520 521
		engine->legacy_active_context = NULL;

		if (!engine->last_retired_context)
			continue;

		engine->context_unpin(engine, engine->last_retired_context);
		engine->last_retired_context = NULL;
522 523
	}

524
	/* Force the GPU state to be restored on enabling */
525
	if (!i915_modparams.enable_execlists) {
526 527
		struct i915_gem_context *ctx;

528
		list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
529 530 531
			if (!i915_gem_context_is_default(ctx))
				continue;

532
			for_each_engine(engine, dev_priv, id)
533 534 535 536 537
				ctx->engine[engine->id].initialised = false;

			ctx->remap_slice = ALL_L3_SLICES(dev_priv);
		}

538
		for_each_engine(engine, dev_priv, id) {
539 540 541 542 543 544
			struct intel_context *kce =
				&dev_priv->kernel_context->engine[engine->id];

			kce->initialised = true;
		}
	}
545 546
}

547
void i915_gem_contexts_fini(struct drm_i915_private *i915)
548
{
549
	lockdep_assert_held(&i915->drm.struct_mutex);
550

551 552
	destroy_kernel_context(&i915->preempt_context);
	destroy_kernel_context(&i915->kernel_context);
553

554 555
	/* Must free all deferred contexts (via flush_workqueue) first */
	ida_destroy(&i915->contexts.hw_ida);
556 557
}

558 559
static int context_idr_cleanup(int id, void *p, void *data)
{
560
	struct i915_gem_context *ctx = p;
561

562
	context_close(ctx);
563
	return 0;
564 565
}

566 567
int i915_gem_context_open(struct drm_i915_private *i915,
			  struct drm_file *file)
568 569
{
	struct drm_i915_file_private *file_priv = file->driver_priv;
570
	struct i915_gem_context *ctx;
571 572 573

	idr_init(&file_priv->context_idr);

574 575 576
	mutex_lock(&i915->drm.struct_mutex);
	ctx = i915_gem_create_context(i915, file_priv);
	mutex_unlock(&i915->drm.struct_mutex);
577
	if (IS_ERR(ctx)) {
578
		idr_destroy(&file_priv->context_idr);
579
		return PTR_ERR(ctx);
580 581
	}

582 583
	GEM_BUG_ON(i915_gem_context_is_kernel(ctx));

584 585 586
	return 0;
}

587
void i915_gem_context_close(struct drm_file *file)
588
{
589
	struct drm_i915_file_private *file_priv = file->driver_priv;
590

591
	lockdep_assert_held(&file_priv->dev_priv->drm.struct_mutex);
592

593
	idr_for_each(&file_priv->context_idr, context_idr_cleanup, NULL);
594 595 596
	idr_destroy(&file_priv->context_idr);
}

597
static inline int
598
mi_set_context(struct drm_i915_gem_request *req, u32 flags)
599
{
600
	struct drm_i915_private *dev_priv = req->i915;
601
	struct intel_engine_cs *engine = req->engine;
602
	enum intel_engine_id id;
603
	const int num_rings =
604
		/* Use an extended w/a on gen7 if signalling from other rings */
605
		(i915_modparams.semaphores && INTEL_GEN(dev_priv) == 7) ?
606
		INTEL_INFO(dev_priv)->num_rings - 1 :
607
		0;
608
	int len;
609
	u32 *cs;
610

611
	flags |= MI_MM_SPACE_GTT;
612
	if (IS_HASWELL(dev_priv) || INTEL_GEN(dev_priv) >= 8)
613 614 615 616
		/* These flags are for resource streamer on HSW+ */
		flags |= HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN;
	else
		flags |= MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN;
617 618

	len = 4;
619
	if (INTEL_GEN(dev_priv) >= 7)
620
		len += 2 + (num_rings ? 4*num_rings + 6 : 0);
621

622 623 624
	cs = intel_ring_begin(req, len);
	if (IS_ERR(cs))
		return PTR_ERR(cs);
625

626
	/* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */
627
	if (INTEL_GEN(dev_priv) >= 7) {
628
		*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
629 630 631
		if (num_rings) {
			struct intel_engine_cs *signaller;

632
			*cs++ = MI_LOAD_REGISTER_IMM(num_rings);
633
			for_each_engine(signaller, dev_priv, id) {
634
				if (signaller == engine)
635 636
					continue;

637 638 639 640
				*cs++ = i915_mmio_reg_offset(
					   RING_PSMI_CTL(signaller->mmio_base));
				*cs++ = _MASKED_BIT_ENABLE(
						GEN6_PSMI_SLEEP_MSG_DISABLE);
641 642 643
			}
		}
	}
644

645 646 647
	*cs++ = MI_NOOP;
	*cs++ = MI_SET_CONTEXT;
	*cs++ = i915_ggtt_offset(req->ctx->engine[RCS].state) | flags;
648 649 650 651
	/*
	 * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
	 * WaMiSetContext_Hang:snb,ivb,vlv
	 */
652
	*cs++ = MI_NOOP;
653

654
	if (INTEL_GEN(dev_priv) >= 7) {
655 656
		if (num_rings) {
			struct intel_engine_cs *signaller;
657
			i915_reg_t last_reg = {}; /* keep gcc quiet */
658

659
			*cs++ = MI_LOAD_REGISTER_IMM(num_rings);
660
			for_each_engine(signaller, dev_priv, id) {
661
				if (signaller == engine)
662 663
					continue;

664
				last_reg = RING_PSMI_CTL(signaller->mmio_base);
665 666 667
				*cs++ = i915_mmio_reg_offset(last_reg);
				*cs++ = _MASKED_BIT_DISABLE(
						GEN6_PSMI_SLEEP_MSG_DISABLE);
668
			}
669 670

			/* Insert a delay before the next switch! */
671 672 673 674
			*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
			*cs++ = i915_mmio_reg_offset(last_reg);
			*cs++ = i915_ggtt_offset(engine->scratch);
			*cs++ = MI_NOOP;
675
		}
676
		*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
677
	}
678

679
	intel_ring_advance(req, cs);
680

681
	return 0;
682 683
}

C
Chris Wilson 已提交
684
static int remap_l3(struct drm_i915_gem_request *req, int slice)
685
{
686 687
	u32 *cs, *remap_info = req->i915->l3_parity.remap_info[slice];
	int i;
688

689
	if (!remap_info)
690 691
		return 0;

692 693 694
	cs = intel_ring_begin(req, GEN7_L3LOG_SIZE/4 * 2 + 2);
	if (IS_ERR(cs))
		return PTR_ERR(cs);
695 696 697 698 699 700

	/*
	 * Note: We do not worry about the concurrent register cacheline hang
	 * here because no other code should access these registers other than
	 * at initialization time.
	 */
701
	*cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4);
702
	for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) {
703 704
		*cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i));
		*cs++ = remap_info[i];
705
	}
706 707
	*cs++ = MI_NOOP;
	intel_ring_advance(req, cs);
708

709
	return 0;
710 711
}

712 713
static inline bool skip_rcs_switch(struct i915_hw_ppgtt *ppgtt,
				   struct intel_engine_cs *engine,
714
				   struct i915_gem_context *to)
715
{
716 717 718
	if (to->remap_slice)
		return false;

719
	if (!to->engine[RCS].initialised)
720 721
		return false;

722
	if (ppgtt && (intel_engine_flag(engine) & ppgtt->pd_dirty_rings))
723
		return false;
724

725
	return to == engine->legacy_active_context;
726 727 728
}

static bool
729
needs_pd_load_pre(struct i915_hw_ppgtt *ppgtt, struct intel_engine_cs *engine)
730
{
731 732
	struct i915_gem_context *from = engine->legacy_active_context;

733
	if (!ppgtt)
734 735
		return false;

736
	/* Always load the ppgtt on first use */
737
	if (!from)
738 739 740
		return true;

	/* Same context without new entries, skip */
741
	if ((!from->ppgtt || from->ppgtt == ppgtt) &&
742
	    !(intel_engine_flag(engine) & ppgtt->pd_dirty_rings))
743 744 745
		return false;

	if (engine->id != RCS)
746 747
		return true;

748
	if (INTEL_GEN(engine->i915) < 8)
749 750 751 752 753 754
		return true;

	return false;
}

static bool
755
needs_pd_load_post(struct i915_hw_ppgtt *ppgtt,
756
		   struct i915_gem_context *to,
757
		   u32 hw_flags)
758
{
759
	if (!ppgtt)
760 761
		return false;

762
	if (!IS_GEN8(to->i915))
763 764
		return false;

B
Ben Widawsky 已提交
765
	if (hw_flags & MI_RESTORE_INHIBIT)
766 767 768 769 770
		return true;

	return false;
}

771
static int do_rcs_switch(struct drm_i915_gem_request *req)
772
{
773
	struct i915_gem_context *to = req->ctx;
774
	struct intel_engine_cs *engine = req->engine;
775
	struct i915_hw_ppgtt *ppgtt = to->ppgtt ?: req->i915->mm.aliasing_ppgtt;
776
	struct i915_gem_context *from = engine->legacy_active_context;
777
	u32 hw_flags;
778
	int ret, i;
779

780 781
	GEM_BUG_ON(engine->id != RCS);

782
	if (skip_rcs_switch(ppgtt, engine, to))
783 784
		return 0;

785
	if (needs_pd_load_pre(ppgtt, engine)) {
786 787 788 789 790
		/* Older GENs and non render rings still want the load first,
		 * "PP_DCLV followed by PP_DIR_BASE register through Load
		 * Register Immediate commands in Ring Buffer before submitting
		 * a context."*/
		trace_switch_mm(engine, to);
791
		ret = ppgtt->switch_mm(ppgtt, req);
792
		if (ret)
793
			return ret;
794 795
	}

796
	if (!to->engine[RCS].initialised || i915_gem_context_is_default(to))
B
Ben Widawsky 已提交
797 798 799 800
		/* NB: If we inhibit the restore, the context is not allowed to
		 * die because future work may end up depending on valid address
		 * space. This means we must enforce that a page table load
		 * occur when this occurs. */
801
		hw_flags = MI_RESTORE_INHIBIT;
802
	else if (ppgtt && intel_engine_flag(engine) & ppgtt->pd_dirty_rings)
803 804 805
		hw_flags = MI_FORCE_RESTORE;
	else
		hw_flags = 0;
806

807 808
	if (to != from || (hw_flags & MI_FORCE_RESTORE)) {
		ret = mi_set_context(req, hw_flags);
809
		if (ret)
810
			return ret;
811

812
		engine->legacy_active_context = to;
813 814
	}

815 816 817
	/* GEN8 does *not* require an explicit reload if the PDPs have been
	 * setup, and we do not wish to move them.
	 */
818
	if (needs_pd_load_post(ppgtt, to, hw_flags)) {
819
		trace_switch_mm(engine, to);
820
		ret = ppgtt->switch_mm(ppgtt, req);
821 822 823 824 825 826 827 828 829
		/* The hardware context switch is emitted, but we haven't
		 * actually changed the state - so it's probably safe to bail
		 * here. Still, let the user know something dangerous has
		 * happened.
		 */
		if (ret)
			return ret;
	}

830 831
	if (ppgtt)
		ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
832 833 834 835 836

	for (i = 0; i < MAX_L3_SLICES; i++) {
		if (!(to->remap_slice & (1<<i)))
			continue;

C
Chris Wilson 已提交
837
		ret = remap_l3(req, i);
838 839 840 841 842 843
		if (ret)
			return ret;

		to->remap_slice &= ~(1<<i);
	}

844
	if (!to->engine[RCS].initialised) {
845 846
		if (engine->init_context) {
			ret = engine->init_context(req);
847
			if (ret)
848
				return ret;
849
		}
850
		to->engine[RCS].initialised = true;
851 852
	}

853 854 855 856 857
	return 0;
}

/**
 * i915_switch_context() - perform a GPU context switch.
858
 * @req: request for which we'll execute the context switch
859 860 861
 *
 * The context life cycle is simple. The context refcount is incremented and
 * decremented by 1 and create and destroy. If the context is in use by the GPU,
862
 * it will have a refcount > 1. This allows us to destroy the context abstract
863
 * object while letting the normal object tracking destroy the backing BO.
864 865 866 867
 *
 * This function should not be used in execlists mode.  Instead the context is
 * switched by writing to the ELSP and requests keep a reference to their
 * context.
868
 */
869
int i915_switch_context(struct drm_i915_gem_request *req)
870
{
871
	struct intel_engine_cs *engine = req->engine;
872

873
	lockdep_assert_held(&req->i915->drm.struct_mutex);
874
	if (i915_modparams.enable_execlists)
875
		return 0;
876

877
	if (!req->ctx->engine[engine->id].state) {
878
		struct i915_gem_context *to = req->ctx;
879 880
		struct i915_hw_ppgtt *ppgtt =
			to->ppgtt ?: req->i915->mm.aliasing_ppgtt;
881

882
		if (needs_pd_load_pre(ppgtt, engine)) {
883 884 885
			int ret;

			trace_switch_mm(engine, to);
886
			ret = ppgtt->switch_mm(ppgtt, req);
887 888 889
			if (ret)
				return ret;

890
			ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
891 892
		}

893
		engine->legacy_active_context = to;
894
		return 0;
895
	}
896

897
	return do_rcs_switch(req);
898
}
899

900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919
static bool engine_has_kernel_context(struct intel_engine_cs *engine)
{
	struct i915_gem_timeline *timeline;

	list_for_each_entry(timeline, &engine->i915->gt.timelines, link) {
		struct intel_timeline *tl;

		if (timeline == &engine->i915->gt.global_timeline)
			continue;

		tl = &timeline->engine[engine->id];
		if (i915_gem_active_peek(&tl->last_request,
					 &engine->i915->drm.struct_mutex))
			return false;
	}

	return (!engine->last_retired_context ||
		i915_gem_context_is_kernel(engine->last_retired_context));
}

920 921 922
int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv)
{
	struct intel_engine_cs *engine;
923
	struct i915_gem_timeline *timeline;
924
	enum intel_engine_id id;
925

926 927
	lockdep_assert_held(&dev_priv->drm.struct_mutex);

928 929
	i915_gem_retire_requests(dev_priv);

930
	for_each_engine(engine, dev_priv, id) {
931 932 933
		struct drm_i915_gem_request *req;
		int ret;

934 935 936
		if (engine_has_kernel_context(engine))
			continue;

937 938 939 940
		req = i915_gem_request_alloc(engine, dev_priv->kernel_context);
		if (IS_ERR(req))
			return PTR_ERR(req);

941 942 943 944 945 946 947 948 949 950 951 952 953 954
		/* Queue this switch after all other activity */
		list_for_each_entry(timeline, &dev_priv->gt.timelines, link) {
			struct drm_i915_gem_request *prev;
			struct intel_timeline *tl;

			tl = &timeline->engine[engine->id];
			prev = i915_gem_active_raw(&tl->last_request,
						   &dev_priv->drm.struct_mutex);
			if (prev)
				i915_sw_fence_await_sw_fence_gfp(&req->submit,
								 &prev->submit,
								 GFP_KERNEL);
		}

955
		ret = i915_switch_context(req);
956
		i915_add_request(req);
957 958 959 960 961 962 963
		if (ret)
			return ret;
	}

	return 0;
}

964 965
static bool client_is_banned(struct drm_i915_file_private *file_priv)
{
966
	return atomic_read(&file_priv->context_bans) > I915_MAX_CLIENT_CONTEXT_BANS;
967 968
}

969 970 971
int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
				  struct drm_file *file)
{
972
	struct drm_i915_private *dev_priv = to_i915(dev);
973 974
	struct drm_i915_gem_context_create *args = data;
	struct drm_i915_file_private *file_priv = file->driver_priv;
975
	struct i915_gem_context *ctx;
976 977
	int ret;

978
	if (!dev_priv->engine[RCS]->context_size)
979 980
		return -ENODEV;

981 982 983
	if (args->pad != 0)
		return -EINVAL;

984 985 986 987 988 989 990 991
	if (client_is_banned(file_priv)) {
		DRM_DEBUG("client %s[%d] banned from creating ctx\n",
			  current->comm,
			  pid_nr(get_task_pid(current, PIDTYPE_PID)));

		return -EIO;
	}

992 993 994 995
	ret = i915_mutex_lock_interruptible(dev);
	if (ret)
		return ret;

996
	ctx = i915_gem_create_context(dev_priv, file_priv);
997
	mutex_unlock(&dev->struct_mutex);
998 999
	if (IS_ERR(ctx))
		return PTR_ERR(ctx);
1000

1001 1002
	GEM_BUG_ON(i915_gem_context_is_kernel(ctx));

1003
	args->ctx_id = ctx->user_handle;
1004
	DRM_DEBUG("HW context %d created\n", args->ctx_id);
1005

1006
	return 0;
1007 1008 1009 1010 1011 1012 1013
}

int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
				   struct drm_file *file)
{
	struct drm_i915_gem_context_destroy *args = data;
	struct drm_i915_file_private *file_priv = file->driver_priv;
1014
	struct i915_gem_context *ctx;
1015 1016
	int ret;

1017 1018 1019
	if (args->pad != 0)
		return -EINVAL;

1020
	if (args->ctx_id == DEFAULT_CONTEXT_HANDLE)
1021
		return -ENOENT;
1022

1023
	ctx = i915_gem_context_lookup(file_priv, args->ctx_id);
1024 1025 1026 1027 1028 1029
	if (!ctx)
		return -ENOENT;

	ret = mutex_lock_interruptible(&dev->struct_mutex);
	if (ret)
		goto out;
1030

1031
	__destroy_hw_context(ctx, file_priv);
1032 1033
	mutex_unlock(&dev->struct_mutex);

1034 1035
out:
	i915_gem_context_put(ctx);
1036 1037
	return 0;
}
1038 1039 1040 1041 1042 1043

int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
				    struct drm_file *file)
{
	struct drm_i915_file_private *file_priv = file->driver_priv;
	struct drm_i915_gem_context_param *args = data;
1044
	struct i915_gem_context *ctx;
1045
	int ret = 0;
1046

1047
	ctx = i915_gem_context_lookup(file_priv, args->ctx_id);
1048 1049
	if (!ctx)
		return -ENOENT;
1050 1051 1052 1053

	args->size = 0;
	switch (args->param) {
	case I915_CONTEXT_PARAM_BAN_PERIOD:
1054
		ret = -EINVAL;
1055
		break;
1056 1057 1058
	case I915_CONTEXT_PARAM_NO_ZEROMAP:
		args->value = ctx->flags & CONTEXT_NO_ZEROMAP;
		break;
C
Chris Wilson 已提交
1059 1060 1061 1062 1063 1064
	case I915_CONTEXT_PARAM_GTT_SIZE:
		if (ctx->ppgtt)
			args->value = ctx->ppgtt->base.total;
		else if (to_i915(dev)->mm.aliasing_ppgtt)
			args->value = to_i915(dev)->mm.aliasing_ppgtt->base.total;
		else
1065
			args->value = to_i915(dev)->ggtt.base.total;
C
Chris Wilson 已提交
1066
		break;
1067
	case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
1068
		args->value = i915_gem_context_no_error_capture(ctx);
1069
		break;
1070
	case I915_CONTEXT_PARAM_BANNABLE:
1071
		args->value = i915_gem_context_is_bannable(ctx);
1072
		break;
1073 1074 1075 1076 1077
	default:
		ret = -EINVAL;
		break;
	}

1078
	i915_gem_context_put(ctx);
1079 1080 1081 1082 1083 1084 1085 1086
	return ret;
}

int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
				    struct drm_file *file)
{
	struct drm_i915_file_private *file_priv = file->driver_priv;
	struct drm_i915_gem_context_param *args = data;
1087
	struct i915_gem_context *ctx;
1088 1089
	int ret;

1090 1091 1092 1093
	ctx = i915_gem_context_lookup(file_priv, args->ctx_id);
	if (!ctx)
		return -ENOENT;

1094 1095
	ret = i915_mutex_lock_interruptible(dev);
	if (ret)
1096
		goto out;
1097 1098 1099

	switch (args->param) {
	case I915_CONTEXT_PARAM_BAN_PERIOD:
1100
		ret = -EINVAL;
1101
		break;
1102 1103 1104 1105 1106 1107
	case I915_CONTEXT_PARAM_NO_ZEROMAP:
		if (args->size) {
			ret = -EINVAL;
		} else {
			ctx->flags &= ~CONTEXT_NO_ZEROMAP;
			ctx->flags |= args->value ? CONTEXT_NO_ZEROMAP : 0;
1108 1109 1110
		}
		break;
	case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
1111
		if (args->size)
1112
			ret = -EINVAL;
1113 1114 1115 1116
		else if (args->value)
			i915_gem_context_set_no_error_capture(ctx);
		else
			i915_gem_context_clear_no_error_capture(ctx);
1117
		break;
1118 1119 1120 1121 1122
	case I915_CONTEXT_PARAM_BANNABLE:
		if (args->size)
			ret = -EINVAL;
		else if (!capable(CAP_SYS_ADMIN) && !args->value)
			ret = -EPERM;
1123 1124
		else if (args->value)
			i915_gem_context_set_bannable(ctx);
1125
		else
1126
			i915_gem_context_clear_bannable(ctx);
1127
		break;
1128 1129 1130 1131 1132 1133
	default:
		ret = -EINVAL;
		break;
	}
	mutex_unlock(&dev->struct_mutex);

1134 1135
out:
	i915_gem_context_put(ctx);
1136 1137
	return ret;
}
1138 1139 1140 1141

int i915_gem_context_reset_stats_ioctl(struct drm_device *dev,
				       void *data, struct drm_file *file)
{
1142
	struct drm_i915_private *dev_priv = to_i915(dev);
1143
	struct drm_i915_reset_stats *args = data;
1144
	struct i915_gem_context *ctx;
1145 1146 1147 1148 1149
	int ret;

	if (args->flags || args->pad)
		return -EINVAL;

1150 1151 1152 1153 1154
	ret = -ENOENT;
	rcu_read_lock();
	ctx = __i915_gem_context_lookup_rcu(file->driver_priv, args->ctx_id);
	if (!ctx)
		goto out;
1155

1156 1157 1158 1159 1160 1161
	/*
	 * We opt for unserialised reads here. This may result in tearing
	 * in the extremely unlikely event of a GPU hang on this context
	 * as we are querying them. If we need that extra layer of protection,
	 * we should wrap the hangstats with a seqlock.
	 */
1162 1163 1164 1165 1166 1167

	if (capable(CAP_SYS_ADMIN))
		args->reset_count = i915_reset_count(&dev_priv->gpu_error);
	else
		args->reset_count = 0;

1168 1169
	args->batch_active = atomic_read(&ctx->guilty_count);
	args->batch_pending = atomic_read(&ctx->active_count);
1170

1171 1172 1173 1174
	ret = 0;
out:
	rcu_read_unlock();
	return ret;
1175
}
1176 1177 1178

#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftests/mock_context.c"
1179
#include "selftests/i915_gem_context.c"
1180
#endif