i915_gem_context.c 31.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75
/*
 * Copyright © 2011-2012 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 *
 * Authors:
 *    Ben Widawsky <ben@bwidawsk.net>
 *
 */

/*
 * This file implements HW context support. On gen5+ a HW context consists of an
 * opaque GPU object which is referenced at times of context saves and restores.
 * With RC6 enabled, the context is also referenced as the GPU enters and exists
 * from RC6 (GPU has it's own internal power context, except on gen5). Though
 * something like a context does exist for the media ring, the code only
 * supports contexts for the render ring.
 *
 * In software, there is a distinction between contexts created by the user,
 * and the default HW context. The default HW context is used by GPU clients
 * that do not request setup of their own hardware context. The default
 * context's state is never restored to help prevent programming errors. This
 * would happen if a client ran and piggy-backed off another clients GPU state.
 * The default context only exists to give the GPU some offset to load as the
 * current to invoke a save of the context we actually care about. In fact, the
 * code could likely be constructed, albeit in a more complicated fashion, to
 * never use the default context, though that limits the driver's ability to
 * swap out, and/or destroy other contexts.
 *
 * All other contexts are created as a request by the GPU client. These contexts
 * store GPU state, and thus allow GPU clients to not re-emit state (and
 * potentially query certain state) at any time. The kernel driver makes
 * certain that the appropriate commands are inserted.
 *
 * The context life cycle is semi-complicated in that context BOs may live
 * longer than the context itself because of the way the hardware, and object
 * tracking works. Below is a very crude representation of the state machine
 * describing the context life.
 *                                         refcount     pincount     active
 * S0: initial state                          0            0           0
 * S1: context created                        1            0           0
 * S2: context is currently running           2            1           X
 * S3: GPU referenced, but not current        2            0           1
 * S4: context is current, but destroyed      1            1           0
 * S5: like S3, but destroyed                 1            0           1
 *
 * The most common (but not all) transitions:
 * S0->S1: client creates a context
 * S1->S2: client submits execbuf with context
 * S2->S3: other clients submits execbuf with context
 * S3->S1: context object was retired
 * S3->S2: clients submits another execbuf
 * S2->S4: context destroy called with current context
 * S3->S5->S0: destroy path
 * S4->S5->S0: destroy path on current context
 *
 * There are two confusing terms used above:
 *  The "current context" means the context which is currently running on the
D
Damien Lespiau 已提交
76
 *  GPU. The GPU has loaded its state already and has stored away the gtt
77 78 79 80 81 82 83 84 85 86 87
 *  offset of the BO. The GPU is not actively referencing the data at this
 *  offset, but it will on the next context switch. The only way to avoid this
 *  is to do a GPU reset.
 *
 *  An "active context' is one which was previously the "current context" and is
 *  on the active list waiting for the next context switch to occur. Until this
 *  happens, the object must remain at the same gtt offset. It is therefore
 *  possible to destroy a context, but it is still active.
 *
 */

88
#include <linux/log2.h>
89 90
#include <drm/drmP.h>
#include <drm/i915_drm.h>
91
#include "i915_drv.h"
92
#include "i915_trace.h"
93

94 95
#define ALL_L3_SLICES(dev) (1 << NUM_L3_SLICES(dev)) - 1

96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154
/* Initial size (as log2) to preallocate the handle->object hashtable */
#define VMA_HT_BITS 2u /* 4 x 2 pointers, 64 bytes minimum */

static void resize_vma_ht(struct work_struct *work)
{
	struct i915_gem_context_vma_lut *lut =
		container_of(work, typeof(*lut), resize);
	unsigned int bits, new_bits, size, i;
	struct hlist_head *new_ht;

	GEM_BUG_ON(!(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS));

	bits = 1 + ilog2(4*lut->ht_count/3 + 1);
	new_bits = min_t(unsigned int,
			 max(bits, VMA_HT_BITS),
			 sizeof(unsigned int) * BITS_PER_BYTE - 1);
	if (new_bits == lut->ht_bits)
		goto out;

	new_ht = kzalloc(sizeof(*new_ht)<<new_bits, GFP_KERNEL | __GFP_NOWARN);
	if (!new_ht)
		new_ht = vzalloc(sizeof(*new_ht)<<new_bits);
	if (!new_ht)
		/* Pretend resize succeeded and stop calling us for a bit! */
		goto out;

	size = BIT(lut->ht_bits);
	for (i = 0; i < size; i++) {
		struct i915_vma *vma;
		struct hlist_node *tmp;

		hlist_for_each_entry_safe(vma, tmp, &lut->ht[i], ctx_node)
			hlist_add_head(&vma->ctx_node,
				       &new_ht[hash_32(vma->ctx_handle,
						       new_bits)]);
	}
	kvfree(lut->ht);
	lut->ht = new_ht;
	lut->ht_bits = new_bits;
out:
	smp_store_release(&lut->ht_size, BIT(bits));
	GEM_BUG_ON(lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS);
}

static void vma_lut_free(struct i915_gem_context *ctx)
{
	struct i915_gem_context_vma_lut *lut = &ctx->vma_lut;
	unsigned int i, size;

	if (lut->ht_size & I915_CTX_RESIZE_IN_PROGRESS)
		cancel_work_sync(&lut->resize);

	size = BIT(lut->ht_bits);
	for (i = 0; i < size; i++) {
		struct i915_vma *vma;

		hlist_for_each_entry(vma, &lut->ht[i], ctx_node) {
			vma->obj->vma_hashed = NULL;
			vma->ctx = NULL;
155
			i915_vma_put(vma);
156 157 158 159 160
		}
	}
	kvfree(lut->ht);
}

161
static void i915_gem_context_free(struct i915_gem_context *ctx)
162
{
163
	int i;
164

165
	lockdep_assert_held(&ctx->i915->drm.struct_mutex);
166
	GEM_BUG_ON(!i915_gem_context_is_closed(ctx));
167

168
	vma_lut_free(ctx);
169 170
	i915_ppgtt_put(ctx->ppgtt);

171 172 173 174 175 176 177
	for (i = 0; i < I915_NUM_ENGINES; i++) {
		struct intel_context *ce = &ctx->engine[i];

		if (!ce->state)
			continue;

		WARN_ON(ce->pin_count);
178
		if (ce->ring)
179
			intel_ring_free(ce->ring);
180

181
		__i915_gem_object_release_unless_active(ce->state->obj);
182 183
	}

184
	kfree(ctx->name);
185
	put_pid(ctx->pid);
186

B
Ben Widawsky 已提交
187
	list_del(&ctx->link);
188

189
	ida_simple_remove(&ctx->i915->contexts.hw_ida, ctx->hw_id);
190
	kfree_rcu(ctx, rcu);
191 192
}

193 194 195
static void contexts_free(struct drm_i915_private *i915)
{
	struct llist_node *freed = llist_del_all(&i915->contexts.free_list);
196
	struct i915_gem_context *ctx, *cn;
197 198 199

	lockdep_assert_held(&i915->drm.struct_mutex);

200
	llist_for_each_entry_safe(ctx, cn, freed, free_link)
201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223
		i915_gem_context_free(ctx);
}

static void contexts_free_worker(struct work_struct *work)
{
	struct drm_i915_private *i915 =
		container_of(work, typeof(*i915), contexts.free_work);

	mutex_lock(&i915->drm.struct_mutex);
	contexts_free(i915);
	mutex_unlock(&i915->drm.struct_mutex);
}

void i915_gem_context_release(struct kref *ref)
{
	struct i915_gem_context *ctx = container_of(ref, typeof(*ctx), ref);
	struct drm_i915_private *i915 = ctx->i915;

	trace_i915_context_free(ctx);
	if (llist_add(&ctx->free_link, &i915->contexts.free_list))
		queue_work(i915->wq, &i915->contexts.free_work);
}

224 225
static void context_close(struct i915_gem_context *ctx)
{
226
	i915_gem_context_set_closed(ctx);
227 228 229 230 231 232
	if (ctx->ppgtt)
		i915_ppgtt_close(&ctx->ppgtt->base);
	ctx->file_priv = ERR_PTR(-EBADF);
	i915_gem_context_put(ctx);
}

233 234 235 236
static int assign_hw_id(struct drm_i915_private *dev_priv, unsigned *out)
{
	int ret;

237
	ret = ida_simple_get(&dev_priv->contexts.hw_ida,
238 239 240 241 242 243
			     0, MAX_CONTEXT_HW_ID, GFP_KERNEL);
	if (ret < 0) {
		/* Contexts are only released when no longer active.
		 * Flush any pending retires to hopefully release some
		 * stale contexts and try again.
		 */
244
		i915_gem_retire_requests(dev_priv);
245
		ret = ida_simple_get(&dev_priv->contexts.hw_ida,
246 247 248 249 250 251 252 253 254
				     0, MAX_CONTEXT_HW_ID, GFP_KERNEL);
		if (ret < 0)
			return ret;
	}

	*out = ret;
	return 0;
}

255 256
static u32 default_desc_template(const struct drm_i915_private *i915,
				 const struct i915_hw_ppgtt *ppgtt)
257
{
258
	u32 address_mode;
259 260
	u32 desc;

261
	desc = GEN8_CTX_VALID | GEN8_CTX_PRIVILEGE;
262

263 264 265 266 267 268
	address_mode = INTEL_LEGACY_32B_CONTEXT;
	if (ppgtt && i915_vm_is_48bit(&ppgtt->base))
		address_mode = INTEL_LEGACY_64B_CONTEXT;
	desc |= address_mode << GEN8_CTX_ADDRESSING_MODE_SHIFT;

	if (IS_GEN8(i915))
269 270 271 272 273 274 275 276 277 278
		desc |= GEN8_CTX_L3LLC_COHERENT;

	/* TODO: WaDisableLiteRestore when we start using semaphore
	 * signalling between Command Streamers
	 * ring->ctx_desc_template |= GEN8_CTX_FORCE_RESTORE;
	 */

	return desc;
}

279
static struct i915_gem_context *
280
__create_hw_context(struct drm_i915_private *dev_priv,
281
		    struct drm_i915_file_private *file_priv)
282
{
283
	struct i915_gem_context *ctx;
T
Tejun Heo 已提交
284
	int ret;
285

286
	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
287 288
	if (ctx == NULL)
		return ERR_PTR(-ENOMEM);
289

290 291 292 293 294 295
	ret = assign_hw_id(dev_priv, &ctx->hw_id);
	if (ret) {
		kfree(ctx);
		return ERR_PTR(ret);
	}

296
	kref_init(&ctx->ref);
297
	list_add_tail(&ctx->link, &dev_priv->contexts.list);
298
	ctx->i915 = dev_priv;
299
	ctx->priority = I915_PRIORITY_NORMAL;
300

301 302 303 304 305 306 307 308 309 310 311
	ctx->vma_lut.ht_bits = VMA_HT_BITS;
	ctx->vma_lut.ht_size = BIT(VMA_HT_BITS);
	BUILD_BUG_ON(BIT(VMA_HT_BITS) == I915_CTX_RESIZE_IN_PROGRESS);
	ctx->vma_lut.ht = kcalloc(ctx->vma_lut.ht_size,
				  sizeof(*ctx->vma_lut.ht),
				  GFP_KERNEL);
	if (!ctx->vma_lut.ht)
		goto err_out;

	INIT_WORK(&ctx->vma_lut.resize, resize_vma_ht);

312
	/* Default context will never have a file_priv */
313 314
	ret = DEFAULT_CONTEXT_HANDLE;
	if (file_priv) {
315
		ret = idr_alloc(&file_priv->context_idr, ctx,
316
				DEFAULT_CONTEXT_HANDLE, 0, GFP_KERNEL);
317
		if (ret < 0)
318
			goto err_lut;
319 320
	}
	ctx->user_handle = ret;
321 322

	ctx->file_priv = file_priv;
323
	if (file_priv) {
324
		ctx->pid = get_task_pid(current, PIDTYPE_PID);
325 326 327 328 329 330 331 332 333
		ctx->name = kasprintf(GFP_KERNEL, "%s[%d]/%x",
				      current->comm,
				      pid_nr(ctx->pid),
				      ctx->user_handle);
		if (!ctx->name) {
			ret = -ENOMEM;
			goto err_pid;
		}
	}
334

335 336 337
	/* NB: Mark all slices as needing a remap so that when the context first
	 * loads it will restore whatever remap state already exists. If there
	 * is no remap info, it will be a NOP. */
338
	ctx->remap_slice = ALL_L3_SLICES(dev_priv);
339

340
	i915_gem_context_set_bannable(ctx);
341
	ctx->ring_size = 4 * PAGE_SIZE;
342 343
	ctx->desc_template =
		default_desc_template(dev_priv, dev_priv->mm.aliasing_ppgtt);
344

345 346 347 348 349 350 351
	/* GuC requires the ring to be placed above GUC_WOPCM_TOP. If GuC is not
	 * present or not in use we still need a small bias as ring wraparound
	 * at offset 0 sometimes hangs. No idea why.
	 */
	if (HAS_GUC(dev_priv) && i915.enable_guc_loading)
		ctx->ggtt_offset_bias = GUC_WOPCM_TOP;
	else
352
		ctx->ggtt_offset_bias = I915_GTT_PAGE_SIZE;
353

354
	return ctx;
355

356 357 358
err_pid:
	put_pid(ctx->pid);
	idr_remove(&file_priv->context_idr, ctx->user_handle);
359 360
err_lut:
	kvfree(ctx->vma_lut.ht);
361
err_out:
362
	context_close(ctx);
363
	return ERR_PTR(ret);
364 365
}

366 367 368 369 370 371 372
static void __destroy_hw_context(struct i915_gem_context *ctx,
				 struct drm_i915_file_private *file_priv)
{
	idr_remove(&file_priv->context_idr, ctx->user_handle);
	context_close(ctx);
}

373 374 375 376 377
/**
 * The default context needs to exist per ring that uses contexts. It stores the
 * context state of the GPU for applications that don't utilize HW contexts, as
 * well as an idle case.
 */
378
static struct i915_gem_context *
379
i915_gem_create_context(struct drm_i915_private *dev_priv,
380
			struct drm_i915_file_private *file_priv)
381
{
382
	struct i915_gem_context *ctx;
383

384
	lockdep_assert_held(&dev_priv->drm.struct_mutex);
385

386
	ctx = __create_hw_context(dev_priv, file_priv);
387
	if (IS_ERR(ctx))
388
		return ctx;
389

390
	if (USES_FULL_PPGTT(dev_priv)) {
C
Chris Wilson 已提交
391
		struct i915_hw_ppgtt *ppgtt;
392

393
		ppgtt = i915_ppgtt_create(dev_priv, file_priv, ctx->name);
394
		if (IS_ERR(ppgtt)) {
395 396
			DRM_DEBUG_DRIVER("PPGTT setup failed (%ld)\n",
					 PTR_ERR(ppgtt));
397
			__destroy_hw_context(ctx, file_priv);
398
			return ERR_CAST(ppgtt);
399 400 401
		}

		ctx->ppgtt = ppgtt;
402
		ctx->desc_template = default_desc_template(dev_priv, ppgtt);
403
	}
404

405 406
	trace_i915_context_create(ctx);

407
	return ctx;
408 409
}

410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432
/**
 * i915_gem_context_create_gvt - create a GVT GEM context
 * @dev: drm device *
 *
 * This function is used to create a GVT specific GEM context.
 *
 * Returns:
 * pointer to i915_gem_context on success, error pointer if failed
 *
 */
struct i915_gem_context *
i915_gem_context_create_gvt(struct drm_device *dev)
{
	struct i915_gem_context *ctx;
	int ret;

	if (!IS_ENABLED(CONFIG_DRM_I915_GVT))
		return ERR_PTR(-ENODEV);

	ret = i915_mutex_lock_interruptible(dev);
	if (ret)
		return ERR_PTR(ret);

433
	ctx = __create_hw_context(to_i915(dev), NULL);
434 435 436
	if (IS_ERR(ctx))
		goto out;

437
	ctx->file_priv = ERR_PTR(-EBADF);
438 439 440
	i915_gem_context_set_closed(ctx); /* not user accessible */
	i915_gem_context_clear_bannable(ctx);
	i915_gem_context_set_force_single_submission(ctx);
441 442
	if (!i915.enable_guc_submission)
		ctx->ring_size = 512 * PAGE_SIZE; /* Max ring buffer size */
443 444

	GEM_BUG_ON(i915_gem_context_is_kernel(ctx));
445 446 447 448 449
out:
	mutex_unlock(&dev->struct_mutex);
	return ctx;
}

450
int i915_gem_contexts_init(struct drm_i915_private *dev_priv)
451
{
452
	struct i915_gem_context *ctx;
453

454 455
	/* Init should only be called once per module load. Eventually the
	 * restriction on the context_disabled check can be loosened. */
456
	if (WARN_ON(dev_priv->kernel_context))
457
		return 0;
458

459
	INIT_LIST_HEAD(&dev_priv->contexts.list);
460 461
	INIT_WORK(&dev_priv->contexts.free_work, contexts_free_worker);
	init_llist_head(&dev_priv->contexts.free_list);
462

463 464
	if (intel_vgpu_active(dev_priv) &&
	    HAS_LOGICAL_RING_CONTEXTS(dev_priv)) {
465 466 467 468 469 470
		if (!i915.enable_execlists) {
			DRM_INFO("Only EXECLIST mode is supported in vgpu.\n");
			return -EINVAL;
		}
	}

471 472
	/* Using the simple ida interface, the max is limited by sizeof(int) */
	BUILD_BUG_ON(MAX_CONTEXT_HW_ID > INT_MAX);
473
	ida_init(&dev_priv->contexts.hw_ida);
474

475
	ctx = i915_gem_create_context(dev_priv, NULL);
476 477 478 479
	if (IS_ERR(ctx)) {
		DRM_ERROR("Failed to create default global context (error %ld)\n",
			  PTR_ERR(ctx));
		return PTR_ERR(ctx);
480 481
	}

482 483 484 485 486
	/* For easy recognisablity, we want the kernel context to be 0 and then
	 * all user contexts will have non-zero hw_id.
	 */
	GEM_BUG_ON(ctx->hw_id);

487
	i915_gem_context_clear_bannable(ctx);
488
	ctx->priority = I915_PRIORITY_MIN; /* lowest priority; idle task */
489
	dev_priv->kernel_context = ctx;
490

491 492
	GEM_BUG_ON(!i915_gem_context_is_kernel(ctx));

493
	DRM_DEBUG_DRIVER("%s context support initialized\n",
494 495
			 dev_priv->engine[RCS]->context_size ? "logical" :
			 "fake");
496
	return 0;
497 498
}

499
void i915_gem_contexts_lost(struct drm_i915_private *dev_priv)
500 501
{
	struct intel_engine_cs *engine;
502
	enum intel_engine_id id;
503

504
	lockdep_assert_held(&dev_priv->drm.struct_mutex);
505

506
	for_each_engine(engine, dev_priv, id) {
507 508 509 510 511 512 513
		engine->legacy_active_context = NULL;

		if (!engine->last_retired_context)
			continue;

		engine->context_unpin(engine, engine->last_retired_context);
		engine->last_retired_context = NULL;
514 515
	}

516 517
	/* Force the GPU state to be restored on enabling */
	if (!i915.enable_execlists) {
518 519
		struct i915_gem_context *ctx;

520
		list_for_each_entry(ctx, &dev_priv->contexts.list, link) {
521 522 523
			if (!i915_gem_context_is_default(ctx))
				continue;

524
			for_each_engine(engine, dev_priv, id)
525 526 527 528 529
				ctx->engine[engine->id].initialised = false;

			ctx->remap_slice = ALL_L3_SLICES(dev_priv);
		}

530
		for_each_engine(engine, dev_priv, id) {
531 532 533 534 535 536
			struct intel_context *kce =
				&dev_priv->kernel_context->engine[engine->id];

			kce->initialised = true;
		}
	}
537 538
}

539
void i915_gem_contexts_fini(struct drm_i915_private *i915)
540
{
541
	struct i915_gem_context *ctx;
542

543
	lockdep_assert_held(&i915->drm.struct_mutex);
544

545 546 547 548 549
	/* Keep the context so that we can free it immediately ourselves */
	ctx = i915_gem_context_get(fetch_and_zero(&i915->kernel_context));
	GEM_BUG_ON(!i915_gem_context_is_kernel(ctx));
	context_close(ctx);
	i915_gem_context_free(ctx);
550

551 552
	/* Must free all deferred contexts (via flush_workqueue) first */
	ida_destroy(&i915->contexts.hw_ida);
553 554
}

555 556
static int context_idr_cleanup(int id, void *p, void *data)
{
557
	struct i915_gem_context *ctx = p;
558

559
	context_close(ctx);
560
	return 0;
561 562
}

563 564
int i915_gem_context_open(struct drm_i915_private *i915,
			  struct drm_file *file)
565 566
{
	struct drm_i915_file_private *file_priv = file->driver_priv;
567
	struct i915_gem_context *ctx;
568 569 570

	idr_init(&file_priv->context_idr);

571 572 573
	mutex_lock(&i915->drm.struct_mutex);
	ctx = i915_gem_create_context(i915, file_priv);
	mutex_unlock(&i915->drm.struct_mutex);
574

575 576
	GEM_BUG_ON(i915_gem_context_is_kernel(ctx));

577
	if (IS_ERR(ctx)) {
578
		idr_destroy(&file_priv->context_idr);
579
		return PTR_ERR(ctx);
580 581
	}

582 583 584
	return 0;
}

585
void i915_gem_context_close(struct drm_file *file)
586
{
587
	struct drm_i915_file_private *file_priv = file->driver_priv;
588

589
	lockdep_assert_held(&file_priv->dev_priv->drm.struct_mutex);
590

591
	idr_for_each(&file_priv->context_idr, context_idr_cleanup, NULL);
592 593 594
	idr_destroy(&file_priv->context_idr);
}

595
static inline int
596
mi_set_context(struct drm_i915_gem_request *req, u32 flags)
597
{
598
	struct drm_i915_private *dev_priv = req->i915;
599
	struct intel_engine_cs *engine = req->engine;
600
	enum intel_engine_id id;
601
	const int num_rings =
602 603
		/* Use an extended w/a on gen7 if signalling from other rings */
		(i915.semaphores && INTEL_GEN(dev_priv) == 7) ?
604
		INTEL_INFO(dev_priv)->num_rings - 1 :
605
		0;
606
	int len;
607
	u32 *cs;
608

609
	flags |= MI_MM_SPACE_GTT;
610
	if (IS_HASWELL(dev_priv) || INTEL_GEN(dev_priv) >= 8)
611 612 613 614
		/* These flags are for resource streamer on HSW+ */
		flags |= HSW_MI_RS_SAVE_STATE_EN | HSW_MI_RS_RESTORE_STATE_EN;
	else
		flags |= MI_SAVE_EXT_STATE_EN | MI_RESTORE_EXT_STATE_EN;
615 616

	len = 4;
617
	if (INTEL_GEN(dev_priv) >= 7)
618
		len += 2 + (num_rings ? 4*num_rings + 6 : 0);
619

620 621 622
	cs = intel_ring_begin(req, len);
	if (IS_ERR(cs))
		return PTR_ERR(cs);
623

624
	/* WaProgramMiArbOnOffAroundMiSetContext:ivb,vlv,hsw,bdw,chv */
625
	if (INTEL_GEN(dev_priv) >= 7) {
626
		*cs++ = MI_ARB_ON_OFF | MI_ARB_DISABLE;
627 628 629
		if (num_rings) {
			struct intel_engine_cs *signaller;

630
			*cs++ = MI_LOAD_REGISTER_IMM(num_rings);
631
			for_each_engine(signaller, dev_priv, id) {
632
				if (signaller == engine)
633 634
					continue;

635 636 637 638
				*cs++ = i915_mmio_reg_offset(
					   RING_PSMI_CTL(signaller->mmio_base));
				*cs++ = _MASKED_BIT_ENABLE(
						GEN6_PSMI_SLEEP_MSG_DISABLE);
639 640 641
			}
		}
	}
642

643 644 645
	*cs++ = MI_NOOP;
	*cs++ = MI_SET_CONTEXT;
	*cs++ = i915_ggtt_offset(req->ctx->engine[RCS].state) | flags;
646 647 648 649
	/*
	 * w/a: MI_SET_CONTEXT must always be followed by MI_NOOP
	 * WaMiSetContext_Hang:snb,ivb,vlv
	 */
650
	*cs++ = MI_NOOP;
651

652
	if (INTEL_GEN(dev_priv) >= 7) {
653 654
		if (num_rings) {
			struct intel_engine_cs *signaller;
655
			i915_reg_t last_reg = {}; /* keep gcc quiet */
656

657
			*cs++ = MI_LOAD_REGISTER_IMM(num_rings);
658
			for_each_engine(signaller, dev_priv, id) {
659
				if (signaller == engine)
660 661
					continue;

662
				last_reg = RING_PSMI_CTL(signaller->mmio_base);
663 664 665
				*cs++ = i915_mmio_reg_offset(last_reg);
				*cs++ = _MASKED_BIT_DISABLE(
						GEN6_PSMI_SLEEP_MSG_DISABLE);
666
			}
667 668

			/* Insert a delay before the next switch! */
669 670 671 672
			*cs++ = MI_STORE_REGISTER_MEM | MI_SRM_LRM_GLOBAL_GTT;
			*cs++ = i915_mmio_reg_offset(last_reg);
			*cs++ = i915_ggtt_offset(engine->scratch);
			*cs++ = MI_NOOP;
673
		}
674
		*cs++ = MI_ARB_ON_OFF | MI_ARB_ENABLE;
675
	}
676

677
	intel_ring_advance(req, cs);
678

679
	return 0;
680 681
}

C
Chris Wilson 已提交
682
static int remap_l3(struct drm_i915_gem_request *req, int slice)
683
{
684 685
	u32 *cs, *remap_info = req->i915->l3_parity.remap_info[slice];
	int i;
686

687
	if (!remap_info)
688 689
		return 0;

690 691 692
	cs = intel_ring_begin(req, GEN7_L3LOG_SIZE/4 * 2 + 2);
	if (IS_ERR(cs))
		return PTR_ERR(cs);
693 694 695 696 697 698

	/*
	 * Note: We do not worry about the concurrent register cacheline hang
	 * here because no other code should access these registers other than
	 * at initialization time.
	 */
699
	*cs++ = MI_LOAD_REGISTER_IMM(GEN7_L3LOG_SIZE/4);
700
	for (i = 0; i < GEN7_L3LOG_SIZE/4; i++) {
701 702
		*cs++ = i915_mmio_reg_offset(GEN7_L3LOG(slice, i));
		*cs++ = remap_info[i];
703
	}
704 705
	*cs++ = MI_NOOP;
	intel_ring_advance(req, cs);
706

707
	return 0;
708 709
}

710 711
static inline bool skip_rcs_switch(struct i915_hw_ppgtt *ppgtt,
				   struct intel_engine_cs *engine,
712
				   struct i915_gem_context *to)
713
{
714 715 716
	if (to->remap_slice)
		return false;

717
	if (!to->engine[RCS].initialised)
718 719
		return false;

720
	if (ppgtt && (intel_engine_flag(engine) & ppgtt->pd_dirty_rings))
721
		return false;
722

723
	return to == engine->legacy_active_context;
724 725 726
}

static bool
727 728
needs_pd_load_pre(struct i915_hw_ppgtt *ppgtt,
		  struct intel_engine_cs *engine,
729
		  struct i915_gem_context *to)
730
{
731
	if (!ppgtt)
732 733
		return false;

734
	/* Always load the ppgtt on first use */
735
	if (!engine->legacy_active_context)
736 737 738
		return true;

	/* Same context without new entries, skip */
739
	if (engine->legacy_active_context == to &&
740
	    !(intel_engine_flag(engine) & ppgtt->pd_dirty_rings))
741 742 743
		return false;

	if (engine->id != RCS)
744 745
		return true;

746
	if (INTEL_GEN(engine->i915) < 8)
747 748 749 750 751 752
		return true;

	return false;
}

static bool
753
needs_pd_load_post(struct i915_hw_ppgtt *ppgtt,
754
		   struct i915_gem_context *to,
755
		   u32 hw_flags)
756
{
757
	if (!ppgtt)
758 759
		return false;

760
	if (!IS_GEN8(to->i915))
761 762
		return false;

B
Ben Widawsky 已提交
763
	if (hw_flags & MI_RESTORE_INHIBIT)
764 765 766 767 768
		return true;

	return false;
}

769
static int do_rcs_switch(struct drm_i915_gem_request *req)
770
{
771
	struct i915_gem_context *to = req->ctx;
772
	struct intel_engine_cs *engine = req->engine;
773
	struct i915_hw_ppgtt *ppgtt = to->ppgtt ?: req->i915->mm.aliasing_ppgtt;
774
	struct i915_gem_context *from = engine->legacy_active_context;
775
	u32 hw_flags;
776
	int ret, i;
777

778 779
	GEM_BUG_ON(engine->id != RCS);

780
	if (skip_rcs_switch(ppgtt, engine, to))
781 782
		return 0;

783
	if (needs_pd_load_pre(ppgtt, engine, to)) {
784 785 786 787 788
		/* Older GENs and non render rings still want the load first,
		 * "PP_DCLV followed by PP_DIR_BASE register through Load
		 * Register Immediate commands in Ring Buffer before submitting
		 * a context."*/
		trace_switch_mm(engine, to);
789
		ret = ppgtt->switch_mm(ppgtt, req);
790
		if (ret)
791
			return ret;
792 793
	}

794
	if (!to->engine[RCS].initialised || i915_gem_context_is_default(to))
B
Ben Widawsky 已提交
795 796 797 798
		/* NB: If we inhibit the restore, the context is not allowed to
		 * die because future work may end up depending on valid address
		 * space. This means we must enforce that a page table load
		 * occur when this occurs. */
799
		hw_flags = MI_RESTORE_INHIBIT;
800
	else if (ppgtt && intel_engine_flag(engine) & ppgtt->pd_dirty_rings)
801 802 803
		hw_flags = MI_FORCE_RESTORE;
	else
		hw_flags = 0;
804

805 806
	if (to != from || (hw_flags & MI_FORCE_RESTORE)) {
		ret = mi_set_context(req, hw_flags);
807
		if (ret)
808
			return ret;
809

810
		engine->legacy_active_context = to;
811 812
	}

813 814 815
	/* GEN8 does *not* require an explicit reload if the PDPs have been
	 * setup, and we do not wish to move them.
	 */
816
	if (needs_pd_load_post(ppgtt, to, hw_flags)) {
817
		trace_switch_mm(engine, to);
818
		ret = ppgtt->switch_mm(ppgtt, req);
819 820 821 822 823 824 825 826 827
		/* The hardware context switch is emitted, but we haven't
		 * actually changed the state - so it's probably safe to bail
		 * here. Still, let the user know something dangerous has
		 * happened.
		 */
		if (ret)
			return ret;
	}

828 829
	if (ppgtt)
		ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
830 831 832 833 834

	for (i = 0; i < MAX_L3_SLICES; i++) {
		if (!(to->remap_slice & (1<<i)))
			continue;

C
Chris Wilson 已提交
835
		ret = remap_l3(req, i);
836 837 838 839 840 841
		if (ret)
			return ret;

		to->remap_slice &= ~(1<<i);
	}

842
	if (!to->engine[RCS].initialised) {
843 844
		if (engine->init_context) {
			ret = engine->init_context(req);
845
			if (ret)
846
				return ret;
847
		}
848
		to->engine[RCS].initialised = true;
849 850
	}

851 852 853 854 855
	return 0;
}

/**
 * i915_switch_context() - perform a GPU context switch.
856
 * @req: request for which we'll execute the context switch
857 858 859
 *
 * The context life cycle is simple. The context refcount is incremented and
 * decremented by 1 and create and destroy. If the context is in use by the GPU,
860
 * it will have a refcount > 1. This allows us to destroy the context abstract
861
 * object while letting the normal object tracking destroy the backing BO.
862 863 864 865
 *
 * This function should not be used in execlists mode.  Instead the context is
 * switched by writing to the ELSP and requests keep a reference to their
 * context.
866
 */
867
int i915_switch_context(struct drm_i915_gem_request *req)
868
{
869
	struct intel_engine_cs *engine = req->engine;
870

871
	lockdep_assert_held(&req->i915->drm.struct_mutex);
872 873
	if (i915.enable_execlists)
		return 0;
874

875
	if (!req->ctx->engine[engine->id].state) {
876
		struct i915_gem_context *to = req->ctx;
877 878
		struct i915_hw_ppgtt *ppgtt =
			to->ppgtt ?: req->i915->mm.aliasing_ppgtt;
879

880
		if (needs_pd_load_pre(ppgtt, engine, to)) {
881 882 883
			int ret;

			trace_switch_mm(engine, to);
884
			ret = ppgtt->switch_mm(ppgtt, req);
885 886 887
			if (ret)
				return ret;

888
			ppgtt->pd_dirty_rings &= ~intel_engine_flag(engine);
889 890
		}

891
		return 0;
892
	}
893

894
	return do_rcs_switch(req);
895
}
896

897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916
static bool engine_has_kernel_context(struct intel_engine_cs *engine)
{
	struct i915_gem_timeline *timeline;

	list_for_each_entry(timeline, &engine->i915->gt.timelines, link) {
		struct intel_timeline *tl;

		if (timeline == &engine->i915->gt.global_timeline)
			continue;

		tl = &timeline->engine[engine->id];
		if (i915_gem_active_peek(&tl->last_request,
					 &engine->i915->drm.struct_mutex))
			return false;
	}

	return (!engine->last_retired_context ||
		i915_gem_context_is_kernel(engine->last_retired_context));
}

917 918 919
int i915_gem_switch_to_kernel_context(struct drm_i915_private *dev_priv)
{
	struct intel_engine_cs *engine;
920
	struct i915_gem_timeline *timeline;
921
	enum intel_engine_id id;
922

923 924
	lockdep_assert_held(&dev_priv->drm.struct_mutex);

925 926
	i915_gem_retire_requests(dev_priv);

927
	for_each_engine(engine, dev_priv, id) {
928 929 930
		struct drm_i915_gem_request *req;
		int ret;

931 932 933
		if (engine_has_kernel_context(engine))
			continue;

934 935 936 937
		req = i915_gem_request_alloc(engine, dev_priv->kernel_context);
		if (IS_ERR(req))
			return PTR_ERR(req);

938 939 940 941 942 943 944 945 946 947 948 949 950 951
		/* Queue this switch after all other activity */
		list_for_each_entry(timeline, &dev_priv->gt.timelines, link) {
			struct drm_i915_gem_request *prev;
			struct intel_timeline *tl;

			tl = &timeline->engine[engine->id];
			prev = i915_gem_active_raw(&tl->last_request,
						   &dev_priv->drm.struct_mutex);
			if (prev)
				i915_sw_fence_await_sw_fence_gfp(&req->submit,
								 &prev->submit,
								 GFP_KERNEL);
		}

952
		ret = i915_switch_context(req);
953
		i915_add_request(req);
954 955 956 957 958 959 960
		if (ret)
			return ret;
	}

	return 0;
}

961 962 963 964 965
static bool client_is_banned(struct drm_i915_file_private *file_priv)
{
	return file_priv->context_bans > I915_MAX_CLIENT_CONTEXT_BANS;
}

966 967 968
int i915_gem_context_create_ioctl(struct drm_device *dev, void *data,
				  struct drm_file *file)
{
969
	struct drm_i915_private *dev_priv = to_i915(dev);
970 971
	struct drm_i915_gem_context_create *args = data;
	struct drm_i915_file_private *file_priv = file->driver_priv;
972
	struct i915_gem_context *ctx;
973 974
	int ret;

975
	if (!dev_priv->engine[RCS]->context_size)
976 977
		return -ENODEV;

978 979 980
	if (args->pad != 0)
		return -EINVAL;

981 982 983 984 985 986 987 988
	if (client_is_banned(file_priv)) {
		DRM_DEBUG("client %s[%d] banned from creating ctx\n",
			  current->comm,
			  pid_nr(get_task_pid(current, PIDTYPE_PID)));

		return -EIO;
	}

989 990 991 992
	ret = i915_mutex_lock_interruptible(dev);
	if (ret)
		return ret;

993 994 995 996
	/* Reap stale contexts */
	i915_gem_retire_requests(dev_priv);
	contexts_free(dev_priv);

997
	ctx = i915_gem_create_context(dev_priv, file_priv);
998
	mutex_unlock(&dev->struct_mutex);
999 1000
	if (IS_ERR(ctx))
		return PTR_ERR(ctx);
1001

1002 1003
	GEM_BUG_ON(i915_gem_context_is_kernel(ctx));

1004
	args->ctx_id = ctx->user_handle;
1005
	DRM_DEBUG("HW context %d created\n", args->ctx_id);
1006

1007
	return 0;
1008 1009 1010 1011 1012 1013 1014
}

int i915_gem_context_destroy_ioctl(struct drm_device *dev, void *data,
				   struct drm_file *file)
{
	struct drm_i915_gem_context_destroy *args = data;
	struct drm_i915_file_private *file_priv = file->driver_priv;
1015
	struct i915_gem_context *ctx;
1016 1017
	int ret;

1018 1019 1020
	if (args->pad != 0)
		return -EINVAL;

1021
	if (args->ctx_id == DEFAULT_CONTEXT_HANDLE)
1022
		return -ENOENT;
1023

1024
	ctx = i915_gem_context_lookup(file_priv, args->ctx_id);
1025 1026 1027 1028 1029 1030
	if (!ctx)
		return -ENOENT;

	ret = mutex_lock_interruptible(&dev->struct_mutex);
	if (ret)
		goto out;
1031

1032
	__destroy_hw_context(ctx, file_priv);
1033 1034
	mutex_unlock(&dev->struct_mutex);

1035 1036
out:
	i915_gem_context_put(ctx);
1037 1038
	return 0;
}
1039 1040 1041 1042 1043 1044

int i915_gem_context_getparam_ioctl(struct drm_device *dev, void *data,
				    struct drm_file *file)
{
	struct drm_i915_file_private *file_priv = file->driver_priv;
	struct drm_i915_gem_context_param *args = data;
1045
	struct i915_gem_context *ctx;
1046
	int ret = 0;
1047

1048
	ctx = i915_gem_context_lookup(file_priv, args->ctx_id);
1049 1050
	if (!ctx)
		return -ENOENT;
1051 1052 1053 1054

	args->size = 0;
	switch (args->param) {
	case I915_CONTEXT_PARAM_BAN_PERIOD:
1055
		ret = -EINVAL;
1056
		break;
1057 1058 1059
	case I915_CONTEXT_PARAM_NO_ZEROMAP:
		args->value = ctx->flags & CONTEXT_NO_ZEROMAP;
		break;
C
Chris Wilson 已提交
1060 1061 1062 1063 1064 1065
	case I915_CONTEXT_PARAM_GTT_SIZE:
		if (ctx->ppgtt)
			args->value = ctx->ppgtt->base.total;
		else if (to_i915(dev)->mm.aliasing_ppgtt)
			args->value = to_i915(dev)->mm.aliasing_ppgtt->base.total;
		else
1066
			args->value = to_i915(dev)->ggtt.base.total;
C
Chris Wilson 已提交
1067
		break;
1068
	case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
1069
		args->value = i915_gem_context_no_error_capture(ctx);
1070
		break;
1071
	case I915_CONTEXT_PARAM_BANNABLE:
1072
		args->value = i915_gem_context_is_bannable(ctx);
1073
		break;
1074 1075 1076 1077 1078
	default:
		ret = -EINVAL;
		break;
	}

1079
	i915_gem_context_put(ctx);
1080 1081 1082 1083 1084 1085 1086 1087
	return ret;
}

int i915_gem_context_setparam_ioctl(struct drm_device *dev, void *data,
				    struct drm_file *file)
{
	struct drm_i915_file_private *file_priv = file->driver_priv;
	struct drm_i915_gem_context_param *args = data;
1088
	struct i915_gem_context *ctx;
1089 1090
	int ret;

1091 1092 1093 1094
	ctx = i915_gem_context_lookup(file_priv, args->ctx_id);
	if (!ctx)
		return -ENOENT;

1095 1096
	ret = i915_mutex_lock_interruptible(dev);
	if (ret)
1097
		goto out;
1098 1099 1100

	switch (args->param) {
	case I915_CONTEXT_PARAM_BAN_PERIOD:
1101
		ret = -EINVAL;
1102
		break;
1103 1104 1105 1106 1107 1108
	case I915_CONTEXT_PARAM_NO_ZEROMAP:
		if (args->size) {
			ret = -EINVAL;
		} else {
			ctx->flags &= ~CONTEXT_NO_ZEROMAP;
			ctx->flags |= args->value ? CONTEXT_NO_ZEROMAP : 0;
1109 1110 1111
		}
		break;
	case I915_CONTEXT_PARAM_NO_ERROR_CAPTURE:
1112
		if (args->size)
1113
			ret = -EINVAL;
1114 1115 1116 1117
		else if (args->value)
			i915_gem_context_set_no_error_capture(ctx);
		else
			i915_gem_context_clear_no_error_capture(ctx);
1118
		break;
1119 1120 1121 1122 1123
	case I915_CONTEXT_PARAM_BANNABLE:
		if (args->size)
			ret = -EINVAL;
		else if (!capable(CAP_SYS_ADMIN) && !args->value)
			ret = -EPERM;
1124 1125
		else if (args->value)
			i915_gem_context_set_bannable(ctx);
1126
		else
1127
			i915_gem_context_clear_bannable(ctx);
1128
		break;
1129 1130 1131 1132 1133 1134
	default:
		ret = -EINVAL;
		break;
	}
	mutex_unlock(&dev->struct_mutex);

1135 1136
out:
	i915_gem_context_put(ctx);
1137 1138
	return ret;
}
1139 1140 1141 1142

int i915_gem_context_reset_stats_ioctl(struct drm_device *dev,
				       void *data, struct drm_file *file)
{
1143
	struct drm_i915_private *dev_priv = to_i915(dev);
1144
	struct drm_i915_reset_stats *args = data;
1145
	struct i915_gem_context *ctx;
1146 1147 1148 1149 1150
	int ret;

	if (args->flags || args->pad)
		return -EINVAL;

1151 1152 1153 1154 1155
	ret = -ENOENT;
	rcu_read_lock();
	ctx = __i915_gem_context_lookup_rcu(file->driver_priv, args->ctx_id);
	if (!ctx)
		goto out;
1156

1157 1158 1159 1160 1161 1162
	/*
	 * We opt for unserialised reads here. This may result in tearing
	 * in the extremely unlikely event of a GPU hang on this context
	 * as we are querying them. If we need that extra layer of protection,
	 * we should wrap the hangstats with a seqlock.
	 */
1163 1164 1165 1166 1167 1168

	if (capable(CAP_SYS_ADMIN))
		args->reset_count = i915_reset_count(&dev_priv->gpu_error);
	else
		args->reset_count = 0;

1169 1170
	args->batch_active = READ_ONCE(ctx->guilty_count);
	args->batch_pending = READ_ONCE(ctx->active_count);
1171

1172 1173 1174 1175
	ret = 0;
out:
	rcu_read_unlock();
	return ret;
1176
}
1177 1178 1179

#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftests/mock_context.c"
1180
#include "selftests/i915_gem_context.c"
1181
#endif