intel_ringbuffer.c 72.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
/*
 * Copyright © 2008-2010 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 *
 * Authors:
 *    Eric Anholt <eric@anholt.net>
 *    Zou Nan hai <nanhai.zou@intel.com>
 *    Xiang Hai hao<haihao.xiang@intel.com>
 *
 */

30
#include <drm/drmP.h>
31
#include "i915_drv.h"
32
#include <drm/i915_drm.h>
33
#include "i915_trace.h"
34
#include "intel_drv.h"
35

36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51
bool
intel_ring_initialized(struct intel_engine_cs *ring)
{
	struct drm_device *dev = ring->dev;

	if (!dev)
		return false;

	if (i915.enable_execlists) {
		struct intel_context *dctx = ring->default_context;
		struct intel_ringbuffer *ringbuf = dctx->engine[ring->id].ringbuf;

		return ringbuf->obj;
	} else
		return ring->buffer && ring->buffer->obj;
}
52

53
int __intel_ring_space(int head, int tail, int size)
54
{
55
	int space = head - (tail + I915_RING_FREE_SPACE);
56
	if (space < 0)
57
		space += size;
58 59 60
	return space;
}

61
int intel_ring_space(struct intel_ringbuffer *ringbuf)
62
{
63 64
	return __intel_ring_space(ringbuf->head & HEAD_ADDR,
				  ringbuf->tail, ringbuf->size);
65 66
}

67
bool intel_ring_stopped(struct intel_engine_cs *ring)
68 69
{
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
70 71
	return dev_priv->gpu_error.stop_rings & intel_ring_flag(ring);
}
72

73
void __intel_ring_advance(struct intel_engine_cs *ring)
74
{
75 76
	struct intel_ringbuffer *ringbuf = ring->buffer;
	ringbuf->tail &= ringbuf->size - 1;
77
	if (intel_ring_stopped(ring))
78
		return;
79
	ring->write_tail(ring, ringbuf->tail);
80 81
}

82
static int
83
gen2_render_ring_flush(struct intel_engine_cs *ring,
84 85 86 87 88 89 90
		       u32	invalidate_domains,
		       u32	flush_domains)
{
	u32 cmd;
	int ret;

	cmd = MI_FLUSH;
91
	if (((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER) == 0)
92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
		cmd |= MI_NO_WRITE_FLUSH;

	if (invalidate_domains & I915_GEM_DOMAIN_SAMPLER)
		cmd |= MI_READ_FLUSH;

	ret = intel_ring_begin(ring, 2);
	if (ret)
		return ret;

	intel_ring_emit(ring, cmd);
	intel_ring_emit(ring, MI_NOOP);
	intel_ring_advance(ring);

	return 0;
}

static int
109
gen4_render_ring_flush(struct intel_engine_cs *ring,
110 111
		       u32	invalidate_domains,
		       u32	flush_domains)
112
{
113
	struct drm_device *dev = ring->dev;
114
	u32 cmd;
115
	int ret;
116

117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145
	/*
	 * read/write caches:
	 *
	 * I915_GEM_DOMAIN_RENDER is always invalidated, but is
	 * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is
	 * also flushed at 2d versus 3d pipeline switches.
	 *
	 * read-only caches:
	 *
	 * I915_GEM_DOMAIN_SAMPLER is flushed on pre-965 if
	 * MI_READ_FLUSH is set, and is always flushed on 965.
	 *
	 * I915_GEM_DOMAIN_COMMAND may not exist?
	 *
	 * I915_GEM_DOMAIN_INSTRUCTION, which exists on 965, is
	 * invalidated when MI_EXE_FLUSH is set.
	 *
	 * I915_GEM_DOMAIN_VERTEX, which exists on 965, is
	 * invalidated with every MI_FLUSH.
	 *
	 * TLBs:
	 *
	 * On 965, TLBs associated with I915_GEM_DOMAIN_COMMAND
	 * and I915_GEM_DOMAIN_CPU in are invalidated at PTE write and
	 * I915_GEM_DOMAIN_RENDER and I915_GEM_DOMAIN_SAMPLER
	 * are flushed at any MI_FLUSH.
	 */

	cmd = MI_FLUSH | MI_NO_WRITE_FLUSH;
146
	if ((invalidate_domains|flush_domains) & I915_GEM_DOMAIN_RENDER)
147 148 149
		cmd &= ~MI_NO_WRITE_FLUSH;
	if (invalidate_domains & I915_GEM_DOMAIN_INSTRUCTION)
		cmd |= MI_EXE_FLUSH;
150

151 152 153
	if (invalidate_domains & I915_GEM_DOMAIN_COMMAND &&
	    (IS_G4X(dev) || IS_GEN5(dev)))
		cmd |= MI_INVALIDATE_ISP;
154

155 156 157
	ret = intel_ring_begin(ring, 2);
	if (ret)
		return ret;
158

159 160 161
	intel_ring_emit(ring, cmd);
	intel_ring_emit(ring, MI_NOOP);
	intel_ring_advance(ring);
162 163

	return 0;
164 165
}

166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203
/**
 * Emits a PIPE_CONTROL with a non-zero post-sync operation, for
 * implementing two workarounds on gen6.  From section 1.4.7.1
 * "PIPE_CONTROL" of the Sandy Bridge PRM volume 2 part 1:
 *
 * [DevSNB-C+{W/A}] Before any depth stall flush (including those
 * produced by non-pipelined state commands), software needs to first
 * send a PIPE_CONTROL with no bits set except Post-Sync Operation !=
 * 0.
 *
 * [Dev-SNB{W/A}]: Before a PIPE_CONTROL with Write Cache Flush Enable
 * =1, a PIPE_CONTROL with any non-zero post-sync-op is required.
 *
 * And the workaround for these two requires this workaround first:
 *
 * [Dev-SNB{W/A}]: Pipe-control with CS-stall bit set must be sent
 * BEFORE the pipe-control with a post-sync op and no write-cache
 * flushes.
 *
 * And this last workaround is tricky because of the requirements on
 * that bit.  From section 1.4.7.2.3 "Stall" of the Sandy Bridge PRM
 * volume 2 part 1:
 *
 *     "1 of the following must also be set:
 *      - Render Target Cache Flush Enable ([12] of DW1)
 *      - Depth Cache Flush Enable ([0] of DW1)
 *      - Stall at Pixel Scoreboard ([1] of DW1)
 *      - Depth Stall ([13] of DW1)
 *      - Post-Sync Operation ([13] of DW1)
 *      - Notify Enable ([8] of DW1)"
 *
 * The cache flushes require the workaround flush that triggered this
 * one, so we can't use it.  Depth stall would trigger the same.
 * Post-sync nonzero is what triggered this second workaround, so we
 * can't use that one either.  Notify enable is IRQs, which aren't
 * really our business.  That leaves only stall at scoreboard.
 */
static int
204
intel_emit_post_sync_nonzero_flush(struct intel_engine_cs *ring)
205
{
206
	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238
	int ret;


	ret = intel_ring_begin(ring, 6);
	if (ret)
		return ret;

	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
	intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
			PIPE_CONTROL_STALL_AT_SCOREBOARD);
	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
	intel_ring_emit(ring, 0); /* low dword */
	intel_ring_emit(ring, 0); /* high dword */
	intel_ring_emit(ring, MI_NOOP);
	intel_ring_advance(ring);

	ret = intel_ring_begin(ring, 6);
	if (ret)
		return ret;

	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(5));
	intel_ring_emit(ring, PIPE_CONTROL_QW_WRITE);
	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT); /* address */
	intel_ring_emit(ring, 0);
	intel_ring_emit(ring, 0);
	intel_ring_emit(ring, MI_NOOP);
	intel_ring_advance(ring);

	return 0;
}

static int
239
gen6_render_ring_flush(struct intel_engine_cs *ring,
240 241 242
                         u32 invalidate_domains, u32 flush_domains)
{
	u32 flags = 0;
243
	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
244 245
	int ret;

246 247 248 249 250
	/* Force SNB workarounds for PIPE_CONTROL flushes */
	ret = intel_emit_post_sync_nonzero_flush(ring);
	if (ret)
		return ret;

251 252 253 254
	/* Just flush everything.  Experiments have shown that reducing the
	 * number of bits based on the write domains has little performance
	 * impact.
	 */
255 256 257 258 259 260 261
	if (flush_domains) {
		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
		/*
		 * Ensure that any following seqno writes only happen
		 * when the render cache is indeed flushed.
		 */
262
		flags |= PIPE_CONTROL_CS_STALL;
263 264 265 266 267 268 269 270 271 272 273
	}
	if (invalidate_domains) {
		flags |= PIPE_CONTROL_TLB_INVALIDATE;
		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
		/*
		 * TLB invalidate requires a post-sync write.
		 */
274
		flags |= PIPE_CONTROL_QW_WRITE | PIPE_CONTROL_CS_STALL;
275
	}
276

277
	ret = intel_ring_begin(ring, 4);
278 279 280
	if (ret)
		return ret;

281
	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
282 283
	intel_ring_emit(ring, flags);
	intel_ring_emit(ring, scratch_addr | PIPE_CONTROL_GLOBAL_GTT);
284
	intel_ring_emit(ring, 0);
285 286 287 288 289
	intel_ring_advance(ring);

	return 0;
}

290
static int
291
gen7_render_ring_cs_stall_wa(struct intel_engine_cs *ring)
292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308
{
	int ret;

	ret = intel_ring_begin(ring, 4);
	if (ret)
		return ret;

	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
	intel_ring_emit(ring, PIPE_CONTROL_CS_STALL |
			      PIPE_CONTROL_STALL_AT_SCOREBOARD);
	intel_ring_emit(ring, 0);
	intel_ring_emit(ring, 0);
	intel_ring_advance(ring);

	return 0;
}

309
static int gen7_ring_fbc_flush(struct intel_engine_cs *ring, u32 value)
R
Rodrigo Vivi 已提交
310 311 312 313 314 315
{
	int ret;

	if (!ring->fbc_dirty)
		return 0;

316
	ret = intel_ring_begin(ring, 6);
R
Rodrigo Vivi 已提交
317 318 319 320 321 322
	if (ret)
		return ret;
	/* WaFbcNukeOn3DBlt:ivb/hsw */
	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
	intel_ring_emit(ring, MSG_FBC_REND_STATE);
	intel_ring_emit(ring, value);
323 324 325
	intel_ring_emit(ring, MI_STORE_REGISTER_MEM(1) | MI_SRM_LRM_GLOBAL_GTT);
	intel_ring_emit(ring, MSG_FBC_REND_STATE);
	intel_ring_emit(ring, ring->scratch.gtt_offset + 256);
R
Rodrigo Vivi 已提交
326 327 328 329 330 331
	intel_ring_advance(ring);

	ring->fbc_dirty = false;
	return 0;
}

332
static int
333
gen7_render_ring_flush(struct intel_engine_cs *ring,
334 335 336
		       u32 invalidate_domains, u32 flush_domains)
{
	u32 flags = 0;
337
	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
338 339
	int ret;

340 341 342 343 344 345 346 347 348 349
	/*
	 * Ensure that any following seqno writes only happen when the render
	 * cache is indeed flushed.
	 *
	 * Workaround: 4th PIPE_CONTROL command (except the ones with only
	 * read-cache invalidate bits set) must have the CS_STALL bit set. We
	 * don't try to be clever and just set it unconditionally.
	 */
	flags |= PIPE_CONTROL_CS_STALL;

350 351 352 353 354 355 356 357 358 359 360 361 362 363 364
	/* Just flush everything.  Experiments have shown that reducing the
	 * number of bits based on the write domains has little performance
	 * impact.
	 */
	if (flush_domains) {
		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
	}
	if (invalidate_domains) {
		flags |= PIPE_CONTROL_TLB_INVALIDATE;
		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
365
		flags |= PIPE_CONTROL_MEDIA_STATE_CLEAR;
366 367 368 369
		/*
		 * TLB invalidate requires a post-sync write.
		 */
		flags |= PIPE_CONTROL_QW_WRITE;
370
		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
371

372 373
		flags |= PIPE_CONTROL_STALL_AT_SCOREBOARD;

374 375 376 377
		/* Workaround: we must issue a pipe_control with CS-stall bit
		 * set before a pipe_control command that has the state cache
		 * invalidate bit set. */
		gen7_render_ring_cs_stall_wa(ring);
378 379 380 381 382 383 384 385
	}

	ret = intel_ring_begin(ring, 4);
	if (ret)
		return ret;

	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4));
	intel_ring_emit(ring, flags);
386
	intel_ring_emit(ring, scratch_addr);
387 388 389
	intel_ring_emit(ring, 0);
	intel_ring_advance(ring);

390
	if (!invalidate_domains && flush_domains)
R
Rodrigo Vivi 已提交
391 392
		return gen7_ring_fbc_flush(ring, FBC_REND_NUKE);

393 394 395
	return 0;
}

396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416
static int
gen8_emit_pipe_control(struct intel_engine_cs *ring,
		       u32 flags, u32 scratch_addr)
{
	int ret;

	ret = intel_ring_begin(ring, 6);
	if (ret)
		return ret;

	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(6));
	intel_ring_emit(ring, flags);
	intel_ring_emit(ring, scratch_addr);
	intel_ring_emit(ring, 0);
	intel_ring_emit(ring, 0);
	intel_ring_emit(ring, 0);
	intel_ring_advance(ring);

	return 0;
}

B
Ben Widawsky 已提交
417
static int
418
gen8_render_ring_flush(struct intel_engine_cs *ring,
B
Ben Widawsky 已提交
419 420 421
		       u32 invalidate_domains, u32 flush_domains)
{
	u32 flags = 0;
422
	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
423
	int ret;
B
Ben Widawsky 已提交
424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439

	flags |= PIPE_CONTROL_CS_STALL;

	if (flush_domains) {
		flags |= PIPE_CONTROL_RENDER_TARGET_CACHE_FLUSH;
		flags |= PIPE_CONTROL_DEPTH_CACHE_FLUSH;
	}
	if (invalidate_domains) {
		flags |= PIPE_CONTROL_TLB_INVALIDATE;
		flags |= PIPE_CONTROL_INSTRUCTION_CACHE_INVALIDATE;
		flags |= PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE;
		flags |= PIPE_CONTROL_VF_CACHE_INVALIDATE;
		flags |= PIPE_CONTROL_CONST_CACHE_INVALIDATE;
		flags |= PIPE_CONTROL_STATE_CACHE_INVALIDATE;
		flags |= PIPE_CONTROL_QW_WRITE;
		flags |= PIPE_CONTROL_GLOBAL_GTT_IVB;
440 441 442 443 444 445 446 447

		/* WaCsStallBeforeStateCacheInvalidate:bdw,chv */
		ret = gen8_emit_pipe_control(ring,
					     PIPE_CONTROL_CS_STALL |
					     PIPE_CONTROL_STALL_AT_SCOREBOARD,
					     0);
		if (ret)
			return ret;
B
Ben Widawsky 已提交
448 449
	}

R
Rodrigo Vivi 已提交
450 451 452 453 454 455 456 457
	ret = gen8_emit_pipe_control(ring, flags, scratch_addr);
	if (ret)
		return ret;

	if (!invalidate_domains && flush_domains)
		return gen7_ring_fbc_flush(ring, FBC_REND_NUKE);

	return 0;
B
Ben Widawsky 已提交
458 459
}

460
static void ring_write_tail(struct intel_engine_cs *ring,
461
			    u32 value)
462
{
463
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
464
	I915_WRITE_TAIL(ring, value);
465 466
}

467
u64 intel_ring_get_active_head(struct intel_engine_cs *ring)
468
{
469
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
470
	u64 acthd;
471

472 473 474 475 476 477 478 479 480
	if (INTEL_INFO(ring->dev)->gen >= 8)
		acthd = I915_READ64_2x32(RING_ACTHD(ring->mmio_base),
					 RING_ACTHD_UDW(ring->mmio_base));
	else if (INTEL_INFO(ring->dev)->gen >= 4)
		acthd = I915_READ(RING_ACTHD(ring->mmio_base));
	else
		acthd = I915_READ(ACTHD);

	return acthd;
481 482
}

483
static void ring_setup_phys_status_page(struct intel_engine_cs *ring)
484 485 486 487 488 489 490 491 492 493
{
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
	u32 addr;

	addr = dev_priv->status_page_dmah->busaddr;
	if (INTEL_INFO(ring->dev)->gen >= 4)
		addr |= (dev_priv->status_page_dmah->busaddr >> 28) & 0xf0;
	I915_WRITE(HWS_PGA, addr);
}

494
static bool stop_ring(struct intel_engine_cs *ring)
495
{
496
	struct drm_i915_private *dev_priv = to_i915(ring->dev);
497

498 499
	if (!IS_GEN2(ring->dev)) {
		I915_WRITE_MODE(ring, _MASKED_BIT_ENABLE(STOP_RING));
500 501
		if (wait_for((I915_READ_MODE(ring) & MODE_IDLE) != 0, 1000)) {
			DRM_ERROR("%s : timed out trying to stop ring\n", ring->name);
502 503 504 505 506 507
			/* Sometimes we observe that the idle flag is not
			 * set even though the ring is empty. So double
			 * check before giving up.
			 */
			if (I915_READ_HEAD(ring) != I915_READ_TAIL(ring))
				return false;
508 509
		}
	}
510

511
	I915_WRITE_CTL(ring, 0);
512
	I915_WRITE_HEAD(ring, 0);
513
	ring->write_tail(ring, 0);
514

515 516 517 518
	if (!IS_GEN2(ring->dev)) {
		(void)I915_READ_CTL(ring);
		I915_WRITE_MODE(ring, _MASKED_BIT_DISABLE(STOP_RING));
	}
519

520 521
	return (I915_READ_HEAD(ring) & HEAD_ADDR) == 0;
}
522

523
static int init_ring_common(struct intel_engine_cs *ring)
524 525 526
{
	struct drm_device *dev = ring->dev;
	struct drm_i915_private *dev_priv = dev->dev_private;
527 528
	struct intel_ringbuffer *ringbuf = ring->buffer;
	struct drm_i915_gem_object *obj = ringbuf->obj;
529 530 531 532 533 534
	int ret = 0;

	gen6_gt_force_wake_get(dev_priv, FORCEWAKE_ALL);

	if (!stop_ring(ring)) {
		/* G45 ring initialization often fails to reset head to zero */
535 536 537 538 539 540 541
		DRM_DEBUG_KMS("%s head not reset to zero "
			      "ctl %08x head %08x tail %08x start %08x\n",
			      ring->name,
			      I915_READ_CTL(ring),
			      I915_READ_HEAD(ring),
			      I915_READ_TAIL(ring),
			      I915_READ_START(ring));
542

543
		if (!stop_ring(ring)) {
544 545 546 547 548 549 550
			DRM_ERROR("failed to set %s head to zero "
				  "ctl %08x head %08x tail %08x start %08x\n",
				  ring->name,
				  I915_READ_CTL(ring),
				  I915_READ_HEAD(ring),
				  I915_READ_TAIL(ring),
				  I915_READ_START(ring));
551 552
			ret = -EIO;
			goto out;
553
		}
554 555
	}

556 557 558 559 560
	if (I915_NEED_GFX_HWS(dev))
		intel_ring_setup_status_page(ring);
	else
		ring_setup_phys_status_page(ring);

561 562 563
	/* Enforce ordering by reading HEAD register back */
	I915_READ_HEAD(ring);

564 565 566 567
	/* Initialize the ring. This must happen _after_ we've cleared the ring
	 * registers with the above sequence (the readback of the HEAD registers
	 * also enforces ordering), otherwise the hw might lose the new ring
	 * register values. */
568
	I915_WRITE_START(ring, i915_gem_obj_ggtt_offset(obj));
569 570 571 572 573 574 575 576

	/* WaClearRingBufHeadRegAtInit:ctg,elk */
	if (I915_READ_HEAD(ring))
		DRM_DEBUG("%s initialization failed [head=%08x], fudging\n",
			  ring->name, I915_READ_HEAD(ring));
	I915_WRITE_HEAD(ring, 0);
	(void)I915_READ_HEAD(ring);

577
	I915_WRITE_CTL(ring,
578
			((ringbuf->size - PAGE_SIZE) & RING_NR_PAGES)
579
			| RING_VALID);
580 581

	/* If the head is still not zero, the ring is dead */
582
	if (wait_for((I915_READ_CTL(ring) & RING_VALID) != 0 &&
583
		     I915_READ_START(ring) == i915_gem_obj_ggtt_offset(obj) &&
584
		     (I915_READ_HEAD(ring) & HEAD_ADDR) == 0, 50)) {
585
		DRM_ERROR("%s initialization failed "
586 587 588 589 590
			  "ctl %08x (valid? %d) head %08x tail %08x start %08x [expected %08lx]\n",
			  ring->name,
			  I915_READ_CTL(ring), I915_READ_CTL(ring) & RING_VALID,
			  I915_READ_HEAD(ring), I915_READ_TAIL(ring),
			  I915_READ_START(ring), (unsigned long)i915_gem_obj_ggtt_offset(obj));
591 592
		ret = -EIO;
		goto out;
593 594
	}

595 596 597 598
	ringbuf->head = I915_READ_HEAD(ring);
	ringbuf->tail = I915_READ_TAIL(ring) & TAIL_ADDR;
	ringbuf->space = intel_ring_space(ringbuf);
	ringbuf->last_retired_head = -1;
599

600 601
	memset(&ring->hangcheck, 0, sizeof(ring->hangcheck));

602
out:
603
	gen6_gt_force_wake_put(dev_priv, FORCEWAKE_ALL);
604 605

	return ret;
606 607
}

608 609 610 611 612 613 614 615 616 617 618 619 620 621 622 623 624 625 626
void
intel_fini_pipe_control(struct intel_engine_cs *ring)
{
	struct drm_device *dev = ring->dev;

	if (ring->scratch.obj == NULL)
		return;

	if (INTEL_INFO(dev)->gen >= 5) {
		kunmap(sg_page(ring->scratch.obj->pages->sgl));
		i915_gem_object_ggtt_unpin(ring->scratch.obj);
	}

	drm_gem_object_unreference(&ring->scratch.obj->base);
	ring->scratch.obj = NULL;
}

int
intel_init_pipe_control(struct intel_engine_cs *ring)
627 628 629
{
	int ret;

630
	if (ring->scratch.obj)
631 632
		return 0;

633 634
	ring->scratch.obj = i915_gem_alloc_object(ring->dev, 4096);
	if (ring->scratch.obj == NULL) {
635 636 637 638
		DRM_ERROR("Failed to allocate seqno page\n");
		ret = -ENOMEM;
		goto err;
	}
639

640 641 642
	ret = i915_gem_object_set_cache_level(ring->scratch.obj, I915_CACHE_LLC);
	if (ret)
		goto err_unref;
643

644
	ret = i915_gem_obj_ggtt_pin(ring->scratch.obj, 4096, 0);
645 646 647
	if (ret)
		goto err_unref;

648 649 650
	ring->scratch.gtt_offset = i915_gem_obj_ggtt_offset(ring->scratch.obj);
	ring->scratch.cpu_page = kmap(sg_page(ring->scratch.obj->pages->sgl));
	if (ring->scratch.cpu_page == NULL) {
651
		ret = -ENOMEM;
652
		goto err_unpin;
653
	}
654

655
	DRM_DEBUG_DRIVER("%s pipe control offset: 0x%08x\n",
656
			 ring->name, ring->scratch.gtt_offset);
657 658 659
	return 0;

err_unpin:
B
Ben Widawsky 已提交
660
	i915_gem_object_ggtt_unpin(ring->scratch.obj);
661
err_unref:
662
	drm_gem_object_unreference(&ring->scratch.obj->base);
663 664 665 666
err:
	return ret;
}

667 668
static int intel_ring_workarounds_emit(struct intel_engine_cs *ring,
				       struct intel_context *ctx)
669
{
670
	int ret, i;
671 672
	struct drm_device *dev = ring->dev;
	struct drm_i915_private *dev_priv = dev->dev_private;
673
	struct i915_workarounds *w = &dev_priv->workarounds;
674

675 676
	if (WARN_ON(w->count == 0))
		return 0;
677

678 679 680 681
	ring->gpu_caches_dirty = true;
	ret = intel_ring_flush_all_caches(ring);
	if (ret)
		return ret;
682

683
	ret = intel_ring_begin(ring, (w->count * 2 + 2));
684 685 686
	if (ret)
		return ret;

687
	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(w->count));
688 689 690 691
	for (i = 0; i < w->count; i++) {
		intel_ring_emit(ring, w->reg[i].addr);
		intel_ring_emit(ring, w->reg[i].value);
	}
692
	intel_ring_emit(ring, MI_NOOP);
693 694 695 696 697 698 699

	intel_ring_advance(ring);

	ring->gpu_caches_dirty = true;
	ret = intel_ring_flush_all_caches(ring);
	if (ret)
		return ret;
700

701
	DRM_DEBUG_DRIVER("Number of Workarounds emitted: %d\n", w->count);
702

703
	return 0;
704 705
}

706
static int wa_add(struct drm_i915_private *dev_priv,
707
		  const u32 addr, const u32 mask, const u32 val)
708 709 710 711 712 713 714 715 716 717 718 719 720
{
	const u32 idx = dev_priv->workarounds.count;

	if (WARN_ON(idx >= I915_MAX_WA_REGS))
		return -ENOSPC;

	dev_priv->workarounds.reg[idx].addr = addr;
	dev_priv->workarounds.reg[idx].value = val;
	dev_priv->workarounds.reg[idx].mask = mask;

	dev_priv->workarounds.count++;

	return 0;
721 722
}

723 724
#define WA_REG(addr, mask, val) { \
		const int r = wa_add(dev_priv, (addr), (mask), (val)); \
725 726 727 728 729
		if (r) \
			return r; \
	}

#define WA_SET_BIT_MASKED(addr, mask) \
730
	WA_REG(addr, (mask), _MASKED_BIT_ENABLE(mask))
731 732

#define WA_CLR_BIT_MASKED(addr, mask) \
733
	WA_REG(addr, (mask), _MASKED_BIT_DISABLE(mask))
734

735
#define WA_SET_FIELD_MASKED(addr, mask, value) \
736
	WA_REG(addr, mask, _MASKED_FIELD(mask, value))
737

738 739
#define WA_SET_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) | (mask))
#define WA_CLR_BIT(addr, mask) WA_REG(addr, mask, I915_READ(addr) & ~(mask))
740

741
#define WA_WRITE(addr, val) WA_REG(addr, 0xffffffff, val)
742

743
static int bdw_init_workarounds(struct intel_engine_cs *ring)
744
{
745 746
	struct drm_device *dev = ring->dev;
	struct drm_i915_private *dev_priv = dev->dev_private;
747 748

	/* WaDisablePartialInstShootdown:bdw */
749
	/* WaDisableThreadStallDopClockGating:bdw (pre-production) */
750 751 752
	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
			  PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE |
			  STALL_DOP_GATING_DISABLE);
753

754
	/* WaDisableDopClockGating:bdw */
755 756
	WA_SET_BIT_MASKED(GEN7_ROW_CHICKEN2,
			  DOP_CLOCK_GATING_DISABLE);
757

758 759
	WA_SET_BIT_MASKED(HALF_SLICE_CHICKEN3,
			  GEN8_SAMPLER_POWER_BYPASS_DIS);
760 761 762 763 764

	/* Use Force Non-Coherent whenever executing a 3D context. This is a
	 * workaround for for a possible hang in the unlikely event a TLB
	 * invalidation occurs during a PSD flush.
	 */
765
	/* WaDisableFenceDestinationToSLM:bdw (GT3 pre-production) */
766 767 768
	WA_SET_BIT_MASKED(HDC_CHICKEN0,
			  HDC_FORCE_NON_COHERENT |
			  (IS_BDW_GT3(dev) ? HDC_FENCE_DEST_SLM_DISABLE : 0));
769 770

	/* Wa4x4STCOptimizationDisable:bdw */
771 772
	WA_SET_BIT_MASKED(CACHE_MODE_1,
			  GEN8_4x4_STC_OPTIMIZATION_DISABLE);
773 774 775 776 777 778 779 780 781

	/*
	 * BSpec recommends 8x4 when MSAA is used,
	 * however in practice 16x4 seems fastest.
	 *
	 * Note that PS/WM thread counts depend on the WIZ hashing
	 * disable bit, which we don't touch here, but it's good
	 * to keep in mind (see 3DSTATE_PS and 3DSTATE_WM).
	 */
782 783 784
	WA_SET_FIELD_MASKED(GEN7_GT_MODE,
			    GEN6_WIZ_HASHING_MASK,
			    GEN6_WIZ_HASHING_16x4);
785

786 787 788
	return 0;
}

789 790 791 792 793 794 795
static int chv_init_workarounds(struct intel_engine_cs *ring)
{
	struct drm_device *dev = ring->dev;
	struct drm_i915_private *dev_priv = dev->dev_private;

	/* WaDisablePartialInstShootdown:chv */
	/* WaDisableThreadStallDopClockGating:chv */
796
	WA_SET_BIT_MASKED(GEN8_ROW_CHICKEN,
797 798
			  PARTIAL_INSTRUCTION_SHOOTDOWN_DISABLE |
			  STALL_DOP_GATING_DISABLE);
799

800 801 802 803 804 805 806 807 808 809
	/* Use Force Non-Coherent whenever executing a 3D context. This is a
	 * workaround for a possible hang in the unlikely event a TLB
	 * invalidation occurs during a PSD flush.
	 */
	/* WaForceEnableNonCoherent:chv */
	/* WaHdcDisableFetchWhenMasked:chv */
	WA_SET_BIT_MASKED(HDC_CHICKEN0,
			  HDC_FORCE_NON_COHERENT |
			  HDC_DONOT_FETCH_MEM_WHEN_MASKED);

810 811 812
	return 0;
}

813
int init_workarounds_ring(struct intel_engine_cs *ring)
814 815 816 817 818 819 820 821 822 823 824 825 826
{
	struct drm_device *dev = ring->dev;
	struct drm_i915_private *dev_priv = dev->dev_private;

	WARN_ON(ring->id != RCS);

	dev_priv->workarounds.count = 0;

	if (IS_BROADWELL(dev))
		return bdw_init_workarounds(ring);

	if (IS_CHERRYVIEW(dev))
		return chv_init_workarounds(ring);
827 828 829 830

	return 0;
}

831
static int init_render_ring(struct intel_engine_cs *ring)
832
{
833
	struct drm_device *dev = ring->dev;
834
	struct drm_i915_private *dev_priv = dev->dev_private;
835
	int ret = init_ring_common(ring);
836 837
	if (ret)
		return ret;
838

839 840
	/* WaTimedSingleVertexDispatch:cl,bw,ctg,elk,ilk,snb */
	if (INTEL_INFO(dev)->gen >= 4 && INTEL_INFO(dev)->gen < 7)
841
		I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(VS_TIMER_DISPATCH));
842 843 844 845

	/* We need to disable the AsyncFlip performance optimisations in order
	 * to use MI_WAIT_FOR_EVENT within the CS. It should already be
	 * programmed to '1' on all products.
846
	 *
847
	 * WaDisableAsyncFlipPerfMode:snb,ivb,hsw,vlv,bdw,chv
848
	 */
849
	if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 9)
850 851
		I915_WRITE(MI_MODE, _MASKED_BIT_ENABLE(ASYNC_FLIP_PERF_DISABLE));

852
	/* Required for the hardware to program scanline values for waiting */
853
	/* WaEnableFlushTlbInvalidationMode:snb */
854 855
	if (INTEL_INFO(dev)->gen == 6)
		I915_WRITE(GFX_MODE,
856
			   _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT));
857

858
	/* WaBCSVCSTlbInvalidationMode:ivb,vlv,hsw */
859 860
	if (IS_GEN7(dev))
		I915_WRITE(GFX_MODE_GEN7,
861
			   _MASKED_BIT_ENABLE(GFX_TLB_INVALIDATE_EXPLICIT) |
862
			   _MASKED_BIT_ENABLE(GFX_REPLAY_MODE));
863

864
	if (INTEL_INFO(dev)->gen >= 5) {
865
		ret = intel_init_pipe_control(ring);
866 867 868 869
		if (ret)
			return ret;
	}

870
	if (IS_GEN6(dev)) {
871 872 873 874 875 876
		/* From the Sandybridge PRM, volume 1 part 3, page 24:
		 * "If this bit is set, STCunit will have LRA as replacement
		 *  policy. [...] This bit must be reset.  LRA replacement
		 *  policy is not supported."
		 */
		I915_WRITE(CACHE_MODE_0,
877
			   _MASKED_BIT_DISABLE(CM0_STC_EVICT_DISABLE_LRA_SNB));
878 879
	}

880 881
	if (INTEL_INFO(dev)->gen >= 6)
		I915_WRITE(INSTPM, _MASKED_BIT_ENABLE(INSTPM_FORCE_ORDERING));
882

883
	if (HAS_L3_DPF(dev))
884
		I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev));
885

886
	return init_workarounds_ring(ring);
887 888
}

889
static void render_ring_cleanup(struct intel_engine_cs *ring)
890
{
891
	struct drm_device *dev = ring->dev;
892 893 894 895 896 897 898
	struct drm_i915_private *dev_priv = dev->dev_private;

	if (dev_priv->semaphore_obj) {
		i915_gem_object_ggtt_unpin(dev_priv->semaphore_obj);
		drm_gem_object_unreference(&dev_priv->semaphore_obj->base);
		dev_priv->semaphore_obj = NULL;
	}
899

900
	intel_fini_pipe_control(ring);
901 902
}

903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936 937 938 939 940 941 942 943 944 945 946 947 948 949 950 951 952 953 954 955 956 957 958 959 960 961 962 963 964 965 966 967 968 969 970 971 972 973 974 975 976
static int gen8_rcs_signal(struct intel_engine_cs *signaller,
			   unsigned int num_dwords)
{
#define MBOX_UPDATE_DWORDS 8
	struct drm_device *dev = signaller->dev;
	struct drm_i915_private *dev_priv = dev->dev_private;
	struct intel_engine_cs *waiter;
	int i, ret, num_rings;

	num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
	num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
#undef MBOX_UPDATE_DWORDS

	ret = intel_ring_begin(signaller, num_dwords);
	if (ret)
		return ret;

	for_each_ring(waiter, dev_priv, i) {
		u64 gtt_offset = signaller->semaphore.signal_ggtt[i];
		if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
			continue;

		intel_ring_emit(signaller, GFX_OP_PIPE_CONTROL(6));
		intel_ring_emit(signaller, PIPE_CONTROL_GLOBAL_GTT_IVB |
					   PIPE_CONTROL_QW_WRITE |
					   PIPE_CONTROL_FLUSH_ENABLE);
		intel_ring_emit(signaller, lower_32_bits(gtt_offset));
		intel_ring_emit(signaller, upper_32_bits(gtt_offset));
		intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
		intel_ring_emit(signaller, 0);
		intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
					   MI_SEMAPHORE_TARGET(waiter->id));
		intel_ring_emit(signaller, 0);
	}

	return 0;
}

static int gen8_xcs_signal(struct intel_engine_cs *signaller,
			   unsigned int num_dwords)
{
#define MBOX_UPDATE_DWORDS 6
	struct drm_device *dev = signaller->dev;
	struct drm_i915_private *dev_priv = dev->dev_private;
	struct intel_engine_cs *waiter;
	int i, ret, num_rings;

	num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
	num_dwords += (num_rings-1) * MBOX_UPDATE_DWORDS;
#undef MBOX_UPDATE_DWORDS

	ret = intel_ring_begin(signaller, num_dwords);
	if (ret)
		return ret;

	for_each_ring(waiter, dev_priv, i) {
		u64 gtt_offset = signaller->semaphore.signal_ggtt[i];
		if (gtt_offset == MI_SEMAPHORE_SYNC_INVALID)
			continue;

		intel_ring_emit(signaller, (MI_FLUSH_DW + 1) |
					   MI_FLUSH_DW_OP_STOREDW);
		intel_ring_emit(signaller, lower_32_bits(gtt_offset) |
					   MI_FLUSH_DW_USE_GTT);
		intel_ring_emit(signaller, upper_32_bits(gtt_offset));
		intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
		intel_ring_emit(signaller, MI_SEMAPHORE_SIGNAL |
					   MI_SEMAPHORE_TARGET(waiter->id));
		intel_ring_emit(signaller, 0);
	}

	return 0;
}

977
static int gen6_signal(struct intel_engine_cs *signaller,
978
		       unsigned int num_dwords)
979
{
980 981
	struct drm_device *dev = signaller->dev;
	struct drm_i915_private *dev_priv = dev->dev_private;
982
	struct intel_engine_cs *useless;
983
	int i, ret, num_rings;
984

985 986 987 988
#define MBOX_UPDATE_DWORDS 3
	num_rings = hweight32(INTEL_INFO(dev)->ring_mask);
	num_dwords += round_up((num_rings-1) * MBOX_UPDATE_DWORDS, 2);
#undef MBOX_UPDATE_DWORDS
989 990 991 992 993

	ret = intel_ring_begin(signaller, num_dwords);
	if (ret)
		return ret;

994 995 996 997 998 999 1000 1001
	for_each_ring(useless, dev_priv, i) {
		u32 mbox_reg = signaller->semaphore.mbox.signal[i];
		if (mbox_reg != GEN6_NOSYNC) {
			intel_ring_emit(signaller, MI_LOAD_REGISTER_IMM(1));
			intel_ring_emit(signaller, mbox_reg);
			intel_ring_emit(signaller, signaller->outstanding_lazy_seqno);
		}
	}
1002

1003 1004 1005 1006
	/* If num_dwords was rounded, make sure the tail pointer is correct */
	if (num_rings % 2 == 0)
		intel_ring_emit(signaller, MI_NOOP);

1007
	return 0;
1008 1009
}

1010 1011 1012 1013 1014 1015 1016 1017 1018
/**
 * gen6_add_request - Update the semaphore mailbox registers
 * 
 * @ring - ring that is adding a request
 * @seqno - return seqno stuck into the ring
 *
 * Update the mailbox registers in the *other* rings with the current seqno.
 * This acts like a signal in the canonical semaphore.
 */
1019
static int
1020
gen6_add_request(struct intel_engine_cs *ring)
1021
{
1022
	int ret;
1023

B
Ben Widawsky 已提交
1024 1025 1026 1027 1028
	if (ring->semaphore.signal)
		ret = ring->semaphore.signal(ring, 4);
	else
		ret = intel_ring_begin(ring, 4);

1029 1030 1031 1032 1033
	if (ret)
		return ret;

	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
1034
	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
1035
	intel_ring_emit(ring, MI_USER_INTERRUPT);
1036
	__intel_ring_advance(ring);
1037 1038 1039 1040

	return 0;
}

1041 1042 1043 1044 1045 1046 1047
static inline bool i915_gem_has_seqno_wrapped(struct drm_device *dev,
					      u32 seqno)
{
	struct drm_i915_private *dev_priv = dev->dev_private;
	return dev_priv->last_seqno < seqno;
}

1048 1049 1050 1051 1052 1053 1054
/**
 * intel_ring_sync - sync the waiter to the signaller on seqno
 *
 * @waiter - ring that is waiting
 * @signaller - ring which has, or will signal
 * @seqno - seqno which the waiter will block on
 */
1055 1056 1057 1058 1059 1060 1061 1062 1063 1064 1065 1066 1067 1068 1069

static int
gen8_ring_sync(struct intel_engine_cs *waiter,
	       struct intel_engine_cs *signaller,
	       u32 seqno)
{
	struct drm_i915_private *dev_priv = waiter->dev->dev_private;
	int ret;

	ret = intel_ring_begin(waiter, 4);
	if (ret)
		return ret;

	intel_ring_emit(waiter, MI_SEMAPHORE_WAIT |
				MI_SEMAPHORE_GLOBAL_GTT |
B
Ben Widawsky 已提交
1070
				MI_SEMAPHORE_POLL |
1071 1072 1073 1074 1075 1076 1077 1078 1079 1080
				MI_SEMAPHORE_SAD_GTE_SDD);
	intel_ring_emit(waiter, seqno);
	intel_ring_emit(waiter,
			lower_32_bits(GEN8_WAIT_OFFSET(waiter, signaller->id)));
	intel_ring_emit(waiter,
			upper_32_bits(GEN8_WAIT_OFFSET(waiter, signaller->id)));
	intel_ring_advance(waiter);
	return 0;
}

1081
static int
1082 1083
gen6_ring_sync(struct intel_engine_cs *waiter,
	       struct intel_engine_cs *signaller,
1084
	       u32 seqno)
1085
{
1086 1087 1088
	u32 dw1 = MI_SEMAPHORE_MBOX |
		  MI_SEMAPHORE_COMPARE |
		  MI_SEMAPHORE_REGISTER;
1089 1090
	u32 wait_mbox = signaller->semaphore.mbox.wait[waiter->id];
	int ret;
1091

1092 1093 1094 1095 1096 1097
	/* Throughout all of the GEM code, seqno passed implies our current
	 * seqno is >= the last seqno executed. However for hardware the
	 * comparison is strictly greater than.
	 */
	seqno -= 1;

1098
	WARN_ON(wait_mbox == MI_SEMAPHORE_SYNC_INVALID);
1099

1100
	ret = intel_ring_begin(waiter, 4);
1101 1102 1103
	if (ret)
		return ret;

1104 1105
	/* If seqno wrap happened, omit the wait with no-ops */
	if (likely(!i915_gem_has_seqno_wrapped(waiter->dev, seqno))) {
1106
		intel_ring_emit(waiter, dw1 | wait_mbox);
1107 1108 1109 1110 1111 1112 1113 1114 1115
		intel_ring_emit(waiter, seqno);
		intel_ring_emit(waiter, 0);
		intel_ring_emit(waiter, MI_NOOP);
	} else {
		intel_ring_emit(waiter, MI_NOOP);
		intel_ring_emit(waiter, MI_NOOP);
		intel_ring_emit(waiter, MI_NOOP);
		intel_ring_emit(waiter, MI_NOOP);
	}
1116
	intel_ring_advance(waiter);
1117 1118 1119 1120

	return 0;
}

1121 1122
#define PIPE_CONTROL_FLUSH(ring__, addr__)					\
do {									\
1123 1124
	intel_ring_emit(ring__, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |		\
		 PIPE_CONTROL_DEPTH_STALL);				\
1125 1126 1127 1128 1129 1130
	intel_ring_emit(ring__, (addr__) | PIPE_CONTROL_GLOBAL_GTT);			\
	intel_ring_emit(ring__, 0);							\
	intel_ring_emit(ring__, 0);							\
} while (0)

static int
1131
pc_render_add_request(struct intel_engine_cs *ring)
1132
{
1133
	u32 scratch_addr = ring->scratch.gtt_offset + 2 * CACHELINE_BYTES;
1134 1135 1136 1137 1138 1139 1140 1141 1142 1143 1144 1145 1146 1147
	int ret;

	/* For Ironlake, MI_USER_INTERRUPT was deprecated and apparently
	 * incoherent with writes to memory, i.e. completely fubar,
	 * so we need to use PIPE_NOTIFY instead.
	 *
	 * However, we also need to workaround the qword write
	 * incoherence by flushing the 6 PIPE_NOTIFY buffers out to
	 * memory before requesting an interrupt.
	 */
	ret = intel_ring_begin(ring, 32);
	if (ret)
		return ret;

1148
	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
1149 1150
			PIPE_CONTROL_WRITE_FLUSH |
			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE);
1151
	intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
1152
	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
1153 1154
	intel_ring_emit(ring, 0);
	PIPE_CONTROL_FLUSH(ring, scratch_addr);
1155
	scratch_addr += 2 * CACHELINE_BYTES; /* write to separate cachelines */
1156
	PIPE_CONTROL_FLUSH(ring, scratch_addr);
1157
	scratch_addr += 2 * CACHELINE_BYTES;
1158
	PIPE_CONTROL_FLUSH(ring, scratch_addr);
1159
	scratch_addr += 2 * CACHELINE_BYTES;
1160
	PIPE_CONTROL_FLUSH(ring, scratch_addr);
1161
	scratch_addr += 2 * CACHELINE_BYTES;
1162
	PIPE_CONTROL_FLUSH(ring, scratch_addr);
1163
	scratch_addr += 2 * CACHELINE_BYTES;
1164
	PIPE_CONTROL_FLUSH(ring, scratch_addr);
1165

1166
	intel_ring_emit(ring, GFX_OP_PIPE_CONTROL(4) | PIPE_CONTROL_QW_WRITE |
1167 1168
			PIPE_CONTROL_WRITE_FLUSH |
			PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |
1169
			PIPE_CONTROL_NOTIFY);
1170
	intel_ring_emit(ring, ring->scratch.gtt_offset | PIPE_CONTROL_GLOBAL_GTT);
1171
	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
1172
	intel_ring_emit(ring, 0);
1173
	__intel_ring_advance(ring);
1174 1175 1176 1177

	return 0;
}

1178
static u32
1179
gen6_ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
1180 1181 1182 1183
{
	/* Workaround to force correct ordering between irq and seqno writes on
	 * ivb (and maybe also on snb) by reading from a CS register (like
	 * ACTHD) before reading the status page. */
1184 1185 1186 1187 1188
	if (!lazy_coherency) {
		struct drm_i915_private *dev_priv = ring->dev->dev_private;
		POSTING_READ(RING_ACTHD(ring->mmio_base));
	}

1189 1190 1191
	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
}

1192
static u32
1193
ring_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
1194
{
1195 1196 1197
	return intel_read_status_page(ring, I915_GEM_HWS_INDEX);
}

M
Mika Kuoppala 已提交
1198
static void
1199
ring_set_seqno(struct intel_engine_cs *ring, u32 seqno)
M
Mika Kuoppala 已提交
1200 1201 1202 1203
{
	intel_write_status_page(ring, I915_GEM_HWS_INDEX, seqno);
}

1204
static u32
1205
pc_render_get_seqno(struct intel_engine_cs *ring, bool lazy_coherency)
1206
{
1207
	return ring->scratch.cpu_page[0];
1208 1209
}

M
Mika Kuoppala 已提交
1210
static void
1211
pc_render_set_seqno(struct intel_engine_cs *ring, u32 seqno)
M
Mika Kuoppala 已提交
1212
{
1213
	ring->scratch.cpu_page[0] = seqno;
M
Mika Kuoppala 已提交
1214 1215
}

1216
static bool
1217
gen5_ring_get_irq(struct intel_engine_cs *ring)
1218 1219
{
	struct drm_device *dev = ring->dev;
1220
	struct drm_i915_private *dev_priv = dev->dev_private;
1221
	unsigned long flags;
1222

1223
	if (WARN_ON(!intel_irqs_enabled(dev_priv)))
1224 1225
		return false;

1226
	spin_lock_irqsave(&dev_priv->irq_lock, flags);
P
Paulo Zanoni 已提交
1227
	if (ring->irq_refcount++ == 0)
1228
		gen5_enable_gt_irq(dev_priv, ring->irq_enable_mask);
1229
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1230 1231 1232 1233 1234

	return true;
}

static void
1235
gen5_ring_put_irq(struct intel_engine_cs *ring)
1236 1237
{
	struct drm_device *dev = ring->dev;
1238
	struct drm_i915_private *dev_priv = dev->dev_private;
1239
	unsigned long flags;
1240

1241
	spin_lock_irqsave(&dev_priv->irq_lock, flags);
P
Paulo Zanoni 已提交
1242
	if (--ring->irq_refcount == 0)
1243
		gen5_disable_gt_irq(dev_priv, ring->irq_enable_mask);
1244
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1245 1246
}

1247
static bool
1248
i9xx_ring_get_irq(struct intel_engine_cs *ring)
1249
{
1250
	struct drm_device *dev = ring->dev;
1251
	struct drm_i915_private *dev_priv = dev->dev_private;
1252
	unsigned long flags;
1253

1254
	if (!intel_irqs_enabled(dev_priv))
1255 1256
		return false;

1257
	spin_lock_irqsave(&dev_priv->irq_lock, flags);
1258
	if (ring->irq_refcount++ == 0) {
1259 1260 1261 1262
		dev_priv->irq_mask &= ~ring->irq_enable_mask;
		I915_WRITE(IMR, dev_priv->irq_mask);
		POSTING_READ(IMR);
	}
1263
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1264 1265

	return true;
1266 1267
}

1268
static void
1269
i9xx_ring_put_irq(struct intel_engine_cs *ring)
1270
{
1271
	struct drm_device *dev = ring->dev;
1272
	struct drm_i915_private *dev_priv = dev->dev_private;
1273
	unsigned long flags;
1274

1275
	spin_lock_irqsave(&dev_priv->irq_lock, flags);
1276
	if (--ring->irq_refcount == 0) {
1277 1278 1279 1280
		dev_priv->irq_mask |= ring->irq_enable_mask;
		I915_WRITE(IMR, dev_priv->irq_mask);
		POSTING_READ(IMR);
	}
1281
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1282 1283
}

C
Chris Wilson 已提交
1284
static bool
1285
i8xx_ring_get_irq(struct intel_engine_cs *ring)
C
Chris Wilson 已提交
1286 1287
{
	struct drm_device *dev = ring->dev;
1288
	struct drm_i915_private *dev_priv = dev->dev_private;
1289
	unsigned long flags;
C
Chris Wilson 已提交
1290

1291
	if (!intel_irqs_enabled(dev_priv))
C
Chris Wilson 已提交
1292 1293
		return false;

1294
	spin_lock_irqsave(&dev_priv->irq_lock, flags);
1295
	if (ring->irq_refcount++ == 0) {
C
Chris Wilson 已提交
1296 1297 1298 1299
		dev_priv->irq_mask &= ~ring->irq_enable_mask;
		I915_WRITE16(IMR, dev_priv->irq_mask);
		POSTING_READ16(IMR);
	}
1300
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
C
Chris Wilson 已提交
1301 1302 1303 1304 1305

	return true;
}

static void
1306
i8xx_ring_put_irq(struct intel_engine_cs *ring)
C
Chris Wilson 已提交
1307 1308
{
	struct drm_device *dev = ring->dev;
1309
	struct drm_i915_private *dev_priv = dev->dev_private;
1310
	unsigned long flags;
C
Chris Wilson 已提交
1311

1312
	spin_lock_irqsave(&dev_priv->irq_lock, flags);
1313
	if (--ring->irq_refcount == 0) {
C
Chris Wilson 已提交
1314 1315 1316 1317
		dev_priv->irq_mask |= ring->irq_enable_mask;
		I915_WRITE16(IMR, dev_priv->irq_mask);
		POSTING_READ16(IMR);
	}
1318
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
C
Chris Wilson 已提交
1319 1320
}

1321
void intel_ring_setup_status_page(struct intel_engine_cs *ring)
1322
{
1323
	struct drm_device *dev = ring->dev;
1324
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
1325 1326 1327 1328 1329 1330 1331
	u32 mmio = 0;

	/* The ring status page addresses are no longer next to the rest of
	 * the ring registers as of gen7.
	 */
	if (IS_GEN7(dev)) {
		switch (ring->id) {
1332
		case RCS:
1333 1334
			mmio = RENDER_HWS_PGA_GEN7;
			break;
1335
		case BCS:
1336 1337
			mmio = BLT_HWS_PGA_GEN7;
			break;
1338 1339 1340 1341 1342
		/*
		 * VCS2 actually doesn't exist on Gen7. Only shut up
		 * gcc switch check warning
		 */
		case VCS2:
1343
		case VCS:
1344 1345
			mmio = BSD_HWS_PGA_GEN7;
			break;
1346
		case VECS:
B
Ben Widawsky 已提交
1347 1348
			mmio = VEBOX_HWS_PGA_GEN7;
			break;
1349 1350 1351 1352
		}
	} else if (IS_GEN6(ring->dev)) {
		mmio = RING_HWS_PGA_GEN6(ring->mmio_base);
	} else {
1353
		/* XXX: gen8 returns to sanity */
1354 1355 1356
		mmio = RING_HWS_PGA(ring->mmio_base);
	}

1357 1358
	I915_WRITE(mmio, (u32)ring->status_page.gfx_addr);
	POSTING_READ(mmio);
1359

1360 1361 1362 1363 1364 1365 1366 1367
	/*
	 * Flush the TLB for this page
	 *
	 * FIXME: These two bits have disappeared on gen8, so a question
	 * arises: do we still need this and if so how should we go about
	 * invalidating the TLB?
	 */
	if (INTEL_INFO(dev)->gen >= 6 && INTEL_INFO(dev)->gen < 8) {
1368
		u32 reg = RING_INSTPM(ring->mmio_base);
1369 1370 1371 1372

		/* ring should be idle before issuing a sync flush*/
		WARN_ON((I915_READ_MODE(ring) & MODE_IDLE) == 0);

1373 1374 1375 1376 1377 1378 1379 1380
		I915_WRITE(reg,
			   _MASKED_BIT_ENABLE(INSTPM_TLB_INVALIDATE |
					      INSTPM_SYNC_FLUSH));
		if (wait_for((I915_READ(reg) & INSTPM_SYNC_FLUSH) == 0,
			     1000))
			DRM_ERROR("%s: wait for SyncFlush to complete for TLB invalidation timed out\n",
				  ring->name);
	}
1381 1382
}

1383
static int
1384
bsd_ring_flush(struct intel_engine_cs *ring,
1385 1386
	       u32     invalidate_domains,
	       u32     flush_domains)
1387
{
1388 1389 1390 1391 1392 1393 1394 1395 1396 1397
	int ret;

	ret = intel_ring_begin(ring, 2);
	if (ret)
		return ret;

	intel_ring_emit(ring, MI_FLUSH);
	intel_ring_emit(ring, MI_NOOP);
	intel_ring_advance(ring);
	return 0;
1398 1399
}

1400
static int
1401
i9xx_add_request(struct intel_engine_cs *ring)
1402
{
1403 1404 1405 1406 1407
	int ret;

	ret = intel_ring_begin(ring, 4);
	if (ret)
		return ret;
1408

1409 1410
	intel_ring_emit(ring, MI_STORE_DWORD_INDEX);
	intel_ring_emit(ring, I915_GEM_HWS_INDEX << MI_STORE_DWORD_INDEX_SHIFT);
1411
	intel_ring_emit(ring, ring->outstanding_lazy_seqno);
1412
	intel_ring_emit(ring, MI_USER_INTERRUPT);
1413
	__intel_ring_advance(ring);
1414

1415
	return 0;
1416 1417
}

1418
static bool
1419
gen6_ring_get_irq(struct intel_engine_cs *ring)
1420 1421
{
	struct drm_device *dev = ring->dev;
1422
	struct drm_i915_private *dev_priv = dev->dev_private;
1423
	unsigned long flags;
1424

1425 1426
	if (WARN_ON(!intel_irqs_enabled(dev_priv)))
		return false;
1427

1428
	spin_lock_irqsave(&dev_priv->irq_lock, flags);
1429
	if (ring->irq_refcount++ == 0) {
1430
		if (HAS_L3_DPF(dev) && ring->id == RCS)
1431 1432
			I915_WRITE_IMR(ring,
				       ~(ring->irq_enable_mask |
1433
					 GT_PARITY_ERROR(dev)));
1434 1435
		else
			I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
1436
		gen5_enable_gt_irq(dev_priv, ring->irq_enable_mask);
1437
	}
1438
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1439 1440 1441 1442 1443

	return true;
}

static void
1444
gen6_ring_put_irq(struct intel_engine_cs *ring)
1445 1446
{
	struct drm_device *dev = ring->dev;
1447
	struct drm_i915_private *dev_priv = dev->dev_private;
1448
	unsigned long flags;
1449

1450
	spin_lock_irqsave(&dev_priv->irq_lock, flags);
1451
	if (--ring->irq_refcount == 0) {
1452
		if (HAS_L3_DPF(dev) && ring->id == RCS)
1453
			I915_WRITE_IMR(ring, ~GT_PARITY_ERROR(dev));
1454 1455
		else
			I915_WRITE_IMR(ring, ~0);
1456
		gen5_disable_gt_irq(dev_priv, ring->irq_enable_mask);
1457
	}
1458
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
1459 1460
}

B
Ben Widawsky 已提交
1461
static bool
1462
hsw_vebox_get_irq(struct intel_engine_cs *ring)
B
Ben Widawsky 已提交
1463 1464 1465 1466 1467
{
	struct drm_device *dev = ring->dev;
	struct drm_i915_private *dev_priv = dev->dev_private;
	unsigned long flags;

1468
	if (WARN_ON(!intel_irqs_enabled(dev_priv)))
B
Ben Widawsky 已提交
1469 1470
		return false;

1471
	spin_lock_irqsave(&dev_priv->irq_lock, flags);
1472
	if (ring->irq_refcount++ == 0) {
B
Ben Widawsky 已提交
1473
		I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
1474
		gen6_enable_pm_irq(dev_priv, ring->irq_enable_mask);
B
Ben Widawsky 已提交
1475
	}
1476
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
B
Ben Widawsky 已提交
1477 1478 1479 1480 1481

	return true;
}

static void
1482
hsw_vebox_put_irq(struct intel_engine_cs *ring)
B
Ben Widawsky 已提交
1483 1484 1485 1486 1487
{
	struct drm_device *dev = ring->dev;
	struct drm_i915_private *dev_priv = dev->dev_private;
	unsigned long flags;

1488
	spin_lock_irqsave(&dev_priv->irq_lock, flags);
1489
	if (--ring->irq_refcount == 0) {
B
Ben Widawsky 已提交
1490
		I915_WRITE_IMR(ring, ~0);
1491
		gen6_disable_pm_irq(dev_priv, ring->irq_enable_mask);
B
Ben Widawsky 已提交
1492
	}
1493
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
B
Ben Widawsky 已提交
1494 1495
}

1496
static bool
1497
gen8_ring_get_irq(struct intel_engine_cs *ring)
1498 1499 1500 1501 1502
{
	struct drm_device *dev = ring->dev;
	struct drm_i915_private *dev_priv = dev->dev_private;
	unsigned long flags;

1503
	if (WARN_ON(!intel_irqs_enabled(dev_priv)))
1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522
		return false;

	spin_lock_irqsave(&dev_priv->irq_lock, flags);
	if (ring->irq_refcount++ == 0) {
		if (HAS_L3_DPF(dev) && ring->id == RCS) {
			I915_WRITE_IMR(ring,
				       ~(ring->irq_enable_mask |
					 GT_RENDER_L3_PARITY_ERROR_INTERRUPT));
		} else {
			I915_WRITE_IMR(ring, ~ring->irq_enable_mask);
		}
		POSTING_READ(RING_IMR(ring->mmio_base));
	}
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);

	return true;
}

static void
1523
gen8_ring_put_irq(struct intel_engine_cs *ring)
1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540 1541
{
	struct drm_device *dev = ring->dev;
	struct drm_i915_private *dev_priv = dev->dev_private;
	unsigned long flags;

	spin_lock_irqsave(&dev_priv->irq_lock, flags);
	if (--ring->irq_refcount == 0) {
		if (HAS_L3_DPF(dev) && ring->id == RCS) {
			I915_WRITE_IMR(ring,
				       ~GT_RENDER_L3_PARITY_ERROR_INTERRUPT);
		} else {
			I915_WRITE_IMR(ring, ~0);
		}
		POSTING_READ(RING_IMR(ring->mmio_base));
	}
	spin_unlock_irqrestore(&dev_priv->irq_lock, flags);
}

1542
static int
1543
i965_dispatch_execbuffer(struct intel_engine_cs *ring,
B
Ben Widawsky 已提交
1544
			 u64 offset, u32 length,
1545
			 unsigned flags)
1546
{
1547
	int ret;
1548

1549 1550 1551 1552
	ret = intel_ring_begin(ring, 2);
	if (ret)
		return ret;

1553
	intel_ring_emit(ring,
1554 1555
			MI_BATCH_BUFFER_START |
			MI_BATCH_GTT |
1556
			(flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965));
1557
	intel_ring_emit(ring, offset);
1558 1559
	intel_ring_advance(ring);

1560 1561 1562
	return 0;
}

1563 1564
/* Just userspace ABI convention to limit the wa batch bo to a resonable size */
#define I830_BATCH_LIMIT (256*1024)
1565 1566
#define I830_TLB_ENTRIES (2)
#define I830_WA_SIZE max(I830_TLB_ENTRIES*4096, I830_BATCH_LIMIT)
1567
static int
1568
i830_dispatch_execbuffer(struct intel_engine_cs *ring,
B
Ben Widawsky 已提交
1569
				u64 offset, u32 len,
1570
				unsigned flags)
1571
{
1572
	u32 cs_offset = ring->scratch.gtt_offset;
1573
	int ret;
1574

1575 1576 1577
	ret = intel_ring_begin(ring, 6);
	if (ret)
		return ret;
1578

1579 1580 1581 1582 1583 1584 1585 1586
	/* Evict the invalid PTE TLBs */
	intel_ring_emit(ring, COLOR_BLT_CMD | BLT_WRITE_RGBA);
	intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_COLOR_COPY | 4096);
	intel_ring_emit(ring, I830_TLB_ENTRIES << 16 | 4); /* load each page */
	intel_ring_emit(ring, cs_offset);
	intel_ring_emit(ring, 0xdeadbeef);
	intel_ring_emit(ring, MI_NOOP);
	intel_ring_advance(ring);
1587

1588
	if ((flags & I915_DISPATCH_PINNED) == 0) {
1589 1590 1591
		if (len > I830_BATCH_LIMIT)
			return -ENOSPC;

1592
		ret = intel_ring_begin(ring, 6 + 2);
1593 1594
		if (ret)
			return ret;
1595 1596 1597 1598 1599 1600 1601

		/* Blit the batch (which has now all relocs applied) to the
		 * stable batch scratch bo area (so that the CS never
		 * stumbles over its tlb invalidation bug) ...
		 */
		intel_ring_emit(ring, SRC_COPY_BLT_CMD | BLT_WRITE_RGBA);
		intel_ring_emit(ring, BLT_DEPTH_32 | BLT_ROP_SRC_COPY | 4096);
1602
		intel_ring_emit(ring, DIV_ROUND_UP(len, 4096) << 16 | 4096);
1603 1604 1605
		intel_ring_emit(ring, cs_offset);
		intel_ring_emit(ring, 4096);
		intel_ring_emit(ring, offset);
1606

1607
		intel_ring_emit(ring, MI_FLUSH);
1608 1609
		intel_ring_emit(ring, MI_NOOP);
		intel_ring_advance(ring);
1610 1611

		/* ... and execute it. */
1612
		offset = cs_offset;
1613
	}
1614

1615 1616 1617 1618 1619 1620 1621 1622 1623 1624
	ret = intel_ring_begin(ring, 4);
	if (ret)
		return ret;

	intel_ring_emit(ring, MI_BATCH_BUFFER);
	intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
	intel_ring_emit(ring, offset + len - 8);
	intel_ring_emit(ring, MI_NOOP);
	intel_ring_advance(ring);

1625 1626 1627 1628
	return 0;
}

static int
1629
i915_dispatch_execbuffer(struct intel_engine_cs *ring,
B
Ben Widawsky 已提交
1630
			 u64 offset, u32 len,
1631
			 unsigned flags)
1632 1633 1634 1635 1636 1637 1638
{
	int ret;

	ret = intel_ring_begin(ring, 2);
	if (ret)
		return ret;

1639
	intel_ring_emit(ring, MI_BATCH_BUFFER_START | MI_BATCH_GTT);
1640
	intel_ring_emit(ring, offset | (flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE));
1641
	intel_ring_advance(ring);
1642 1643 1644 1645

	return 0;
}

1646
static void cleanup_status_page(struct intel_engine_cs *ring)
1647
{
1648
	struct drm_i915_gem_object *obj;
1649

1650 1651
	obj = ring->status_page.obj;
	if (obj == NULL)
1652 1653
		return;

1654
	kunmap(sg_page(obj->pages->sgl));
B
Ben Widawsky 已提交
1655
	i915_gem_object_ggtt_unpin(obj);
1656
	drm_gem_object_unreference(&obj->base);
1657
	ring->status_page.obj = NULL;
1658 1659
}

1660
static int init_status_page(struct intel_engine_cs *ring)
1661
{
1662
	struct drm_i915_gem_object *obj;
1663

1664
	if ((obj = ring->status_page.obj) == NULL) {
1665
		unsigned flags;
1666
		int ret;
1667

1668 1669 1670 1671 1672
		obj = i915_gem_alloc_object(ring->dev, 4096);
		if (obj == NULL) {
			DRM_ERROR("Failed to allocate status page\n");
			return -ENOMEM;
		}
1673

1674 1675 1676 1677
		ret = i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
		if (ret)
			goto err_unref;

1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691
		flags = 0;
		if (!HAS_LLC(ring->dev))
			/* On g33, we cannot place HWS above 256MiB, so
			 * restrict its pinning to the low mappable arena.
			 * Though this restriction is not documented for
			 * gen4, gen5, or byt, they also behave similarly
			 * and hang if the HWS is placed at the top of the
			 * GTT. To generalise, it appears that all !llc
			 * platforms have issues with us placing the HWS
			 * above the mappable region (even though we never
			 * actualy map it).
			 */
			flags |= PIN_MAPPABLE;
		ret = i915_gem_obj_ggtt_pin(obj, 4096, flags);
1692 1693 1694 1695 1696 1697 1698 1699
		if (ret) {
err_unref:
			drm_gem_object_unreference(&obj->base);
			return ret;
		}

		ring->status_page.obj = obj;
	}
1700

1701
	ring->status_page.gfx_addr = i915_gem_obj_ggtt_offset(obj);
1702
	ring->status_page.page_addr = kmap(sg_page(obj->pages->sgl));
1703
	memset(ring->status_page.page_addr, 0, PAGE_SIZE);
1704

1705 1706
	DRM_DEBUG_DRIVER("%s hws offset: 0x%08x\n",
			ring->name, ring->status_page.gfx_addr);
1707 1708 1709 1710

	return 0;
}

1711
static int init_phys_status_page(struct intel_engine_cs *ring)
1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727
{
	struct drm_i915_private *dev_priv = ring->dev->dev_private;

	if (!dev_priv->status_page_dmah) {
		dev_priv->status_page_dmah =
			drm_pci_alloc(ring->dev, PAGE_SIZE, PAGE_SIZE);
		if (!dev_priv->status_page_dmah)
			return -ENOMEM;
	}

	ring->status_page.page_addr = dev_priv->status_page_dmah->vaddr;
	memset(ring->status_page.page_addr, 0, PAGE_SIZE);

	return 0;
}

1728
void intel_unpin_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
1729 1730
{
	iounmap(ringbuf->virtual_start);
1731
	ringbuf->virtual_start = NULL;
1732
	i915_gem_object_ggtt_unpin(ringbuf->obj);
1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763
}

int intel_pin_and_map_ringbuffer_obj(struct drm_device *dev,
				     struct intel_ringbuffer *ringbuf)
{
	struct drm_i915_private *dev_priv = to_i915(dev);
	struct drm_i915_gem_object *obj = ringbuf->obj;
	int ret;

	ret = i915_gem_obj_ggtt_pin(obj, PAGE_SIZE, PIN_MAPPABLE);
	if (ret)
		return ret;

	ret = i915_gem_object_set_to_gtt_domain(obj, true);
	if (ret) {
		i915_gem_object_ggtt_unpin(obj);
		return ret;
	}

	ringbuf->virtual_start = ioremap_wc(dev_priv->gtt.mappable_base +
			i915_gem_obj_ggtt_offset(obj), ringbuf->size);
	if (ringbuf->virtual_start == NULL) {
		i915_gem_object_ggtt_unpin(obj);
		return -EINVAL;
	}

	return 0;
}

void intel_destroy_ringbuffer_obj(struct intel_ringbuffer *ringbuf)
{
1764 1765 1766 1767
	drm_gem_object_unreference(&ringbuf->obj->base);
	ringbuf->obj = NULL;
}

1768 1769
int intel_alloc_ringbuffer_obj(struct drm_device *dev,
			       struct intel_ringbuffer *ringbuf)
1770
{
1771
	struct drm_i915_gem_object *obj;
1772

1773 1774
	obj = NULL;
	if (!HAS_LLC(dev))
1775
		obj = i915_gem_object_create_stolen(dev, ringbuf->size);
1776
	if (obj == NULL)
1777
		obj = i915_gem_alloc_object(dev, ringbuf->size);
1778 1779
	if (obj == NULL)
		return -ENOMEM;
1780

1781 1782 1783
	/* mark ring buffers as read-only from GPU side by default */
	obj->gt_ro = 1;

1784
	ringbuf->obj = obj;
1785

1786
	return 0;
1787 1788 1789
}

static int intel_init_ring_buffer(struct drm_device *dev,
1790
				  struct intel_engine_cs *ring)
1791
{
1792
	struct intel_ringbuffer *ringbuf = ring->buffer;
1793 1794
	int ret;

1795 1796 1797 1798 1799 1800 1801
	if (ringbuf == NULL) {
		ringbuf = kzalloc(sizeof(*ringbuf), GFP_KERNEL);
		if (!ringbuf)
			return -ENOMEM;
		ring->buffer = ringbuf;
	}

1802 1803 1804
	ring->dev = dev;
	INIT_LIST_HEAD(&ring->active_list);
	INIT_LIST_HEAD(&ring->request_list);
1805
	INIT_LIST_HEAD(&ring->execlist_queue);
1806
	ringbuf->size = 32 * PAGE_SIZE;
1807
	ringbuf->ring = ring;
1808
	memset(ring->semaphore.sync_seqno, 0, sizeof(ring->semaphore.sync_seqno));
1809 1810 1811 1812 1813 1814

	init_waitqueue_head(&ring->irq_queue);

	if (I915_NEED_GFX_HWS(dev)) {
		ret = init_status_page(ring);
		if (ret)
1815
			goto error;
1816 1817 1818 1819
	} else {
		BUG_ON(ring->id != RCS);
		ret = init_phys_status_page(ring);
		if (ret)
1820
			goto error;
1821 1822
	}

1823 1824 1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837
	if (ringbuf->obj == NULL) {
		ret = intel_alloc_ringbuffer_obj(dev, ringbuf);
		if (ret) {
			DRM_ERROR("Failed to allocate ringbuffer %s: %d\n",
					ring->name, ret);
			goto error;
		}

		ret = intel_pin_and_map_ringbuffer_obj(dev, ringbuf);
		if (ret) {
			DRM_ERROR("Failed to pin and map ringbuffer %s: %d\n",
					ring->name, ret);
			intel_destroy_ringbuffer_obj(ringbuf);
			goto error;
		}
1838
	}
1839

1840 1841 1842 1843
	/* Workaround an erratum on the i830 which causes a hang if
	 * the TAIL pointer points to within the last 2 cachelines
	 * of the buffer.
	 */
1844
	ringbuf->effective_size = ringbuf->size;
1845
	if (IS_I830(dev) || IS_845G(dev))
1846
		ringbuf->effective_size -= 2 * CACHELINE_BYTES;
1847

1848 1849
	ret = i915_cmd_parser_init_ring(ring);
	if (ret)
1850 1851 1852 1853 1854 1855 1856
		goto error;

	ret = ring->init(ring);
	if (ret)
		goto error;

	return 0;
1857

1858 1859 1860 1861
error:
	kfree(ringbuf);
	ring->buffer = NULL;
	return ret;
1862 1863
}

1864
void intel_cleanup_ring_buffer(struct intel_engine_cs *ring)
1865
{
1866 1867
	struct drm_i915_private *dev_priv;
	struct intel_ringbuffer *ringbuf;
1868

1869
	if (!intel_ring_initialized(ring))
1870 1871
		return;

1872 1873 1874
	dev_priv = to_i915(ring->dev);
	ringbuf = ring->buffer;

1875
	intel_stop_ring_buffer(ring);
1876
	WARN_ON(!IS_GEN2(ring->dev) && (I915_READ_MODE(ring) & MODE_IDLE) == 0);
1877

1878
	intel_unpin_ringbuffer_obj(ringbuf);
1879
	intel_destroy_ringbuffer_obj(ringbuf);
1880 1881
	ring->preallocated_lazy_request = NULL;
	ring->outstanding_lazy_seqno = 0;
1882

Z
Zou Nan hai 已提交
1883 1884 1885
	if (ring->cleanup)
		ring->cleanup(ring);

1886
	cleanup_status_page(ring);
1887 1888

	i915_cmd_parser_fini_ring(ring);
1889

1890
	kfree(ringbuf);
1891
	ring->buffer = NULL;
1892 1893
}

1894
static int intel_ring_wait_request(struct intel_engine_cs *ring, int n)
1895
{
1896
	struct intel_ringbuffer *ringbuf = ring->buffer;
1897
	struct drm_i915_gem_request *request;
1898
	u32 seqno = 0;
1899 1900
	int ret;

1901 1902 1903
	if (ringbuf->last_retired_head != -1) {
		ringbuf->head = ringbuf->last_retired_head;
		ringbuf->last_retired_head = -1;
1904

1905
		ringbuf->space = intel_ring_space(ringbuf);
1906
		if (ringbuf->space >= n)
1907 1908 1909 1910
			return 0;
	}

	list_for_each_entry(request, &ring->request_list, list) {
1911 1912
		if (__intel_ring_space(request->tail, ringbuf->tail,
				       ringbuf->size) >= n) {
1913 1914 1915 1916 1917 1918 1919 1920
			seqno = request->seqno;
			break;
		}
	}

	if (seqno == 0)
		return -ENOSPC;

1921
	ret = i915_wait_seqno(ring, seqno);
1922 1923 1924
	if (ret)
		return ret;

1925
	i915_gem_retire_requests_ring(ring);
1926 1927
	ringbuf->head = ringbuf->last_retired_head;
	ringbuf->last_retired_head = -1;
1928

1929
	ringbuf->space = intel_ring_space(ringbuf);
1930 1931 1932
	return 0;
}

1933
static int ring_wait_for_space(struct intel_engine_cs *ring, int n)
1934
{
1935
	struct drm_device *dev = ring->dev;
1936
	struct drm_i915_private *dev_priv = dev->dev_private;
1937
	struct intel_ringbuffer *ringbuf = ring->buffer;
1938
	unsigned long end;
1939
	int ret;
1940

1941 1942 1943 1944
	ret = intel_ring_wait_request(ring, n);
	if (ret != -ENOSPC)
		return ret;

1945 1946 1947
	/* force the tail write in case we have been skipping them */
	__intel_ring_advance(ring);

1948 1949 1950 1951 1952 1953
	/* With GEM the hangcheck timer should kick us out of the loop,
	 * leaving it early runs the risk of corrupting GEM state (due
	 * to running on almost untested codepaths). But on resume
	 * timers don't work yet, so prevent a complete hang in that
	 * case by choosing an insanely large timeout. */
	end = jiffies + 60 * HZ;
1954

1955
	trace_i915_ring_wait_begin(ring);
1956
	do {
1957
		ringbuf->head = I915_READ_HEAD(ring);
1958
		ringbuf->space = intel_ring_space(ringbuf);
1959
		if (ringbuf->space >= n) {
1960 1961
			ret = 0;
			break;
1962 1963
		}

1964
		msleep(1);
1965

1966 1967 1968 1969 1970
		if (dev_priv->mm.interruptible && signal_pending(current)) {
			ret = -ERESTARTSYS;
			break;
		}

1971 1972
		ret = i915_gem_check_wedge(&dev_priv->gpu_error,
					   dev_priv->mm.interruptible);
1973
		if (ret)
1974 1975 1976 1977 1978 1979 1980
			break;

		if (time_after(jiffies, end)) {
			ret = -EBUSY;
			break;
		}
	} while (1);
C
Chris Wilson 已提交
1981
	trace_i915_ring_wait_end(ring);
1982
	return ret;
1983
}
1984

1985
static int intel_wrap_ring_buffer(struct intel_engine_cs *ring)
1986 1987
{
	uint32_t __iomem *virt;
1988 1989
	struct intel_ringbuffer *ringbuf = ring->buffer;
	int rem = ringbuf->size - ringbuf->tail;
1990

1991
	if (ringbuf->space < rem) {
1992 1993 1994 1995 1996
		int ret = ring_wait_for_space(ring, rem);
		if (ret)
			return ret;
	}

1997
	virt = ringbuf->virtual_start + ringbuf->tail;
1998 1999 2000 2001
	rem /= 4;
	while (rem--)
		iowrite32(MI_NOOP, virt++);

2002
	ringbuf->tail = 0;
2003
	ringbuf->space = intel_ring_space(ringbuf);
2004 2005 2006 2007

	return 0;
}

2008
int intel_ring_idle(struct intel_engine_cs *ring)
2009 2010 2011 2012 2013
{
	u32 seqno;
	int ret;

	/* We need to add any requests required to flush the objects and ring */
2014
	if (ring->outstanding_lazy_seqno) {
2015
		ret = i915_add_request(ring, NULL);
2016 2017 2018 2019 2020 2021 2022 2023 2024 2025 2026 2027 2028 2029 2030
		if (ret)
			return ret;
	}

	/* Wait upon the last request to be completed */
	if (list_empty(&ring->request_list))
		return 0;

	seqno = list_entry(ring->request_list.prev,
			   struct drm_i915_gem_request,
			   list)->seqno;

	return i915_wait_seqno(ring, seqno);
}

2031
static int
2032
intel_ring_alloc_seqno(struct intel_engine_cs *ring)
2033
{
2034
	if (ring->outstanding_lazy_seqno)
2035 2036
		return 0;

2037 2038 2039 2040 2041 2042 2043 2044 2045 2046
	if (ring->preallocated_lazy_request == NULL) {
		struct drm_i915_gem_request *request;

		request = kmalloc(sizeof(*request), GFP_KERNEL);
		if (request == NULL)
			return -ENOMEM;

		ring->preallocated_lazy_request = request;
	}

2047
	return i915_gem_get_seqno(ring->dev, &ring->outstanding_lazy_seqno);
2048 2049
}

2050
static int __intel_ring_prepare(struct intel_engine_cs *ring,
2051
				int bytes)
M
Mika Kuoppala 已提交
2052
{
2053
	struct intel_ringbuffer *ringbuf = ring->buffer;
M
Mika Kuoppala 已提交
2054 2055
	int ret;

2056
	if (unlikely(ringbuf->tail + bytes > ringbuf->effective_size)) {
M
Mika Kuoppala 已提交
2057 2058 2059 2060 2061
		ret = intel_wrap_ring_buffer(ring);
		if (unlikely(ret))
			return ret;
	}

2062
	if (unlikely(ringbuf->space < bytes)) {
M
Mika Kuoppala 已提交
2063 2064 2065 2066 2067 2068 2069 2070
		ret = ring_wait_for_space(ring, bytes);
		if (unlikely(ret))
			return ret;
	}

	return 0;
}

2071
int intel_ring_begin(struct intel_engine_cs *ring,
2072
		     int num_dwords)
2073
{
2074
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
2075
	int ret;
2076

2077 2078
	ret = i915_gem_check_wedge(&dev_priv->gpu_error,
				   dev_priv->mm.interruptible);
2079 2080
	if (ret)
		return ret;
2081

2082 2083 2084 2085
	ret = __intel_ring_prepare(ring, num_dwords * sizeof(uint32_t));
	if (ret)
		return ret;

2086 2087 2088 2089 2090
	/* Preallocate the olr before touching the ring */
	ret = intel_ring_alloc_seqno(ring);
	if (ret)
		return ret;

2091
	ring->buffer->space -= num_dwords * sizeof(uint32_t);
2092
	return 0;
2093
}
2094

2095
/* Align the ring tail to a cacheline boundary */
2096
int intel_ring_cacheline_align(struct intel_engine_cs *ring)
2097
{
2098
	int num_dwords = (ring->buffer->tail & (CACHELINE_BYTES - 1)) / sizeof(uint32_t);
2099 2100 2101 2102 2103
	int ret;

	if (num_dwords == 0)
		return 0;

2104
	num_dwords = CACHELINE_BYTES / sizeof(uint32_t) - num_dwords;
2105 2106 2107 2108 2109 2110 2111 2112 2113 2114 2115 2116
	ret = intel_ring_begin(ring, num_dwords);
	if (ret)
		return ret;

	while (num_dwords--)
		intel_ring_emit(ring, MI_NOOP);

	intel_ring_advance(ring);

	return 0;
}

2117
void intel_ring_init_seqno(struct intel_engine_cs *ring, u32 seqno)
2118
{
2119 2120
	struct drm_device *dev = ring->dev;
	struct drm_i915_private *dev_priv = dev->dev_private;
2121

2122
	BUG_ON(ring->outstanding_lazy_seqno);
2123

2124
	if (INTEL_INFO(dev)->gen == 6 || INTEL_INFO(dev)->gen == 7) {
2125 2126
		I915_WRITE(RING_SYNC_0(ring->mmio_base), 0);
		I915_WRITE(RING_SYNC_1(ring->mmio_base), 0);
2127
		if (HAS_VEBOX(dev))
2128
			I915_WRITE(RING_SYNC_2(ring->mmio_base), 0);
2129
	}
2130

2131
	ring->set_seqno(ring, seqno);
2132
	ring->hangcheck.seqno = seqno;
2133
}
2134

2135
static void gen6_bsd_ring_write_tail(struct intel_engine_cs *ring,
2136
				     u32 value)
2137
{
2138
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
2139 2140

       /* Every tail move must follow the sequence below */
2141 2142 2143 2144

	/* Disable notification that the ring is IDLE. The GT
	 * will then assume that it is busy and bring it out of rc6.
	 */
2145
	I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
2146 2147 2148 2149
		   _MASKED_BIT_ENABLE(GEN6_BSD_SLEEP_MSG_DISABLE));

	/* Clear the context id. Here be magic! */
	I915_WRITE64(GEN6_BSD_RNCID, 0x0);
2150

2151
	/* Wait for the ring not to be idle, i.e. for it to wake up. */
2152
	if (wait_for((I915_READ(GEN6_BSD_SLEEP_PSMI_CONTROL) &
2153 2154 2155
		      GEN6_BSD_SLEEP_INDICATOR) == 0,
		     50))
		DRM_ERROR("timed out waiting for the BSD ring to wake up\n");
2156

2157
	/* Now that the ring is fully powered up, update the tail */
2158
	I915_WRITE_TAIL(ring, value);
2159 2160 2161 2162 2163
	POSTING_READ(RING_TAIL(ring->mmio_base));

	/* Let the ring send IDLE messages to the GT again,
	 * and so let it sleep to conserve power when idle.
	 */
2164
	I915_WRITE(GEN6_BSD_SLEEP_PSMI_CONTROL,
2165
		   _MASKED_BIT_DISABLE(GEN6_BSD_SLEEP_MSG_DISABLE));
2166 2167
}

2168
static int gen6_bsd_ring_flush(struct intel_engine_cs *ring,
2169
			       u32 invalidate, u32 flush)
2170
{
2171
	uint32_t cmd;
2172 2173 2174 2175 2176 2177
	int ret;

	ret = intel_ring_begin(ring, 4);
	if (ret)
		return ret;

2178
	cmd = MI_FLUSH_DW;
B
Ben Widawsky 已提交
2179 2180
	if (INTEL_INFO(ring->dev)->gen >= 8)
		cmd += 1;
2181 2182 2183 2184 2185 2186
	/*
	 * Bspec vol 1c.5 - video engine command streamer:
	 * "If ENABLED, all TLBs will be invalidated once the flush
	 * operation is complete. This bit is only valid when the
	 * Post-Sync Operation field is a value of 1h or 3h."
	 */
2187
	if (invalidate & I915_GEM_GPU_DOMAINS)
2188 2189
		cmd |= MI_INVALIDATE_TLB | MI_INVALIDATE_BSD |
			MI_FLUSH_DW_STORE_INDEX | MI_FLUSH_DW_OP_STOREDW;
2190
	intel_ring_emit(ring, cmd);
2191
	intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
B
Ben Widawsky 已提交
2192 2193 2194 2195 2196 2197 2198
	if (INTEL_INFO(ring->dev)->gen >= 8) {
		intel_ring_emit(ring, 0); /* upper addr */
		intel_ring_emit(ring, 0); /* value */
	} else  {
		intel_ring_emit(ring, 0);
		intel_ring_emit(ring, MI_NOOP);
	}
2199 2200
	intel_ring_advance(ring);
	return 0;
2201 2202
}

2203
static int
2204
gen8_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
B
Ben Widawsky 已提交
2205
			      u64 offset, u32 len,
2206 2207
			      unsigned flags)
{
2208
	bool ppgtt = USES_PPGTT(ring->dev) && !(flags & I915_DISPATCH_SECURE);
2209 2210 2211 2212 2213 2214 2215
	int ret;

	ret = intel_ring_begin(ring, 4);
	if (ret)
		return ret;

	/* FIXME(BDW): Address space and security selectors. */
B
Ben Widawsky 已提交
2216
	intel_ring_emit(ring, MI_BATCH_BUFFER_START_GEN8 | (ppgtt<<8));
B
Ben Widawsky 已提交
2217 2218
	intel_ring_emit(ring, lower_32_bits(offset));
	intel_ring_emit(ring, upper_32_bits(offset));
2219 2220 2221 2222 2223 2224
	intel_ring_emit(ring, MI_NOOP);
	intel_ring_advance(ring);

	return 0;
}

2225
static int
2226
hsw_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
B
Ben Widawsky 已提交
2227
			      u64 offset, u32 len,
2228 2229 2230 2231 2232 2233 2234 2235 2236
			      unsigned flags)
{
	int ret;

	ret = intel_ring_begin(ring, 2);
	if (ret)
		return ret;

	intel_ring_emit(ring,
2237 2238 2239
			MI_BATCH_BUFFER_START |
			(flags & I915_DISPATCH_SECURE ?
			 0 : MI_BATCH_PPGTT_HSW | MI_BATCH_NON_SECURE_HSW));
2240 2241 2242 2243 2244 2245 2246
	/* bit0-7 is the length on GEN6+ */
	intel_ring_emit(ring, offset);
	intel_ring_advance(ring);

	return 0;
}

2247
static int
2248
gen6_ring_dispatch_execbuffer(struct intel_engine_cs *ring,
B
Ben Widawsky 已提交
2249
			      u64 offset, u32 len,
2250
			      unsigned flags)
2251
{
2252
	int ret;
2253

2254 2255 2256
	ret = intel_ring_begin(ring, 2);
	if (ret)
		return ret;
2257

2258 2259 2260
	intel_ring_emit(ring,
			MI_BATCH_BUFFER_START |
			(flags & I915_DISPATCH_SECURE ? 0 : MI_BATCH_NON_SECURE_I965));
2261 2262 2263
	/* bit0-7 is the length on GEN6+ */
	intel_ring_emit(ring, offset);
	intel_ring_advance(ring);
2264

2265
	return 0;
2266 2267
}

2268 2269
/* Blitter support (SandyBridge+) */

2270
static int gen6_ring_flush(struct intel_engine_cs *ring,
2271
			   u32 invalidate, u32 flush)
Z
Zou Nan hai 已提交
2272
{
R
Rodrigo Vivi 已提交
2273
	struct drm_device *dev = ring->dev;
2274
	struct drm_i915_private *dev_priv = dev->dev_private;
2275
	uint32_t cmd;
2276 2277
	int ret;

2278
	ret = intel_ring_begin(ring, 4);
2279 2280 2281
	if (ret)
		return ret;

2282
	cmd = MI_FLUSH_DW;
B
Ben Widawsky 已提交
2283 2284
	if (INTEL_INFO(ring->dev)->gen >= 8)
		cmd += 1;
2285 2286 2287 2288 2289 2290
	/*
	 * Bspec vol 1c.3 - blitter engine command streamer:
	 * "If ENABLED, all TLBs will be invalidated once the flush
	 * operation is complete. This bit is only valid when the
	 * Post-Sync Operation field is a value of 1h or 3h."
	 */
2291
	if (invalidate & I915_GEM_DOMAIN_RENDER)
2292
		cmd |= MI_INVALIDATE_TLB | MI_FLUSH_DW_STORE_INDEX |
2293
			MI_FLUSH_DW_OP_STOREDW;
2294
	intel_ring_emit(ring, cmd);
2295
	intel_ring_emit(ring, I915_GEM_HWS_SCRATCH_ADDR | MI_FLUSH_DW_USE_GTT);
B
Ben Widawsky 已提交
2296 2297 2298 2299 2300 2301 2302
	if (INTEL_INFO(ring->dev)->gen >= 8) {
		intel_ring_emit(ring, 0); /* upper addr */
		intel_ring_emit(ring, 0); /* value */
	} else  {
		intel_ring_emit(ring, 0);
		intel_ring_emit(ring, MI_NOOP);
	}
2303
	intel_ring_advance(ring);
R
Rodrigo Vivi 已提交
2304

2305 2306 2307 2308 2309 2310
	if (!invalidate && flush) {
		if (IS_GEN7(dev))
			return gen7_ring_fbc_flush(ring, FBC_REND_CACHE_CLEAN);
		else if (IS_BROADWELL(dev))
			dev_priv->fbc.need_sw_cache_clean = true;
	}
R
Rodrigo Vivi 已提交
2311

2312
	return 0;
Z
Zou Nan hai 已提交
2313 2314
}

2315 2316
int intel_init_render_ring_buffer(struct drm_device *dev)
{
2317
	struct drm_i915_private *dev_priv = dev->dev_private;
2318
	struct intel_engine_cs *ring = &dev_priv->ring[RCS];
2319 2320
	struct drm_i915_gem_object *obj;
	int ret;
2321

2322 2323 2324 2325
	ring->name = "render ring";
	ring->id = RCS;
	ring->mmio_base = RENDER_RING_BASE;

B
Ben Widawsky 已提交
2326
	if (INTEL_INFO(dev)->gen >= 8) {
2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342
		if (i915_semaphore_is_enabled(dev)) {
			obj = i915_gem_alloc_object(dev, 4096);
			if (obj == NULL) {
				DRM_ERROR("Failed to allocate semaphore bo. Disabling semaphores\n");
				i915.semaphores = 0;
			} else {
				i915_gem_object_set_cache_level(obj, I915_CACHE_LLC);
				ret = i915_gem_obj_ggtt_pin(obj, 0, PIN_NONBLOCK);
				if (ret != 0) {
					drm_gem_object_unreference(&obj->base);
					DRM_ERROR("Failed to pin semaphore bo. Disabling semaphores\n");
					i915.semaphores = 0;
				} else
					dev_priv->semaphore_obj = obj;
			}
		}
2343 2344

		ring->init_context = intel_ring_workarounds_emit;
B
Ben Widawsky 已提交
2345 2346 2347 2348 2349 2350 2351 2352
		ring->add_request = gen6_add_request;
		ring->flush = gen8_render_ring_flush;
		ring->irq_get = gen8_ring_get_irq;
		ring->irq_put = gen8_ring_put_irq;
		ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
		ring->get_seqno = gen6_ring_get_seqno;
		ring->set_seqno = ring_set_seqno;
		if (i915_semaphore_is_enabled(dev)) {
2353
			WARN_ON(!dev_priv->semaphore_obj);
2354
			ring->semaphore.sync_to = gen8_ring_sync;
2355 2356
			ring->semaphore.signal = gen8_rcs_signal;
			GEN8_RING_SEMAPHORE_INIT;
B
Ben Widawsky 已提交
2357 2358
		}
	} else if (INTEL_INFO(dev)->gen >= 6) {
2359
		ring->add_request = gen6_add_request;
2360
		ring->flush = gen7_render_ring_flush;
2361
		if (INTEL_INFO(dev)->gen == 6)
2362
			ring->flush = gen6_render_ring_flush;
B
Ben Widawsky 已提交
2363 2364
		ring->irq_get = gen6_ring_get_irq;
		ring->irq_put = gen6_ring_put_irq;
2365
		ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT;
2366
		ring->get_seqno = gen6_ring_get_seqno;
M
Mika Kuoppala 已提交
2367
		ring->set_seqno = ring_set_seqno;
B
Ben Widawsky 已提交
2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388
		if (i915_semaphore_is_enabled(dev)) {
			ring->semaphore.sync_to = gen6_ring_sync;
			ring->semaphore.signal = gen6_signal;
			/*
			 * The current semaphore is only applied on pre-gen8
			 * platform.  And there is no VCS2 ring on the pre-gen8
			 * platform. So the semaphore between RCS and VCS2 is
			 * initialized as INVALID.  Gen8 will initialize the
			 * sema between VCS2 and RCS later.
			 */
			ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_INVALID;
			ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_RV;
			ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_RB;
			ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_RVE;
			ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
			ring->semaphore.mbox.signal[RCS] = GEN6_NOSYNC;
			ring->semaphore.mbox.signal[VCS] = GEN6_VRSYNC;
			ring->semaphore.mbox.signal[BCS] = GEN6_BRSYNC;
			ring->semaphore.mbox.signal[VECS] = GEN6_VERSYNC;
			ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
		}
2389 2390
	} else if (IS_GEN5(dev)) {
		ring->add_request = pc_render_add_request;
2391
		ring->flush = gen4_render_ring_flush;
2392
		ring->get_seqno = pc_render_get_seqno;
M
Mika Kuoppala 已提交
2393
		ring->set_seqno = pc_render_set_seqno;
2394 2395
		ring->irq_get = gen5_ring_get_irq;
		ring->irq_put = gen5_ring_put_irq;
2396 2397
		ring->irq_enable_mask = GT_RENDER_USER_INTERRUPT |
					GT_RENDER_PIPECTL_NOTIFY_INTERRUPT;
2398
	} else {
2399
		ring->add_request = i9xx_add_request;
2400 2401 2402 2403
		if (INTEL_INFO(dev)->gen < 4)
			ring->flush = gen2_render_ring_flush;
		else
			ring->flush = gen4_render_ring_flush;
2404
		ring->get_seqno = ring_get_seqno;
M
Mika Kuoppala 已提交
2405
		ring->set_seqno = ring_set_seqno;
C
Chris Wilson 已提交
2406 2407 2408 2409 2410 2411 2412
		if (IS_GEN2(dev)) {
			ring->irq_get = i8xx_ring_get_irq;
			ring->irq_put = i8xx_ring_put_irq;
		} else {
			ring->irq_get = i9xx_ring_get_irq;
			ring->irq_put = i9xx_ring_put_irq;
		}
2413
		ring->irq_enable_mask = I915_USER_INTERRUPT;
2414
	}
2415
	ring->write_tail = ring_write_tail;
B
Ben Widawsky 已提交
2416

2417 2418
	if (IS_HASWELL(dev))
		ring->dispatch_execbuffer = hsw_ring_dispatch_execbuffer;
2419 2420
	else if (IS_GEN8(dev))
		ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
2421
	else if (INTEL_INFO(dev)->gen >= 6)
2422 2423 2424 2425 2426 2427 2428
		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
	else if (INTEL_INFO(dev)->gen >= 4)
		ring->dispatch_execbuffer = i965_dispatch_execbuffer;
	else if (IS_I830(dev) || IS_845G(dev))
		ring->dispatch_execbuffer = i830_dispatch_execbuffer;
	else
		ring->dispatch_execbuffer = i915_dispatch_execbuffer;
2429 2430 2431
	ring->init = init_render_ring;
	ring->cleanup = render_ring_cleanup;

2432 2433
	/* Workaround batchbuffer to combat CS tlb bug. */
	if (HAS_BROKEN_CS_TLB(dev)) {
2434
		obj = i915_gem_alloc_object(dev, I830_WA_SIZE);
2435 2436 2437 2438 2439
		if (obj == NULL) {
			DRM_ERROR("Failed to allocate batch bo\n");
			return -ENOMEM;
		}

2440
		ret = i915_gem_obj_ggtt_pin(obj, 0, 0);
2441 2442 2443 2444 2445 2446
		if (ret != 0) {
			drm_gem_object_unreference(&obj->base);
			DRM_ERROR("Failed to ping batch bo\n");
			return ret;
		}

2447 2448
		ring->scratch.obj = obj;
		ring->scratch.gtt_offset = i915_gem_obj_ggtt_offset(obj);
2449 2450
	}

2451
	return intel_init_ring_buffer(dev, ring);
2452 2453 2454 2455
}

int intel_init_bsd_ring_buffer(struct drm_device *dev)
{
2456
	struct drm_i915_private *dev_priv = dev->dev_private;
2457
	struct intel_engine_cs *ring = &dev_priv->ring[VCS];
2458

2459 2460 2461
	ring->name = "bsd ring";
	ring->id = VCS;

2462
	ring->write_tail = ring_write_tail;
2463
	if (INTEL_INFO(dev)->gen >= 6) {
2464
		ring->mmio_base = GEN6_BSD_RING_BASE;
2465 2466 2467
		/* gen6 bsd needs a special wa for tail updates */
		if (IS_GEN6(dev))
			ring->write_tail = gen6_bsd_ring_write_tail;
2468
		ring->flush = gen6_bsd_ring_flush;
2469 2470
		ring->add_request = gen6_add_request;
		ring->get_seqno = gen6_ring_get_seqno;
M
Mika Kuoppala 已提交
2471
		ring->set_seqno = ring_set_seqno;
2472 2473 2474 2475 2476
		if (INTEL_INFO(dev)->gen >= 8) {
			ring->irq_enable_mask =
				GT_RENDER_USER_INTERRUPT << GEN8_VCS1_IRQ_SHIFT;
			ring->irq_get = gen8_ring_get_irq;
			ring->irq_put = gen8_ring_put_irq;
2477 2478
			ring->dispatch_execbuffer =
				gen8_ring_dispatch_execbuffer;
B
Ben Widawsky 已提交
2479
			if (i915_semaphore_is_enabled(dev)) {
2480
				ring->semaphore.sync_to = gen8_ring_sync;
2481 2482
				ring->semaphore.signal = gen8_xcs_signal;
				GEN8_RING_SEMAPHORE_INIT;
B
Ben Widawsky 已提交
2483
			}
2484 2485 2486 2487
		} else {
			ring->irq_enable_mask = GT_BSD_USER_INTERRUPT;
			ring->irq_get = gen6_ring_get_irq;
			ring->irq_put = gen6_ring_put_irq;
2488 2489
			ring->dispatch_execbuffer =
				gen6_ring_dispatch_execbuffer;
B
Ben Widawsky 已提交
2490 2491 2492 2493 2494 2495 2496 2497 2498 2499 2500 2501 2502 2503
			if (i915_semaphore_is_enabled(dev)) {
				ring->semaphore.sync_to = gen6_ring_sync;
				ring->semaphore.signal = gen6_signal;
				ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_VR;
				ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_INVALID;
				ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_VB;
				ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_VVE;
				ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
				ring->semaphore.mbox.signal[RCS] = GEN6_RVSYNC;
				ring->semaphore.mbox.signal[VCS] = GEN6_NOSYNC;
				ring->semaphore.mbox.signal[BCS] = GEN6_BVSYNC;
				ring->semaphore.mbox.signal[VECS] = GEN6_VEVSYNC;
				ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
			}
2504
		}
2505 2506 2507
	} else {
		ring->mmio_base = BSD_RING_BASE;
		ring->flush = bsd_ring_flush;
2508
		ring->add_request = i9xx_add_request;
2509
		ring->get_seqno = ring_get_seqno;
M
Mika Kuoppala 已提交
2510
		ring->set_seqno = ring_set_seqno;
2511
		if (IS_GEN5(dev)) {
2512
			ring->irq_enable_mask = ILK_BSD_USER_INTERRUPT;
2513 2514 2515
			ring->irq_get = gen5_ring_get_irq;
			ring->irq_put = gen5_ring_put_irq;
		} else {
2516
			ring->irq_enable_mask = I915_BSD_USER_INTERRUPT;
2517 2518 2519
			ring->irq_get = i9xx_ring_get_irq;
			ring->irq_put = i9xx_ring_put_irq;
		}
2520
		ring->dispatch_execbuffer = i965_dispatch_execbuffer;
2521 2522 2523
	}
	ring->init = init_ring_common;

2524
	return intel_init_ring_buffer(dev, ring);
2525
}
2526

2527 2528 2529 2530 2531 2532 2533
/**
 * Initialize the second BSD ring for Broadwell GT3.
 * It is noted that this only exists on Broadwell GT3.
 */
int intel_init_bsd2_ring_buffer(struct drm_device *dev)
{
	struct drm_i915_private *dev_priv = dev->dev_private;
2534
	struct intel_engine_cs *ring = &dev_priv->ring[VCS2];
2535 2536 2537 2538 2539 2540

	if ((INTEL_INFO(dev)->gen != 8)) {
		DRM_ERROR("No dual-BSD ring on non-BDW machine\n");
		return -EINVAL;
	}

R
Rodrigo Vivi 已提交
2541
	ring->name = "bsd2 ring";
2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555
	ring->id = VCS2;

	ring->write_tail = ring_write_tail;
	ring->mmio_base = GEN8_BSD2_RING_BASE;
	ring->flush = gen6_bsd_ring_flush;
	ring->add_request = gen6_add_request;
	ring->get_seqno = gen6_ring_get_seqno;
	ring->set_seqno = ring_set_seqno;
	ring->irq_enable_mask =
			GT_RENDER_USER_INTERRUPT << GEN8_VCS2_IRQ_SHIFT;
	ring->irq_get = gen8_ring_get_irq;
	ring->irq_put = gen8_ring_put_irq;
	ring->dispatch_execbuffer =
			gen8_ring_dispatch_execbuffer;
2556
	if (i915_semaphore_is_enabled(dev)) {
2557
		ring->semaphore.sync_to = gen8_ring_sync;
2558 2559 2560
		ring->semaphore.signal = gen8_xcs_signal;
		GEN8_RING_SEMAPHORE_INIT;
	}
2561 2562 2563 2564 2565
	ring->init = init_ring_common;

	return intel_init_ring_buffer(dev, ring);
}

2566 2567
int intel_init_blt_ring_buffer(struct drm_device *dev)
{
2568
	struct drm_i915_private *dev_priv = dev->dev_private;
2569
	struct intel_engine_cs *ring = &dev_priv->ring[BCS];
2570

2571 2572 2573 2574 2575
	ring->name = "blitter ring";
	ring->id = BCS;

	ring->mmio_base = BLT_RING_BASE;
	ring->write_tail = ring_write_tail;
2576
	ring->flush = gen6_ring_flush;
2577 2578
	ring->add_request = gen6_add_request;
	ring->get_seqno = gen6_ring_get_seqno;
M
Mika Kuoppala 已提交
2579
	ring->set_seqno = ring_set_seqno;
2580 2581 2582 2583 2584
	if (INTEL_INFO(dev)->gen >= 8) {
		ring->irq_enable_mask =
			GT_RENDER_USER_INTERRUPT << GEN8_BCS_IRQ_SHIFT;
		ring->irq_get = gen8_ring_get_irq;
		ring->irq_put = gen8_ring_put_irq;
2585
		ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
B
Ben Widawsky 已提交
2586
		if (i915_semaphore_is_enabled(dev)) {
2587
			ring->semaphore.sync_to = gen8_ring_sync;
2588 2589
			ring->semaphore.signal = gen8_xcs_signal;
			GEN8_RING_SEMAPHORE_INIT;
B
Ben Widawsky 已提交
2590
		}
2591 2592 2593 2594
	} else {
		ring->irq_enable_mask = GT_BLT_USER_INTERRUPT;
		ring->irq_get = gen6_ring_get_irq;
		ring->irq_put = gen6_ring_put_irq;
2595
		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
B
Ben Widawsky 已提交
2596 2597 2598 2599 2600 2601 2602 2603 2604 2605 2606 2607 2608 2609 2610 2611 2612 2613 2614 2615 2616
		if (i915_semaphore_is_enabled(dev)) {
			ring->semaphore.signal = gen6_signal;
			ring->semaphore.sync_to = gen6_ring_sync;
			/*
			 * The current semaphore is only applied on pre-gen8
			 * platform.  And there is no VCS2 ring on the pre-gen8
			 * platform. So the semaphore between BCS and VCS2 is
			 * initialized as INVALID.  Gen8 will initialize the
			 * sema between BCS and VCS2 later.
			 */
			ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_BR;
			ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_BV;
			ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_INVALID;
			ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_BVE;
			ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
			ring->semaphore.mbox.signal[RCS] = GEN6_RBSYNC;
			ring->semaphore.mbox.signal[VCS] = GEN6_VBSYNC;
			ring->semaphore.mbox.signal[BCS] = GEN6_NOSYNC;
			ring->semaphore.mbox.signal[VECS] = GEN6_VEBSYNC;
			ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
		}
2617
	}
2618
	ring->init = init_ring_common;
2619

2620
	return intel_init_ring_buffer(dev, ring);
2621
}
2622

B
Ben Widawsky 已提交
2623 2624
int intel_init_vebox_ring_buffer(struct drm_device *dev)
{
2625
	struct drm_i915_private *dev_priv = dev->dev_private;
2626
	struct intel_engine_cs *ring = &dev_priv->ring[VECS];
B
Ben Widawsky 已提交
2627 2628 2629 2630 2631 2632 2633 2634 2635 2636

	ring->name = "video enhancement ring";
	ring->id = VECS;

	ring->mmio_base = VEBOX_RING_BASE;
	ring->write_tail = ring_write_tail;
	ring->flush = gen6_ring_flush;
	ring->add_request = gen6_add_request;
	ring->get_seqno = gen6_ring_get_seqno;
	ring->set_seqno = ring_set_seqno;
2637 2638 2639

	if (INTEL_INFO(dev)->gen >= 8) {
		ring->irq_enable_mask =
2640
			GT_RENDER_USER_INTERRUPT << GEN8_VECS_IRQ_SHIFT;
2641 2642
		ring->irq_get = gen8_ring_get_irq;
		ring->irq_put = gen8_ring_put_irq;
2643
		ring->dispatch_execbuffer = gen8_ring_dispatch_execbuffer;
B
Ben Widawsky 已提交
2644
		if (i915_semaphore_is_enabled(dev)) {
2645
			ring->semaphore.sync_to = gen8_ring_sync;
2646 2647
			ring->semaphore.signal = gen8_xcs_signal;
			GEN8_RING_SEMAPHORE_INIT;
B
Ben Widawsky 已提交
2648
		}
2649 2650 2651 2652
	} else {
		ring->irq_enable_mask = PM_VEBOX_USER_INTERRUPT;
		ring->irq_get = hsw_vebox_get_irq;
		ring->irq_put = hsw_vebox_put_irq;
2653
		ring->dispatch_execbuffer = gen6_ring_dispatch_execbuffer;
B
Ben Widawsky 已提交
2654 2655 2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667
		if (i915_semaphore_is_enabled(dev)) {
			ring->semaphore.sync_to = gen6_ring_sync;
			ring->semaphore.signal = gen6_signal;
			ring->semaphore.mbox.wait[RCS] = MI_SEMAPHORE_SYNC_VER;
			ring->semaphore.mbox.wait[VCS] = MI_SEMAPHORE_SYNC_VEV;
			ring->semaphore.mbox.wait[BCS] = MI_SEMAPHORE_SYNC_VEB;
			ring->semaphore.mbox.wait[VECS] = MI_SEMAPHORE_SYNC_INVALID;
			ring->semaphore.mbox.wait[VCS2] = MI_SEMAPHORE_SYNC_INVALID;
			ring->semaphore.mbox.signal[RCS] = GEN6_RVESYNC;
			ring->semaphore.mbox.signal[VCS] = GEN6_VVESYNC;
			ring->semaphore.mbox.signal[BCS] = GEN6_BVESYNC;
			ring->semaphore.mbox.signal[VECS] = GEN6_NOSYNC;
			ring->semaphore.mbox.signal[VCS2] = GEN6_NOSYNC;
		}
2668
	}
B
Ben Widawsky 已提交
2669 2670 2671 2672 2673
	ring->init = init_ring_common;

	return intel_init_ring_buffer(dev, ring);
}

2674
int
2675
intel_ring_flush_all_caches(struct intel_engine_cs *ring)
2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687 2688 2689 2690 2691 2692
{
	int ret;

	if (!ring->gpu_caches_dirty)
		return 0;

	ret = ring->flush(ring, 0, I915_GEM_GPU_DOMAINS);
	if (ret)
		return ret;

	trace_i915_gem_ring_flush(ring, 0, I915_GEM_GPU_DOMAINS);

	ring->gpu_caches_dirty = false;
	return 0;
}

int
2693
intel_ring_invalidate_all_caches(struct intel_engine_cs *ring)
2694 2695 2696 2697 2698 2699 2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710
{
	uint32_t flush_domains;
	int ret;

	flush_domains = 0;
	if (ring->gpu_caches_dirty)
		flush_domains = I915_GEM_GPU_DOMAINS;

	ret = ring->flush(ring, I915_GEM_GPU_DOMAINS, flush_domains);
	if (ret)
		return ret;

	trace_i915_gem_ring_flush(ring, I915_GEM_GPU_DOMAINS, flush_domains);

	ring->gpu_caches_dirty = false;
	return 0;
}
2711 2712

void
2713
intel_stop_ring_buffer(struct intel_engine_cs *ring)
2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726
{
	int ret;

	if (!intel_ring_initialized(ring))
		return;

	ret = intel_ring_idle(ring);
	if (ret && !i915_reset_in_progress(&to_i915(ring->dev)->gpu_error))
		DRM_ERROR("failed to quiesce %s whilst cleaning up: %d\n",
			  ring->name, ret);

	stop_ring(ring);
}