i915_gpu_error.c 50.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
/*
 * Copyright (c) 2008 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 *
 * Authors:
 *    Eric Anholt <eric@anholt.net>
 *    Keith Packard <keithp@keithp.com>
 *    Mika Kuoppala <mika.kuoppala@intel.com>
 *
 */

#include <generated/utsrelease.h>
31
#include <linux/stop_machine.h>
32
#include <linux/zlib.h>
33
#include <drm/drm_print.h>
34
#include <linux/ascii85.h>
35

36
#include "i915_gpu_error.h"
37 38
#include "i915_drv.h"

39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
static inline const struct intel_engine_cs *
engine_lookup(const struct drm_i915_private *i915, unsigned int id)
{
	if (id >= I915_NUM_ENGINES)
		return NULL;

	return i915->engine[id];
}

static inline const char *
__engine_name(const struct intel_engine_cs *engine)
{
	return engine ? engine->name : "";
}

static const char *
engine_name(const struct drm_i915_private *i915, unsigned int id)
{
	return __engine_name(engine_lookup(i915, id));
58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
}

static const char *tiling_flag(int tiling)
{
	switch (tiling) {
	default:
	case I915_TILING_NONE: return "";
	case I915_TILING_X: return " X";
	case I915_TILING_Y: return " Y";
	}
}

static const char *dirty_flag(int dirty)
{
	return dirty ? " dirty" : "";
}

static const char *purgeable_flag(int purgeable)
{
	return purgeable ? " purgeable" : "";
}

static bool __i915_error_ok(struct drm_i915_error_state_buf *e)
{

	if (!e->err && WARN(e->bytes > (e->size - 1), "overflow")) {
		e->err = -ENOSPC;
		return false;
	}

	if (e->bytes == e->size - 1 || e->err)
		return false;

	return true;
}

static bool __i915_error_seek(struct drm_i915_error_state_buf *e,
			      unsigned len)
{
	if (e->pos + len <= e->start) {
		e->pos += len;
		return false;
	}

	/* First vsnprintf needs to fit in its entirety for memmove */
	if (len >= e->size) {
		e->err = -EIO;
		return false;
	}

	return true;
}

static void __i915_error_advance(struct drm_i915_error_state_buf *e,
				 unsigned len)
{
	/* If this is first printf in this window, adjust it so that
	 * start position matches start of the buffer
	 */

	if (e->pos < e->start) {
		const size_t off = e->start - e->pos;

		/* Should not happen but be paranoid */
		if (off > len || e->bytes) {
			e->err = -EIO;
			return;
		}

		memmove(e->buf, e->buf + off, len - off);
		e->bytes = len - off;
		e->pos = e->start;
		return;
	}

	e->bytes += len;
	e->pos += len;
}

137
__printf(2, 0)
138 139 140 141 142 143 144 145 146 147
static void i915_error_vprintf(struct drm_i915_error_state_buf *e,
			       const char *f, va_list args)
{
	unsigned len;

	if (!__i915_error_ok(e))
		return;

	/* Seek the first printf which is hits start position */
	if (e->pos < e->start) {
148 149 150
		va_list tmp;

		va_copy(tmp, args);
151 152 153 154
		len = vsnprintf(NULL, 0, f, tmp);
		va_end(tmp);

		if (!__i915_error_seek(e, len))
155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190
			return;
	}

	len = vsnprintf(e->buf + e->bytes, e->size - e->bytes, f, args);
	if (len >= e->size - e->bytes)
		len = e->size - e->bytes - 1;

	__i915_error_advance(e, len);
}

static void i915_error_puts(struct drm_i915_error_state_buf *e,
			    const char *str)
{
	unsigned len;

	if (!__i915_error_ok(e))
		return;

	len = strlen(str);

	/* Seek the first printf which is hits start position */
	if (e->pos < e->start) {
		if (!__i915_error_seek(e, len))
			return;
	}

	if (len >= e->size - e->bytes)
		len = e->size - e->bytes - 1;
	memcpy(e->buf + e->bytes, str, len);

	__i915_error_advance(e, len);
}

#define err_printf(e, ...) i915_error_printf(e, __VA_ARGS__)
#define err_puts(e, s) i915_error_puts(e, s)

191 192 193 194 195 196 197 198 199 200 201 202 203 204 205
static void __i915_printfn_error(struct drm_printer *p, struct va_format *vaf)
{
	i915_error_vprintf(p->arg, vaf->fmt, *vaf->va);
}

static inline struct drm_printer
i915_error_printer(struct drm_i915_error_state_buf *e)
{
	struct drm_printer p = {
		.printfn = __i915_printfn_error,
		.arg = e,
	};
	return p;
}

206 207
#ifdef CONFIG_DRM_I915_COMPRESS_ERROR

208 209 210 211 212 213
struct compress {
	struct z_stream_s zstream;
	void *tmp;
};

static bool compress_init(struct compress *c)
214
{
215
	struct z_stream_s *zstream = memset(&c->zstream, 0, sizeof(c->zstream));
216 217 218 219 220 221 222 223 224 225 226 227

	zstream->workspace =
		kmalloc(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL),
			GFP_ATOMIC | __GFP_NOWARN);
	if (!zstream->workspace)
		return false;

	if (zlib_deflateInit(zstream, Z_DEFAULT_COMPRESSION) != Z_OK) {
		kfree(zstream->workspace);
		return false;
	}

228
	c->tmp = NULL;
229
	if (i915_has_memcpy_from_wc())
230 231
		c->tmp = (void *)__get_free_page(GFP_ATOMIC | __GFP_NOWARN);

232 233 234
	return true;
}

235 236 237 238 239 240 241 242 243 244 245 246 247 248
static void *compress_next_page(struct drm_i915_error_object *dst)
{
	unsigned long page;

	if (dst->page_count >= dst->num_pages)
		return ERR_PTR(-ENOSPC);

	page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN);
	if (!page)
		return ERR_PTR(-ENOMEM);

	return dst->pages[dst->page_count++] = (void *)page;
}

249
static int compress_page(struct compress *c,
250 251 252
			 void *src,
			 struct drm_i915_error_object *dst)
{
253 254
	struct z_stream_s *zstream = &c->zstream;

255
	zstream->next_in = src;
256 257
	if (c->tmp && i915_memcpy_from_wc(c->tmp, src, PAGE_SIZE))
		zstream->next_in = c->tmp;
258 259 260 261
	zstream->avail_in = PAGE_SIZE;

	do {
		if (zstream->avail_out == 0) {
262 263 264
			zstream->next_out = compress_next_page(dst);
			if (IS_ERR(zstream->next_out))
				return PTR_ERR(zstream->next_out);
265 266 267 268

			zstream->avail_out = PAGE_SIZE;
		}

269
		if (zlib_deflate(zstream, Z_NO_FLUSH) != Z_OK)
270 271 272 273 274 275 276 277 278 279
			return -EIO;
	} while (zstream->avail_in);

	/* Fallback to uncompressed if we increase size? */
	if (0 && zstream->total_out > zstream->total_in)
		return -E2BIG;

	return 0;
}

280
static int compress_flush(struct compress *c,
281 282
			  struct drm_i915_error_object *dst)
{
283 284
	struct z_stream_s *zstream = &c->zstream;

285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312
	do {
		switch (zlib_deflate(zstream, Z_FINISH)) {
		case Z_OK: /* more space requested */
			zstream->next_out = compress_next_page(dst);
			if (IS_ERR(zstream->next_out))
				return PTR_ERR(zstream->next_out);

			zstream->avail_out = PAGE_SIZE;
			break;

		case Z_STREAM_END:
			goto end;

		default: /* any error */
			return -EIO;
		}
	} while (1);

end:
	memset(zstream->next_out, 0, zstream->avail_out);
	dst->unused = zstream->avail_out;
	return 0;
}

static void compress_fini(struct compress *c,
			  struct drm_i915_error_object *dst)
{
	struct z_stream_s *zstream = &c->zstream;
313 314 315

	zlib_deflateEnd(zstream);
	kfree(zstream->workspace);
316 317
	if (c->tmp)
		free_page((unsigned long)c->tmp);
318 319 320 321 322 323 324 325 326
}

static void err_compression_marker(struct drm_i915_error_state_buf *m)
{
	err_puts(m, ":");
}

#else

327 328 329 330
struct compress {
};

static bool compress_init(struct compress *c)
331 332 333 334
{
	return true;
}

335
static int compress_page(struct compress *c,
336 337 338 339
			 void *src,
			 struct drm_i915_error_object *dst)
{
	unsigned long page;
340
	void *ptr;
341 342 343 344 345

	page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN);
	if (!page)
		return -ENOMEM;

346 347 348 349
	ptr = (void *)page;
	if (!i915_memcpy_from_wc(ptr, src, PAGE_SIZE))
		memcpy(ptr, src, PAGE_SIZE);
	dst->pages[dst->page_count++] = ptr;
350 351 352 353

	return 0;
}

354 355 356 357 358 359
static int compress_flush(struct compress *c,
			  struct drm_i915_error_object *dst)
{
	return 0;
}

360
static void compress_fini(struct compress *c,
361 362 363 364 365 366 367 368 369 370 371
			  struct drm_i915_error_object *dst)
{
}

static void err_compression_marker(struct drm_i915_error_state_buf *m)
{
	err_puts(m, "~");
}

#endif

372 373 374 375 376
static void print_error_buffers(struct drm_i915_error_state_buf *m,
				const char *name,
				struct drm_i915_error_buffer *err,
				int count)
{
377
	err_printf(m, "%s [%d]:\n", name, count);
378 379

	while (count--) {
380
		err_printf(m, "    %08x_%08x %8u %02x %02x %02x",
381 382
			   upper_32_bits(err->gtt_offset),
			   lower_32_bits(err->gtt_offset),
383 384
			   err->size,
			   err->read_domains,
385 386
			   err->write_domain,
			   err->wseqno);
387 388 389
		err_puts(m, tiling_flag(err->tiling));
		err_puts(m, dirty_flag(err->dirty));
		err_puts(m, purgeable_flag(err->purgeable));
390
		err_puts(m, err->userptr ? " userptr" : "");
391
		err_puts(m, err->engine != -1 ? " " : "");
392
		err_puts(m, engine_name(m->i915, err->engine));
393
		err_puts(m, i915_cache_level_str(m->i915, err->cache_level));
394 395 396 397 398 399 400 401 402 403 404

		if (err->name)
			err_printf(m, " (name: %d)", err->name);
		if (err->fence_reg != I915_FENCE_REG_NONE)
			err_printf(m, " (fence: %d)", err->fence_reg);

		err_puts(m, "\n");
		err++;
	}
}

405
static void error_print_instdone(struct drm_i915_error_state_buf *m,
406
				 const struct drm_i915_error_engine *ee)
407
{
408 409 410
	int slice;
	int subslice;

411 412 413 414 415 416 417 418 419 420 421 422
	err_printf(m, "  INSTDONE: 0x%08x\n",
		   ee->instdone.instdone);

	if (ee->engine_id != RCS || INTEL_GEN(m->i915) <= 3)
		return;

	err_printf(m, "  SC_INSTDONE: 0x%08x\n",
		   ee->instdone.slice_common);

	if (INTEL_GEN(m->i915) <= 6)
		return;

423 424 425 426 427 428 429 430 431
	for_each_instdone_slice_subslice(m->i915, slice, subslice)
		err_printf(m, "  SAMPLER_INSTDONE[%d][%d]: 0x%08x\n",
			   slice, subslice,
			   ee->instdone.sampler[slice][subslice]);

	for_each_instdone_slice_subslice(m->i915, slice, subslice)
		err_printf(m, "  ROW_INSTDONE[%d][%d]: 0x%08x\n",
			   slice, subslice,
			   ee->instdone.row[slice][subslice]);
432 433
}

434 435 436 437 438
static const char *bannable(const struct drm_i915_error_context *ctx)
{
	return ctx->bannable ? "" : " (unbannable)";
}

439 440
static void error_print_request(struct drm_i915_error_state_buf *m,
				const char *prefix,
441 442
				const struct drm_i915_error_request *erq,
				const unsigned long epoch)
443 444 445 446
{
	if (!erq->seqno)
		return;

447
	err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, prio %d, emitted %dms, start %08x, head %08x, tail %08x\n",
448
		   prefix, erq->pid, erq->ban_score,
449
		   erq->context, erq->seqno, erq->sched_attr.priority,
450
		   jiffies_to_msecs(erq->jiffies - epoch),
451
		   erq->start, erq->head, erq->tail);
452 453
}

454 455
static void error_print_context(struct drm_i915_error_state_buf *m,
				const char *header,
456
				const struct drm_i915_error_context *ctx)
457
{
458
	err_printf(m, "%s%s[%d] user_handle %d hw_id %d, prio %d, ban score %d%s guilty %d active %d\n",
459
		   header, ctx->comm, ctx->pid, ctx->handle, ctx->hw_id,
460
		   ctx->sched_attr.priority, ctx->ban_score, bannable(ctx),
461
		   ctx->guilty, ctx->active);
462 463
}

464
static void error_print_engine(struct drm_i915_error_state_buf *m,
465 466
			       const struct drm_i915_error_engine *ee,
			       const unsigned long epoch)
467
{
468 469
	int n;

470 471
	err_printf(m, "%s command stream:\n",
		   engine_name(m->i915, ee->engine_id));
472
	err_printf(m, "  IDLE?: %s\n", yesno(ee->idle));
473
	err_printf(m, "  START: 0x%08x\n", ee->start);
474
	err_printf(m, "  HEAD:  0x%08x [0x%08x]\n", ee->head, ee->rq_head);
475 476
	err_printf(m, "  TAIL:  0x%08x [0x%08x, 0x%08x]\n",
		   ee->tail, ee->rq_post, ee->rq_tail);
477
	err_printf(m, "  CTL:   0x%08x\n", ee->ctl);
478
	err_printf(m, "  MODE:  0x%08x\n", ee->mode);
479 480 481 482 483
	err_printf(m, "  HWS:   0x%08x\n", ee->hws);
	err_printf(m, "  ACTHD: 0x%08x %08x\n",
		   (u32)(ee->acthd>>32), (u32)ee->acthd);
	err_printf(m, "  IPEIR: 0x%08x\n", ee->ipeir);
	err_printf(m, "  IPEHR: 0x%08x\n", ee->ipehr);
484 485 486

	error_print_instdone(m, ee);

487 488 489 490 491 492 493 494
	if (ee->batchbuffer) {
		u64 start = ee->batchbuffer->gtt_offset;
		u64 end = start + ee->batchbuffer->gtt_size;

		err_printf(m, "  batch: [0x%08x_%08x, 0x%08x_%08x]\n",
			   upper_32_bits(start), lower_32_bits(start),
			   upper_32_bits(end), lower_32_bits(end));
	}
495
	if (INTEL_GEN(m->i915) >= 4) {
496
		err_printf(m, "  BBADDR: 0x%08x_%08x\n",
497 498 499
			   (u32)(ee->bbaddr>>32), (u32)ee->bbaddr);
		err_printf(m, "  BB_STATE: 0x%08x\n", ee->bbstate);
		err_printf(m, "  INSTPS: 0x%08x\n", ee->instps);
500
	}
501 502 503 504 505 506
	err_printf(m, "  INSTPM: 0x%08x\n", ee->instpm);
	err_printf(m, "  FADDR: 0x%08x %08x\n", upper_32_bits(ee->faddr),
		   lower_32_bits(ee->faddr));
	if (INTEL_GEN(m->i915) >= 6) {
		err_printf(m, "  RC PSMI: 0x%08x\n", ee->rc_psmi);
		err_printf(m, "  FAULT_REG: 0x%08x\n", ee->fault_reg);
507 508 509 510 511 512 513
		err_printf(m, "  SYNC_0: 0x%08x\n",
			   ee->semaphore_mboxes[0]);
		err_printf(m, "  SYNC_1: 0x%08x\n",
			   ee->semaphore_mboxes[1]);
		if (HAS_VEBOX(m->i915))
			err_printf(m, "  SYNC_2: 0x%08x\n",
				   ee->semaphore_mboxes[2]);
514
	}
515
	if (HAS_PPGTT(m->i915)) {
516
		err_printf(m, "  GFX_MODE: 0x%08x\n", ee->vm_info.gfx_mode);
517

518
		if (INTEL_GEN(m->i915) >= 8) {
519 520 521
			int i;
			for (i = 0; i < 4; i++)
				err_printf(m, "  PDP%d: 0x%016llx\n",
522
					   i, ee->vm_info.pdp[i]);
523 524
		} else {
			err_printf(m, "  PP_DIR_BASE: 0x%08x\n",
525
				   ee->vm_info.pp_dir_base);
526 527
		}
	}
528 529 530 531 532
	err_printf(m, "  seqno: 0x%08x\n", ee->seqno);
	err_printf(m, "  last_seqno: 0x%08x\n", ee->last_seqno);
	err_printf(m, "  waiting: %s\n", yesno(ee->waiting));
	err_printf(m, "  ring->head: 0x%08x\n", ee->cpu_ring_head);
	err_printf(m, "  ring->tail: 0x%08x\n", ee->cpu_ring_tail);
533 534 535
	err_printf(m, "  hangcheck stall: %s\n", yesno(ee->hangcheck_stalled));
	err_printf(m, "  hangcheck action: %s\n",
		   hangcheck_action_to_str(ee->hangcheck_action));
536 537
	err_printf(m, "  hangcheck action timestamp: %dms (%lu%s)\n",
		   jiffies_to_msecs(ee->hangcheck_timestamp - epoch),
538
		   ee->hangcheck_timestamp,
539
		   ee->hangcheck_timestamp == epoch ? "; epoch" : "");
540
	err_printf(m, "  engine reset count: %u\n", ee->reset_count);
541

542 543
	for (n = 0; n < ee->num_ports; n++) {
		err_printf(m, "  ELSP[%d]:", n);
544
		error_print_request(m, " ", &ee->execlist[n], epoch);
545 546
	}

547
	error_print_context(m, "  Active context: ", &ee->context);
548 549 550 551 552 553 554 555 556 557 558
}

void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
{
	va_list args;

	va_start(args, f);
	i915_error_vprintf(e, f, args);
	va_end(args);
}

559
static void print_error_obj(struct drm_i915_error_state_buf *m,
560 561
			    struct intel_engine_cs *engine,
			    const char *name,
562 563
			    struct drm_i915_error_object *obj)
{
564
	char out[ASCII85_BUFSZ];
565
	int page;
566

567 568 569 570 571 572 573 574 575 576
	if (!obj)
		return;

	if (name) {
		err_printf(m, "%s --- %s = 0x%08x %08x\n",
			   engine ? engine->name : "global", name,
			   upper_32_bits(obj->gtt_offset),
			   lower_32_bits(obj->gtt_offset));
	}

577 578 579 580 581 582 583 584 585
	err_compression_marker(m);
	for (page = 0; page < obj->page_count; page++) {
		int i, len;

		len = PAGE_SIZE;
		if (page == obj->page_count - 1)
			len -= obj->unused;
		len = ascii85_encode_len(len);

586 587
		for (i = 0; i < len; i++)
			err_puts(m, ascii85_encode(obj->pages[page][i], out));
588
	}
589
	err_puts(m, "\n");
590 591
}

592
static void err_print_capabilities(struct drm_i915_error_state_buf *m,
593 594
				   const struct intel_device_info *info,
				   const struct intel_driver_caps *caps)
595
{
596 597 598
	struct drm_printer p = i915_error_printer(m);

	intel_device_info_dump_flags(info, &p);
599
	intel_driver_caps_print(caps, &p);
600
	intel_device_info_dump_topology(&info->sseu, &p);
601 602
}

603
static void err_print_params(struct drm_i915_error_state_buf *m,
604
			     const struct i915_params *params)
605
{
606 607 608
	struct drm_printer p = i915_error_printer(m);

	i915_params_dump(params, &p);
609 610
}

611 612 613 614 615 616 617 618 619 620 621 622
static void err_print_pciid(struct drm_i915_error_state_buf *m,
			    struct drm_i915_private *i915)
{
	struct pci_dev *pdev = i915->drm.pdev;

	err_printf(m, "PCI ID: 0x%04x\n", pdev->device);
	err_printf(m, "PCI Revision: 0x%02x\n", pdev->revision);
	err_printf(m, "PCI Subsystem: %04x:%04x\n",
		   pdev->subsystem_vendor,
		   pdev->subsystem_device);
}

623 624 625 626 627 628 629 630 631 632 633 634
static void err_print_uc(struct drm_i915_error_state_buf *m,
			 const struct i915_error_uc *error_uc)
{
	struct drm_printer p = i915_error_printer(m);
	const struct i915_gpu_state *error =
		container_of(error_uc, typeof(*error), uc);

	if (!error->device_info.has_guc)
		return;

	intel_uc_fw_dump(&error_uc->guc_fw, &p);
	intel_uc_fw_dump(&error_uc->huc_fw, &p);
635
	print_error_obj(m, NULL, "GuC log buffer", error_uc->guc_log);
636 637
}

638
int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
639
			    const struct i915_gpu_state *error)
640
{
641
	struct drm_i915_private *dev_priv = m->i915;
642
	struct drm_i915_error_object *obj;
A
Arnd Bergmann 已提交
643
	struct timespec64 ts;
644
	int i, j;
645 646

	if (!error) {
647 648
		err_printf(m, "No error state collected\n");
		return 0;
649 650
	}

651 652
	if (*error->error_msg)
		err_printf(m, "%s\n", error->error_msg);
653
	err_printf(m, "Kernel: " UTS_RELEASE "\n");
A
Arnd Bergmann 已提交
654 655 656 657 658 659 660 661 662
	ts = ktime_to_timespec64(error->time);
	err_printf(m, "Time: %lld s %ld us\n",
		   (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC);
	ts = ktime_to_timespec64(error->boottime);
	err_printf(m, "Boottime: %lld s %ld us\n",
		   (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC);
	ts = ktime_to_timespec64(error->uptime);
	err_printf(m, "Uptime: %lld s %ld us\n",
		   (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC);
663 664 665 666 667
	err_printf(m, "Epoch: %lu jiffies (%u HZ)\n", error->epoch, HZ);
	err_printf(m, "Capture: %lu jiffies; %d ms ago, %d ms after epoch\n",
		   error->capture,
		   jiffies_to_msecs(jiffies - error->capture),
		   jiffies_to_msecs(error->capture - error->epoch));
668

669
	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
670
		if (error->engine[i].hangcheck_stalled &&
671
		    error->engine[i].context.pid) {
672
			err_printf(m, "Active process (on ring %s): %s [%d], score %d%s\n",
673
				   engine_name(m->i915, i),
674 675
				   error->engine[i].context.comm,
				   error->engine[i].context.pid,
676 677
				   error->engine[i].context.ban_score,
				   bannable(&error->engine[i].context));
678 679
		}
	}
680
	err_printf(m, "Reset count: %u\n", error->reset_count);
681
	err_printf(m, "Suspend count: %u\n", error->suspend_count);
682
	err_printf(m, "Platform: %s\n", intel_platform_name(error->device_info.platform));
683
	err_print_pciid(m, error->i915);
684

685
	err_printf(m, "IOMMU enabled?: %d\n", error->iommu);
686

687
	if (HAS_CSR(dev_priv)) {
688 689 690 691 692 693 694 695 696
		struct intel_csr *csr = &dev_priv->csr;

		err_printf(m, "DMC loaded: %s\n",
			   yesno(csr->dmc_payload != NULL));
		err_printf(m, "DMC fw version: %d.%d\n",
			   CSR_VERSION_MAJOR(csr->version),
			   CSR_VERSION_MINOR(csr->version));
	}

697
	err_printf(m, "GT awake: %s\n", yesno(error->awake));
698 699
	err_printf(m, "RPM wakelock: %s\n", yesno(error->wakelock));
	err_printf(m, "PM suspended: %s\n", yesno(error->suspended));
700 701
	err_printf(m, "EIR: 0x%08x\n", error->eir);
	err_printf(m, "IER: 0x%08x\n", error->ier);
702 703
	for (i = 0; i < error->ngtier; i++)
		err_printf(m, "GTIER[%d]: 0x%08x\n", i, error->gtier[i]);
704 705 706 707
	err_printf(m, "PGTBL_ER: 0x%08x\n", error->pgtbl_er);
	err_printf(m, "FORCEWAKE: 0x%08x\n", error->forcewake);
	err_printf(m, "DERRMR: 0x%08x\n", error->derrmr);
	err_printf(m, "CCID: 0x%08x\n", error->ccid);
708
	err_printf(m, "Missed interrupts: 0x%08lx\n", dev_priv->gpu_error.missed_irq_rings);
709

710
	for (i = 0; i < error->nfence; i++)
711 712
		err_printf(m, "  fence[%d] = %08llx\n", i, error->fence[i]);

713
	if (INTEL_GEN(dev_priv) >= 6) {
714
		err_printf(m, "ERROR: 0x%08x\n", error->error);
715

716
		if (INTEL_GEN(dev_priv) >= 8)
717 718 719
			err_printf(m, "FAULT_TLB_DATA: 0x%08x 0x%08x\n",
				   error->fault_data1, error->fault_data0);

720 721 722
		err_printf(m, "DONE_REG: 0x%08x\n", error->done_reg);
	}

723
	if (IS_GEN7(dev_priv))
724 725
		err_printf(m, "ERR_INT: 0x%08x\n", error->err_int);

726 727
	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
		if (error->engine[i].engine_id != -1)
728
			error_print_engine(m, &error->engine[i], error->epoch);
729
	}
730

731 732 733
	for (i = 0; i < ARRAY_SIZE(error->active_vm); i++) {
		char buf[128];
		int len, first = 1;
734

735 736 737 738 739 740 741 742 743 744
		if (!error->active_vm[i])
			break;

		len = scnprintf(buf, sizeof(buf), "Active (");
		for (j = 0; j < ARRAY_SIZE(error->engine); j++) {
			if (error->engine[j].vm != error->active_vm[i])
				continue;

			len += scnprintf(buf + len, sizeof(buf), "%s%s",
					 first ? "" : ", ",
745
					 dev_priv->engine[j]->name);
746 747 748 749
			first = 0;
		}
		scnprintf(buf + len, sizeof(buf), ")");
		print_error_buffers(m, buf,
750 751 752
				    error->active_bo[i],
				    error->active_bo_count[i]);
	}
753

754 755 756 757
	print_error_buffers(m, "Pinned (global)",
			    error->pinned_bo,
			    error->pinned_bo_count);

758
	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
759
		const struct drm_i915_error_engine *ee = &error->engine[i];
760 761

		obj = ee->batchbuffer;
762
		if (obj) {
763
			err_puts(m, dev_priv->engine[i]->name);
764
			if (ee->context.pid)
765
				err_printf(m, " (submitted by %s [%d], ctx %d [%d], score %d%s)",
766 767 768 769
					   ee->context.comm,
					   ee->context.pid,
					   ee->context.handle,
					   ee->context.hw_id,
770 771
					   ee->context.ban_score,
					   bannable(&ee->context));
772 773 774
			err_printf(m, " --- gtt_offset = 0x%08x %08x\n",
				   upper_32_bits(obj->gtt_offset),
				   lower_32_bits(obj->gtt_offset));
775
			print_error_obj(m, dev_priv->engine[i], NULL, obj);
776 777
		}

778 779 780 781
		for (j = 0; j < ee->user_bo_count; j++)
			print_error_obj(m, dev_priv->engine[i],
					"user", ee->user_bo[j]);

782
		if (ee->num_requests) {
783
			err_printf(m, "%s --- %d requests\n",
784
				   dev_priv->engine[i]->name,
785
				   ee->num_requests);
786
			for (j = 0; j < ee->num_requests; j++)
787 788 789
				error_print_request(m, " ",
						    &ee->requests[j],
						    error->epoch);
790 791
		}

792 793
		if (IS_ERR(ee->waiters)) {
			err_printf(m, "%s --- ? waiters [unable to acquire spinlock]\n",
794
				   dev_priv->engine[i]->name);
795
		} else if (ee->num_waiters) {
796
			err_printf(m, "%s --- %d waiters\n",
797
				   dev_priv->engine[i]->name,
798 799
				   ee->num_waiters);
			for (j = 0; j < ee->num_waiters; j++) {
800
				err_printf(m, " seqno 0x%08x for %s [%d]\n",
801 802 803
					   ee->waiters[j].seqno,
					   ee->waiters[j].comm,
					   ee->waiters[j].pid);
804 805 806
			}
		}

807
		print_error_obj(m, dev_priv->engine[i],
808
				"ringbuffer", ee->ringbuffer);
809

810
		print_error_obj(m, dev_priv->engine[i],
811
				"HW Status", ee->hws_page);
812

813
		print_error_obj(m, dev_priv->engine[i],
814
				"HW context", ee->ctx);
815

816
		print_error_obj(m, dev_priv->engine[i],
817
				"WA context", ee->wa_ctx);
818

819
		print_error_obj(m, dev_priv->engine[i],
820
				"WA batchbuffer", ee->wa_batchbuffer);
821 822 823

		print_error_obj(m, dev_priv->engine[i],
				"NULL context", ee->default_state);
824 825 826 827 828 829
	}

	if (error->overlay)
		intel_overlay_print_error_state(m, error->overlay);

	if (error->display)
830
		intel_display_print_error_state(m, error->display);
831

832
	err_print_capabilities(m, &error->device_info, &error->driver_caps);
833
	err_print_params(m, &error->params);
834
	err_print_uc(m, &error->uc);
835

836 837 838 839 840 841 842
	if (m->bytes == 0 && m->err)
		return m->err;

	return 0;
}

int i915_error_state_buf_init(struct drm_i915_error_state_buf *ebuf,
843
			      struct drm_i915_private *i915,
844 845 846
			      size_t count, loff_t pos)
{
	memset(ebuf, 0, sizeof(*ebuf));
847
	ebuf->i915 = i915;
848 849 850 851 852 853

	/* We need to have enough room to store any i915_error_state printf
	 * so that we can move it to start position.
	 */
	ebuf->size = count + 1 > PAGE_SIZE ? count + 1 : PAGE_SIZE;
	ebuf->buf = kmalloc(ebuf->size,
854
				GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
855 856 857

	if (ebuf->buf == NULL) {
		ebuf->size = PAGE_SIZE;
858
		ebuf->buf = kmalloc(ebuf->size, GFP_KERNEL);
859 860 861 862
	}

	if (ebuf->buf == NULL) {
		ebuf->size = 128;
863
		ebuf->buf = kmalloc(ebuf->size, GFP_KERNEL);
864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881
	}

	if (ebuf->buf == NULL)
		return -ENOMEM;

	ebuf->start = pos;

	return 0;
}

static void i915_error_object_free(struct drm_i915_error_object *obj)
{
	int page;

	if (obj == NULL)
		return;

	for (page = 0; page < obj->page_count; page++)
882
		free_page((unsigned long)obj->pages[page]);
883 884 885 886

	kfree(obj);
}

887 888 889 890 891 892
static __always_inline void free_param(const char *type, void *x)
{
	if (!__builtin_strcmp(type, "char *"))
		kfree(*(void **)x);
}

893 894 895 896 897 898 899
static void cleanup_params(struct i915_gpu_state *error)
{
#define FREE(T, x, ...) free_param(#T, &error->params.x);
	I915_PARAMS_FOR_EACH(FREE);
#undef FREE
}

900 901 902 903 904 905
static void cleanup_uc_state(struct i915_gpu_state *error)
{
	struct i915_error_uc *error_uc = &error->uc;

	kfree(error_uc->guc_fw.path);
	kfree(error_uc->huc_fw.path);
906
	i915_error_object_free(error_uc->guc_log);
907 908
}

909
void __i915_gpu_state_free(struct kref *error_ref)
910
{
911 912
	struct i915_gpu_state *error =
		container_of(error_ref, typeof(*error), ref);
913
	long i, j;
914

915 916 917
	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
		struct drm_i915_error_engine *ee = &error->engine[i];

918 919 920 921
		for (j = 0; j < ee->user_bo_count; j++)
			i915_error_object_free(ee->user_bo[j]);
		kfree(ee->user_bo);

922 923 924 925 926 927 928 929
		i915_error_object_free(ee->batchbuffer);
		i915_error_object_free(ee->wa_batchbuffer);
		i915_error_object_free(ee->ringbuffer);
		i915_error_object_free(ee->hws_page);
		i915_error_object_free(ee->ctx);
		i915_error_object_free(ee->wa_ctx);

		kfree(ee->requests);
930 931
		if (!IS_ERR_OR_NULL(ee->waiters))
			kfree(ee->waiters);
932 933
	}

934
	for (i = 0; i < ARRAY_SIZE(error->active_bo); i++)
935 936
		kfree(error->active_bo[i]);
	kfree(error->pinned_bo);
937

938 939
	kfree(error->overlay);
	kfree(error->display);
940

941
	cleanup_params(error);
942 943
	cleanup_uc_state(error);

944 945 946 947
	kfree(error);
}

static struct drm_i915_error_object *
948
i915_error_object_create(struct drm_i915_private *i915,
C
Chris Wilson 已提交
949
			 struct i915_vma *vma)
950
{
951 952
	struct i915_ggtt *ggtt = &i915->ggtt;
	const u64 slot = ggtt->error_capture.start;
953
	struct drm_i915_error_object *dst;
954
	struct compress compress;
955 956 957
	unsigned long num_pages;
	struct sgt_iter iter;
	dma_addr_t dma;
958
	int ret;
959

C
Chris Wilson 已提交
960 961 962
	if (!vma)
		return NULL;

963
	num_pages = min_t(u64, vma->size, vma->obj->base.size) >> PAGE_SHIFT;
964
	num_pages = DIV_ROUND_UP(10 * num_pages, 8); /* worstcase zlib growth */
965 966
	dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *),
		      GFP_ATOMIC | __GFP_NOWARN);
C
Chris Wilson 已提交
967
	if (!dst)
968 969
		return NULL;

970 971
	dst->gtt_offset = vma->node.start;
	dst->gtt_size = vma->node.size;
972
	dst->num_pages = num_pages;
973
	dst->page_count = 0;
974 975
	dst->unused = 0;

976
	if (!compress_init(&compress)) {
977 978 979
		kfree(dst);
		return NULL;
	}
980

981
	ret = -EINVAL;
982 983
	for_each_sgt_dma(dma, iter, vma->pages) {
		void __iomem *s;
984

985
		ggtt->vm.insert_page(&ggtt->vm, dma, slot, I915_CACHE_NONE, 0);
986

987
		s = io_mapping_map_atomic_wc(&ggtt->iomap, slot);
988
		ret = compress_page(&compress, (void  __force *)s, dst);
989 990
		io_mapping_unmap_atomic(s);
		if (ret)
991
			break;
992 993
	}

994 995 996 997 998 999
	if (ret || compress_flush(&compress, dst)) {
		while (dst->page_count--)
			free_page((unsigned long)dst->pages[dst->page_count]);
		kfree(dst);
		dst = NULL;
	}
1000

1001
	compress_fini(&compress, dst);
1002
	return dst;
1003 1004
}

1005 1006 1007 1008 1009 1010
/* The error capture is special as tries to run underneath the normal
 * locking rules - so we use the raw version of the i915_gem_active lookup.
 */
static inline uint32_t
__active_get_seqno(struct i915_gem_active *active)
{
1011
	struct i915_request *request;
1012 1013 1014

	request = __i915_gem_active_peek(active);
	return request ? request->global_seqno : 0;
1015 1016 1017 1018 1019
}

static inline int
__active_get_engine_id(struct i915_gem_active *active)
{
1020
	struct i915_request *request;
1021

1022 1023
	request = __i915_gem_active_peek(active);
	return request ? request->engine->id : -1;
1024 1025
}

1026
static void capture_bo(struct drm_i915_error_buffer *err,
1027
		       struct i915_vma *vma)
1028
{
1029 1030
	struct drm_i915_gem_object *obj = vma->obj;

1031 1032
	err->size = obj->base.size;
	err->name = obj->base.name;
1033

1034 1035
	err->wseqno = __active_get_seqno(&obj->frontbuffer_write);
	err->engine = __active_get_engine_id(&obj->frontbuffer_write);
1036

1037
	err->gtt_offset = vma->node.start;
1038 1039
	err->read_domains = obj->read_domains;
	err->write_domain = obj->write_domain;
1040
	err->fence_reg = vma->fence ? vma->fence->id : -1;
1041
	err->tiling = i915_gem_object_get_tiling(obj);
C
Chris Wilson 已提交
1042 1043
	err->dirty = obj->mm.dirty;
	err->purgeable = obj->mm.madv != I915_MADV_WILLNEED;
1044
	err->userptr = obj->userptr.mm != NULL;
1045 1046 1047
	err->cache_level = obj->cache_level;
}

1048 1049 1050
static u32 capture_error_bo(struct drm_i915_error_buffer *err,
			    int count, struct list_head *head,
			    bool pinned_only)
1051
{
B
Ben Widawsky 已提交
1052
	struct i915_vma *vma;
1053 1054
	int i = 0;

1055
	list_for_each_entry(vma, head, vm_link) {
1056 1057 1058
		if (!vma->obj)
			continue;

1059 1060 1061
		if (pinned_only && !i915_vma_is_pinned(vma))
			continue;

1062
		capture_bo(err++, vma);
1063 1064 1065 1066 1067 1068 1069
		if (++i == count)
			break;
	}

	return i;
}

1070 1071 1072 1073 1074 1075 1076 1077 1078 1079
/* Generate a semi-unique error code. The code is not meant to have meaning, The
 * code's only purpose is to try to prevent false duplicated bug reports by
 * grossly estimating a GPU error state.
 *
 * TODO Ideally, hashing the batchbuffer would be a very nice way to determine
 * the hang if we could strip the GTT offset information from it.
 *
 * It's only a small step better than a random number in its current form.
 */
static uint32_t i915_error_generate_code(struct drm_i915_private *dev_priv,
1080
					 struct i915_gpu_state *error,
1081
					 int *engine_id)
1082 1083 1084 1085 1086 1087 1088 1089 1090
{
	uint32_t error_code = 0;
	int i;

	/* IPEHR would be an ideal way to detect errors, as it's the gross
	 * measure of "the command that hung." However, has some very common
	 * synchronization commands which almost always appear in the case
	 * strictly a client bug. Use instdone to differentiate those some.
	 */
1091
	for (i = 0; i < I915_NUM_ENGINES; i++) {
1092
		if (error->engine[i].hangcheck_stalled) {
1093 1094
			if (engine_id)
				*engine_id = i;
1095

1096 1097
			return error->engine[i].ipehr ^
			       error->engine[i].instdone.instdone;
1098 1099
		}
	}
1100 1101 1102 1103

	return error_code;
}

1104
static void gem_record_fences(struct i915_gpu_state *error)
1105
{
1106
	struct drm_i915_private *dev_priv = error->i915;
1107 1108
	int i;

1109
	if (INTEL_GEN(dev_priv) >= 6) {
1110
		for (i = 0; i < dev_priv->num_fence_regs; i++)
1111 1112
			error->fence[i] = I915_READ64(FENCE_REG_GEN6_LO(i));
	} else if (INTEL_GEN(dev_priv) >= 4) {
1113 1114
		for (i = 0; i < dev_priv->num_fence_regs; i++)
			error->fence[i] = I915_READ64(FENCE_REG_965_LO(i));
1115
	} else {
1116
		for (i = 0; i < dev_priv->num_fence_regs; i++)
1117
			error->fence[i] = I915_READ(FENCE_REG(i));
1118
	}
1119
	error->nfence = i;
1120 1121
}

1122 1123
static void gen6_record_semaphore_state(struct intel_engine_cs *engine,
					struct drm_i915_error_engine *ee)
1124
{
1125 1126 1127 1128
	struct drm_i915_private *dev_priv = engine->i915;

	ee->semaphore_mboxes[0] = I915_READ(RING_SYNC_0(engine->mmio_base));
	ee->semaphore_mboxes[1] = I915_READ(RING_SYNC_1(engine->mmio_base));
1129
	if (HAS_VEBOX(dev_priv))
1130
		ee->semaphore_mboxes[2] =
1131
			I915_READ(RING_SYNC_2(engine->mmio_base));
1132 1133
}

1134 1135
static void error_record_engine_waiters(struct intel_engine_cs *engine,
					struct drm_i915_error_engine *ee)
1136 1137 1138 1139 1140 1141
{
	struct intel_breadcrumbs *b = &engine->breadcrumbs;
	struct drm_i915_error_waiter *waiter;
	struct rb_node *rb;
	int count;

1142 1143
	ee->num_waiters = 0;
	ee->waiters = NULL;
1144

1145 1146 1147
	if (RB_EMPTY_ROOT(&b->waiters))
		return;

1148
	if (!spin_trylock_irq(&b->rb_lock)) {
1149 1150 1151 1152
		ee->waiters = ERR_PTR(-EDEADLK);
		return;
	}

1153 1154 1155
	count = 0;
	for (rb = rb_first(&b->waiters); rb != NULL; rb = rb_next(rb))
		count++;
1156
	spin_unlock_irq(&b->rb_lock);
1157 1158 1159 1160 1161 1162 1163 1164 1165

	waiter = NULL;
	if (count)
		waiter = kmalloc_array(count,
				       sizeof(struct drm_i915_error_waiter),
				       GFP_ATOMIC);
	if (!waiter)
		return;

1166
	if (!spin_trylock_irq(&b->rb_lock)) {
1167 1168 1169 1170
		kfree(waiter);
		ee->waiters = ERR_PTR(-EDEADLK);
		return;
	}
1171

1172
	ee->waiters = waiter;
1173
	for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
G
Geliang Tang 已提交
1174
		struct intel_wait *w = rb_entry(rb, typeof(*w), node);
1175 1176 1177 1178 1179 1180

		strcpy(waiter->comm, w->tsk->comm);
		waiter->pid = w->tsk->pid;
		waiter->seqno = w->seqno;
		waiter++;

1181
		if (++ee->num_waiters == count)
1182 1183
			break;
	}
1184
	spin_unlock_irq(&b->rb_lock);
1185 1186
}

1187
static void error_record_engine_registers(struct i915_gpu_state *error,
1188 1189
					  struct intel_engine_cs *engine,
					  struct drm_i915_error_engine *ee)
1190
{
1191 1192
	struct drm_i915_private *dev_priv = engine->i915;

1193
	if (INTEL_GEN(dev_priv) >= 6) {
1194
		ee->rc_psmi = I915_READ(RING_PSMI_CTL(engine->mmio_base));
1195 1196 1197
		if (INTEL_GEN(dev_priv) >= 8) {
			ee->fault_reg = I915_READ(GEN8_RING_FAULT_REG);
		} else {
1198
			gen6_record_semaphore_state(engine, ee);
1199 1200
			ee->fault_reg = I915_READ(RING_FAULT_REG(engine));
		}
1201 1202
	}

1203
	if (INTEL_GEN(dev_priv) >= 4) {
1204 1205 1206 1207 1208
		ee->faddr = I915_READ(RING_DMA_FADD(engine->mmio_base));
		ee->ipeir = I915_READ(RING_IPEIR(engine->mmio_base));
		ee->ipehr = I915_READ(RING_IPEHR(engine->mmio_base));
		ee->instps = I915_READ(RING_INSTPS(engine->mmio_base));
		ee->bbaddr = I915_READ(RING_BBADDR(engine->mmio_base));
1209
		if (INTEL_GEN(dev_priv) >= 8) {
1210 1211
			ee->faddr |= (u64) I915_READ(RING_DMA_FADD_UDW(engine->mmio_base)) << 32;
			ee->bbaddr |= (u64) I915_READ(RING_BBADDR_UDW(engine->mmio_base)) << 32;
1212
		}
1213
		ee->bbstate = I915_READ(RING_BBSTATE(engine->mmio_base));
1214
	} else {
1215 1216 1217
		ee->faddr = I915_READ(DMA_FADD_I8XX);
		ee->ipeir = I915_READ(IPEIR);
		ee->ipehr = I915_READ(IPEHR);
1218 1219
	}

1220
	intel_engine_get_instdone(engine, &ee->instdone);
1221

1222 1223
	ee->waiting = intel_engine_has_waiter(engine);
	ee->instpm = I915_READ(RING_INSTPM(engine->mmio_base));
1224
	ee->acthd = intel_engine_get_active_head(engine);
1225
	ee->seqno = intel_engine_get_seqno(engine);
1226
	ee->last_seqno = intel_engine_last_submit(engine);
1227 1228 1229 1230
	ee->start = I915_READ_START(engine);
	ee->head = I915_READ_HEAD(engine);
	ee->tail = I915_READ_TAIL(engine);
	ee->ctl = I915_READ_CTL(engine);
1231 1232
	if (INTEL_GEN(dev_priv) > 2)
		ee->mode = I915_READ_MODE(engine);
1233

1234
	if (!HWS_NEEDS_PHYSICAL(dev_priv)) {
1235
		i915_reg_t mmio;
1236

1237
		if (IS_GEN7(dev_priv)) {
1238
			switch (engine->id) {
1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252
			default:
			case RCS:
				mmio = RENDER_HWS_PGA_GEN7;
				break;
			case BCS:
				mmio = BLT_HWS_PGA_GEN7;
				break;
			case VCS:
				mmio = BSD_HWS_PGA_GEN7;
				break;
			case VECS:
				mmio = VEBOX_HWS_PGA_GEN7;
				break;
			}
1253
		} else if (IS_GEN6(engine->i915)) {
1254
			mmio = RING_HWS_PGA_GEN6(engine->mmio_base);
1255 1256
		} else {
			/* XXX: gen8 returns to sanity */
1257
			mmio = RING_HWS_PGA(engine->mmio_base);
1258 1259
		}

1260
		ee->hws = I915_READ(mmio);
1261 1262
	}

1263
	ee->idle = intel_engine_is_idle(engine);
1264
	ee->hangcheck_timestamp = engine->hangcheck.action_timestamp;
1265
	ee->hangcheck_action = engine->hangcheck.action;
1266
	ee->hangcheck_stalled = engine->hangcheck.stalled;
1267 1268
	ee->reset_count = i915_reset_engine_count(&dev_priv->gpu_error,
						  engine);
1269

1270
	if (HAS_PPGTT(dev_priv)) {
1271 1272
		int i;

1273
		ee->vm_info.gfx_mode = I915_READ(RING_MODE_GEN7(engine));
1274

1275
		if (IS_GEN6(dev_priv))
1276
			ee->vm_info.pp_dir_base =
1277
				I915_READ(RING_PP_DIR_BASE_READ(engine));
1278
		else if (IS_GEN7(dev_priv))
1279
			ee->vm_info.pp_dir_base =
1280
				I915_READ(RING_PP_DIR_BASE(engine));
1281
		else if (INTEL_GEN(dev_priv) >= 8)
1282
			for (i = 0; i < 4; i++) {
1283
				ee->vm_info.pdp[i] =
1284
					I915_READ(GEN8_RING_PDP_UDW(engine, i));
1285 1286
				ee->vm_info.pdp[i] <<= 32;
				ee->vm_info.pdp[i] |=
1287
					I915_READ(GEN8_RING_PDP_LDW(engine, i));
1288 1289
			}
	}
1290 1291
}

1292
static void record_request(struct i915_request *request,
1293 1294
			   struct drm_i915_error_request *erq)
{
C
Chris Wilson 已提交
1295 1296 1297
	struct i915_gem_context *ctx = request->gem_context;

	erq->context = ctx->hw_id;
1298
	erq->sched_attr = request->sched.attr;
C
Chris Wilson 已提交
1299
	erq->ban_score = atomic_read(&ctx->ban_score);
1300
	erq->seqno = request->global_seqno;
1301
	erq->jiffies = request->emitted_jiffies;
1302
	erq->start = i915_ggtt_offset(request->ring->vma);
1303 1304 1305 1306
	erq->head = request->head;
	erq->tail = request->tail;

	rcu_read_lock();
C
Chris Wilson 已提交
1307
	erq->pid = ctx->pid ? pid_nr(ctx->pid) : 0;
1308 1309 1310
	rcu_read_unlock();
}

1311
static void engine_record_requests(struct intel_engine_cs *engine,
1312
				   struct i915_request *first,
1313 1314
				   struct drm_i915_error_engine *ee)
{
1315
	struct i915_request *request;
1316 1317 1318 1319
	int count;

	count = 0;
	request = first;
1320
	list_for_each_entry_from(request, &engine->timeline.requests, link)
1321 1322 1323 1324 1325 1326 1327 1328 1329 1330 1331 1332
		count++;
	if (!count)
		return;

	ee->requests = kcalloc(count, sizeof(*ee->requests), GFP_ATOMIC);
	if (!ee->requests)
		return;

	ee->num_requests = count;

	count = 0;
	request = first;
1333
	list_for_each_entry_from(request, &engine->timeline.requests, link) {
1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352
		if (count >= ee->num_requests) {
			/*
			 * If the ring request list was changed in
			 * between the point where the error request
			 * list was created and dimensioned and this
			 * point then just exit early to avoid crashes.
			 *
			 * We don't need to communicate that the
			 * request list changed state during error
			 * state capture and that the error state is
			 * slightly incorrect as a consequence since we
			 * are typically only interested in the request
			 * list state at the point of error state
			 * capture, not in any changes happening during
			 * the capture.
			 */
			break;
		}

1353
		record_request(request, &ee->requests[count++]);
1354 1355 1356 1357
	}
	ee->num_requests = count;
}

1358 1359 1360
static void error_record_engine_execlists(struct intel_engine_cs *engine,
					  struct drm_i915_error_engine *ee)
{
1361
	const struct intel_engine_execlists * const execlists = &engine->execlists;
1362 1363
	unsigned int n;

1364
	for (n = 0; n < execlists_num_ports(execlists); n++) {
1365
		struct i915_request *rq = port_request(&execlists->port[n]);
1366 1367 1368 1369 1370 1371

		if (!rq)
			break;

		record_request(rq, &ee->execlist[n]);
	}
1372 1373

	ee->num_ports = n;
1374 1375
}

1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392
static void record_context(struct drm_i915_error_context *e,
			   struct i915_gem_context *ctx)
{
	if (ctx->pid) {
		struct task_struct *task;

		rcu_read_lock();
		task = pid_task(ctx->pid, PIDTYPE_PID);
		if (task) {
			strcpy(e->comm, task->comm);
			e->pid = task->pid;
		}
		rcu_read_unlock();
	}

	e->handle = ctx->user_handle;
	e->hw_id = ctx->hw_id;
1393
	e->sched_attr = ctx->sched;
1394
	e->ban_score = atomic_read(&ctx->ban_score);
1395
	e->bannable = i915_gem_context_is_bannable(ctx);
1396 1397
	e->guilty = atomic_read(&ctx->guilty_count);
	e->active = atomic_read(&ctx->active_count);
1398 1399
}

1400
static void request_record_user_bo(struct i915_request *request,
1401 1402
				   struct drm_i915_error_engine *ee)
{
1403
	struct i915_capture_list *c;
1404
	struct drm_i915_error_object **bo;
1405
	long count, max;
1406

1407
	max = 0;
1408
	for (c = request->capture_list; c; c = c->next)
1409 1410 1411
		max++;
	if (!max)
		return;
1412

1413 1414 1415 1416 1417 1418
	bo = kmalloc_array(max, sizeof(*bo), GFP_ATOMIC);
	if (!bo) {
		/* If we can't capture everything, try to capture something. */
		max = min_t(long, max, PAGE_SIZE / sizeof(*bo));
		bo = kmalloc_array(max, sizeof(*bo), GFP_ATOMIC);
	}
1419 1420 1421 1422 1423 1424 1425 1426
	if (!bo)
		return;

	count = 0;
	for (c = request->capture_list; c; c = c->next) {
		bo[count] = i915_error_object_create(request->i915, c->vma);
		if (!bo[count])
			break;
1427 1428
		if (++count == max)
			break;
1429 1430 1431 1432 1433 1434
	}

	ee->user_bo = bo;
	ee->user_bo_count = count;
}

1435 1436 1437 1438 1439 1440 1441
static struct drm_i915_error_object *
capture_object(struct drm_i915_private *dev_priv,
	       struct drm_i915_gem_object *obj)
{
	if (obj && i915_gem_object_has_pages(obj)) {
		struct i915_vma fake = {
			.node = { .start = U64_MAX, .size = obj->base.size },
1442
			.size = obj->base.size,
1443 1444 1445 1446 1447 1448 1449 1450 1451 1452
			.pages = obj->mm.pages,
			.obj = obj,
		};

		return i915_error_object_create(dev_priv, &fake);
	} else {
		return NULL;
	}
}

1453
static void gem_record_rings(struct i915_gpu_state *error)
1454
{
1455 1456
	struct drm_i915_private *i915 = error->i915;
	struct i915_ggtt *ggtt = &i915->ggtt;
1457
	int i;
1458

1459
	for (i = 0; i < I915_NUM_ENGINES; i++) {
1460
		struct intel_engine_cs *engine = i915->engine[i];
1461
		struct drm_i915_error_engine *ee = &error->engine[i];
1462
		struct i915_request *request;
1463

1464
		ee->engine_id = -1;
1465

1466
		if (!engine)
1467 1468
			continue;

1469
		ee->engine_id = i;
1470

1471 1472
		error_record_engine_registers(error, engine, ee);
		error_record_engine_waiters(engine, ee);
1473
		error_record_engine_execlists(engine, ee);
1474

1475
		request = i915_gem_find_active_request(engine);
1476
		if (request) {
C
Chris Wilson 已提交
1477
			struct i915_gem_context *ctx = request->gem_context;
1478
			struct intel_ring *ring;
1479

1480
			ee->vm = ctx->ppgtt ? &ctx->ppgtt->vm : &ggtt->vm;
1481

C
Chris Wilson 已提交
1482
			record_context(&ee->context, ctx);
1483

1484 1485 1486 1487
			/* We need to copy these to an anonymous buffer
			 * as the simplest method to avoid being overwritten
			 * by userspace.
			 */
1488
			ee->batchbuffer =
1489
				i915_error_object_create(i915, request->batch);
1490

1491
			if (HAS_BROKEN_CS_TLB(i915))
1492
				ee->wa_batchbuffer =
1493
					i915_error_object_create(i915,
C
Chris Wilson 已提交
1494
								 engine->scratch);
1495
			request_record_user_bo(request, ee);
1496

C
Chris Wilson 已提交
1497
			ee->ctx =
1498
				i915_error_object_create(i915,
1499
							 request->hw_context->state);
1500

1501
			error->simulated |=
C
Chris Wilson 已提交
1502
				i915_gem_context_no_error_capture(ctx);
1503

1504 1505 1506 1507
			ee->rq_head = request->head;
			ee->rq_post = request->postfix;
			ee->rq_tail = request->tail;

1508 1509 1510
			ring = request->ring;
			ee->cpu_ring_head = ring->head;
			ee->cpu_ring_tail = ring->tail;
1511
			ee->ringbuffer =
1512
				i915_error_object_create(i915, ring->vma);
1513 1514

			engine_record_requests(engine, request, ee);
1515
		}
1516

1517
		ee->hws_page =
1518
			i915_error_object_create(i915,
C
Chris Wilson 已提交
1519
						 engine->status_page.vma);
1520

1521
		ee->wa_ctx = i915_error_object_create(i915, engine->wa_ctx.vma);
1522

1523
		ee->default_state = capture_object(i915, engine->default_state);
1524 1525 1526
	}
}

1527 1528 1529
static void gem_capture_vm(struct i915_gpu_state *error,
			   struct i915_address_space *vm,
			   int idx)
1530
{
1531
	struct drm_i915_error_buffer *active_bo;
1532
	struct i915_vma *vma;
1533
	int count;
1534

1535
	count = 0;
1536
	list_for_each_entry(vma, &vm->active_list, vm_link)
1537
		count++;
1538

1539 1540 1541
	active_bo = NULL;
	if (count)
		active_bo = kcalloc(count, sizeof(*active_bo), GFP_ATOMIC);
1542
	if (active_bo)
1543 1544 1545 1546 1547 1548 1549
		count = capture_error_bo(active_bo, count, &vm->active_list, false);
	else
		count = 0;

	error->active_vm[idx] = vm;
	error->active_bo[idx] = active_bo;
	error->active_bo_count[idx] = count;
1550 1551
}

1552
static void capture_active_buffers(struct i915_gpu_state *error)
1553
{
1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566
	int cnt = 0, i, j;

	BUILD_BUG_ON(ARRAY_SIZE(error->engine) > ARRAY_SIZE(error->active_bo));
	BUILD_BUG_ON(ARRAY_SIZE(error->active_bo) != ARRAY_SIZE(error->active_vm));
	BUILD_BUG_ON(ARRAY_SIZE(error->active_bo) != ARRAY_SIZE(error->active_bo_count));

	/* Scan each engine looking for unique active contexts/vm */
	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
		struct drm_i915_error_engine *ee = &error->engine[i];
		bool found;

		if (!ee->vm)
			continue;
1567

1568 1569 1570 1571
		found = false;
		for (j = 0; j < i && !found; j++)
			found = error->engine[j].vm == ee->vm;
		if (!found)
1572
			gem_capture_vm(error, ee->vm, cnt++);
1573
	}
1574 1575
}

1576
static void capture_pinned_buffers(struct i915_gpu_state *error)
1577
{
1578
	struct i915_address_space *vm = &error->i915->ggtt.vm;
1579 1580 1581 1582 1583
	struct drm_i915_error_buffer *bo;
	struct i915_vma *vma;
	int count_inactive, count_active;

	count_inactive = 0;
1584
	list_for_each_entry(vma, &vm->inactive_list, vm_link)
1585 1586 1587
		count_inactive++;

	count_active = 0;
1588
	list_for_each_entry(vma, &vm->active_list, vm_link)
1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605
		count_active++;

	bo = NULL;
	if (count_inactive + count_active)
		bo = kcalloc(count_inactive + count_active,
			     sizeof(*bo), GFP_ATOMIC);
	if (!bo)
		return;

	count_inactive = capture_error_bo(bo, count_inactive,
					  &vm->active_list, true);
	count_active = capture_error_bo(bo + count_inactive, count_active,
					&vm->inactive_list, true);
	error->pinned_bo_count = count_inactive + count_active;
	error->pinned_bo = bo;
}

1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623
static void capture_uc_state(struct i915_gpu_state *error)
{
	struct drm_i915_private *i915 = error->i915;
	struct i915_error_uc *error_uc = &error->uc;

	/* Capturing uC state won't be useful if there is no GuC */
	if (!error->device_info.has_guc)
		return;

	error_uc->guc_fw = i915->guc.fw;
	error_uc->huc_fw = i915->huc.fw;

	/* Non-default firmware paths will be specified by the modparam.
	 * As modparams are generally accesible from the userspace make
	 * explicit copies of the firmware paths.
	 */
	error_uc->guc_fw.path = kstrdup(i915->guc.fw.path, GFP_ATOMIC);
	error_uc->huc_fw.path = kstrdup(i915->huc.fw.path, GFP_ATOMIC);
1624
	error_uc->guc_log = i915_error_object_create(i915, i915->guc.log.vma);
1625 1626
}

1627
/* Capture all registers which don't fit into another category. */
1628
static void capture_reg_state(struct i915_gpu_state *error)
1629
{
1630
	struct drm_i915_private *dev_priv = error->i915;
1631
	int i;
1632

1633 1634 1635 1636 1637 1638 1639
	/* General organization
	 * 1. Registers specific to a single generation
	 * 2. Registers which belong to multiple generations
	 * 3. Feature specific registers.
	 * 4. Everything else
	 * Please try to follow the order.
	 */
1640

1641
	/* 1: Registers specific to a single generation */
1642
	if (IS_VALLEYVIEW(dev_priv)) {
1643
		error->gtier[0] = I915_READ(GTIER);
1644
		error->ier = I915_READ(VLV_IER);
1645
		error->forcewake = I915_READ_FW(FORCEWAKE_VLV);
1646
	}
1647

1648
	if (IS_GEN7(dev_priv))
1649
		error->err_int = I915_READ(GEN7_ERR_INT);
1650

1651
	if (INTEL_GEN(dev_priv) >= 8) {
1652 1653 1654 1655
		error->fault_data0 = I915_READ(GEN8_FAULT_TLB_DATA0);
		error->fault_data1 = I915_READ(GEN8_FAULT_TLB_DATA1);
	}

1656
	if (IS_GEN6(dev_priv)) {
1657
		error->forcewake = I915_READ_FW(FORCEWAKE);
1658 1659 1660
		error->gab_ctl = I915_READ(GAB_CTL);
		error->gfx_mode = I915_READ(GFX_MODE);
	}
1661

1662
	/* 2: Registers which belong to multiple generations */
1663
	if (INTEL_GEN(dev_priv) >= 7)
1664
		error->forcewake = I915_READ_FW(FORCEWAKE_MT);
1665

1666
	if (INTEL_GEN(dev_priv) >= 6) {
1667
		error->derrmr = I915_READ(DERRMR);
1668 1669 1670 1671
		error->error = I915_READ(ERROR_GEN6);
		error->done_reg = I915_READ(DONE_REG);
	}

J
Joonas Lahtinen 已提交
1672
	if (INTEL_GEN(dev_priv) >= 5)
1673 1674
		error->ccid = I915_READ(CCID);

1675
	/* 3: Feature specific registers */
1676
	if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv)) {
1677 1678 1679 1680 1681
		error->gam_ecochk = I915_READ(GAM_ECOCHK);
		error->gac_eco = I915_READ(GAC_ECO_BITS);
	}

	/* 4: Everything else */
1682 1683 1684 1685 1686 1687 1688 1689 1690 1691
	if (INTEL_GEN(dev_priv) >= 11) {
		error->ier = I915_READ(GEN8_DE_MISC_IER);
		error->gtier[0] = I915_READ(GEN11_RENDER_COPY_INTR_ENABLE);
		error->gtier[1] = I915_READ(GEN11_VCS_VECS_INTR_ENABLE);
		error->gtier[2] = I915_READ(GEN11_GUC_SG_INTR_ENABLE);
		error->gtier[3] = I915_READ(GEN11_GPM_WGBOXPERF_INTR_ENABLE);
		error->gtier[4] = I915_READ(GEN11_CRYPTO_RSVD_INTR_ENABLE);
		error->gtier[5] = I915_READ(GEN11_GUNIT_CSME_INTR_ENABLE);
		error->ngtier = 6;
	} else if (INTEL_GEN(dev_priv) >= 8) {
1692 1693 1694
		error->ier = I915_READ(GEN8_DE_MISC_IER);
		for (i = 0; i < 4; i++)
			error->gtier[i] = I915_READ(GEN8_GT_IER(i));
1695
		error->ngtier = 4;
1696
	} else if (HAS_PCH_SPLIT(dev_priv)) {
1697
		error->ier = I915_READ(DEIER);
1698
		error->gtier[0] = I915_READ(GTIER);
1699
		error->ngtier = 1;
1700
	} else if (IS_GEN2(dev_priv)) {
1701
		error->ier = I915_READ16(IER);
1702
	} else if (!IS_VALLEYVIEW(dev_priv)) {
1703
		error->ier = I915_READ(IER);
1704 1705 1706
	}
	error->eir = I915_READ(EIR);
	error->pgtbl_er = I915_READ(PGTBL_ER);
1707 1708
}

1709
static void i915_error_capture_msg(struct drm_i915_private *dev_priv,
1710
				   struct i915_gpu_state *error,
1711
				   u32 engine_mask,
1712
				   const char *error_msg)
1713 1714
{
	u32 ecode;
1715
	int engine_id = -1, len;
1716

1717
	ecode = i915_error_generate_code(dev_priv, error, &engine_id);
1718

1719
	len = scnprintf(error->error_msg, sizeof(error->error_msg),
1720
			"GPU HANG: ecode %d:%d:0x%08x",
1721
			INTEL_GEN(dev_priv), engine_id, ecode);
1722

1723
	if (engine_id != -1 && error->engine[engine_id].context.pid)
1724 1725 1726
		len += scnprintf(error->error_msg + len,
				 sizeof(error->error_msg) - len,
				 ", in %s [%d]",
1727 1728
				 error->engine[engine_id].context.comm,
				 error->engine[engine_id].context.pid);
1729 1730 1731 1732

	scnprintf(error->error_msg + len, sizeof(error->error_msg) - len,
		  ", reason: %s, action: %s",
		  error_msg,
1733
		  engine_mask ? "reset" : "continue");
1734 1735
}

1736
static void capture_gen_state(struct i915_gpu_state *error)
1737
{
1738 1739 1740 1741 1742
	struct drm_i915_private *i915 = error->i915;

	error->awake = i915->gt.awake;
	error->wakelock = atomic_read(&i915->runtime_pm.wakeref_count);
	error->suspended = i915->runtime_pm.suspended;
1743

1744 1745 1746 1747
	error->iommu = -1;
#ifdef CONFIG_INTEL_IOMMU
	error->iommu = intel_iommu_gfx_mapped;
#endif
1748 1749
	error->reset_count = i915_reset_count(&i915->gpu_error);
	error->suspend_count = i915->suspend_count;
1750 1751

	memcpy(&error->device_info,
1752
	       INTEL_INFO(i915),
1753
	       sizeof(error->device_info));
1754
	error->driver_caps = i915->caps;
1755 1756
}

1757 1758 1759 1760 1761 1762
static __always_inline void dup_param(const char *type, void *x)
{
	if (!__builtin_strcmp(type, "char *"))
		*(void **)x = kstrdup(*(void **)x, GFP_ATOMIC);
}

1763 1764 1765 1766 1767 1768 1769 1770
static void capture_params(struct i915_gpu_state *error)
{
	error->params = i915_modparams;
#define DUP(T, x, ...) dup_param(#T, &error->params.x);
	I915_PARAMS_FOR_EACH(DUP);
#undef DUP
}

1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786
static unsigned long capture_find_epoch(const struct i915_gpu_state *error)
{
	unsigned long epoch = error->capture;
	int i;

	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
		const struct drm_i915_error_engine *ee = &error->engine[i];

		if (ee->hangcheck_stalled &&
		    time_before(ee->hangcheck_timestamp, epoch))
			epoch = ee->hangcheck_timestamp;
	}

	return epoch;
}

1787 1788 1789 1790 1791 1792 1793 1794
static void capture_finish(struct i915_gpu_state *error)
{
	struct i915_ggtt *ggtt = &error->i915->ggtt;
	const u64 slot = ggtt->error_capture.start;

	ggtt->vm.clear_range(&ggtt->vm, slot, PAGE_SIZE);
}

1795 1796
static int capture(void *data)
{
1797
	struct i915_gpu_state *error = data;
1798

A
Arnd Bergmann 已提交
1799 1800 1801 1802
	error->time = ktime_get_real();
	error->boottime = ktime_get_boottime();
	error->uptime = ktime_sub(ktime_get(),
				  error->i915->gt.last_init_time);
1803
	error->capture = jiffies;
1804

1805
	capture_params(error);
1806
	capture_gen_state(error);
1807
	capture_uc_state(error);
1808 1809 1810 1811 1812
	capture_reg_state(error);
	gem_record_fences(error);
	gem_record_rings(error);
	capture_active_buffers(error);
	capture_pinned_buffers(error);
1813 1814 1815 1816

	error->overlay = intel_overlay_capture_error_state(error->i915);
	error->display = intel_display_capture_error_state(error->i915);

1817 1818
	error->epoch = capture_find_epoch(error);

1819
	capture_finish(error);
1820 1821 1822
	return 0;
}

1823 1824
#define DAY_AS_SECONDS(x) (24 * 60 * 60 * (x))

1825 1826 1827 1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841
struct i915_gpu_state *
i915_capture_gpu_state(struct drm_i915_private *i915)
{
	struct i915_gpu_state *error;

	error = kzalloc(sizeof(*error), GFP_ATOMIC);
	if (!error)
		return NULL;

	kref_init(&error->ref);
	error->i915 = i915;

	stop_machine(capture, error, NULL);

	return error;
}

1842 1843
/**
 * i915_capture_error_state - capture an error record for later analysis
1844 1845 1846
 * @i915: i915 device
 * @engine_mask: the mask of engines triggering the hang
 * @error_msg: a message to insert into the error capture header
1847 1848 1849 1850 1851 1852
 *
 * Should be called when an error is detected (either a hang or an error
 * interrupt) to capture error state from the time of the error.  Fills
 * out a structure which becomes available in debugfs for user level tools
 * to pick up.
 */
1853
void i915_capture_error_state(struct drm_i915_private *i915,
1854
			      u32 engine_mask,
1855
			      const char *error_msg)
1856
{
1857
	static bool warned;
1858
	struct i915_gpu_state *error;
1859 1860
	unsigned long flags;

1861
	if (!i915_modparams.error_capture)
1862 1863
		return;

1864
	if (READ_ONCE(i915->gpu_error.first_error))
1865 1866
		return;

1867
	error = i915_capture_gpu_state(i915);
1868 1869 1870 1871 1872
	if (!error) {
		DRM_DEBUG_DRIVER("out of memory, not capturing error state\n");
		return;
	}

1873
	i915_error_capture_msg(i915, error, engine_mask, error_msg);
1874 1875
	DRM_INFO("%s\n", error->error_msg);

1876
	if (!error->simulated) {
1877 1878 1879
		spin_lock_irqsave(&i915->gpu_error.lock, flags);
		if (!i915->gpu_error.first_error) {
			i915->gpu_error.first_error = error;
1880 1881
			error = NULL;
		}
1882
		spin_unlock_irqrestore(&i915->gpu_error.lock, flags);
1883 1884
	}

1885
	if (error) {
1886
		__i915_gpu_state_free(&error->ref);
1887 1888 1889
		return;
	}

1890 1891
	if (!warned &&
	    ktime_get_real_seconds() - DRIVER_TIMESTAMP < DAY_AS_SECONDS(180)) {
1892 1893 1894 1895
		DRM_INFO("GPU hangs can indicate a bug anywhere in the entire gfx stack, including userspace.\n");
		DRM_INFO("Please file a _new_ bug report on bugs.freedesktop.org against DRI -> DRM/Intel\n");
		DRM_INFO("drm/i915 developers can then reassign to the right component if it's not a kernel issue.\n");
		DRM_INFO("The gpu crash dump is required to analyze gpu hangs, so please always attach it.\n");
1896
		DRM_INFO("GPU crash dump saved to /sys/class/drm/card%d/error\n",
1897
			 i915->drm.primary->index);
1898 1899
		warned = true;
	}
1900 1901
}

1902 1903
struct i915_gpu_state *
i915_first_error_state(struct drm_i915_private *i915)
1904
{
1905
	struct i915_gpu_state *error;
1906

1907 1908 1909 1910 1911
	spin_lock_irq(&i915->gpu_error.lock);
	error = i915->gpu_error.first_error;
	if (error)
		i915_gpu_state_get(error);
	spin_unlock_irq(&i915->gpu_error.lock);
1912

1913
	return error;
1914 1915
}

1916
void i915_reset_error_state(struct drm_i915_private *i915)
1917
{
1918
	struct i915_gpu_state *error;
1919

1920 1921 1922 1923
	spin_lock_irq(&i915->gpu_error.lock);
	error = i915->gpu_error.first_error;
	i915->gpu_error.first_error = NULL;
	spin_unlock_irq(&i915->gpu_error.lock);
1924

1925
	i915_gpu_state_put(error);
1926
}