i915_gpu_error.c 50.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29
/*
 * Copyright (c) 2008 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 *
 * Authors:
 *    Eric Anholt <eric@anholt.net>
 *    Keith Packard <keithp@keithp.com>
 *    Mika Kuoppala <mika.kuoppala@intel.com>
 *
 */

30
#include <linux/utsname.h>
31
#include <linux/stop_machine.h>
32
#include <linux/zlib.h>
33
#include <drm/drm_print.h>
34
#include <linux/ascii85.h>
35

36
#include "i915_gpu_error.h"
37 38
#include "i915_drv.h"

39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57
static inline const struct intel_engine_cs *
engine_lookup(const struct drm_i915_private *i915, unsigned int id)
{
	if (id >= I915_NUM_ENGINES)
		return NULL;

	return i915->engine[id];
}

static inline const char *
__engine_name(const struct intel_engine_cs *engine)
{
	return engine ? engine->name : "";
}

static const char *
engine_name(const struct drm_i915_private *i915, unsigned int id)
{
	return __engine_name(engine_lookup(i915, id));
58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136
}

static const char *tiling_flag(int tiling)
{
	switch (tiling) {
	default:
	case I915_TILING_NONE: return "";
	case I915_TILING_X: return " X";
	case I915_TILING_Y: return " Y";
	}
}

static const char *dirty_flag(int dirty)
{
	return dirty ? " dirty" : "";
}

static const char *purgeable_flag(int purgeable)
{
	return purgeable ? " purgeable" : "";
}

static bool __i915_error_ok(struct drm_i915_error_state_buf *e)
{

	if (!e->err && WARN(e->bytes > (e->size - 1), "overflow")) {
		e->err = -ENOSPC;
		return false;
	}

	if (e->bytes == e->size - 1 || e->err)
		return false;

	return true;
}

static bool __i915_error_seek(struct drm_i915_error_state_buf *e,
			      unsigned len)
{
	if (e->pos + len <= e->start) {
		e->pos += len;
		return false;
	}

	/* First vsnprintf needs to fit in its entirety for memmove */
	if (len >= e->size) {
		e->err = -EIO;
		return false;
	}

	return true;
}

static void __i915_error_advance(struct drm_i915_error_state_buf *e,
				 unsigned len)
{
	/* If this is first printf in this window, adjust it so that
	 * start position matches start of the buffer
	 */

	if (e->pos < e->start) {
		const size_t off = e->start - e->pos;

		/* Should not happen but be paranoid */
		if (off > len || e->bytes) {
			e->err = -EIO;
			return;
		}

		memmove(e->buf, e->buf + off, len - off);
		e->bytes = len - off;
		e->pos = e->start;
		return;
	}

	e->bytes += len;
	e->pos += len;
}

137
__printf(2, 0)
138 139 140 141 142 143 144 145 146 147
static void i915_error_vprintf(struct drm_i915_error_state_buf *e,
			       const char *f, va_list args)
{
	unsigned len;

	if (!__i915_error_ok(e))
		return;

	/* Seek the first printf which is hits start position */
	if (e->pos < e->start) {
148 149 150
		va_list tmp;

		va_copy(tmp, args);
151 152 153 154
		len = vsnprintf(NULL, 0, f, tmp);
		va_end(tmp);

		if (!__i915_error_seek(e, len))
155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190
			return;
	}

	len = vsnprintf(e->buf + e->bytes, e->size - e->bytes, f, args);
	if (len >= e->size - e->bytes)
		len = e->size - e->bytes - 1;

	__i915_error_advance(e, len);
}

static void i915_error_puts(struct drm_i915_error_state_buf *e,
			    const char *str)
{
	unsigned len;

	if (!__i915_error_ok(e))
		return;

	len = strlen(str);

	/* Seek the first printf which is hits start position */
	if (e->pos < e->start) {
		if (!__i915_error_seek(e, len))
			return;
	}

	if (len >= e->size - e->bytes)
		len = e->size - e->bytes - 1;
	memcpy(e->buf + e->bytes, str, len);

	__i915_error_advance(e, len);
}

#define err_printf(e, ...) i915_error_printf(e, __VA_ARGS__)
#define err_puts(e, s) i915_error_puts(e, s)

191 192 193 194 195 196 197 198 199 200 201 202 203 204 205
static void __i915_printfn_error(struct drm_printer *p, struct va_format *vaf)
{
	i915_error_vprintf(p->arg, vaf->fmt, *vaf->va);
}

static inline struct drm_printer
i915_error_printer(struct drm_i915_error_state_buf *e)
{
	struct drm_printer p = {
		.printfn = __i915_printfn_error,
		.arg = e,
	};
	return p;
}

206 207
#ifdef CONFIG_DRM_I915_COMPRESS_ERROR

208 209 210 211 212 213
struct compress {
	struct z_stream_s zstream;
	void *tmp;
};

static bool compress_init(struct compress *c)
214
{
215
	struct z_stream_s *zstream = memset(&c->zstream, 0, sizeof(c->zstream));
216 217 218 219 220 221 222 223 224 225 226 227

	zstream->workspace =
		kmalloc(zlib_deflate_workspacesize(MAX_WBITS, MAX_MEM_LEVEL),
			GFP_ATOMIC | __GFP_NOWARN);
	if (!zstream->workspace)
		return false;

	if (zlib_deflateInit(zstream, Z_DEFAULT_COMPRESSION) != Z_OK) {
		kfree(zstream->workspace);
		return false;
	}

228
	c->tmp = NULL;
229
	if (i915_has_memcpy_from_wc())
230 231
		c->tmp = (void *)__get_free_page(GFP_ATOMIC | __GFP_NOWARN);

232 233 234
	return true;
}

235 236 237 238 239 240 241 242 243 244 245 246 247 248
static void *compress_next_page(struct drm_i915_error_object *dst)
{
	unsigned long page;

	if (dst->page_count >= dst->num_pages)
		return ERR_PTR(-ENOSPC);

	page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN);
	if (!page)
		return ERR_PTR(-ENOMEM);

	return dst->pages[dst->page_count++] = (void *)page;
}

249
static int compress_page(struct compress *c,
250 251 252
			 void *src,
			 struct drm_i915_error_object *dst)
{
253 254
	struct z_stream_s *zstream = &c->zstream;

255
	zstream->next_in = src;
256 257
	if (c->tmp && i915_memcpy_from_wc(c->tmp, src, PAGE_SIZE))
		zstream->next_in = c->tmp;
258 259 260 261
	zstream->avail_in = PAGE_SIZE;

	do {
		if (zstream->avail_out == 0) {
262 263 264
			zstream->next_out = compress_next_page(dst);
			if (IS_ERR(zstream->next_out))
				return PTR_ERR(zstream->next_out);
265 266 267 268

			zstream->avail_out = PAGE_SIZE;
		}

269
		if (zlib_deflate(zstream, Z_NO_FLUSH) != Z_OK)
270 271 272 273 274 275 276 277 278 279
			return -EIO;
	} while (zstream->avail_in);

	/* Fallback to uncompressed if we increase size? */
	if (0 && zstream->total_out > zstream->total_in)
		return -E2BIG;

	return 0;
}

280
static int compress_flush(struct compress *c,
281 282
			  struct drm_i915_error_object *dst)
{
283 284
	struct z_stream_s *zstream = &c->zstream;

285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312
	do {
		switch (zlib_deflate(zstream, Z_FINISH)) {
		case Z_OK: /* more space requested */
			zstream->next_out = compress_next_page(dst);
			if (IS_ERR(zstream->next_out))
				return PTR_ERR(zstream->next_out);

			zstream->avail_out = PAGE_SIZE;
			break;

		case Z_STREAM_END:
			goto end;

		default: /* any error */
			return -EIO;
		}
	} while (1);

end:
	memset(zstream->next_out, 0, zstream->avail_out);
	dst->unused = zstream->avail_out;
	return 0;
}

static void compress_fini(struct compress *c,
			  struct drm_i915_error_object *dst)
{
	struct z_stream_s *zstream = &c->zstream;
313 314 315

	zlib_deflateEnd(zstream);
	kfree(zstream->workspace);
316 317
	if (c->tmp)
		free_page((unsigned long)c->tmp);
318 319 320 321 322 323 324 325 326
}

static void err_compression_marker(struct drm_i915_error_state_buf *m)
{
	err_puts(m, ":");
}

#else

327 328 329 330
struct compress {
};

static bool compress_init(struct compress *c)
331 332 333 334
{
	return true;
}

335
static int compress_page(struct compress *c,
336 337 338 339
			 void *src,
			 struct drm_i915_error_object *dst)
{
	unsigned long page;
340
	void *ptr;
341 342 343 344 345

	page = __get_free_page(GFP_ATOMIC | __GFP_NOWARN);
	if (!page)
		return -ENOMEM;

346 347 348 349
	ptr = (void *)page;
	if (!i915_memcpy_from_wc(ptr, src, PAGE_SIZE))
		memcpy(ptr, src, PAGE_SIZE);
	dst->pages[dst->page_count++] = ptr;
350 351 352 353

	return 0;
}

354 355 356 357 358 359
static int compress_flush(struct compress *c,
			  struct drm_i915_error_object *dst)
{
	return 0;
}

360
static void compress_fini(struct compress *c,
361 362 363 364 365 366 367 368 369 370 371
			  struct drm_i915_error_object *dst)
{
}

static void err_compression_marker(struct drm_i915_error_state_buf *m)
{
	err_puts(m, "~");
}

#endif

372 373 374 375 376
static void print_error_buffers(struct drm_i915_error_state_buf *m,
				const char *name,
				struct drm_i915_error_buffer *err,
				int count)
{
377
	err_printf(m, "%s [%d]:\n", name, count);
378 379

	while (count--) {
380
		err_printf(m, "    %08x_%08x %8u %02x %02x %02x",
381 382
			   upper_32_bits(err->gtt_offset),
			   lower_32_bits(err->gtt_offset),
383 384
			   err->size,
			   err->read_domains,
385 386
			   err->write_domain,
			   err->wseqno);
387 388 389
		err_puts(m, tiling_flag(err->tiling));
		err_puts(m, dirty_flag(err->dirty));
		err_puts(m, purgeable_flag(err->purgeable));
390
		err_puts(m, err->userptr ? " userptr" : "");
391
		err_puts(m, err->engine != -1 ? " " : "");
392
		err_puts(m, engine_name(m->i915, err->engine));
393
		err_puts(m, i915_cache_level_str(m->i915, err->cache_level));
394 395 396 397 398 399 400 401 402 403 404

		if (err->name)
			err_printf(m, " (name: %d)", err->name);
		if (err->fence_reg != I915_FENCE_REG_NONE)
			err_printf(m, " (fence: %d)", err->fence_reg);

		err_puts(m, "\n");
		err++;
	}
}

405
static void error_print_instdone(struct drm_i915_error_state_buf *m,
406
				 const struct drm_i915_error_engine *ee)
407
{
408 409 410
	int slice;
	int subslice;

411 412 413 414 415 416 417 418 419 420 421 422
	err_printf(m, "  INSTDONE: 0x%08x\n",
		   ee->instdone.instdone);

	if (ee->engine_id != RCS || INTEL_GEN(m->i915) <= 3)
		return;

	err_printf(m, "  SC_INSTDONE: 0x%08x\n",
		   ee->instdone.slice_common);

	if (INTEL_GEN(m->i915) <= 6)
		return;

423 424 425 426 427 428 429 430 431
	for_each_instdone_slice_subslice(m->i915, slice, subslice)
		err_printf(m, "  SAMPLER_INSTDONE[%d][%d]: 0x%08x\n",
			   slice, subslice,
			   ee->instdone.sampler[slice][subslice]);

	for_each_instdone_slice_subslice(m->i915, slice, subslice)
		err_printf(m, "  ROW_INSTDONE[%d][%d]: 0x%08x\n",
			   slice, subslice,
			   ee->instdone.row[slice][subslice]);
432 433
}

434 435 436 437 438
static const char *bannable(const struct drm_i915_error_context *ctx)
{
	return ctx->bannable ? "" : " (unbannable)";
}

439 440
static void error_print_request(struct drm_i915_error_state_buf *m,
				const char *prefix,
441 442
				const struct drm_i915_error_request *erq,
				const unsigned long epoch)
443 444 445 446
{
	if (!erq->seqno)
		return;

447
	err_printf(m, "%s pid %d, ban score %d, seqno %8x:%08x, prio %d, emitted %dms, start %08x, head %08x, tail %08x\n",
448
		   prefix, erq->pid, erq->ban_score,
449
		   erq->context, erq->seqno, erq->sched_attr.priority,
450
		   jiffies_to_msecs(erq->jiffies - epoch),
451
		   erq->start, erq->head, erq->tail);
452 453
}

454 455
static void error_print_context(struct drm_i915_error_state_buf *m,
				const char *header,
456
				const struct drm_i915_error_context *ctx)
457
{
458
	err_printf(m, "%s%s[%d] user_handle %d hw_id %d, prio %d, ban score %d%s guilty %d active %d\n",
459
		   header, ctx->comm, ctx->pid, ctx->handle, ctx->hw_id,
460
		   ctx->sched_attr.priority, ctx->ban_score, bannable(ctx),
461
		   ctx->guilty, ctx->active);
462 463
}

464
static void error_print_engine(struct drm_i915_error_state_buf *m,
465 466
			       const struct drm_i915_error_engine *ee,
			       const unsigned long epoch)
467
{
468 469
	int n;

470 471
	err_printf(m, "%s command stream:\n",
		   engine_name(m->i915, ee->engine_id));
472
	err_printf(m, "  IDLE?: %s\n", yesno(ee->idle));
473
	err_printf(m, "  START: 0x%08x\n", ee->start);
474
	err_printf(m, "  HEAD:  0x%08x [0x%08x]\n", ee->head, ee->rq_head);
475 476
	err_printf(m, "  TAIL:  0x%08x [0x%08x, 0x%08x]\n",
		   ee->tail, ee->rq_post, ee->rq_tail);
477
	err_printf(m, "  CTL:   0x%08x\n", ee->ctl);
478
	err_printf(m, "  MODE:  0x%08x\n", ee->mode);
479 480 481 482 483
	err_printf(m, "  HWS:   0x%08x\n", ee->hws);
	err_printf(m, "  ACTHD: 0x%08x %08x\n",
		   (u32)(ee->acthd>>32), (u32)ee->acthd);
	err_printf(m, "  IPEIR: 0x%08x\n", ee->ipeir);
	err_printf(m, "  IPEHR: 0x%08x\n", ee->ipehr);
484 485 486

	error_print_instdone(m, ee);

487 488 489 490 491 492 493 494
	if (ee->batchbuffer) {
		u64 start = ee->batchbuffer->gtt_offset;
		u64 end = start + ee->batchbuffer->gtt_size;

		err_printf(m, "  batch: [0x%08x_%08x, 0x%08x_%08x]\n",
			   upper_32_bits(start), lower_32_bits(start),
			   upper_32_bits(end), lower_32_bits(end));
	}
495
	if (INTEL_GEN(m->i915) >= 4) {
496
		err_printf(m, "  BBADDR: 0x%08x_%08x\n",
497 498 499
			   (u32)(ee->bbaddr>>32), (u32)ee->bbaddr);
		err_printf(m, "  BB_STATE: 0x%08x\n", ee->bbstate);
		err_printf(m, "  INSTPS: 0x%08x\n", ee->instps);
500
	}
501 502 503 504 505 506
	err_printf(m, "  INSTPM: 0x%08x\n", ee->instpm);
	err_printf(m, "  FADDR: 0x%08x %08x\n", upper_32_bits(ee->faddr),
		   lower_32_bits(ee->faddr));
	if (INTEL_GEN(m->i915) >= 6) {
		err_printf(m, "  RC PSMI: 0x%08x\n", ee->rc_psmi);
		err_printf(m, "  FAULT_REG: 0x%08x\n", ee->fault_reg);
507 508 509 510 511 512 513
		err_printf(m, "  SYNC_0: 0x%08x\n",
			   ee->semaphore_mboxes[0]);
		err_printf(m, "  SYNC_1: 0x%08x\n",
			   ee->semaphore_mboxes[1]);
		if (HAS_VEBOX(m->i915))
			err_printf(m, "  SYNC_2: 0x%08x\n",
				   ee->semaphore_mboxes[2]);
514
	}
515
	if (HAS_PPGTT(m->i915)) {
516
		err_printf(m, "  GFX_MODE: 0x%08x\n", ee->vm_info.gfx_mode);
517

518
		if (INTEL_GEN(m->i915) >= 8) {
519 520 521
			int i;
			for (i = 0; i < 4; i++)
				err_printf(m, "  PDP%d: 0x%016llx\n",
522
					   i, ee->vm_info.pdp[i]);
523 524
		} else {
			err_printf(m, "  PP_DIR_BASE: 0x%08x\n",
525
				   ee->vm_info.pp_dir_base);
526 527
		}
	}
528 529 530 531 532
	err_printf(m, "  seqno: 0x%08x\n", ee->seqno);
	err_printf(m, "  last_seqno: 0x%08x\n", ee->last_seqno);
	err_printf(m, "  waiting: %s\n", yesno(ee->waiting));
	err_printf(m, "  ring->head: 0x%08x\n", ee->cpu_ring_head);
	err_printf(m, "  ring->tail: 0x%08x\n", ee->cpu_ring_tail);
533 534 535
	err_printf(m, "  hangcheck stall: %s\n", yesno(ee->hangcheck_stalled));
	err_printf(m, "  hangcheck action: %s\n",
		   hangcheck_action_to_str(ee->hangcheck_action));
536 537
	err_printf(m, "  hangcheck action timestamp: %dms (%lu%s)\n",
		   jiffies_to_msecs(ee->hangcheck_timestamp - epoch),
538
		   ee->hangcheck_timestamp,
539
		   ee->hangcheck_timestamp == epoch ? "; epoch" : "");
540
	err_printf(m, "  engine reset count: %u\n", ee->reset_count);
541

542 543
	for (n = 0; n < ee->num_ports; n++) {
		err_printf(m, "  ELSP[%d]:", n);
544
		error_print_request(m, " ", &ee->execlist[n], epoch);
545 546
	}

547
	error_print_context(m, "  Active context: ", &ee->context);
548 549 550 551 552 553 554 555 556 557 558
}

void i915_error_printf(struct drm_i915_error_state_buf *e, const char *f, ...)
{
	va_list args;

	va_start(args, f);
	i915_error_vprintf(e, f, args);
	va_end(args);
}

559
static void print_error_obj(struct drm_i915_error_state_buf *m,
560 561
			    struct intel_engine_cs *engine,
			    const char *name,
562 563
			    struct drm_i915_error_object *obj)
{
564
	char out[ASCII85_BUFSZ];
565
	int page;
566

567 568 569 570 571 572 573 574 575 576
	if (!obj)
		return;

	if (name) {
		err_printf(m, "%s --- %s = 0x%08x %08x\n",
			   engine ? engine->name : "global", name,
			   upper_32_bits(obj->gtt_offset),
			   lower_32_bits(obj->gtt_offset));
	}

577 578 579 580 581 582 583 584 585
	err_compression_marker(m);
	for (page = 0; page < obj->page_count; page++) {
		int i, len;

		len = PAGE_SIZE;
		if (page == obj->page_count - 1)
			len -= obj->unused;
		len = ascii85_encode_len(len);

586 587
		for (i = 0; i < len; i++)
			err_puts(m, ascii85_encode(obj->pages[page][i], out));
588
	}
589
	err_puts(m, "\n");
590 591
}

592
static void err_print_capabilities(struct drm_i915_error_state_buf *m,
593 594
				   const struct intel_device_info *info,
				   const struct intel_driver_caps *caps)
595
{
596 597 598
	struct drm_printer p = i915_error_printer(m);

	intel_device_info_dump_flags(info, &p);
599
	intel_driver_caps_print(caps, &p);
600
	intel_device_info_dump_topology(&info->sseu, &p);
601 602
}

603
static void err_print_params(struct drm_i915_error_state_buf *m,
604
			     const struct i915_params *params)
605
{
606 607 608
	struct drm_printer p = i915_error_printer(m);

	i915_params_dump(params, &p);
609 610
}

611 612 613 614 615 616 617 618 619 620 621 622
static void err_print_pciid(struct drm_i915_error_state_buf *m,
			    struct drm_i915_private *i915)
{
	struct pci_dev *pdev = i915->drm.pdev;

	err_printf(m, "PCI ID: 0x%04x\n", pdev->device);
	err_printf(m, "PCI Revision: 0x%02x\n", pdev->revision);
	err_printf(m, "PCI Subsystem: %04x:%04x\n",
		   pdev->subsystem_vendor,
		   pdev->subsystem_device);
}

623 624 625 626 627 628 629 630 631 632 633 634
static void err_print_uc(struct drm_i915_error_state_buf *m,
			 const struct i915_error_uc *error_uc)
{
	struct drm_printer p = i915_error_printer(m);
	const struct i915_gpu_state *error =
		container_of(error_uc, typeof(*error), uc);

	if (!error->device_info.has_guc)
		return;

	intel_uc_fw_dump(&error_uc->guc_fw, &p);
	intel_uc_fw_dump(&error_uc->huc_fw, &p);
635
	print_error_obj(m, NULL, "GuC log buffer", error_uc->guc_log);
636 637
}

638
int i915_error_state_to_str(struct drm_i915_error_state_buf *m,
639
			    const struct i915_gpu_state *error)
640
{
641
	struct drm_i915_private *dev_priv = m->i915;
642
	struct drm_i915_error_object *obj;
A
Arnd Bergmann 已提交
643
	struct timespec64 ts;
644
	int i, j;
645 646

	if (!error) {
647 648
		err_printf(m, "No error state collected\n");
		return 0;
649 650
	}

651 652 653
	if (IS_ERR(error))
		return PTR_ERR(error);

654 655
	if (*error->error_msg)
		err_printf(m, "%s\n", error->error_msg);
656
	err_printf(m, "Kernel: %s\n", init_utsname()->release);
A
Arnd Bergmann 已提交
657 658 659 660 661 662 663 664 665
	ts = ktime_to_timespec64(error->time);
	err_printf(m, "Time: %lld s %ld us\n",
		   (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC);
	ts = ktime_to_timespec64(error->boottime);
	err_printf(m, "Boottime: %lld s %ld us\n",
		   (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC);
	ts = ktime_to_timespec64(error->uptime);
	err_printf(m, "Uptime: %lld s %ld us\n",
		   (s64)ts.tv_sec, ts.tv_nsec / NSEC_PER_USEC);
666 667 668 669 670
	err_printf(m, "Epoch: %lu jiffies (%u HZ)\n", error->epoch, HZ);
	err_printf(m, "Capture: %lu jiffies; %d ms ago, %d ms after epoch\n",
		   error->capture,
		   jiffies_to_msecs(jiffies - error->capture),
		   jiffies_to_msecs(error->capture - error->epoch));
671

672
	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
673
		if (error->engine[i].hangcheck_stalled &&
674
		    error->engine[i].context.pid) {
675
			err_printf(m, "Active process (on ring %s): %s [%d], score %d%s\n",
676
				   engine_name(m->i915, i),
677 678
				   error->engine[i].context.comm,
				   error->engine[i].context.pid,
679 680
				   error->engine[i].context.ban_score,
				   bannable(&error->engine[i].context));
681 682
		}
	}
683
	err_printf(m, "Reset count: %u\n", error->reset_count);
684
	err_printf(m, "Suspend count: %u\n", error->suspend_count);
685
	err_printf(m, "Platform: %s\n", intel_platform_name(error->device_info.platform));
686
	err_print_pciid(m, error->i915);
687

688
	err_printf(m, "IOMMU enabled?: %d\n", error->iommu);
689

690
	if (HAS_CSR(dev_priv)) {
691 692 693 694 695 696 697 698 699
		struct intel_csr *csr = &dev_priv->csr;

		err_printf(m, "DMC loaded: %s\n",
			   yesno(csr->dmc_payload != NULL));
		err_printf(m, "DMC fw version: %d.%d\n",
			   CSR_VERSION_MAJOR(csr->version),
			   CSR_VERSION_MINOR(csr->version));
	}

700
	err_printf(m, "GT awake: %s\n", yesno(error->awake));
701 702
	err_printf(m, "RPM wakelock: %s\n", yesno(error->wakelock));
	err_printf(m, "PM suspended: %s\n", yesno(error->suspended));
703 704
	err_printf(m, "EIR: 0x%08x\n", error->eir);
	err_printf(m, "IER: 0x%08x\n", error->ier);
705 706
	for (i = 0; i < error->ngtier; i++)
		err_printf(m, "GTIER[%d]: 0x%08x\n", i, error->gtier[i]);
707 708 709 710
	err_printf(m, "PGTBL_ER: 0x%08x\n", error->pgtbl_er);
	err_printf(m, "FORCEWAKE: 0x%08x\n", error->forcewake);
	err_printf(m, "DERRMR: 0x%08x\n", error->derrmr);
	err_printf(m, "CCID: 0x%08x\n", error->ccid);
711
	err_printf(m, "Missed interrupts: 0x%08lx\n", dev_priv->gpu_error.missed_irq_rings);
712

713
	for (i = 0; i < error->nfence; i++)
714 715
		err_printf(m, "  fence[%d] = %08llx\n", i, error->fence[i]);

716
	if (INTEL_GEN(dev_priv) >= 6) {
717
		err_printf(m, "ERROR: 0x%08x\n", error->error);
718

719
		if (INTEL_GEN(dev_priv) >= 8)
720 721 722
			err_printf(m, "FAULT_TLB_DATA: 0x%08x 0x%08x\n",
				   error->fault_data1, error->fault_data0);

723 724 725
		err_printf(m, "DONE_REG: 0x%08x\n", error->done_reg);
	}

726
	if (IS_GEN7(dev_priv))
727 728
		err_printf(m, "ERR_INT: 0x%08x\n", error->err_int);

729 730
	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
		if (error->engine[i].engine_id != -1)
731
			error_print_engine(m, &error->engine[i], error->epoch);
732
	}
733

734 735 736
	for (i = 0; i < ARRAY_SIZE(error->active_vm); i++) {
		char buf[128];
		int len, first = 1;
737

738 739 740 741 742 743 744 745 746 747
		if (!error->active_vm[i])
			break;

		len = scnprintf(buf, sizeof(buf), "Active (");
		for (j = 0; j < ARRAY_SIZE(error->engine); j++) {
			if (error->engine[j].vm != error->active_vm[i])
				continue;

			len += scnprintf(buf + len, sizeof(buf), "%s%s",
					 first ? "" : ", ",
748
					 dev_priv->engine[j]->name);
749 750 751 752
			first = 0;
		}
		scnprintf(buf + len, sizeof(buf), ")");
		print_error_buffers(m, buf,
753 754 755
				    error->active_bo[i],
				    error->active_bo_count[i]);
	}
756

757 758 759 760
	print_error_buffers(m, "Pinned (global)",
			    error->pinned_bo,
			    error->pinned_bo_count);

761
	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
762
		const struct drm_i915_error_engine *ee = &error->engine[i];
763 764

		obj = ee->batchbuffer;
765
		if (obj) {
766
			err_puts(m, dev_priv->engine[i]->name);
767
			if (ee->context.pid)
768
				err_printf(m, " (submitted by %s [%d], ctx %d [%d], score %d%s)",
769 770 771 772
					   ee->context.comm,
					   ee->context.pid,
					   ee->context.handle,
					   ee->context.hw_id,
773 774
					   ee->context.ban_score,
					   bannable(&ee->context));
775 776 777
			err_printf(m, " --- gtt_offset = 0x%08x %08x\n",
				   upper_32_bits(obj->gtt_offset),
				   lower_32_bits(obj->gtt_offset));
778
			print_error_obj(m, dev_priv->engine[i], NULL, obj);
779 780
		}

781 782 783 784
		for (j = 0; j < ee->user_bo_count; j++)
			print_error_obj(m, dev_priv->engine[i],
					"user", ee->user_bo[j]);

785
		if (ee->num_requests) {
786
			err_printf(m, "%s --- %d requests\n",
787
				   dev_priv->engine[i]->name,
788
				   ee->num_requests);
789
			for (j = 0; j < ee->num_requests; j++)
790 791 792
				error_print_request(m, " ",
						    &ee->requests[j],
						    error->epoch);
793 794
		}

795 796
		if (IS_ERR(ee->waiters)) {
			err_printf(m, "%s --- ? waiters [unable to acquire spinlock]\n",
797
				   dev_priv->engine[i]->name);
798
		} else if (ee->num_waiters) {
799
			err_printf(m, "%s --- %d waiters\n",
800
				   dev_priv->engine[i]->name,
801 802
				   ee->num_waiters);
			for (j = 0; j < ee->num_waiters; j++) {
803
				err_printf(m, " seqno 0x%08x for %s [%d]\n",
804 805 806
					   ee->waiters[j].seqno,
					   ee->waiters[j].comm,
					   ee->waiters[j].pid);
807 808 809
			}
		}

810
		print_error_obj(m, dev_priv->engine[i],
811
				"ringbuffer", ee->ringbuffer);
812

813
		print_error_obj(m, dev_priv->engine[i],
814
				"HW Status", ee->hws_page);
815

816
		print_error_obj(m, dev_priv->engine[i],
817
				"HW context", ee->ctx);
818

819
		print_error_obj(m, dev_priv->engine[i],
820
				"WA context", ee->wa_ctx);
821

822
		print_error_obj(m, dev_priv->engine[i],
823
				"WA batchbuffer", ee->wa_batchbuffer);
824 825 826

		print_error_obj(m, dev_priv->engine[i],
				"NULL context", ee->default_state);
827 828 829 830 831 832
	}

	if (error->overlay)
		intel_overlay_print_error_state(m, error->overlay);

	if (error->display)
833
		intel_display_print_error_state(m, error->display);
834

835
	err_print_capabilities(m, &error->device_info, &error->driver_caps);
836
	err_print_params(m, &error->params);
837
	err_print_uc(m, &error->uc);
838

839 840 841 842 843 844 845
	if (m->bytes == 0 && m->err)
		return m->err;

	return 0;
}

int i915_error_state_buf_init(struct drm_i915_error_state_buf *ebuf,
846
			      struct drm_i915_private *i915,
847 848 849
			      size_t count, loff_t pos)
{
	memset(ebuf, 0, sizeof(*ebuf));
850
	ebuf->i915 = i915;
851 852 853 854 855 856

	/* We need to have enough room to store any i915_error_state printf
	 * so that we can move it to start position.
	 */
	ebuf->size = count + 1 > PAGE_SIZE ? count + 1 : PAGE_SIZE;
	ebuf->buf = kmalloc(ebuf->size,
857
				GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
858 859 860

	if (ebuf->buf == NULL) {
		ebuf->size = PAGE_SIZE;
861
		ebuf->buf = kmalloc(ebuf->size, GFP_KERNEL);
862 863 864 865
	}

	if (ebuf->buf == NULL) {
		ebuf->size = 128;
866
		ebuf->buf = kmalloc(ebuf->size, GFP_KERNEL);
867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884
	}

	if (ebuf->buf == NULL)
		return -ENOMEM;

	ebuf->start = pos;

	return 0;
}

static void i915_error_object_free(struct drm_i915_error_object *obj)
{
	int page;

	if (obj == NULL)
		return;

	for (page = 0; page < obj->page_count; page++)
885
		free_page((unsigned long)obj->pages[page]);
886 887 888 889

	kfree(obj);
}

890 891 892 893 894 895
static __always_inline void free_param(const char *type, void *x)
{
	if (!__builtin_strcmp(type, "char *"))
		kfree(*(void **)x);
}

896 897 898 899 900 901 902
static void cleanup_params(struct i915_gpu_state *error)
{
#define FREE(T, x, ...) free_param(#T, &error->params.x);
	I915_PARAMS_FOR_EACH(FREE);
#undef FREE
}

903 904 905 906 907 908
static void cleanup_uc_state(struct i915_gpu_state *error)
{
	struct i915_error_uc *error_uc = &error->uc;

	kfree(error_uc->guc_fw.path);
	kfree(error_uc->huc_fw.path);
909
	i915_error_object_free(error_uc->guc_log);
910 911
}

912
void __i915_gpu_state_free(struct kref *error_ref)
913
{
914 915
	struct i915_gpu_state *error =
		container_of(error_ref, typeof(*error), ref);
916
	long i, j;
917

918 919 920
	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
		struct drm_i915_error_engine *ee = &error->engine[i];

921 922 923 924
		for (j = 0; j < ee->user_bo_count; j++)
			i915_error_object_free(ee->user_bo[j]);
		kfree(ee->user_bo);

925 926 927 928 929 930 931 932
		i915_error_object_free(ee->batchbuffer);
		i915_error_object_free(ee->wa_batchbuffer);
		i915_error_object_free(ee->ringbuffer);
		i915_error_object_free(ee->hws_page);
		i915_error_object_free(ee->ctx);
		i915_error_object_free(ee->wa_ctx);

		kfree(ee->requests);
933 934
		if (!IS_ERR_OR_NULL(ee->waiters))
			kfree(ee->waiters);
935 936
	}

937
	for (i = 0; i < ARRAY_SIZE(error->active_bo); i++)
938 939
		kfree(error->active_bo[i]);
	kfree(error->pinned_bo);
940

941 942
	kfree(error->overlay);
	kfree(error->display);
943

944
	cleanup_params(error);
945 946
	cleanup_uc_state(error);

947 948 949 950
	kfree(error);
}

static struct drm_i915_error_object *
951
i915_error_object_create(struct drm_i915_private *i915,
C
Chris Wilson 已提交
952
			 struct i915_vma *vma)
953
{
954 955
	struct i915_ggtt *ggtt = &i915->ggtt;
	const u64 slot = ggtt->error_capture.start;
956
	struct drm_i915_error_object *dst;
957
	struct compress compress;
958 959 960
	unsigned long num_pages;
	struct sgt_iter iter;
	dma_addr_t dma;
961
	int ret;
962

C
Chris Wilson 已提交
963 964 965
	if (!vma)
		return NULL;

966
	num_pages = min_t(u64, vma->size, vma->obj->base.size) >> PAGE_SHIFT;
967
	num_pages = DIV_ROUND_UP(10 * num_pages, 8); /* worstcase zlib growth */
968 969
	dst = kmalloc(sizeof(*dst) + num_pages * sizeof(u32 *),
		      GFP_ATOMIC | __GFP_NOWARN);
C
Chris Wilson 已提交
970
	if (!dst)
971 972
		return NULL;

973 974
	dst->gtt_offset = vma->node.start;
	dst->gtt_size = vma->node.size;
975
	dst->num_pages = num_pages;
976
	dst->page_count = 0;
977 978
	dst->unused = 0;

979
	if (!compress_init(&compress)) {
980 981 982
		kfree(dst);
		return NULL;
	}
983

984
	ret = -EINVAL;
985 986
	for_each_sgt_dma(dma, iter, vma->pages) {
		void __iomem *s;
987

988
		ggtt->vm.insert_page(&ggtt->vm, dma, slot, I915_CACHE_NONE, 0);
989

990
		s = io_mapping_map_atomic_wc(&ggtt->iomap, slot);
991
		ret = compress_page(&compress, (void  __force *)s, dst);
992 993
		io_mapping_unmap_atomic(s);
		if (ret)
994
			break;
995 996
	}

997 998 999 1000 1001 1002
	if (ret || compress_flush(&compress, dst)) {
		while (dst->page_count--)
			free_page((unsigned long)dst->pages[dst->page_count]);
		kfree(dst);
		dst = NULL;
	}
1003

1004
	compress_fini(&compress, dst);
1005
	return dst;
1006 1007
}

1008 1009 1010 1011 1012 1013
/* The error capture is special as tries to run underneath the normal
 * locking rules - so we use the raw version of the i915_gem_active lookup.
 */
static inline uint32_t
__active_get_seqno(struct i915_gem_active *active)
{
1014
	struct i915_request *request;
1015 1016 1017

	request = __i915_gem_active_peek(active);
	return request ? request->global_seqno : 0;
1018 1019 1020 1021 1022
}

static inline int
__active_get_engine_id(struct i915_gem_active *active)
{
1023
	struct i915_request *request;
1024

1025 1026
	request = __i915_gem_active_peek(active);
	return request ? request->engine->id : -1;
1027 1028
}

1029
static void capture_bo(struct drm_i915_error_buffer *err,
1030
		       struct i915_vma *vma)
1031
{
1032 1033
	struct drm_i915_gem_object *obj = vma->obj;

1034 1035
	err->size = obj->base.size;
	err->name = obj->base.name;
1036

1037 1038
	err->wseqno = __active_get_seqno(&obj->frontbuffer_write);
	err->engine = __active_get_engine_id(&obj->frontbuffer_write);
1039

1040
	err->gtt_offset = vma->node.start;
1041 1042
	err->read_domains = obj->read_domains;
	err->write_domain = obj->write_domain;
1043
	err->fence_reg = vma->fence ? vma->fence->id : -1;
1044
	err->tiling = i915_gem_object_get_tiling(obj);
C
Chris Wilson 已提交
1045 1046
	err->dirty = obj->mm.dirty;
	err->purgeable = obj->mm.madv != I915_MADV_WILLNEED;
1047
	err->userptr = obj->userptr.mm != NULL;
1048 1049 1050
	err->cache_level = obj->cache_level;
}

1051 1052 1053
static u32 capture_error_bo(struct drm_i915_error_buffer *err,
			    int count, struct list_head *head,
			    bool pinned_only)
1054
{
B
Ben Widawsky 已提交
1055
	struct i915_vma *vma;
1056 1057
	int i = 0;

1058
	list_for_each_entry(vma, head, vm_link) {
1059 1060 1061
		if (!vma->obj)
			continue;

1062 1063 1064
		if (pinned_only && !i915_vma_is_pinned(vma))
			continue;

1065
		capture_bo(err++, vma);
1066 1067 1068 1069 1070 1071 1072
		if (++i == count)
			break;
	}

	return i;
}

1073 1074 1075 1076 1077 1078 1079 1080 1081 1082
/* Generate a semi-unique error code. The code is not meant to have meaning, The
 * code's only purpose is to try to prevent false duplicated bug reports by
 * grossly estimating a GPU error state.
 *
 * TODO Ideally, hashing the batchbuffer would be a very nice way to determine
 * the hang if we could strip the GTT offset information from it.
 *
 * It's only a small step better than a random number in its current form.
 */
static uint32_t i915_error_generate_code(struct drm_i915_private *dev_priv,
1083
					 struct i915_gpu_state *error,
1084
					 int *engine_id)
1085 1086 1087 1088 1089 1090 1091 1092 1093
{
	uint32_t error_code = 0;
	int i;

	/* IPEHR would be an ideal way to detect errors, as it's the gross
	 * measure of "the command that hung." However, has some very common
	 * synchronization commands which almost always appear in the case
	 * strictly a client bug. Use instdone to differentiate those some.
	 */
1094
	for (i = 0; i < I915_NUM_ENGINES; i++) {
1095
		if (error->engine[i].hangcheck_stalled) {
1096 1097
			if (engine_id)
				*engine_id = i;
1098

1099 1100
			return error->engine[i].ipehr ^
			       error->engine[i].instdone.instdone;
1101 1102
		}
	}
1103 1104 1105 1106

	return error_code;
}

1107
static void gem_record_fences(struct i915_gpu_state *error)
1108
{
1109
	struct drm_i915_private *dev_priv = error->i915;
1110 1111
	int i;

1112
	if (INTEL_GEN(dev_priv) >= 6) {
1113
		for (i = 0; i < dev_priv->num_fence_regs; i++)
1114 1115
			error->fence[i] = I915_READ64(FENCE_REG_GEN6_LO(i));
	} else if (INTEL_GEN(dev_priv) >= 4) {
1116 1117
		for (i = 0; i < dev_priv->num_fence_regs; i++)
			error->fence[i] = I915_READ64(FENCE_REG_965_LO(i));
1118
	} else {
1119
		for (i = 0; i < dev_priv->num_fence_regs; i++)
1120
			error->fence[i] = I915_READ(FENCE_REG(i));
1121
	}
1122
	error->nfence = i;
1123 1124
}

1125 1126
static void gen6_record_semaphore_state(struct intel_engine_cs *engine,
					struct drm_i915_error_engine *ee)
1127
{
1128 1129 1130 1131
	struct drm_i915_private *dev_priv = engine->i915;

	ee->semaphore_mboxes[0] = I915_READ(RING_SYNC_0(engine->mmio_base));
	ee->semaphore_mboxes[1] = I915_READ(RING_SYNC_1(engine->mmio_base));
1132
	if (HAS_VEBOX(dev_priv))
1133
		ee->semaphore_mboxes[2] =
1134
			I915_READ(RING_SYNC_2(engine->mmio_base));
1135 1136
}

1137 1138
static void error_record_engine_waiters(struct intel_engine_cs *engine,
					struct drm_i915_error_engine *ee)
1139 1140 1141 1142 1143 1144
{
	struct intel_breadcrumbs *b = &engine->breadcrumbs;
	struct drm_i915_error_waiter *waiter;
	struct rb_node *rb;
	int count;

1145 1146
	ee->num_waiters = 0;
	ee->waiters = NULL;
1147

1148 1149 1150
	if (RB_EMPTY_ROOT(&b->waiters))
		return;

1151
	if (!spin_trylock_irq(&b->rb_lock)) {
1152 1153 1154 1155
		ee->waiters = ERR_PTR(-EDEADLK);
		return;
	}

1156 1157 1158
	count = 0;
	for (rb = rb_first(&b->waiters); rb != NULL; rb = rb_next(rb))
		count++;
1159
	spin_unlock_irq(&b->rb_lock);
1160 1161 1162 1163 1164 1165 1166 1167 1168

	waiter = NULL;
	if (count)
		waiter = kmalloc_array(count,
				       sizeof(struct drm_i915_error_waiter),
				       GFP_ATOMIC);
	if (!waiter)
		return;

1169
	if (!spin_trylock_irq(&b->rb_lock)) {
1170 1171 1172 1173
		kfree(waiter);
		ee->waiters = ERR_PTR(-EDEADLK);
		return;
	}
1174

1175
	ee->waiters = waiter;
1176
	for (rb = rb_first(&b->waiters); rb; rb = rb_next(rb)) {
G
Geliang Tang 已提交
1177
		struct intel_wait *w = rb_entry(rb, typeof(*w), node);
1178 1179 1180 1181 1182 1183

		strcpy(waiter->comm, w->tsk->comm);
		waiter->pid = w->tsk->pid;
		waiter->seqno = w->seqno;
		waiter++;

1184
		if (++ee->num_waiters == count)
1185 1186
			break;
	}
1187
	spin_unlock_irq(&b->rb_lock);
1188 1189
}

1190
static void error_record_engine_registers(struct i915_gpu_state *error,
1191 1192
					  struct intel_engine_cs *engine,
					  struct drm_i915_error_engine *ee)
1193
{
1194 1195
	struct drm_i915_private *dev_priv = engine->i915;

1196
	if (INTEL_GEN(dev_priv) >= 6) {
1197
		ee->rc_psmi = I915_READ(RING_PSMI_CTL(engine->mmio_base));
1198 1199 1200
		if (INTEL_GEN(dev_priv) >= 8) {
			ee->fault_reg = I915_READ(GEN8_RING_FAULT_REG);
		} else {
1201
			gen6_record_semaphore_state(engine, ee);
1202 1203
			ee->fault_reg = I915_READ(RING_FAULT_REG(engine));
		}
1204 1205
	}

1206
	if (INTEL_GEN(dev_priv) >= 4) {
1207 1208 1209 1210 1211
		ee->faddr = I915_READ(RING_DMA_FADD(engine->mmio_base));
		ee->ipeir = I915_READ(RING_IPEIR(engine->mmio_base));
		ee->ipehr = I915_READ(RING_IPEHR(engine->mmio_base));
		ee->instps = I915_READ(RING_INSTPS(engine->mmio_base));
		ee->bbaddr = I915_READ(RING_BBADDR(engine->mmio_base));
1212
		if (INTEL_GEN(dev_priv) >= 8) {
1213 1214
			ee->faddr |= (u64) I915_READ(RING_DMA_FADD_UDW(engine->mmio_base)) << 32;
			ee->bbaddr |= (u64) I915_READ(RING_BBADDR_UDW(engine->mmio_base)) << 32;
1215
		}
1216
		ee->bbstate = I915_READ(RING_BBSTATE(engine->mmio_base));
1217
	} else {
1218 1219 1220
		ee->faddr = I915_READ(DMA_FADD_I8XX);
		ee->ipeir = I915_READ(IPEIR);
		ee->ipehr = I915_READ(IPEHR);
1221 1222
	}

1223
	intel_engine_get_instdone(engine, &ee->instdone);
1224

1225 1226
	ee->waiting = intel_engine_has_waiter(engine);
	ee->instpm = I915_READ(RING_INSTPM(engine->mmio_base));
1227
	ee->acthd = intel_engine_get_active_head(engine);
1228
	ee->seqno = intel_engine_get_seqno(engine);
1229
	ee->last_seqno = intel_engine_last_submit(engine);
1230 1231 1232 1233
	ee->start = I915_READ_START(engine);
	ee->head = I915_READ_HEAD(engine);
	ee->tail = I915_READ_TAIL(engine);
	ee->ctl = I915_READ_CTL(engine);
1234 1235
	if (INTEL_GEN(dev_priv) > 2)
		ee->mode = I915_READ_MODE(engine);
1236

1237
	if (!HWS_NEEDS_PHYSICAL(dev_priv)) {
1238
		i915_reg_t mmio;
1239

1240
		if (IS_GEN7(dev_priv)) {
1241
			switch (engine->id) {
1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255
			default:
			case RCS:
				mmio = RENDER_HWS_PGA_GEN7;
				break;
			case BCS:
				mmio = BLT_HWS_PGA_GEN7;
				break;
			case VCS:
				mmio = BSD_HWS_PGA_GEN7;
				break;
			case VECS:
				mmio = VEBOX_HWS_PGA_GEN7;
				break;
			}
1256
		} else if (IS_GEN6(engine->i915)) {
1257
			mmio = RING_HWS_PGA_GEN6(engine->mmio_base);
1258 1259
		} else {
			/* XXX: gen8 returns to sanity */
1260
			mmio = RING_HWS_PGA(engine->mmio_base);
1261 1262
		}

1263
		ee->hws = I915_READ(mmio);
1264 1265
	}

1266
	ee->idle = intel_engine_is_idle(engine);
1267
	ee->hangcheck_timestamp = engine->hangcheck.action_timestamp;
1268
	ee->hangcheck_action = engine->hangcheck.action;
1269
	ee->hangcheck_stalled = engine->hangcheck.stalled;
1270 1271
	ee->reset_count = i915_reset_engine_count(&dev_priv->gpu_error,
						  engine);
1272

1273
	if (HAS_PPGTT(dev_priv)) {
1274 1275
		int i;

1276
		ee->vm_info.gfx_mode = I915_READ(RING_MODE_GEN7(engine));
1277

1278
		if (IS_GEN6(dev_priv))
1279
			ee->vm_info.pp_dir_base =
1280
				I915_READ(RING_PP_DIR_BASE_READ(engine));
1281
		else if (IS_GEN7(dev_priv))
1282
			ee->vm_info.pp_dir_base =
1283
				I915_READ(RING_PP_DIR_BASE(engine));
1284
		else if (INTEL_GEN(dev_priv) >= 8)
1285
			for (i = 0; i < 4; i++) {
1286
				ee->vm_info.pdp[i] =
1287
					I915_READ(GEN8_RING_PDP_UDW(engine, i));
1288 1289
				ee->vm_info.pdp[i] <<= 32;
				ee->vm_info.pdp[i] |=
1290
					I915_READ(GEN8_RING_PDP_LDW(engine, i));
1291 1292
			}
	}
1293 1294
}

1295
static void record_request(struct i915_request *request,
1296 1297
			   struct drm_i915_error_request *erq)
{
C
Chris Wilson 已提交
1298 1299 1300
	struct i915_gem_context *ctx = request->gem_context;

	erq->context = ctx->hw_id;
1301
	erq->sched_attr = request->sched.attr;
C
Chris Wilson 已提交
1302
	erq->ban_score = atomic_read(&ctx->ban_score);
1303
	erq->seqno = request->global_seqno;
1304
	erq->jiffies = request->emitted_jiffies;
1305
	erq->start = i915_ggtt_offset(request->ring->vma);
1306 1307 1308 1309
	erq->head = request->head;
	erq->tail = request->tail;

	rcu_read_lock();
C
Chris Wilson 已提交
1310
	erq->pid = ctx->pid ? pid_nr(ctx->pid) : 0;
1311 1312 1313
	rcu_read_unlock();
}

1314
static void engine_record_requests(struct intel_engine_cs *engine,
1315
				   struct i915_request *first,
1316 1317
				   struct drm_i915_error_engine *ee)
{
1318
	struct i915_request *request;
1319 1320 1321 1322
	int count;

	count = 0;
	request = first;
1323
	list_for_each_entry_from(request, &engine->timeline.requests, link)
1324 1325 1326 1327 1328 1329 1330 1331 1332 1333 1334 1335
		count++;
	if (!count)
		return;

	ee->requests = kcalloc(count, sizeof(*ee->requests), GFP_ATOMIC);
	if (!ee->requests)
		return;

	ee->num_requests = count;

	count = 0;
	request = first;
1336
	list_for_each_entry_from(request, &engine->timeline.requests, link) {
1337 1338 1339 1340 1341 1342 1343 1344 1345 1346 1347 1348 1349 1350 1351 1352 1353 1354 1355
		if (count >= ee->num_requests) {
			/*
			 * If the ring request list was changed in
			 * between the point where the error request
			 * list was created and dimensioned and this
			 * point then just exit early to avoid crashes.
			 *
			 * We don't need to communicate that the
			 * request list changed state during error
			 * state capture and that the error state is
			 * slightly incorrect as a consequence since we
			 * are typically only interested in the request
			 * list state at the point of error state
			 * capture, not in any changes happening during
			 * the capture.
			 */
			break;
		}

1356
		record_request(request, &ee->requests[count++]);
1357 1358 1359 1360
	}
	ee->num_requests = count;
}

1361 1362 1363
static void error_record_engine_execlists(struct intel_engine_cs *engine,
					  struct drm_i915_error_engine *ee)
{
1364
	const struct intel_engine_execlists * const execlists = &engine->execlists;
1365 1366
	unsigned int n;

1367
	for (n = 0; n < execlists_num_ports(execlists); n++) {
1368
		struct i915_request *rq = port_request(&execlists->port[n]);
1369 1370 1371 1372 1373 1374

		if (!rq)
			break;

		record_request(rq, &ee->execlist[n]);
	}
1375 1376

	ee->num_ports = n;
1377 1378
}

1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395
static void record_context(struct drm_i915_error_context *e,
			   struct i915_gem_context *ctx)
{
	if (ctx->pid) {
		struct task_struct *task;

		rcu_read_lock();
		task = pid_task(ctx->pid, PIDTYPE_PID);
		if (task) {
			strcpy(e->comm, task->comm);
			e->pid = task->pid;
		}
		rcu_read_unlock();
	}

	e->handle = ctx->user_handle;
	e->hw_id = ctx->hw_id;
1396
	e->sched_attr = ctx->sched;
1397
	e->ban_score = atomic_read(&ctx->ban_score);
1398
	e->bannable = i915_gem_context_is_bannable(ctx);
1399 1400
	e->guilty = atomic_read(&ctx->guilty_count);
	e->active = atomic_read(&ctx->active_count);
1401 1402
}

1403
static void request_record_user_bo(struct i915_request *request,
1404 1405
				   struct drm_i915_error_engine *ee)
{
1406
	struct i915_capture_list *c;
1407
	struct drm_i915_error_object **bo;
1408
	long count, max;
1409

1410
	max = 0;
1411
	for (c = request->capture_list; c; c = c->next)
1412 1413 1414
		max++;
	if (!max)
		return;
1415

1416 1417 1418 1419 1420 1421
	bo = kmalloc_array(max, sizeof(*bo), GFP_ATOMIC);
	if (!bo) {
		/* If we can't capture everything, try to capture something. */
		max = min_t(long, max, PAGE_SIZE / sizeof(*bo));
		bo = kmalloc_array(max, sizeof(*bo), GFP_ATOMIC);
	}
1422 1423 1424 1425 1426 1427 1428 1429
	if (!bo)
		return;

	count = 0;
	for (c = request->capture_list; c; c = c->next) {
		bo[count] = i915_error_object_create(request->i915, c->vma);
		if (!bo[count])
			break;
1430 1431
		if (++count == max)
			break;
1432 1433 1434 1435 1436 1437
	}

	ee->user_bo = bo;
	ee->user_bo_count = count;
}

1438 1439 1440 1441 1442 1443 1444
static struct drm_i915_error_object *
capture_object(struct drm_i915_private *dev_priv,
	       struct drm_i915_gem_object *obj)
{
	if (obj && i915_gem_object_has_pages(obj)) {
		struct i915_vma fake = {
			.node = { .start = U64_MAX, .size = obj->base.size },
1445
			.size = obj->base.size,
1446 1447 1448 1449 1450 1451 1452 1453 1454 1455
			.pages = obj->mm.pages,
			.obj = obj,
		};

		return i915_error_object_create(dev_priv, &fake);
	} else {
		return NULL;
	}
}

1456
static void gem_record_rings(struct i915_gpu_state *error)
1457
{
1458 1459
	struct drm_i915_private *i915 = error->i915;
	struct i915_ggtt *ggtt = &i915->ggtt;
1460
	int i;
1461

1462
	for (i = 0; i < I915_NUM_ENGINES; i++) {
1463
		struct intel_engine_cs *engine = i915->engine[i];
1464
		struct drm_i915_error_engine *ee = &error->engine[i];
1465
		struct i915_request *request;
1466

1467
		ee->engine_id = -1;
1468

1469
		if (!engine)
1470 1471
			continue;

1472
		ee->engine_id = i;
1473

1474 1475
		error_record_engine_registers(error, engine, ee);
		error_record_engine_waiters(engine, ee);
1476
		error_record_engine_execlists(engine, ee);
1477

1478
		request = i915_gem_find_active_request(engine);
1479
		if (request) {
C
Chris Wilson 已提交
1480
			struct i915_gem_context *ctx = request->gem_context;
1481
			struct intel_ring *ring;
1482

1483
			ee->vm = ctx->ppgtt ? &ctx->ppgtt->vm : &ggtt->vm;
1484

C
Chris Wilson 已提交
1485
			record_context(&ee->context, ctx);
1486

1487 1488 1489 1490
			/* We need to copy these to an anonymous buffer
			 * as the simplest method to avoid being overwritten
			 * by userspace.
			 */
1491
			ee->batchbuffer =
1492
				i915_error_object_create(i915, request->batch);
1493

1494
			if (HAS_BROKEN_CS_TLB(i915))
1495
				ee->wa_batchbuffer =
1496
					i915_error_object_create(i915,
C
Chris Wilson 已提交
1497
								 engine->scratch);
1498
			request_record_user_bo(request, ee);
1499

C
Chris Wilson 已提交
1500
			ee->ctx =
1501
				i915_error_object_create(i915,
1502
							 request->hw_context->state);
1503

1504
			error->simulated |=
C
Chris Wilson 已提交
1505
				i915_gem_context_no_error_capture(ctx);
1506

1507 1508 1509 1510
			ee->rq_head = request->head;
			ee->rq_post = request->postfix;
			ee->rq_tail = request->tail;

1511 1512 1513
			ring = request->ring;
			ee->cpu_ring_head = ring->head;
			ee->cpu_ring_tail = ring->tail;
1514
			ee->ringbuffer =
1515
				i915_error_object_create(i915, ring->vma);
1516 1517

			engine_record_requests(engine, request, ee);
1518
		}
1519

1520
		ee->hws_page =
1521
			i915_error_object_create(i915,
C
Chris Wilson 已提交
1522
						 engine->status_page.vma);
1523

1524
		ee->wa_ctx = i915_error_object_create(i915, engine->wa_ctx.vma);
1525

1526
		ee->default_state = capture_object(i915, engine->default_state);
1527 1528 1529
	}
}

1530 1531 1532
static void gem_capture_vm(struct i915_gpu_state *error,
			   struct i915_address_space *vm,
			   int idx)
1533
{
1534
	struct drm_i915_error_buffer *active_bo;
1535
	struct i915_vma *vma;
1536
	int count;
1537

1538
	count = 0;
1539
	list_for_each_entry(vma, &vm->active_list, vm_link)
1540
		count++;
1541

1542 1543 1544
	active_bo = NULL;
	if (count)
		active_bo = kcalloc(count, sizeof(*active_bo), GFP_ATOMIC);
1545
	if (active_bo)
1546 1547 1548 1549 1550 1551 1552
		count = capture_error_bo(active_bo, count, &vm->active_list, false);
	else
		count = 0;

	error->active_vm[idx] = vm;
	error->active_bo[idx] = active_bo;
	error->active_bo_count[idx] = count;
1553 1554
}

1555
static void capture_active_buffers(struct i915_gpu_state *error)
1556
{
1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569
	int cnt = 0, i, j;

	BUILD_BUG_ON(ARRAY_SIZE(error->engine) > ARRAY_SIZE(error->active_bo));
	BUILD_BUG_ON(ARRAY_SIZE(error->active_bo) != ARRAY_SIZE(error->active_vm));
	BUILD_BUG_ON(ARRAY_SIZE(error->active_bo) != ARRAY_SIZE(error->active_bo_count));

	/* Scan each engine looking for unique active contexts/vm */
	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
		struct drm_i915_error_engine *ee = &error->engine[i];
		bool found;

		if (!ee->vm)
			continue;
1570

1571 1572 1573 1574
		found = false;
		for (j = 0; j < i && !found; j++)
			found = error->engine[j].vm == ee->vm;
		if (!found)
1575
			gem_capture_vm(error, ee->vm, cnt++);
1576
	}
1577 1578
}

1579
static void capture_pinned_buffers(struct i915_gpu_state *error)
1580
{
1581
	struct i915_address_space *vm = &error->i915->ggtt.vm;
1582 1583 1584 1585 1586
	struct drm_i915_error_buffer *bo;
	struct i915_vma *vma;
	int count_inactive, count_active;

	count_inactive = 0;
1587
	list_for_each_entry(vma, &vm->inactive_list, vm_link)
1588 1589 1590
		count_inactive++;

	count_active = 0;
1591
	list_for_each_entry(vma, &vm->active_list, vm_link)
1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608
		count_active++;

	bo = NULL;
	if (count_inactive + count_active)
		bo = kcalloc(count_inactive + count_active,
			     sizeof(*bo), GFP_ATOMIC);
	if (!bo)
		return;

	count_inactive = capture_error_bo(bo, count_inactive,
					  &vm->active_list, true);
	count_active = capture_error_bo(bo + count_inactive, count_active,
					&vm->inactive_list, true);
	error->pinned_bo_count = count_inactive + count_active;
	error->pinned_bo = bo;
}

1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626
static void capture_uc_state(struct i915_gpu_state *error)
{
	struct drm_i915_private *i915 = error->i915;
	struct i915_error_uc *error_uc = &error->uc;

	/* Capturing uC state won't be useful if there is no GuC */
	if (!error->device_info.has_guc)
		return;

	error_uc->guc_fw = i915->guc.fw;
	error_uc->huc_fw = i915->huc.fw;

	/* Non-default firmware paths will be specified by the modparam.
	 * As modparams are generally accesible from the userspace make
	 * explicit copies of the firmware paths.
	 */
	error_uc->guc_fw.path = kstrdup(i915->guc.fw.path, GFP_ATOMIC);
	error_uc->huc_fw.path = kstrdup(i915->huc.fw.path, GFP_ATOMIC);
1627
	error_uc->guc_log = i915_error_object_create(i915, i915->guc.log.vma);
1628 1629
}

1630
/* Capture all registers which don't fit into another category. */
1631
static void capture_reg_state(struct i915_gpu_state *error)
1632
{
1633
	struct drm_i915_private *dev_priv = error->i915;
1634
	int i;
1635

1636 1637 1638 1639 1640 1641 1642
	/* General organization
	 * 1. Registers specific to a single generation
	 * 2. Registers which belong to multiple generations
	 * 3. Feature specific registers.
	 * 4. Everything else
	 * Please try to follow the order.
	 */
1643

1644
	/* 1: Registers specific to a single generation */
1645
	if (IS_VALLEYVIEW(dev_priv)) {
1646
		error->gtier[0] = I915_READ(GTIER);
1647
		error->ier = I915_READ(VLV_IER);
1648
		error->forcewake = I915_READ_FW(FORCEWAKE_VLV);
1649
	}
1650

1651
	if (IS_GEN7(dev_priv))
1652
		error->err_int = I915_READ(GEN7_ERR_INT);
1653

1654
	if (INTEL_GEN(dev_priv) >= 8) {
1655 1656 1657 1658
		error->fault_data0 = I915_READ(GEN8_FAULT_TLB_DATA0);
		error->fault_data1 = I915_READ(GEN8_FAULT_TLB_DATA1);
	}

1659
	if (IS_GEN6(dev_priv)) {
1660
		error->forcewake = I915_READ_FW(FORCEWAKE);
1661 1662 1663
		error->gab_ctl = I915_READ(GAB_CTL);
		error->gfx_mode = I915_READ(GFX_MODE);
	}
1664

1665
	/* 2: Registers which belong to multiple generations */
1666
	if (INTEL_GEN(dev_priv) >= 7)
1667
		error->forcewake = I915_READ_FW(FORCEWAKE_MT);
1668

1669
	if (INTEL_GEN(dev_priv) >= 6) {
1670
		error->derrmr = I915_READ(DERRMR);
1671 1672 1673 1674
		error->error = I915_READ(ERROR_GEN6);
		error->done_reg = I915_READ(DONE_REG);
	}

J
Joonas Lahtinen 已提交
1675
	if (INTEL_GEN(dev_priv) >= 5)
1676 1677
		error->ccid = I915_READ(CCID);

1678
	/* 3: Feature specific registers */
1679
	if (IS_GEN6(dev_priv) || IS_GEN7(dev_priv)) {
1680 1681 1682 1683 1684
		error->gam_ecochk = I915_READ(GAM_ECOCHK);
		error->gac_eco = I915_READ(GAC_ECO_BITS);
	}

	/* 4: Everything else */
1685 1686 1687 1688 1689 1690 1691 1692 1693 1694
	if (INTEL_GEN(dev_priv) >= 11) {
		error->ier = I915_READ(GEN8_DE_MISC_IER);
		error->gtier[0] = I915_READ(GEN11_RENDER_COPY_INTR_ENABLE);
		error->gtier[1] = I915_READ(GEN11_VCS_VECS_INTR_ENABLE);
		error->gtier[2] = I915_READ(GEN11_GUC_SG_INTR_ENABLE);
		error->gtier[3] = I915_READ(GEN11_GPM_WGBOXPERF_INTR_ENABLE);
		error->gtier[4] = I915_READ(GEN11_CRYPTO_RSVD_INTR_ENABLE);
		error->gtier[5] = I915_READ(GEN11_GUNIT_CSME_INTR_ENABLE);
		error->ngtier = 6;
	} else if (INTEL_GEN(dev_priv) >= 8) {
1695 1696 1697
		error->ier = I915_READ(GEN8_DE_MISC_IER);
		for (i = 0; i < 4; i++)
			error->gtier[i] = I915_READ(GEN8_GT_IER(i));
1698
		error->ngtier = 4;
1699
	} else if (HAS_PCH_SPLIT(dev_priv)) {
1700
		error->ier = I915_READ(DEIER);
1701
		error->gtier[0] = I915_READ(GTIER);
1702
		error->ngtier = 1;
1703
	} else if (IS_GEN2(dev_priv)) {
1704
		error->ier = I915_READ16(IER);
1705
	} else if (!IS_VALLEYVIEW(dev_priv)) {
1706
		error->ier = I915_READ(IER);
1707 1708 1709
	}
	error->eir = I915_READ(EIR);
	error->pgtbl_er = I915_READ(PGTBL_ER);
1710 1711
}

1712
static void i915_error_capture_msg(struct drm_i915_private *dev_priv,
1713
				   struct i915_gpu_state *error,
1714
				   u32 engine_mask,
1715
				   const char *error_msg)
1716 1717
{
	u32 ecode;
1718
	int engine_id = -1, len;
1719

1720
	ecode = i915_error_generate_code(dev_priv, error, &engine_id);
1721

1722
	len = scnprintf(error->error_msg, sizeof(error->error_msg),
1723
			"GPU HANG: ecode %d:%d:0x%08x",
1724
			INTEL_GEN(dev_priv), engine_id, ecode);
1725

1726
	if (engine_id != -1 && error->engine[engine_id].context.pid)
1727 1728 1729
		len += scnprintf(error->error_msg + len,
				 sizeof(error->error_msg) - len,
				 ", in %s [%d]",
1730 1731
				 error->engine[engine_id].context.comm,
				 error->engine[engine_id].context.pid);
1732 1733 1734 1735

	scnprintf(error->error_msg + len, sizeof(error->error_msg) - len,
		  ", reason: %s, action: %s",
		  error_msg,
1736
		  engine_mask ? "reset" : "continue");
1737 1738
}

1739
static void capture_gen_state(struct i915_gpu_state *error)
1740
{
1741 1742 1743 1744 1745
	struct drm_i915_private *i915 = error->i915;

	error->awake = i915->gt.awake;
	error->wakelock = atomic_read(&i915->runtime_pm.wakeref_count);
	error->suspended = i915->runtime_pm.suspended;
1746

1747 1748 1749 1750
	error->iommu = -1;
#ifdef CONFIG_INTEL_IOMMU
	error->iommu = intel_iommu_gfx_mapped;
#endif
1751 1752
	error->reset_count = i915_reset_count(&i915->gpu_error);
	error->suspend_count = i915->suspend_count;
1753 1754

	memcpy(&error->device_info,
1755
	       INTEL_INFO(i915),
1756
	       sizeof(error->device_info));
1757
	error->driver_caps = i915->caps;
1758 1759
}

1760 1761 1762 1763 1764 1765
static __always_inline void dup_param(const char *type, void *x)
{
	if (!__builtin_strcmp(type, "char *"))
		*(void **)x = kstrdup(*(void **)x, GFP_ATOMIC);
}

1766 1767 1768 1769 1770 1771 1772 1773
static void capture_params(struct i915_gpu_state *error)
{
	error->params = i915_modparams;
#define DUP(T, x, ...) dup_param(#T, &error->params.x);
	I915_PARAMS_FOR_EACH(DUP);
#undef DUP
}

1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789
static unsigned long capture_find_epoch(const struct i915_gpu_state *error)
{
	unsigned long epoch = error->capture;
	int i;

	for (i = 0; i < ARRAY_SIZE(error->engine); i++) {
		const struct drm_i915_error_engine *ee = &error->engine[i];

		if (ee->hangcheck_stalled &&
		    time_before(ee->hangcheck_timestamp, epoch))
			epoch = ee->hangcheck_timestamp;
	}

	return epoch;
}

1790 1791 1792 1793 1794 1795 1796 1797
static void capture_finish(struct i915_gpu_state *error)
{
	struct i915_ggtt *ggtt = &error->i915->ggtt;
	const u64 slot = ggtt->error_capture.start;

	ggtt->vm.clear_range(&ggtt->vm, slot, PAGE_SIZE);
}

1798 1799
static int capture(void *data)
{
1800
	struct i915_gpu_state *error = data;
1801

A
Arnd Bergmann 已提交
1802 1803 1804 1805
	error->time = ktime_get_real();
	error->boottime = ktime_get_boottime();
	error->uptime = ktime_sub(ktime_get(),
				  error->i915->gt.last_init_time);
1806
	error->capture = jiffies;
1807

1808
	capture_params(error);
1809
	capture_gen_state(error);
1810
	capture_uc_state(error);
1811 1812 1813 1814 1815
	capture_reg_state(error);
	gem_record_fences(error);
	gem_record_rings(error);
	capture_active_buffers(error);
	capture_pinned_buffers(error);
1816 1817 1818 1819

	error->overlay = intel_overlay_capture_error_state(error->i915);
	error->display = intel_display_capture_error_state(error->i915);

1820 1821
	error->epoch = capture_find_epoch(error);

1822
	capture_finish(error);
1823 1824 1825
	return 0;
}

1826 1827
#define DAY_AS_SECONDS(x) (24 * 60 * 60 * (x))

1828 1829 1830 1831 1832 1833 1834 1835 1836 1837 1838 1839 1840 1841 1842 1843 1844
struct i915_gpu_state *
i915_capture_gpu_state(struct drm_i915_private *i915)
{
	struct i915_gpu_state *error;

	error = kzalloc(sizeof(*error), GFP_ATOMIC);
	if (!error)
		return NULL;

	kref_init(&error->ref);
	error->i915 = i915;

	stop_machine(capture, error, NULL);

	return error;
}

1845 1846
/**
 * i915_capture_error_state - capture an error record for later analysis
1847 1848 1849
 * @i915: i915 device
 * @engine_mask: the mask of engines triggering the hang
 * @error_msg: a message to insert into the error capture header
1850 1851 1852 1853 1854 1855
 *
 * Should be called when an error is detected (either a hang or an error
 * interrupt) to capture error state from the time of the error.  Fills
 * out a structure which becomes available in debugfs for user level tools
 * to pick up.
 */
1856
void i915_capture_error_state(struct drm_i915_private *i915,
1857
			      u32 engine_mask,
1858
			      const char *error_msg)
1859
{
1860
	static bool warned;
1861
	struct i915_gpu_state *error;
1862 1863
	unsigned long flags;

1864
	if (!i915_modparams.error_capture)
1865 1866
		return;

1867
	if (READ_ONCE(i915->gpu_error.first_error))
1868 1869
		return;

1870
	error = i915_capture_gpu_state(i915);
1871 1872
	if (!error) {
		DRM_DEBUG_DRIVER("out of memory, not capturing error state\n");
1873
		i915_disable_error_state(i915, -ENOMEM);
1874 1875 1876
		return;
	}

1877
	i915_error_capture_msg(i915, error, engine_mask, error_msg);
1878 1879
	DRM_INFO("%s\n", error->error_msg);

1880
	if (!error->simulated) {
1881 1882 1883
		spin_lock_irqsave(&i915->gpu_error.lock, flags);
		if (!i915->gpu_error.first_error) {
			i915->gpu_error.first_error = error;
1884 1885
			error = NULL;
		}
1886
		spin_unlock_irqrestore(&i915->gpu_error.lock, flags);
1887 1888
	}

1889
	if (error) {
1890
		__i915_gpu_state_free(&error->ref);
1891 1892 1893
		return;
	}

1894 1895
	if (!warned &&
	    ktime_get_real_seconds() - DRIVER_TIMESTAMP < DAY_AS_SECONDS(180)) {
1896 1897 1898 1899
		DRM_INFO("GPU hangs can indicate a bug anywhere in the entire gfx stack, including userspace.\n");
		DRM_INFO("Please file a _new_ bug report on bugs.freedesktop.org against DRI -> DRM/Intel\n");
		DRM_INFO("drm/i915 developers can then reassign to the right component if it's not a kernel issue.\n");
		DRM_INFO("The gpu crash dump is required to analyze gpu hangs, so please always attach it.\n");
1900
		DRM_INFO("GPU crash dump saved to /sys/class/drm/card%d/error\n",
1901
			 i915->drm.primary->index);
1902 1903
		warned = true;
	}
1904 1905
}

1906 1907
struct i915_gpu_state *
i915_first_error_state(struct drm_i915_private *i915)
1908
{
1909
	struct i915_gpu_state *error;
1910

1911 1912 1913 1914 1915
	spin_lock_irq(&i915->gpu_error.lock);
	error = i915->gpu_error.first_error;
	if (error)
		i915_gpu_state_get(error);
	spin_unlock_irq(&i915->gpu_error.lock);
1916

1917
	return error;
1918 1919
}

1920
void i915_reset_error_state(struct drm_i915_private *i915)
1921
{
1922
	struct i915_gpu_state *error;
1923

1924 1925 1926 1927
	spin_lock_irq(&i915->gpu_error.lock);
	error = i915->gpu_error.first_error;
	i915->gpu_error.first_error = NULL;
	spin_unlock_irq(&i915->gpu_error.lock);
1928

1929 1930 1931 1932 1933 1934 1935 1936 1937 1938
	if (!IS_ERR(error))
		i915_gpu_state_put(error);
}

void i915_disable_error_state(struct drm_i915_private *i915, int err)
{
	spin_lock_irq(&i915->gpu_error.lock);
	if (!i915->gpu_error.first_error)
		i915->gpu_error.first_error = ERR_PTR(err);
	spin_unlock_irq(&i915->gpu_error.lock);
1939
}