i915_gem_gtt.c 91.1 KB
Newer Older
1 2
/*
 * Copyright © 2010 Daniel Vetter
3
 * Copyright © 2011-2014 Intel Corporation
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 *
 */

26 27 28
#include <linux/slab.h> /* fault-inject.h is not standalone! */

#include <linux/fault-inject.h>
29
#include <linux/log2.h>
30
#include <linux/random.h>
31
#include <linux/seq_file.h>
32
#include <linux/stop_machine.h>
33

34 35
#include <drm/drmP.h>
#include <drm/i915_drm.h>
36

37
#include "i915_drv.h"
38
#include "i915_vgpu.h"
39 40
#include "i915_trace.h"
#include "intel_drv.h"
41
#include "intel_frontbuffer.h"
42

43 44
#define I915_GFP_DMA (GFP_KERNEL | __GFP_HIGHMEM)

45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80
/**
 * DOC: Global GTT views
 *
 * Background and previous state
 *
 * Historically objects could exists (be bound) in global GTT space only as
 * singular instances with a view representing all of the object's backing pages
 * in a linear fashion. This view will be called a normal view.
 *
 * To support multiple views of the same object, where the number of mapped
 * pages is not equal to the backing store, or where the layout of the pages
 * is not linear, concept of a GGTT view was added.
 *
 * One example of an alternative view is a stereo display driven by a single
 * image. In this case we would have a framebuffer looking like this
 * (2x2 pages):
 *
 *    12
 *    34
 *
 * Above would represent a normal GGTT view as normally mapped for GPU or CPU
 * rendering. In contrast, fed to the display engine would be an alternative
 * view which could look something like this:
 *
 *   1212
 *   3434
 *
 * In this example both the size and layout of pages in the alternative view is
 * different from the normal view.
 *
 * Implementation and usage
 *
 * GGTT views are implemented using VMAs and are distinguished via enum
 * i915_ggtt_view_type and struct i915_ggtt_view.
 *
 * A new flavour of core GEM functions which work with GGTT bound objects were
81 82 83
 * added with the _ggtt_ infix, and sometimes with _view postfix to avoid
 * renaming  in large amounts of code. They take the struct i915_ggtt_view
 * parameter encapsulating all metadata required to implement a view.
84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105
 *
 * As a helper for callers which are only interested in the normal view,
 * globally const i915_ggtt_view_normal singleton instance exists. All old core
 * GEM API functions, the ones not taking the view parameter, are operating on,
 * or with the normal GGTT view.
 *
 * Code wanting to add or use a new GGTT view needs to:
 *
 * 1. Add a new enum with a suitable name.
 * 2. Extend the metadata in the i915_ggtt_view structure if required.
 * 3. Add support to i915_get_vma_pages().
 *
 * New views are required to build a scatter-gather table from within the
 * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and
 * exists for the lifetime of an VMA.
 *
 * Core API is designed to have copy semantics which means that passed in
 * struct i915_ggtt_view does not need to be persistent (left around after
 * calling the core API functions).
 *
 */

106 107 108
static int
i915_get_ggtt_vma_pages(struct i915_vma *vma);

109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132
static void gen6_ggtt_invalidate(struct drm_i915_private *dev_priv)
{
	/* Note that as an uncached mmio write, this should flush the
	 * WCB of the writes into the GGTT before it triggers the invalidate.
	 */
	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
}

static void guc_ggtt_invalidate(struct drm_i915_private *dev_priv)
{
	gen6_ggtt_invalidate(dev_priv);
	I915_WRITE(GEN8_GTCR, GEN8_GTCR_INVALIDATE);
}

static void gmch_ggtt_invalidate(struct drm_i915_private *dev_priv)
{
	intel_gtt_chipset_flush();
}

static inline void i915_ggtt_invalidate(struct drm_i915_private *i915)
{
	i915->ggtt.invalidate(i915);
}

133 134
int intel_sanitize_enable_ppgtt(struct drm_i915_private *dev_priv,
			       	int enable_ppgtt)
135
{
136 137
	bool has_aliasing_ppgtt;
	bool has_full_ppgtt;
138
	bool has_full_48bit_ppgtt;
139

140 141 142
	has_aliasing_ppgtt = dev_priv->info.has_aliasing_ppgtt;
	has_full_ppgtt = dev_priv->info.has_full_ppgtt;
	has_full_48bit_ppgtt = dev_priv->info.has_full_48bit_ppgtt;
143

144 145 146 147 148
	if (intel_vgpu_active(dev_priv)) {
		/* emulation is too hard */
		has_full_ppgtt = false;
		has_full_48bit_ppgtt = false;
	}
149

150 151 152
	if (!has_aliasing_ppgtt)
		return 0;

153 154 155 156
	/*
	 * We don't allow disabling PPGTT for gen9+ as it's a requirement for
	 * execlists, the sole mechanism available to submit work.
	 */
157
	if (enable_ppgtt == 0 && INTEL_GEN(dev_priv) < 9)
158 159 160 161 162
		return 0;

	if (enable_ppgtt == 1)
		return 1;

163
	if (enable_ppgtt == 2 && has_full_ppgtt)
164 165
		return 2;

166 167 168
	if (enable_ppgtt == 3 && has_full_48bit_ppgtt)
		return 3;

169 170
#ifdef CONFIG_INTEL_IOMMU
	/* Disable ppgtt on SNB if VT-d is on. */
171
	if (IS_GEN6(dev_priv) && intel_iommu_gfx_mapped) {
172
		DRM_INFO("Disabling PPGTT because VT-d is on\n");
173
		return 0;
174 175 176
	}
#endif

177
	/* Early VLV doesn't have this */
178
	if (IS_VALLEYVIEW(dev_priv) && dev_priv->drm.pdev->revision < 0xb) {
179 180 181 182
		DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n");
		return 0;
	}

183
	if (INTEL_GEN(dev_priv) >= 8 && i915.enable_execlists && has_full_ppgtt)
184
		return has_full_48bit_ppgtt ? 3 : 2;
185 186
	else
		return has_aliasing_ppgtt ? 1 : 0;
187 188
}

189 190 191
static int ppgtt_bind_vma(struct i915_vma *vma,
			  enum i915_cache_level cache_level,
			  u32 unused)
192
{
193 194 195 196 197 198 199
	u32 pte_flags;
	int ret;

	trace_i915_va_alloc(vma);
	ret = vma->vm->allocate_va_range(vma->vm, vma->node.start, vma->size);
	if (ret)
		return ret;
200

C
Chris Wilson 已提交
201
	vma->pages = vma->obj->mm.pages;
202

203
	/* Currently applicable only to VLV */
204
	pte_flags = 0;
205 206 207
	if (vma->obj->gt_ro)
		pte_flags |= PTE_READ_ONLY;

208
	vma->vm->insert_entries(vma->vm, vma->pages, vma->node.start,
209
				cache_level, pte_flags);
210 211

	return 0;
212 213 214 215
}

static void ppgtt_unbind_vma(struct i915_vma *vma)
{
216
	vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
217
}
218

219
static gen8_pte_t gen8_pte_encode(dma_addr_t addr,
220
				  enum i915_cache_level level)
B
Ben Widawsky 已提交
221
{
222
	gen8_pte_t pte = _PAGE_PRESENT | _PAGE_RW;
B
Ben Widawsky 已提交
223
	pte |= addr;
224 225 226

	switch (level) {
	case I915_CACHE_NONE:
B
Ben Widawsky 已提交
227
		pte |= PPAT_UNCACHED_INDEX;
228 229 230 231 232 233 234 235 236
		break;
	case I915_CACHE_WT:
		pte |= PPAT_DISPLAY_ELLC_INDEX;
		break;
	default:
		pte |= PPAT_CACHED_INDEX;
		break;
	}

B
Ben Widawsky 已提交
237 238 239
	return pte;
}

240 241
static gen8_pde_t gen8_pde_encode(const dma_addr_t addr,
				  const enum i915_cache_level level)
B
Ben Widawsky 已提交
242
{
243
	gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
B
Ben Widawsky 已提交
244 245 246 247 248 249 250 251
	pde |= addr;
	if (level != I915_CACHE_NONE)
		pde |= PPAT_CACHED_PDE_INDEX;
	else
		pde |= PPAT_UNCACHED_INDEX;
	return pde;
}

252 253 254
#define gen8_pdpe_encode gen8_pde_encode
#define gen8_pml4e_encode gen8_pde_encode

255 256
static gen6_pte_t snb_pte_encode(dma_addr_t addr,
				 enum i915_cache_level level,
257
				 u32 unused)
258
{
259
	gen6_pte_t pte = GEN6_PTE_VALID;
260
	pte |= GEN6_PTE_ADDR_ENCODE(addr);
261 262

	switch (level) {
263 264 265 266 267 268 269 270
	case I915_CACHE_L3_LLC:
	case I915_CACHE_LLC:
		pte |= GEN6_PTE_CACHE_LLC;
		break;
	case I915_CACHE_NONE:
		pte |= GEN6_PTE_UNCACHED;
		break;
	default:
271
		MISSING_CASE(level);
272 273 274 275 276
	}

	return pte;
}

277 278
static gen6_pte_t ivb_pte_encode(dma_addr_t addr,
				 enum i915_cache_level level,
279
				 u32 unused)
280
{
281
	gen6_pte_t pte = GEN6_PTE_VALID;
282 283 284 285 286
	pte |= GEN6_PTE_ADDR_ENCODE(addr);

	switch (level) {
	case I915_CACHE_L3_LLC:
		pte |= GEN7_PTE_CACHE_L3_LLC;
287 288 289 290 291
		break;
	case I915_CACHE_LLC:
		pte |= GEN6_PTE_CACHE_LLC;
		break;
	case I915_CACHE_NONE:
292
		pte |= GEN6_PTE_UNCACHED;
293 294
		break;
	default:
295
		MISSING_CASE(level);
296 297
	}

298 299 300
	return pte;
}

301 302
static gen6_pte_t byt_pte_encode(dma_addr_t addr,
				 enum i915_cache_level level,
303
				 u32 flags)
304
{
305
	gen6_pte_t pte = GEN6_PTE_VALID;
306 307
	pte |= GEN6_PTE_ADDR_ENCODE(addr);

308 309
	if (!(flags & PTE_READ_ONLY))
		pte |= BYT_PTE_WRITEABLE;
310 311 312 313 314 315 316

	if (level != I915_CACHE_NONE)
		pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;

	return pte;
}

317 318
static gen6_pte_t hsw_pte_encode(dma_addr_t addr,
				 enum i915_cache_level level,
319
				 u32 unused)
320
{
321
	gen6_pte_t pte = GEN6_PTE_VALID;
322
	pte |= HSW_PTE_ADDR_ENCODE(addr);
323 324

	if (level != I915_CACHE_NONE)
325
		pte |= HSW_WB_LLC_AGE3;
326 327 328 329

	return pte;
}

330 331
static gen6_pte_t iris_pte_encode(dma_addr_t addr,
				  enum i915_cache_level level,
332
				  u32 unused)
333
{
334
	gen6_pte_t pte = GEN6_PTE_VALID;
335 336
	pte |= HSW_PTE_ADDR_ENCODE(addr);

337 338 339 340
	switch (level) {
	case I915_CACHE_NONE:
		break;
	case I915_CACHE_WT:
341
		pte |= HSW_WT_ELLC_LLC_AGE3;
342 343
		break;
	default:
344
		pte |= HSW_WB_ELLC_LLC_AGE3;
345 346
		break;
	}
347 348 349 350

	return pte;
}

351
static struct page *vm_alloc_page(struct i915_address_space *vm, gfp_t gfp)
352
{
353
	struct page *page;
354

355 356
	if (I915_SELFTEST_ONLY(should_fail(&vm->fault_attr, 1)))
		i915_gem_shrink_all(vm->i915);
357

358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386
	if (vm->free_pages.nr)
		return vm->free_pages.pages[--vm->free_pages.nr];

	page = alloc_page(gfp);
	if (!page)
		return NULL;

	if (vm->pt_kmap_wc)
		set_pages_array_wc(&page, 1);

	return page;
}

static void vm_free_pages_release(struct i915_address_space *vm)
{
	GEM_BUG_ON(!pagevec_count(&vm->free_pages));

	if (vm->pt_kmap_wc)
		set_pages_array_wb(vm->free_pages.pages,
				   pagevec_count(&vm->free_pages));

	__pagevec_release(&vm->free_pages);
}

static void vm_free_page(struct i915_address_space *vm, struct page *page)
{
	if (!pagevec_add(&vm->free_pages, page))
		vm_free_pages_release(vm);
}
387

388 389 390 391 392 393 394
static int __setup_page_dma(struct i915_address_space *vm,
			    struct i915_page_dma *p,
			    gfp_t gfp)
{
	p->page = vm_alloc_page(vm, gfp | __GFP_NOWARN | __GFP_NORETRY);
	if (unlikely(!p->page))
		return -ENOMEM;
395

396 397 398 399 400
	p->daddr = dma_map_page(vm->dma, p->page, 0, PAGE_SIZE,
				PCI_DMA_BIDIRECTIONAL);
	if (unlikely(dma_mapping_error(vm->dma, p->daddr))) {
		vm_free_page(vm, p->page);
		return -ENOMEM;
401
	}
402 403

	return 0;
404 405
}

406
static int setup_page_dma(struct i915_address_space *vm,
407
			  struct i915_page_dma *p)
408
{
409
	return __setup_page_dma(vm, p, I915_GFP_DMA);
410 411
}

412
static void cleanup_page_dma(struct i915_address_space *vm,
413
			     struct i915_page_dma *p)
414
{
415 416
	dma_unmap_page(vm->dma, p->daddr, PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
	vm_free_page(vm, p->page);
417 418
}

419
#define kmap_atomic_px(px) kmap_atomic(px_base(px)->page)
420

421 422 423 424
#define setup_px(vm, px) setup_page_dma((vm), px_base(px))
#define cleanup_px(vm, px) cleanup_page_dma((vm), px_base(px))
#define fill_px(ppgtt, px, v) fill_page_dma((vm), px_base(px), (v))
#define fill32_px(ppgtt, px, v) fill_page_dma_32((vm), px_base(px), (v))
425

426 427 428
static void fill_page_dma(struct i915_address_space *vm,
			  struct i915_page_dma *p,
			  const u64 val)
429
{
430
	u64 * const vaddr = kmap_atomic(p->page);
431 432 433 434 435
	int i;

	for (i = 0; i < 512; i++)
		vaddr[i] = val;

436
	kunmap_atomic(vaddr);
437 438
}

439 440 441
static void fill_page_dma_32(struct i915_address_space *vm,
			     struct i915_page_dma *p,
			     const u32 v)
442
{
443
	fill_page_dma(vm, p, (u64)v << 32 | v);
444 445
}

446
static int
447
setup_scratch_page(struct i915_address_space *vm, gfp_t gfp)
448
{
449
	return __setup_page_dma(vm, &vm->scratch_page, gfp | __GFP_ZERO);
450 451
}

452
static void cleanup_scratch_page(struct i915_address_space *vm)
453
{
454
	cleanup_page_dma(vm, &vm->scratch_page);
455 456
}

457
static struct i915_page_table *alloc_pt(struct i915_address_space *vm)
458
{
459
	struct i915_page_table *pt;
460

461 462
	pt = kmalloc(sizeof(*pt), GFP_KERNEL | __GFP_NOWARN);
	if (unlikely(!pt))
463 464
		return ERR_PTR(-ENOMEM);

465 466 467 468
	if (unlikely(setup_px(vm, pt))) {
		kfree(pt);
		return ERR_PTR(-ENOMEM);
	}
469

470
	pt->used_ptes = 0;
471 472 473
	return pt;
}

474
static void free_pt(struct i915_address_space *vm, struct i915_page_table *pt)
475
{
476
	cleanup_px(vm, pt);
477 478 479 480 481 482
	kfree(pt);
}

static void gen8_initialize_pt(struct i915_address_space *vm,
			       struct i915_page_table *pt)
{
483 484
	fill_px(vm, pt,
		gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC));
485 486 487 488 489
}

static void gen6_initialize_pt(struct i915_address_space *vm,
			       struct i915_page_table *pt)
{
490 491
	fill32_px(vm, pt,
		  vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0));
492 493
}

494
static struct i915_page_directory *alloc_pd(struct i915_address_space *vm)
495
{
496
	struct i915_page_directory *pd;
497

498 499
	pd = kzalloc(sizeof(*pd), GFP_KERNEL | __GFP_NOWARN);
	if (unlikely(!pd))
500 501
		return ERR_PTR(-ENOMEM);

502 503 504 505
	if (unlikely(setup_px(vm, pd))) {
		kfree(pd);
		return ERR_PTR(-ENOMEM);
	}
506

507
	pd->used_pdes = 0;
508 509 510
	return pd;
}

511
static void free_pd(struct i915_address_space *vm,
512
		    struct i915_page_directory *pd)
513
{
514 515
	cleanup_px(vm, pd);
	kfree(pd);
516 517 518 519 520
}

static void gen8_initialize_pd(struct i915_address_space *vm,
			       struct i915_page_directory *pd)
{
521
	unsigned int i;
522

523 524 525 526
	fill_px(vm, pd,
		gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC));
	for (i = 0; i < I915_PDES; i++)
		pd->page_table[i] = vm->scratch_pt;
527 528
}

529
static int __pdp_init(struct i915_address_space *vm,
530 531
		      struct i915_page_directory_pointer *pdp)
{
532 533
	const unsigned int pdpes = I915_PDPES_PER_PDP(vm->i915);
	unsigned int i;
534

535
	pdp->page_directory = kmalloc_array(pdpes, sizeof(*pdp->page_directory),
536 537
					    GFP_KERNEL | __GFP_NOWARN);
	if (unlikely(!pdp->page_directory))
538 539
		return -ENOMEM;

540 541 542
	for (i = 0; i < pdpes; i++)
		pdp->page_directory[i] = vm->scratch_pd;

543 544 545 546 547 548 549 550 551
	return 0;
}

static void __pdp_fini(struct i915_page_directory_pointer *pdp)
{
	kfree(pdp->page_directory);
	pdp->page_directory = NULL;
}

552 553
static struct i915_page_directory_pointer *
alloc_pdp(struct i915_address_space *vm)
554 555 556 557
{
	struct i915_page_directory_pointer *pdp;
	int ret = -ENOMEM;

558
	WARN_ON(!USES_FULL_48BIT_PPGTT(vm->i915));
559 560 561 562 563

	pdp = kzalloc(sizeof(*pdp), GFP_KERNEL);
	if (!pdp)
		return ERR_PTR(-ENOMEM);

564
	ret = __pdp_init(vm, pdp);
565 566 567
	if (ret)
		goto fail_bitmap;

568
	ret = setup_px(vm, pdp);
569 570 571 572 573 574 575 576 577 578 579 580 581
	if (ret)
		goto fail_page_m;

	return pdp;

fail_page_m:
	__pdp_fini(pdp);
fail_bitmap:
	kfree(pdp);

	return ERR_PTR(ret);
}

582
static void free_pdp(struct i915_address_space *vm,
583 584 585
		     struct i915_page_directory_pointer *pdp)
{
	__pdp_fini(pdp);
586 587
	if (USES_FULL_48BIT_PPGTT(vm->i915)) {
		cleanup_px(vm, pdp);
588 589 590 591
		kfree(pdp);
	}
}

592 593 594 595 596 597 598
static void gen8_initialize_pdp(struct i915_address_space *vm,
				struct i915_page_directory_pointer *pdp)
{
	gen8_ppgtt_pdpe_t scratch_pdpe;

	scratch_pdpe = gen8_pdpe_encode(px_dma(vm->scratch_pd), I915_CACHE_LLC);

599
	fill_px(vm, pdp, scratch_pdpe);
600 601 602 603 604
}

static void gen8_initialize_pml4(struct i915_address_space *vm,
				 struct i915_pml4 *pml4)
{
605
	unsigned int i;
606

607 608 609 610
	fill_px(vm, pml4,
		gen8_pml4e_encode(px_dma(vm->scratch_pdp), I915_CACHE_LLC));
	for (i = 0; i < GEN8_PML4ES_PER_PML4; i++)
		pml4->pdps[i] = vm->scratch_pdp;
611 612
}

613
/* Broadwell Page Directory Pointer Descriptors */
614
static int gen8_write_pdp(struct drm_i915_gem_request *req,
615 616
			  unsigned entry,
			  dma_addr_t addr)
617
{
618
	struct intel_engine_cs *engine = req->engine;
619
	u32 *cs;
620 621 622

	BUG_ON(entry >= 4);

623 624 625
	cs = intel_ring_begin(req, 6);
	if (IS_ERR(cs))
		return PTR_ERR(cs);
626

627 628 629 630 631 632 633
	*cs++ = MI_LOAD_REGISTER_IMM(1);
	*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_UDW(engine, entry));
	*cs++ = upper_32_bits(addr);
	*cs++ = MI_LOAD_REGISTER_IMM(1);
	*cs++ = i915_mmio_reg_offset(GEN8_RING_PDP_LDW(engine, entry));
	*cs++ = lower_32_bits(addr);
	intel_ring_advance(req, cs);
634 635 636 637

	return 0;
}

638 639
static int gen8_legacy_mm_switch(struct i915_hw_ppgtt *ppgtt,
				 struct drm_i915_gem_request *req)
640
{
641
	int i, ret;
642

643
	for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) {
644 645
		const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);

646
		ret = gen8_write_pdp(req, i, pd_daddr);
647 648
		if (ret)
			return ret;
649
	}
B
Ben Widawsky 已提交
650

651
	return 0;
652 653
}

654 655 656 657 658 659
static int gen8_48b_mm_switch(struct i915_hw_ppgtt *ppgtt,
			      struct drm_i915_gem_request *req)
{
	return gen8_write_pdp(req, 0, px_dma(&ppgtt->pml4));
}

660 661 662 663 664 665 666
/* PDE TLBs are a pain to invalidate on GEN8+. When we modify
 * the page table structures, we mark them dirty so that
 * context switching/execlist queuing code takes extra steps
 * to ensure that tlbs are flushed.
 */
static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt)
{
667
	ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.i915)->ring_mask;
668 669
}

670 671 672 673
/* Removes entries from a single page table, releasing it if it's empty.
 * Caller can use the return value to update higher-level entries.
 */
static bool gen8_ppgtt_clear_pt(struct i915_address_space *vm,
674
				struct i915_page_table *pt,
675
				u64 start, u64 length)
676
{
677
	unsigned int num_entries = gen8_pte_count(start, length);
M
Mika Kuoppala 已提交
678 679
	unsigned int pte = gen8_pte_index(start);
	unsigned int pte_end = pte + num_entries;
680 681 682
	const gen8_pte_t scratch_pte =
		gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC);
	gen8_pte_t *vaddr;
683

684
	GEM_BUG_ON(num_entries > pt->used_ptes);
M
Mika Kuoppala 已提交
685

686 687 688
	pt->used_ptes -= num_entries;
	if (!pt->used_ptes)
		return true;
689

690
	vaddr = kmap_atomic_px(pt);
M
Mika Kuoppala 已提交
691
	while (pte < pte_end)
692
		vaddr[pte++] = scratch_pte;
693
	kunmap_atomic(vaddr);
694 695

	return false;
696
}
697

698 699 700 701 702 703 704 705 706 707 708 709 710 711
static void gen8_ppgtt_set_pde(struct i915_address_space *vm,
			       struct i915_page_directory *pd,
			       struct i915_page_table *pt,
			       unsigned int pde)
{
	gen8_pde_t *vaddr;

	pd->page_table[pde] = pt;

	vaddr = kmap_atomic_px(pd);
	vaddr[pde] = gen8_pde_encode(px_dma(pt), I915_CACHE_LLC);
	kunmap_atomic(vaddr);
}

712
static bool gen8_ppgtt_clear_pd(struct i915_address_space *vm,
713
				struct i915_page_directory *pd,
714
				u64 start, u64 length)
715 716
{
	struct i915_page_table *pt;
717
	u32 pde;
718 719

	gen8_for_each_pde(pt, pd, start, length, pde) {
720 721
		if (!gen8_ppgtt_clear_pt(vm, pt, start, length))
			continue;
722

723
		gen8_ppgtt_set_pde(vm, pd, vm->scratch_pt, pde);
724
		pd->used_pdes--;
725 726

		free_pt(vm, pt);
727 728
	}

729 730
	return !pd->used_pdes;
}
731

732 733 734 735 736 737 738 739 740 741 742 743 744 745
static void gen8_ppgtt_set_pdpe(struct i915_address_space *vm,
				struct i915_page_directory_pointer *pdp,
				struct i915_page_directory *pd,
				unsigned int pdpe)
{
	gen8_ppgtt_pdpe_t *vaddr;

	pdp->page_directory[pdpe] = pd;
	if (!USES_FULL_48BIT_PPGTT(vm->i915))
		return;

	vaddr = kmap_atomic_px(pdp);
	vaddr[pdpe] = gen8_pdpe_encode(px_dma(pd), I915_CACHE_LLC);
	kunmap_atomic(vaddr);
746
}
747

748 749 750 751
/* Removes entries from a single page dir pointer, releasing it if it's empty.
 * Caller can use the return value to update higher-level entries
 */
static bool gen8_ppgtt_clear_pdp(struct i915_address_space *vm,
752
				 struct i915_page_directory_pointer *pdp,
753
				 u64 start, u64 length)
754 755
{
	struct i915_page_directory *pd;
756
	unsigned int pdpe;
757

758
	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
759 760
		if (!gen8_ppgtt_clear_pd(vm, pd, start, length))
			continue;
761

762
		gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
763
		pdp->used_pdpes--;
764

765 766
		free_pd(vm, pd);
	}
767

768
	return !pdp->used_pdpes;
769
}
770

771 772 773 774 775 776
static void gen8_ppgtt_clear_3lvl(struct i915_address_space *vm,
				  u64 start, u64 length)
{
	gen8_ppgtt_clear_pdp(vm, &i915_vm_to_ppgtt(vm)->pdp, start, length);
}

777 778 779 780 781 782 783 784 785 786 787 788 789
static void gen8_ppgtt_set_pml4e(struct i915_pml4 *pml4,
				 struct i915_page_directory_pointer *pdp,
				 unsigned int pml4e)
{
	gen8_ppgtt_pml4e_t *vaddr;

	pml4->pdps[pml4e] = pdp;

	vaddr = kmap_atomic_px(pml4);
	vaddr[pml4e] = gen8_pml4e_encode(px_dma(pdp), I915_CACHE_LLC);
	kunmap_atomic(vaddr);
}

790 791 792 793
/* Removes entries from a single pml4.
 * This is the top-level structure in 4-level page tables used on gen8+.
 * Empty entries are always scratch pml4e.
 */
794 795
static void gen8_ppgtt_clear_4lvl(struct i915_address_space *vm,
				  u64 start, u64 length)
796
{
797 798
	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
	struct i915_pml4 *pml4 = &ppgtt->pml4;
799
	struct i915_page_directory_pointer *pdp;
800
	unsigned int pml4e;
801

802
	GEM_BUG_ON(!USES_FULL_48BIT_PPGTT(vm->i915));
803

804
	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
805 806
		if (!gen8_ppgtt_clear_pdp(vm, pdp, start, length))
			continue;
807

808 809 810 811
		gen8_ppgtt_set_pml4e(pml4, vm->scratch_pdp, pml4e);
		__clear_bit(pml4e, pml4->used_pml4es);

		free_pdp(vm, pdp);
812 813 814
	}
}

815 816 817 818 819 820 821
struct sgt_dma {
	struct scatterlist *sg;
	dma_addr_t dma, max;
};

static __always_inline bool
gen8_ppgtt_insert_pte_entries(struct i915_hw_ppgtt *ppgtt,
822
			      struct i915_page_directory_pointer *pdp,
823 824
			      struct sgt_dma *iter,
			      u64 start,
825 826
			      enum i915_cache_level cache_level)
{
827 828 829 830 831 832 833
	unsigned int pdpe = gen8_pdpe_index(start);
	unsigned int pde = gen8_pde_index(start);
	unsigned int pte = gen8_pte_index(start);
	struct i915_page_directory *pd;
	const gen8_pte_t pte_encode = gen8_pte_encode(0, cache_level);
	gen8_pte_t *vaddr;
	bool ret;
834

835
	pd = pdp->page_directory[pdpe];
836
	vaddr = kmap_atomic_px(pd->page_table[pde]);
837 838 839 840 841 842 843 844 845
	do {
		vaddr[pte] = pte_encode | iter->dma;
		iter->dma += PAGE_SIZE;
		if (iter->dma >= iter->max) {
			iter->sg = __sg_next(iter->sg);
			if (!iter->sg) {
				ret = false;
				break;
			}
846

847 848
			iter->dma = sg_dma_address(iter->sg);
			iter->max = iter->dma + iter->sg->length;
B
Ben Widawsky 已提交
849
		}
850

851 852
		if (++pte == GEN8_PTES) {
			if (++pde == I915_PDES) {
853 854 855
				/* Limited by sg length for 3lvl */
				if (++pdpe == GEN8_PML4ES_PER_PML4) {
					ret = true;
856
					break;
857 858 859 860
				}

				GEM_BUG_ON(pdpe > GEN8_LEGACY_PDPES);
				pd = pdp->page_directory[pdpe];
861 862
				pde = 0;
			}
863

864 865
			kunmap_atomic(vaddr);
			vaddr = kmap_atomic_px(pd->page_table[pde]);
866
			pte = 0;
867
		}
868
	} while (1);
869
	kunmap_atomic(vaddr);
870

871
	return ret;
872 873
}

874 875 876 877 878
static void gen8_ppgtt_insert_3lvl(struct i915_address_space *vm,
				   struct sg_table *pages,
				   u64 start,
				   enum i915_cache_level cache_level,
				   u32 unused)
879
{
880
	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
881 882 883 884 885
	struct sgt_dma iter = {
		.sg = pages->sgl,
		.dma = sg_dma_address(iter.sg),
		.max = iter.dma + iter.sg->length,
	};
886

887 888 889
	gen8_ppgtt_insert_pte_entries(ppgtt, &ppgtt->pdp, &iter,
				      start, cache_level);
}
890

891 892 893 894 895 896 897 898 899 900 901 902 903 904
static void gen8_ppgtt_insert_4lvl(struct i915_address_space *vm,
				   struct sg_table *pages,
				   uint64_t start,
				   enum i915_cache_level cache_level,
				   u32 unused)
{
	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
	struct sgt_dma iter = {
		.sg = pages->sgl,
		.dma = sg_dma_address(iter.sg),
		.max = iter.dma + iter.sg->length,
	};
	struct i915_page_directory_pointer **pdps = ppgtt->pml4.pdps;
	unsigned int pml4e = gen8_pml4e_index(start);
905

906 907 908
	while (gen8_ppgtt_insert_pte_entries(ppgtt, pdps[pml4e++], &iter,
					     start, cache_level))
		;
909 910
}

911
static void gen8_free_page_tables(struct i915_address_space *vm,
912
				  struct i915_page_directory *pd)
913 914 915
{
	int i;

916
	if (!px_page(pd))
917 918
		return;

919 920 921
	for (i = 0; i < I915_PDES; i++) {
		if (pd->page_table[i] != vm->scratch_pt)
			free_pt(vm, pd->page_table[i]);
922
	}
B
Ben Widawsky 已提交
923 924
}

925 926
static int gen8_init_scratch(struct i915_address_space *vm)
{
927
	int ret;
928

929
	ret = setup_scratch_page(vm, I915_GFP_DMA);
930 931
	if (ret)
		return ret;
932

933
	vm->scratch_pt = alloc_pt(vm);
934
	if (IS_ERR(vm->scratch_pt)) {
935 936
		ret = PTR_ERR(vm->scratch_pt);
		goto free_scratch_page;
937 938
	}

939
	vm->scratch_pd = alloc_pd(vm);
940
	if (IS_ERR(vm->scratch_pd)) {
941 942
		ret = PTR_ERR(vm->scratch_pd);
		goto free_pt;
943 944
	}

945 946
	if (USES_FULL_48BIT_PPGTT(dev)) {
		vm->scratch_pdp = alloc_pdp(vm);
947
		if (IS_ERR(vm->scratch_pdp)) {
948 949
			ret = PTR_ERR(vm->scratch_pdp);
			goto free_pd;
950 951 952
		}
	}

953 954
	gen8_initialize_pt(vm, vm->scratch_pt);
	gen8_initialize_pd(vm, vm->scratch_pd);
955
	if (USES_FULL_48BIT_PPGTT(dev_priv))
956
		gen8_initialize_pdp(vm, vm->scratch_pdp);
957 958

	return 0;
959 960

free_pd:
961
	free_pd(vm, vm->scratch_pd);
962
free_pt:
963
	free_pt(vm, vm->scratch_pt);
964
free_scratch_page:
965
	cleanup_scratch_page(vm);
966 967

	return ret;
968 969
}

970 971 972
static int gen8_ppgtt_notify_vgt(struct i915_hw_ppgtt *ppgtt, bool create)
{
	enum vgt_g2v_type msg;
973
	struct drm_i915_private *dev_priv = ppgtt->base.i915;
974 975
	int i;

976
	if (USES_FULL_48BIT_PPGTT(dev_priv)) {
977 978
		u64 daddr = px_dma(&ppgtt->pml4);

979 980
		I915_WRITE(vgtif_reg(pdp[0].lo), lower_32_bits(daddr));
		I915_WRITE(vgtif_reg(pdp[0].hi), upper_32_bits(daddr));
981 982 983 984 985 986 987

		msg = (create ? VGT_G2V_PPGTT_L4_PAGE_TABLE_CREATE :
				VGT_G2V_PPGTT_L4_PAGE_TABLE_DESTROY);
	} else {
		for (i = 0; i < GEN8_LEGACY_PDPES; i++) {
			u64 daddr = i915_page_dir_dma_addr(ppgtt, i);

988 989
			I915_WRITE(vgtif_reg(pdp[i].lo), lower_32_bits(daddr));
			I915_WRITE(vgtif_reg(pdp[i].hi), upper_32_bits(daddr));
990 991 992 993 994 995 996 997 998 999 1000
		}

		msg = (create ? VGT_G2V_PPGTT_L3_PAGE_TABLE_CREATE :
				VGT_G2V_PPGTT_L3_PAGE_TABLE_DESTROY);
	}

	I915_WRITE(vgtif_reg(g2v_notify), msg);

	return 0;
}

1001 1002
static void gen8_free_scratch(struct i915_address_space *vm)
{
1003 1004 1005 1006 1007
	if (USES_FULL_48BIT_PPGTT(vm->i915))
		free_pdp(vm, vm->scratch_pdp);
	free_pd(vm, vm->scratch_pd);
	free_pt(vm, vm->scratch_pt);
	cleanup_scratch_page(vm);
1008 1009
}

1010
static void gen8_ppgtt_cleanup_3lvl(struct i915_address_space *vm,
1011
				    struct i915_page_directory_pointer *pdp)
1012 1013 1014
{
	int i;

1015
	for (i = 0; i < I915_PDPES_PER_PDP(vm->i915); i++) {
1016
		if (pdp->page_directory[i] == vm->scratch_pd)
1017 1018
			continue;

1019 1020
		gen8_free_page_tables(vm, pdp->page_directory[i]);
		free_pd(vm, pdp->page_directory[i]);
1021
	}
1022

1023
	free_pdp(vm, pdp);
1024 1025 1026 1027 1028 1029 1030 1031 1032 1033
}

static void gen8_ppgtt_cleanup_4lvl(struct i915_hw_ppgtt *ppgtt)
{
	int i;

	for_each_set_bit(i, ppgtt->pml4.used_pml4es, GEN8_PML4ES_PER_PML4) {
		if (WARN_ON(!ppgtt->pml4.pdps[i]))
			continue;

1034
		gen8_ppgtt_cleanup_3lvl(&ppgtt->base, ppgtt->pml4.pdps[i]);
1035 1036
	}

1037
	cleanup_px(&ppgtt->base, &ppgtt->pml4);
1038 1039 1040 1041
}

static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
{
1042
	struct drm_i915_private *dev_priv = vm->i915;
1043
	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1044

1045
	if (intel_vgpu_active(dev_priv))
1046 1047
		gen8_ppgtt_notify_vgt(ppgtt, false);

1048 1049
	if (!USES_FULL_48BIT_PPGTT(vm->i915))
		gen8_ppgtt_cleanup_3lvl(&ppgtt->base, &ppgtt->pdp);
1050 1051
	else
		gen8_ppgtt_cleanup_4lvl(ppgtt);
1052

1053
	gen8_free_scratch(vm);
1054 1055
}

1056 1057 1058
static int gen8_ppgtt_alloc_pd(struct i915_address_space *vm,
			       struct i915_page_directory *pd,
			       u64 start, u64 length)
1059
{
1060
	struct i915_page_table *pt;
1061
	u64 from = start;
1062
	unsigned int pde;
1063

1064
	gen8_for_each_pde(pt, pd, start, length, pde) {
1065
		if (pt == vm->scratch_pt) {
1066 1067 1068
			pt = alloc_pt(vm);
			if (IS_ERR(pt))
				goto unwind;
1069

1070
			gen8_initialize_pt(vm, pt);
1071 1072 1073

			gen8_ppgtt_set_pde(vm, pd, pt, pde);
			pd->used_pdes++;
1074
		}
1075

1076
		pt->used_ptes += gen8_pte_count(start, length);
1077
	}
1078
	return 0;
1079

1080 1081
unwind:
	gen8_ppgtt_clear_pd(vm, pd, from, start - from);
B
Ben Widawsky 已提交
1082
	return -ENOMEM;
1083 1084
}

1085 1086 1087 1088 1089 1090 1091 1092 1093 1094
/**
 * gen8_ppgtt_alloc_page_dirpointers() - Allocate pdps for VA range.
 * @vm:	Master vm structure.
 * @pml4:	Page map level 4 for this address range.
 * @start:	Starting virtual address to begin allocations.
 * @length:	Size of the allocations.
 * @new_pdps:	Bitmap set by function with new allocations. Likely used by the
 *		caller to free on error.
 *
 * Allocate the required number of page directory pointers. Extremely similar to
1095
 * gen8_ppgtt_alloc_page_directories() and gen8_ppgtt_alloc_pd().
1096 1097 1098 1099 1100 1101 1102 1103 1104
 * The main difference is here we are limited by the pml4 boundary (instead of
 * the page directory pointer).
 *
 * Return: 0 if success; negative error code otherwise.
 */
static int
gen8_ppgtt_alloc_page_dirpointers(struct i915_address_space *vm,
				  struct i915_pml4 *pml4,
				  uint64_t start,
1105
				  uint64_t length)
1106 1107 1108 1109
{
	struct i915_page_directory_pointer *pdp;
	uint32_t pml4e;

1110
	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
1111
		if (!test_bit(pml4e, pml4->used_pml4es)) {
1112
			pdp = alloc_pdp(vm);
1113
			if (IS_ERR(pdp))
1114
				return PTR_ERR(pdp);
1115

1116
			gen8_initialize_pdp(vm, pdp);
1117 1118 1119 1120 1121 1122 1123 1124 1125
			pml4->pdps[pml4e] = pdp;
			trace_i915_page_directory_pointer_entry_alloc(vm,
								      pml4e,
								      start,
								      GEN8_PML4E_SHIFT);
		}
	}

	return 0;
1126 1127
}

1128 1129 1130 1131
static int gen8_alloc_va_range_3lvl(struct i915_address_space *vm,
				    struct i915_page_directory_pointer *pdp,
				    uint64_t start,
				    uint64_t length)
1132
{
1133
	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1134
	struct i915_page_directory *pd;
1135 1136
	u64 from = start;
	unsigned int pdpe;
1137 1138
	int ret;

1139
	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1140 1141 1142 1143
		if (pd == vm->scratch_pd) {
			pd = alloc_pd(vm);
			if (IS_ERR(pd))
				goto unwind;
1144

1145
			gen8_initialize_pd(vm, pd);
1146
			gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe);
1147 1148 1149 1150 1151 1152 1153 1154 1155 1156
			pdp->used_pdpes++;
		}

		ret = gen8_ppgtt_alloc_pd(vm, pd, start, length);
		if (unlikely(ret)) {
			gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
			pdp->used_pdpes--;
			free_pd(vm, pd);
			goto unwind;
		}
1157
	}
1158

1159
	mark_tlbs_dirty(ppgtt);
B
Ben Widawsky 已提交
1160
	return 0;
1161

1162 1163 1164
unwind:
	gen8_ppgtt_clear_pdp(vm, pdp, from, start - from);
	return -ENOMEM;
1165 1166
}

1167 1168 1169 1170 1171 1172 1173
static int gen8_alloc_va_range_4lvl(struct i915_address_space *vm,
				    struct i915_pml4 *pml4,
				    uint64_t start,
				    uint64_t length)
{
	DECLARE_BITMAP(new_pdps, GEN8_PML4ES_PER_PML4);
	struct i915_page_directory_pointer *pdp;
1174
	uint64_t pml4e;
1175 1176 1177 1178 1179 1180 1181 1182 1183
	int ret = 0;

	/* Do the pml4 allocations first, so we don't need to track the newly
	 * allocated tables below the pdp */
	bitmap_zero(new_pdps, GEN8_PML4ES_PER_PML4);

	/* The pagedirectory and pagetable allocations are done in the shared 3
	 * and 4 level code. Just allocate the pdps.
	 */
1184
	ret = gen8_ppgtt_alloc_page_dirpointers(vm, pml4, start, length);
1185 1186 1187
	if (ret)
		return ret;

1188
	gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
1189 1190 1191 1192 1193 1194
		WARN_ON(!pdp);

		ret = gen8_alloc_va_range_3lvl(vm, pdp, start, length);
		if (ret)
			goto err_out;

1195
		gen8_ppgtt_set_pml4e(pml4, pdp, pml4e);
1196 1197 1198 1199 1200 1201 1202 1203 1204
	}

	bitmap_or(pml4->used_pml4es, new_pdps, pml4->used_pml4es,
		  GEN8_PML4ES_PER_PML4);

	return 0;

err_out:
	for_each_set_bit(pml4e, new_pdps, GEN8_PML4ES_PER_PML4)
1205
		gen8_ppgtt_cleanup_3lvl(vm, pml4->pdps[pml4e]);
1206 1207 1208 1209 1210 1211 1212

	return ret;
}

static int gen8_alloc_va_range(struct i915_address_space *vm,
			       uint64_t start, uint64_t length)
{
1213
	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1214

1215
	if (USES_FULL_48BIT_PPGTT(vm->i915))
1216 1217 1218 1219 1220
		return gen8_alloc_va_range_4lvl(vm, &ppgtt->pml4, start, length);
	else
		return gen8_alloc_va_range_3lvl(vm, &ppgtt->pdp, start, length);
}

1221 1222
static void gen8_dump_pdp(struct i915_hw_ppgtt *ppgtt,
			  struct i915_page_directory_pointer *pdp,
1223 1224 1225 1226 1227 1228 1229
			  uint64_t start, uint64_t length,
			  gen8_pte_t scratch_pte,
			  struct seq_file *m)
{
	struct i915_page_directory *pd;
	uint32_t pdpe;

1230
	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
1231 1232 1233 1234 1235
		struct i915_page_table *pt;
		uint64_t pd_len = length;
		uint64_t pd_start = start;
		uint32_t pde;

1236
		if (pdp->page_directory[pdpe] == ppgtt->base.scratch_pd)
1237 1238 1239
			continue;

		seq_printf(m, "\tPDPE #%d\n", pdpe);
1240
		gen8_for_each_pde(pt, pd, pd_start, pd_len, pde) {
1241 1242 1243
			uint32_t  pte;
			gen8_pte_t *pt_vaddr;

1244
			if (pd->page_table[pde] == ppgtt->base.scratch_pt)
1245 1246
				continue;

1247
			pt_vaddr = kmap_atomic_px(pt);
1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280
			for (pte = 0; pte < GEN8_PTES; pte += 4) {
				uint64_t va =
					(pdpe << GEN8_PDPE_SHIFT) |
					(pde << GEN8_PDE_SHIFT) |
					(pte << GEN8_PTE_SHIFT);
				int i;
				bool found = false;

				for (i = 0; i < 4; i++)
					if (pt_vaddr[pte + i] != scratch_pte)
						found = true;
				if (!found)
					continue;

				seq_printf(m, "\t\t0x%llx [%03d,%03d,%04d]: =", va, pdpe, pde, pte);
				for (i = 0; i < 4; i++) {
					if (pt_vaddr[pte + i] != scratch_pte)
						seq_printf(m, " %llx", pt_vaddr[pte + i]);
					else
						seq_puts(m, "  SCRATCH ");
				}
				seq_puts(m, "\n");
			}
			kunmap_atomic(pt_vaddr);
		}
	}
}

static void gen8_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
{
	struct i915_address_space *vm = &ppgtt->base;
	uint64_t start = ppgtt->base.start;
	uint64_t length = ppgtt->base.total;
1281 1282
	const gen8_pte_t scratch_pte =
		gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC);
1283

1284
	if (!USES_FULL_48BIT_PPGTT(vm->i915)) {
1285
		gen8_dump_pdp(ppgtt, &ppgtt->pdp, start, length, scratch_pte, m);
1286
	} else {
1287
		uint64_t pml4e;
1288 1289 1290
		struct i915_pml4 *pml4 = &ppgtt->pml4;
		struct i915_page_directory_pointer *pdp;

1291
		gen8_for_each_pml4e(pdp, pml4, start, length, pml4e) {
1292 1293 1294 1295
			if (!test_bit(pml4e, pml4->used_pml4es))
				continue;

			seq_printf(m, "    PML4E #%llu\n", pml4e);
1296
			gen8_dump_pdp(ppgtt, pdp, start, length, scratch_pte, m);
1297 1298 1299 1300
		}
	}
}

1301
static int gen8_preallocate_top_level_pdp(struct i915_hw_ppgtt *ppgtt)
1302
{
1303 1304 1305 1306 1307 1308
	struct i915_address_space *vm = &ppgtt->base;
	struct i915_page_directory_pointer *pdp = &ppgtt->pdp;
	struct i915_page_directory *pd;
	u64 start = 0, length = ppgtt->base.total;
	u64 from = start;
	unsigned int pdpe;
1309

1310 1311 1312 1313
	gen8_for_each_pdpe(pd, pdp, start, length, pdpe) {
		pd = alloc_pd(vm);
		if (IS_ERR(pd))
			goto unwind;
1314

1315 1316 1317 1318
		gen8_initialize_pd(vm, pd);
		gen8_ppgtt_set_pdpe(vm, pdp, pd, pdpe);
		pdp->used_pdpes++;
	}
1319

1320 1321
	pdp->used_pdpes++; /* never remove */
	return 0;
1322

1323 1324 1325 1326 1327 1328 1329 1330
unwind:
	start -= from;
	gen8_for_each_pdpe(pd, pdp, from, start, pdpe) {
		gen8_ppgtt_set_pdpe(vm, pdp, vm->scratch_pd, pdpe);
		free_pd(vm, pd);
	}
	pdp->used_pdpes = 0;
	return -ENOMEM;
1331 1332
}

1333
/*
1334 1335 1336 1337
 * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
 * with a net effect resembling a 2-level page table in normal x86 terms. Each
 * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
 * space.
B
Ben Widawsky 已提交
1338
 *
1339
 */
1340
static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
B
Ben Widawsky 已提交
1341
{
1342
	struct drm_i915_private *dev_priv = ppgtt->base.i915;
1343
	int ret;
1344

1345 1346 1347
	ret = gen8_init_scratch(&ppgtt->base);
	if (ret)
		return ret;
1348

1349 1350
	ppgtt->base.start = 0;
	ppgtt->base.cleanup = gen8_ppgtt_cleanup;
1351
	ppgtt->base.allocate_va_range = gen8_alloc_va_range;
1352 1353
	ppgtt->base.unbind_vma = ppgtt_unbind_vma;
	ppgtt->base.bind_vma = ppgtt_bind_vma;
1354
	ppgtt->debug_dump = gen8_dump_ppgtt;
1355

1356 1357 1358 1359 1360 1361
	/* There are only few exceptions for gen >=6. chv and bxt.
	 * And we are not sure about the latter so play safe for now.
	 */
	if (IS_CHERRYVIEW(dev_priv) || IS_BROXTON(dev_priv))
		ppgtt->base.pt_kmap_wc = true;

1362
	if (USES_FULL_48BIT_PPGTT(dev_priv)) {
1363
		ret = setup_px(&ppgtt->base, &ppgtt->pml4);
1364 1365
		if (ret)
			goto free_scratch;
1366

1367 1368
		gen8_initialize_pml4(&ppgtt->base, &ppgtt->pml4);

1369
		ppgtt->base.total = 1ULL << 48;
1370
		ppgtt->switch_mm = gen8_48b_mm_switch;
1371 1372

		ppgtt->base.insert_entries = gen8_ppgtt_insert_4lvl;
1373
		ppgtt->base.clear_range = gen8_ppgtt_clear_4lvl;
1374
	} else {
1375
		ret = __pdp_init(&ppgtt->base, &ppgtt->pdp);
1376 1377 1378 1379
		if (ret)
			goto free_scratch;

		ppgtt->base.total = 1ULL << 32;
1380
		ppgtt->switch_mm = gen8_legacy_mm_switch;
1381 1382 1383
		trace_i915_page_directory_pointer_entry_alloc(&ppgtt->base,
							      0, 0,
							      GEN8_PML4E_SHIFT);
1384

1385
		if (intel_vgpu_active(dev_priv)) {
1386 1387 1388
			ret = gen8_preallocate_top_level_pdp(ppgtt);
			if (ret) {
				__pdp_fini(&ppgtt->pdp);
1389
				goto free_scratch;
1390
			}
1391
		}
1392 1393

		ppgtt->base.insert_entries = gen8_ppgtt_insert_3lvl;
1394
		ppgtt->base.clear_range = gen8_ppgtt_clear_3lvl;
1395
	}
1396

1397
	if (intel_vgpu_active(dev_priv))
1398 1399
		gen8_ppgtt_notify_vgt(ppgtt, true);

1400
	return 0;
1401 1402 1403 1404

free_scratch:
	gen8_free_scratch(&ppgtt->base);
	return ret;
1405 1406
}

B
Ben Widawsky 已提交
1407 1408 1409
static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
{
	struct i915_address_space *vm = &ppgtt->base;
1410
	struct i915_page_table *unused;
1411
	gen6_pte_t scratch_pte;
B
Ben Widawsky 已提交
1412
	uint32_t pd_entry;
1413
	uint32_t  pte, pde;
1414
	uint32_t start = ppgtt->base.start, length = ppgtt->base.total;
B
Ben Widawsky 已提交
1415

1416
	scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
1417
				     I915_CACHE_LLC, 0);
B
Ben Widawsky 已提交
1418

1419
	gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde) {
B
Ben Widawsky 已提交
1420
		u32 expected;
1421
		gen6_pte_t *pt_vaddr;
1422
		const dma_addr_t pt_addr = px_dma(ppgtt->pd.page_table[pde]);
1423
		pd_entry = readl(ppgtt->pd_addr + pde);
B
Ben Widawsky 已提交
1424 1425 1426 1427 1428 1429 1430 1431 1432
		expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID);

		if (pd_entry != expected)
			seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n",
				   pde,
				   pd_entry,
				   expected);
		seq_printf(m, "\tPDE: %x\n", pd_entry);

1433
		pt_vaddr = kmap_atomic_px(ppgtt->pd.page_table[pde]);
1434

1435
		for (pte = 0; pte < GEN6_PTES; pte+=4) {
B
Ben Widawsky 已提交
1436
			unsigned long va =
1437
				(pde * PAGE_SIZE * GEN6_PTES) +
B
Ben Widawsky 已提交
1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450 1451 1452 1453 1454 1455
				(pte * PAGE_SIZE);
			int i;
			bool found = false;
			for (i = 0; i < 4; i++)
				if (pt_vaddr[pte + i] != scratch_pte)
					found = true;
			if (!found)
				continue;

			seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte);
			for (i = 0; i < 4; i++) {
				if (pt_vaddr[pte + i] != scratch_pte)
					seq_printf(m, " %08x", pt_vaddr[pte + i]);
				else
					seq_puts(m, "  SCRATCH ");
			}
			seq_puts(m, "\n");
		}
1456
		kunmap_atomic(pt_vaddr);
B
Ben Widawsky 已提交
1457 1458 1459
	}
}

1460
/* Write pde (index) from the page directory @pd to the page table @pt */
C
Chris Wilson 已提交
1461 1462 1463
static inline void gen6_write_pde(const struct i915_hw_ppgtt *ppgtt,
				  const unsigned int pde,
				  const struct i915_page_table *pt)
B
Ben Widawsky 已提交
1464
{
1465
	/* Caller needs to make sure the write completes if necessary */
C
Chris Wilson 已提交
1466 1467
	writel_relaxed(GEN6_PDE_ADDR_ENCODE(px_dma(pt)) | GEN6_PDE_VALID,
		       ppgtt->pd_addr + pde);
1468
}
B
Ben Widawsky 已提交
1469

1470 1471
/* Write all the page tables found in the ppgtt structure to incrementing page
 * directories. */
C
Chris Wilson 已提交
1472
static void gen6_write_page_range(struct i915_hw_ppgtt *ppgtt,
1473 1474
				  uint32_t start, uint32_t length)
{
1475
	struct i915_page_table *pt;
C
Chris Wilson 已提交
1476
	unsigned int pde;
1477

C
Chris Wilson 已提交
1478 1479
	gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde)
		gen6_write_pde(ppgtt, pde, pt);
1480

C
Chris Wilson 已提交
1481
	mark_tlbs_dirty(ppgtt);
1482
	wmb();
B
Ben Widawsky 已提交
1483 1484
}

1485
static inline uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
B
Ben Widawsky 已提交
1486
{
1487 1488
	GEM_BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f);
	return ppgtt->pd.base.ggtt_offset << 10;
1489 1490
}

1491
static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
1492
			 struct drm_i915_gem_request *req)
1493
{
1494
	struct intel_engine_cs *engine = req->engine;
1495
	u32 *cs;
1496 1497 1498
	int ret;

	/* NB: TLBs must be flushed and invalidated before a switch */
1499
	ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH);
1500 1501 1502
	if (ret)
		return ret;

1503 1504 1505
	cs = intel_ring_begin(req, 6);
	if (IS_ERR(cs))
		return PTR_ERR(cs);
1506

1507 1508 1509 1510 1511 1512 1513
	*cs++ = MI_LOAD_REGISTER_IMM(2);
	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine));
	*cs++ = PP_DIR_DCLV_2G;
	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine));
	*cs++ = get_pd_offset(ppgtt);
	*cs++ = MI_NOOP;
	intel_ring_advance(req, cs);
1514 1515 1516 1517

	return 0;
}

1518
static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
1519
			  struct drm_i915_gem_request *req)
1520
{
1521
	struct intel_engine_cs *engine = req->engine;
1522
	u32 *cs;
1523 1524 1525
	int ret;

	/* NB: TLBs must be flushed and invalidated before a switch */
1526
	ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH);
1527 1528 1529
	if (ret)
		return ret;

1530 1531 1532 1533 1534 1535 1536 1537 1538 1539 1540
	cs = intel_ring_begin(req, 6);
	if (IS_ERR(cs))
		return PTR_ERR(cs);

	*cs++ = MI_LOAD_REGISTER_IMM(2);
	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_DCLV(engine));
	*cs++ = PP_DIR_DCLV_2G;
	*cs++ = i915_mmio_reg_offset(RING_PP_DIR_BASE(engine));
	*cs++ = get_pd_offset(ppgtt);
	*cs++ = MI_NOOP;
	intel_ring_advance(req, cs);
1541

1542
	/* XXX: RCS is the only one to auto invalidate the TLBs? */
1543
	if (engine->id != RCS) {
1544
		ret = engine->emit_flush(req, EMIT_INVALIDATE | EMIT_FLUSH);
1545 1546 1547 1548
		if (ret)
			return ret;
	}

1549 1550 1551
	return 0;
}

1552
static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
1553
			  struct drm_i915_gem_request *req)
1554
{
1555
	struct intel_engine_cs *engine = req->engine;
1556
	struct drm_i915_private *dev_priv = req->i915;
1557

1558 1559
	I915_WRITE(RING_PP_DIR_DCLV(engine), PP_DIR_DCLV_2G);
	I915_WRITE(RING_PP_DIR_BASE(engine), get_pd_offset(ppgtt));
1560 1561 1562
	return 0;
}

1563
static void gen8_ppgtt_enable(struct drm_i915_private *dev_priv)
1564
{
1565
	struct intel_engine_cs *engine;
1566
	enum intel_engine_id id;
B
Ben Widawsky 已提交
1567

1568
	for_each_engine(engine, dev_priv, id) {
1569 1570
		u32 four_level = USES_FULL_48BIT_PPGTT(dev_priv) ?
				 GEN8_GFX_PPGTT_48B : 0;
1571
		I915_WRITE(RING_MODE_GEN7(engine),
1572
			   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE | four_level));
1573 1574
	}
}
B
Ben Widawsky 已提交
1575

1576
static void gen7_ppgtt_enable(struct drm_i915_private *dev_priv)
B
Ben Widawsky 已提交
1577
{
1578
	struct intel_engine_cs *engine;
1579
	uint32_t ecochk, ecobits;
1580
	enum intel_engine_id id;
B
Ben Widawsky 已提交
1581

1582 1583
	ecobits = I915_READ(GAC_ECO_BITS);
	I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
1584

1585
	ecochk = I915_READ(GAM_ECOCHK);
1586
	if (IS_HASWELL(dev_priv)) {
1587 1588 1589 1590 1591 1592
		ecochk |= ECOCHK_PPGTT_WB_HSW;
	} else {
		ecochk |= ECOCHK_PPGTT_LLC_IVB;
		ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
	}
	I915_WRITE(GAM_ECOCHK, ecochk);
1593

1594
	for_each_engine(engine, dev_priv, id) {
B
Ben Widawsky 已提交
1595
		/* GFX_MODE is per-ring on gen7+ */
1596
		I915_WRITE(RING_MODE_GEN7(engine),
1597
			   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
B
Ben Widawsky 已提交
1598
	}
1599
}
B
Ben Widawsky 已提交
1600

1601
static void gen6_ppgtt_enable(struct drm_i915_private *dev_priv)
1602 1603
{
	uint32_t ecochk, gab_ctl, ecobits;
1604

1605 1606 1607
	ecobits = I915_READ(GAC_ECO_BITS);
	I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
		   ECOBITS_PPGTT_CACHE64B);
B
Ben Widawsky 已提交
1608

1609 1610 1611 1612 1613 1614 1615
	gab_ctl = I915_READ(GAB_CTL);
	I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);

	ecochk = I915_READ(GAM_ECOCHK);
	I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);

	I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
B
Ben Widawsky 已提交
1616 1617
}

1618
/* PPGTT support for Sandybdrige/Gen6 and later */
1619
static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
1620
				   u64 start, u64 length)
1621
{
1622
	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1623 1624 1625 1626 1627 1628
	unsigned int first_entry = start >> PAGE_SHIFT;
	unsigned int pde = first_entry / GEN6_PTES;
	unsigned int pte = first_entry % GEN6_PTES;
	unsigned int num_entries = length >> PAGE_SHIFT;
	gen6_pte_t scratch_pte =
		vm->pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC, 0);
1629

1630
	while (num_entries) {
1631 1632 1633
		struct i915_page_table *pt = ppgtt->pd.page_table[pde++];
		unsigned int end = min(pte + num_entries, GEN6_PTES);
		gen6_pte_t *vaddr;
1634

1635
		num_entries -= end - pte;
1636

1637 1638 1639 1640 1641
		/* Note that the hw doesn't support removing PDE on the fly
		 * (they are cached inside the context with no means to
		 * invalidate the cache), so we can only reset the PTE
		 * entries back to scratch.
		 */
1642

1643 1644 1645 1646 1647
		vaddr = kmap_atomic_px(pt);
		do {
			vaddr[pte++] = scratch_pte;
		} while (pte < end);
		kunmap_atomic(vaddr);
1648

1649
		pte = 0;
1650
	}
1651 1652
}

1653
static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
D
Daniel Vetter 已提交
1654
				      struct sg_table *pages,
1655
				      uint64_t start,
1656
				      enum i915_cache_level cache_level, u32 flags)
D
Daniel Vetter 已提交
1657
{
1658
	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1659
	unsigned first_entry = start >> PAGE_SHIFT;
1660 1661
	unsigned act_pt = first_entry / GEN6_PTES;
	unsigned act_pte = first_entry % GEN6_PTES;
1662 1663 1664 1665
	const u32 pte_encode = vm->pte_encode(0, cache_level, flags);
	struct sgt_dma iter;
	gen6_pte_t *vaddr;

1666
	vaddr = kmap_atomic_px(ppgtt->pd.page_table[act_pt]);
1667 1668 1669 1670 1671
	iter.sg = pages->sgl;
	iter.dma = sg_dma_address(iter.sg);
	iter.max = iter.dma + iter.sg->length;
	do {
		vaddr[act_pte] = pte_encode | GEN6_PTE_ADDR_ENCODE(iter.dma);
1672

1673 1674 1675 1676 1677
		iter.dma += PAGE_SIZE;
		if (iter.dma == iter.max) {
			iter.sg = __sg_next(iter.sg);
			if (!iter.sg)
				break;
1678

1679 1680 1681
			iter.dma = sg_dma_address(iter.sg);
			iter.max = iter.dma + iter.sg->length;
		}
1682

1683
		if (++act_pte == GEN6_PTES) {
1684 1685
			kunmap_atomic(vaddr);
			vaddr = kmap_atomic_px(ppgtt->pd.page_table[++act_pt]);
1686
			act_pte = 0;
D
Daniel Vetter 已提交
1687
		}
1688
	} while (1);
1689
	kunmap_atomic(vaddr);
D
Daniel Vetter 已提交
1690 1691
}

1692
static int gen6_alloc_va_range(struct i915_address_space *vm,
1693
			       u64 start, u64 length)
1694
{
1695
	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1696
	struct i915_page_table *pt;
1697 1698 1699
	u64 from = start;
	unsigned int pde;
	bool flush = false;
1700

1701
	gen6_for_each_pde(pt, &ppgtt->pd, start, length, pde) {
1702 1703 1704 1705
		if (pt == vm->scratch_pt) {
			pt = alloc_pt(vm);
			if (IS_ERR(pt))
				goto unwind_out;
1706

1707 1708 1709 1710
			gen6_initialize_pt(vm, pt);
			ppgtt->pd.page_table[pde] = pt;
			gen6_write_pde(ppgtt, pde, pt);
			flush = true;
1711 1712 1713
		}
	}

1714 1715 1716
	if (flush) {
		mark_tlbs_dirty(ppgtt);
		wmb();
1717 1718 1719
	}

	return 0;
1720 1721

unwind_out:
1722 1723
	gen6_ppgtt_clear_range(vm, from, start);
	return -ENOMEM;
1724 1725
}

1726 1727
static int gen6_init_scratch(struct i915_address_space *vm)
{
1728
	int ret;
1729

1730
	ret = setup_scratch_page(vm, I915_GFP_DMA);
1731 1732
	if (ret)
		return ret;
1733

1734
	vm->scratch_pt = alloc_pt(vm);
1735
	if (IS_ERR(vm->scratch_pt)) {
1736
		cleanup_scratch_page(vm);
1737 1738 1739 1740 1741 1742 1743 1744 1745 1746
		return PTR_ERR(vm->scratch_pt);
	}

	gen6_initialize_pt(vm, vm->scratch_pt);

	return 0;
}

static void gen6_free_scratch(struct i915_address_space *vm)
{
1747 1748
	free_pt(vm, vm->scratch_pt);
	cleanup_scratch_page(vm);
1749 1750
}

1751
static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
1752
{
1753
	struct i915_hw_ppgtt *ppgtt = i915_vm_to_ppgtt(vm);
1754
	struct i915_page_directory *pd = &ppgtt->pd;
1755 1756
	struct i915_page_table *pt;
	uint32_t pde;
1757

1758 1759
	drm_mm_remove_node(&ppgtt->node);

1760
	gen6_for_all_pdes(pt, pd, pde)
1761
		if (pt != vm->scratch_pt)
1762
			free_pt(vm, pt);
1763

1764
	gen6_free_scratch(vm);
1765 1766
}

1767
static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
1768
{
1769
	struct i915_address_space *vm = &ppgtt->base;
1770
	struct drm_i915_private *dev_priv = ppgtt->base.i915;
1771
	struct i915_ggtt *ggtt = &dev_priv->ggtt;
1772
	int ret;
1773

B
Ben Widawsky 已提交
1774 1775 1776 1777
	/* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
	 * allocator works in address space sizes, so it's multiplied by page
	 * size. We allocate at the top of the GTT to avoid fragmentation.
	 */
1778
	BUG_ON(!drm_mm_initialized(&ggtt->base.mm));
1779

1780 1781 1782
	ret = gen6_init_scratch(vm);
	if (ret)
		return ret;
1783

1784 1785 1786 1787 1788
	ret = i915_gem_gtt_insert(&ggtt->base, &ppgtt->node,
				  GEN6_PD_SIZE, GEN6_PD_ALIGN,
				  I915_COLOR_UNEVICTABLE,
				  0, ggtt->base.total,
				  PIN_HIGH);
1789
	if (ret)
1790 1791
		goto err_out;

1792
	if (ppgtt->node.start < ggtt->mappable_end)
B
Ben Widawsky 已提交
1793
		DRM_DEBUG("Forced to use aperture for PDEs\n");
1794

1795 1796 1797 1798 1799 1800
	ppgtt->pd.base.ggtt_offset =
		ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t);

	ppgtt->pd_addr = (gen6_pte_t __iomem *)ggtt->gsm +
		ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t);

1801
	return 0;
1802 1803

err_out:
1804
	gen6_free_scratch(vm);
1805
	return ret;
1806 1807 1808 1809
}

static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt)
{
1810
	return gen6_ppgtt_allocate_page_directories(ppgtt);
1811
}
1812

1813 1814 1815
static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt,
				  uint64_t start, uint64_t length)
{
1816
	struct i915_page_table *unused;
1817
	uint32_t pde;
1818

1819
	gen6_for_each_pde(unused, &ppgtt->pd, start, length, pde)
1820
		ppgtt->pd.page_table[pde] = ppgtt->base.scratch_pt;
1821 1822
}

1823
static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
1824
{
1825
	struct drm_i915_private *dev_priv = ppgtt->base.i915;
1826
	struct i915_ggtt *ggtt = &dev_priv->ggtt;
1827 1828
	int ret;

1829
	ppgtt->base.pte_encode = ggtt->base.pte_encode;
1830
	if (intel_vgpu_active(dev_priv) || IS_GEN6(dev_priv))
1831
		ppgtt->switch_mm = gen6_mm_switch;
1832
	else if (IS_HASWELL(dev_priv))
1833
		ppgtt->switch_mm = hsw_mm_switch;
1834
	else if (IS_GEN7(dev_priv))
1835
		ppgtt->switch_mm = gen7_mm_switch;
1836
	else
1837 1838 1839 1840 1841 1842 1843 1844
		BUG();

	ret = gen6_ppgtt_alloc(ppgtt);
	if (ret)
		return ret;

	ppgtt->base.clear_range = gen6_ppgtt_clear_range;
	ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
1845 1846
	ppgtt->base.unbind_vma = ppgtt_unbind_vma;
	ppgtt->base.bind_vma = ppgtt_bind_vma;
1847 1848
	ppgtt->base.cleanup = gen6_ppgtt_cleanup;
	ppgtt->base.start = 0;
1849
	ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE;
B
Ben Widawsky 已提交
1850
	ppgtt->debug_dump = gen6_dump_ppgtt;
1851

1852
	gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total);
C
Chris Wilson 已提交
1853
	gen6_write_page_range(ppgtt, 0, ppgtt->base.total);
1854

1855 1856 1857 1858 1859 1860
	ret = gen6_alloc_va_range(&ppgtt->base, 0, ppgtt->base.total);
	if (ret) {
		gen6_ppgtt_cleanup(&ppgtt->base);
		return ret;
	}

1861
	DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n",
1862 1863
			 ppgtt->node.size >> 20,
			 ppgtt->node.start / PAGE_SIZE);
1864

1865 1866
	DRM_DEBUG_DRIVER("Adding PPGTT at offset %x\n",
			 ppgtt->pd.base.ggtt_offset << 10);
1867

1868
	return 0;
1869 1870
}

1871 1872
static int __hw_ppgtt_init(struct i915_hw_ppgtt *ppgtt,
			   struct drm_i915_private *dev_priv)
1873
{
1874
	ppgtt->base.i915 = dev_priv;
1875
	ppgtt->base.dma = &dev_priv->drm.pdev->dev;
1876

1877
	if (INTEL_INFO(dev_priv)->gen < 8)
1878
		return gen6_ppgtt_init(ppgtt);
B
Ben Widawsky 已提交
1879
	else
1880
		return gen8_ppgtt_init(ppgtt);
1881
}
1882

1883
static void i915_address_space_init(struct i915_address_space *vm,
C
Chris Wilson 已提交
1884 1885
				    struct drm_i915_private *dev_priv,
				    const char *name)
1886
{
C
Chris Wilson 已提交
1887
	i915_gem_timeline_init(dev_priv, &vm->timeline, name);
1888

1889
	drm_mm_init(&vm->mm, vm->start, vm->total);
1890 1891
	vm->mm.head_node.color = I915_COLOR_UNEVICTABLE;

1892 1893
	INIT_LIST_HEAD(&vm->active_list);
	INIT_LIST_HEAD(&vm->inactive_list);
1894
	INIT_LIST_HEAD(&vm->unbound_list);
1895

1896
	list_add_tail(&vm->global_link, &dev_priv->vm_list);
1897
	pagevec_init(&vm->free_pages, false);
1898 1899
}

1900 1901
static void i915_address_space_fini(struct i915_address_space *vm)
{
1902 1903 1904
	if (pagevec_count(&vm->free_pages))
		vm_free_pages_release(vm);

1905 1906 1907 1908 1909
	i915_gem_timeline_fini(&vm->timeline);
	drm_mm_takedown(&vm->mm);
	list_del(&vm->global_link);
}

1910
static void gtt_write_workarounds(struct drm_i915_private *dev_priv)
1911 1912 1913 1914 1915
{
	/* This function is for gtt related workarounds. This function is
	 * called on driver load and after a GPU reset, so you can place
	 * workarounds here even if they get overwritten by GPU reset.
	 */
1916
	/* WaIncreaseDefaultTLBEntries:chv,bdw,skl,bxt,kbl,glk */
1917
	if (IS_BROADWELL(dev_priv))
1918
		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_BDW);
1919
	else if (IS_CHERRYVIEW(dev_priv))
1920
		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN8_L3_LRA_1_GPGPU_DEFAULT_VALUE_CHV);
1921
	else if (IS_GEN9_BC(dev_priv))
1922
		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_SKL);
1923
	else if (IS_GEN9_LP(dev_priv))
1924 1925 1926
		I915_WRITE(GEN8_L3_LRA_1_GPGPU, GEN9_L3_LRA_1_GPGPU_DEFAULT_VALUE_BXT);
}

1927
int i915_ppgtt_init_hw(struct drm_i915_private *dev_priv)
1928
{
1929
	gtt_write_workarounds(dev_priv);
1930

1931 1932 1933 1934 1935 1936
	/* In the case of execlists, PPGTT is enabled by the context descriptor
	 * and the PDPs are contained within the context itself.  We don't
	 * need to do anything here. */
	if (i915.enable_execlists)
		return 0;

1937
	if (!USES_PPGTT(dev_priv))
1938 1939
		return 0;

1940
	if (IS_GEN6(dev_priv))
1941
		gen6_ppgtt_enable(dev_priv);
1942
	else if (IS_GEN7(dev_priv))
1943 1944 1945
		gen7_ppgtt_enable(dev_priv);
	else if (INTEL_GEN(dev_priv) >= 8)
		gen8_ppgtt_enable(dev_priv);
1946
	else
1947
		MISSING_CASE(INTEL_GEN(dev_priv));
1948

1949 1950
	return 0;
}
1951

1952
struct i915_hw_ppgtt *
1953
i915_ppgtt_create(struct drm_i915_private *dev_priv,
C
Chris Wilson 已提交
1954 1955
		  struct drm_i915_file_private *fpriv,
		  const char *name)
1956 1957 1958 1959 1960 1961 1962 1963
{
	struct i915_hw_ppgtt *ppgtt;
	int ret;

	ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
	if (!ppgtt)
		return ERR_PTR(-ENOMEM);

1964
	ret = __hw_ppgtt_init(ppgtt, dev_priv);
1965 1966 1967 1968 1969
	if (ret) {
		kfree(ppgtt);
		return ERR_PTR(ret);
	}

1970 1971 1972 1973
	kref_init(&ppgtt->ref);
	i915_address_space_init(&ppgtt->base, dev_priv, name);
	ppgtt->base.file = fpriv;

1974 1975
	trace_i915_ppgtt_create(&ppgtt->base);

1976 1977 1978
	return ppgtt;
}

1979 1980 1981 1982 1983 1984 1985 1986 1987 1988 1989 1990 1991 1992 1993 1994 1995 1996 1997 1998 1999
void i915_ppgtt_close(struct i915_address_space *vm)
{
	struct list_head *phases[] = {
		&vm->active_list,
		&vm->inactive_list,
		&vm->unbound_list,
		NULL,
	}, **phase;

	GEM_BUG_ON(vm->closed);
	vm->closed = true;

	for (phase = phases; *phase; phase++) {
		struct i915_vma *vma, *vn;

		list_for_each_entry_safe(vma, vn, *phase, vm_link)
			if (!i915_vma_is_closed(vma))
				i915_vma_close(vma);
	}
}

2000
void i915_ppgtt_release(struct kref *kref)
2001 2002 2003 2004
{
	struct i915_hw_ppgtt *ppgtt =
		container_of(kref, struct i915_hw_ppgtt, ref);

2005 2006
	trace_i915_ppgtt_release(&ppgtt->base);

2007
	/* vmas should already be unbound and destroyed */
2008 2009
	WARN_ON(!list_empty(&ppgtt->base.active_list));
	WARN_ON(!list_empty(&ppgtt->base.inactive_list));
2010
	WARN_ON(!list_empty(&ppgtt->base.unbound_list));
2011 2012

	ppgtt->base.cleanup(&ppgtt->base);
2013
	i915_address_space_fini(&ppgtt->base);
2014 2015
	kfree(ppgtt);
}
2016

2017 2018 2019
/* Certain Gen5 chipsets require require idling the GPU before
 * unmapping anything from the GTT when VT-d is enabled.
 */
2020
static bool needs_idle_maps(struct drm_i915_private *dev_priv)
2021 2022 2023 2024 2025
{
#ifdef CONFIG_INTEL_IOMMU
	/* Query intel_iommu to see if we need the workaround. Presumably that
	 * was loaded first.
	 */
2026
	if (IS_GEN5(dev_priv) && IS_MOBILE(dev_priv) && intel_iommu_gfx_mapped)
2027 2028 2029 2030 2031
		return true;
#endif
	return false;
}

2032
void i915_check_and_clear_faults(struct drm_i915_private *dev_priv)
2033
{
2034
	struct intel_engine_cs *engine;
2035
	enum intel_engine_id id;
2036

2037
	if (INTEL_INFO(dev_priv)->gen < 6)
2038 2039
		return;

2040
	for_each_engine(engine, dev_priv, id) {
2041
		u32 fault_reg;
2042
		fault_reg = I915_READ(RING_FAULT_REG(engine));
2043 2044
		if (fault_reg & RING_FAULT_VALID) {
			DRM_DEBUG_DRIVER("Unexpected fault\n"
2045
					 "\tAddr: 0x%08lx\n"
2046 2047 2048 2049 2050 2051 2052
					 "\tAddress space: %s\n"
					 "\tSource ID: %d\n"
					 "\tType: %d\n",
					 fault_reg & PAGE_MASK,
					 fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT",
					 RING_FAULT_SRCID(fault_reg),
					 RING_FAULT_FAULT_TYPE(fault_reg));
2053
			I915_WRITE(RING_FAULT_REG(engine),
2054 2055 2056
				   fault_reg & ~RING_FAULT_VALID);
		}
	}
2057 2058 2059 2060

	/* Engine specific init may not have been done till this point. */
	if (dev_priv->engine[RCS])
		POSTING_READ(RING_FAULT_REG(dev_priv->engine[RCS]));
2061 2062
}

2063
void i915_gem_suspend_gtt_mappings(struct drm_i915_private *dev_priv)
2064
{
2065
	struct i915_ggtt *ggtt = &dev_priv->ggtt;
2066 2067 2068 2069

	/* Don't bother messing with faults pre GEN6 as we have little
	 * documentation supporting that it's a good idea.
	 */
2070
	if (INTEL_GEN(dev_priv) < 6)
2071 2072
		return;

2073
	i915_check_and_clear_faults(dev_priv);
2074

2075
	ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total);
2076

2077
	i915_ggtt_invalidate(dev_priv);
2078 2079
}

2080 2081
int i915_gem_gtt_prepare_pages(struct drm_i915_gem_object *obj,
			       struct sg_table *pages)
2082
{
2083 2084 2085 2086 2087 2088 2089 2090 2091 2092 2093 2094 2095 2096 2097 2098 2099 2100
	do {
		if (dma_map_sg(&obj->base.dev->pdev->dev,
			       pages->sgl, pages->nents,
			       PCI_DMA_BIDIRECTIONAL))
			return 0;

		/* If the DMA remap fails, one cause can be that we have
		 * too many objects pinned in a small remapping table,
		 * such as swiotlb. Incrementally purge all other objects and
		 * try again - if there are no more pages to remove from
		 * the DMA remapper, i915_gem_shrink will return 0.
		 */
		GEM_BUG_ON(obj->mm.pages == pages);
	} while (i915_gem_shrink(to_i915(obj->base.dev),
				 obj->base.size >> PAGE_SHIFT,
				 I915_SHRINK_BOUND |
				 I915_SHRINK_UNBOUND |
				 I915_SHRINK_ACTIVE));
2101

2102
	return -ENOSPC;
2103 2104
}

2105
static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
B
Ben Widawsky 已提交
2106 2107 2108 2109
{
	writeq(pte, addr);
}

2110 2111 2112 2113 2114 2115
static void gen8_ggtt_insert_page(struct i915_address_space *vm,
				  dma_addr_t addr,
				  uint64_t offset,
				  enum i915_cache_level level,
				  u32 unused)
{
2116
	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2117
	gen8_pte_t __iomem *pte =
2118
		(gen8_pte_t __iomem *)ggtt->gsm + (offset >> PAGE_SHIFT);
2119

2120
	gen8_set_pte(pte, gen8_pte_encode(addr, level));
2121

2122
	ggtt->invalidate(vm->i915);
2123 2124
}

B
Ben Widawsky 已提交
2125 2126
static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
				     struct sg_table *st,
2127
				     uint64_t start,
2128
				     enum i915_cache_level level, u32 unused)
B
Ben Widawsky 已提交
2129
{
2130
	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2131 2132
	struct sgt_iter sgt_iter;
	gen8_pte_t __iomem *gtt_entries;
2133
	const gen8_pte_t pte_encode = gen8_pte_encode(0, level);
2134
	dma_addr_t addr;
2135

2136 2137 2138 2139
	gtt_entries = (gen8_pte_t __iomem *)ggtt->gsm;
	gtt_entries += start >> PAGE_SHIFT;
	for_each_sgt_dma(addr, sgt_iter, st)
		gen8_set_pte(gtt_entries++, pte_encode | addr);
2140

2141
	wmb();
B
Ben Widawsky 已提交
2142 2143 2144 2145 2146

	/* This next bit makes the above posting read even more important. We
	 * want to flush the TLBs only after we're certain all the PTE updates
	 * have finished.
	 */
2147
	ggtt->invalidate(vm->i915);
B
Ben Widawsky 已提交
2148 2149
}

2150 2151 2152 2153 2154 2155 2156 2157 2158 2159 2160 2161 2162 2163 2164 2165 2166 2167 2168 2169 2170 2171 2172 2173 2174 2175
struct insert_entries {
	struct i915_address_space *vm;
	struct sg_table *st;
	uint64_t start;
	enum i915_cache_level level;
	u32 flags;
};

static int gen8_ggtt_insert_entries__cb(void *_arg)
{
	struct insert_entries *arg = _arg;
	gen8_ggtt_insert_entries(arg->vm, arg->st,
				 arg->start, arg->level, arg->flags);
	return 0;
}

static void gen8_ggtt_insert_entries__BKL(struct i915_address_space *vm,
					  struct sg_table *st,
					  uint64_t start,
					  enum i915_cache_level level,
					  u32 flags)
{
	struct insert_entries arg = { vm, st, start, level, flags };
	stop_machine(gen8_ggtt_insert_entries__cb, &arg, NULL);
}

2176 2177 2178 2179 2180 2181
static void gen6_ggtt_insert_page(struct i915_address_space *vm,
				  dma_addr_t addr,
				  uint64_t offset,
				  enum i915_cache_level level,
				  u32 flags)
{
2182
	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2183
	gen6_pte_t __iomem *pte =
2184
		(gen6_pte_t __iomem *)ggtt->gsm + (offset >> PAGE_SHIFT);
2185

2186
	iowrite32(vm->pte_encode(addr, level, flags), pte);
2187

2188
	ggtt->invalidate(vm->i915);
2189 2190
}

2191 2192 2193 2194 2195 2196
/*
 * Binds an object into the global gtt with the specified cache level. The object
 * will be accessible to the GPU via commands whose operands reference offsets
 * within the global GTT as well as accessible by the GPU through the GMADR
 * mapped BAR (dev_priv->mm.gtt->gtt).
 */
2197
static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
2198
				     struct sg_table *st,
2199
				     uint64_t start,
2200
				     enum i915_cache_level level, u32 flags)
2201
{
2202
	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2203 2204 2205
	gen6_pte_t __iomem *entries = (gen6_pte_t __iomem *)ggtt->gsm;
	unsigned int i = start >> PAGE_SHIFT;
	struct sgt_iter iter;
2206
	dma_addr_t addr;
2207 2208 2209
	for_each_sgt_dma(addr, iter, st)
		iowrite32(vm->pte_encode(addr, level, flags), &entries[i++]);
	wmb();
2210 2211 2212 2213 2214

	/* This next bit makes the above posting read even more important. We
	 * want to flush the TLBs only after we're certain all the PTE updates
	 * have finished.
	 */
2215
	ggtt->invalidate(vm->i915);
2216 2217
}

2218
static void nop_clear_range(struct i915_address_space *vm,
2219
			    uint64_t start, uint64_t length)
2220 2221 2222
{
}

B
Ben Widawsky 已提交
2223
static void gen8_ggtt_clear_range(struct i915_address_space *vm,
2224
				  uint64_t start, uint64_t length)
B
Ben Widawsky 已提交
2225
{
2226
	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2227 2228
	unsigned first_entry = start >> PAGE_SHIFT;
	unsigned num_entries = length >> PAGE_SHIFT;
2229 2230 2231
	const gen8_pte_t scratch_pte =
		gen8_pte_encode(vm->scratch_page.daddr, I915_CACHE_LLC);
	gen8_pte_t __iomem *gtt_base =
2232 2233
		(gen8_pte_t __iomem *)ggtt->gsm + first_entry;
	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
B
Ben Widawsky 已提交
2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245
	int i;

	if (WARN(num_entries > max_entries,
		 "First entry = %d; Num entries = %d (max=%d)\n",
		 first_entry, num_entries, max_entries))
		num_entries = max_entries;

	for (i = 0; i < num_entries; i++)
		gen8_set_pte(&gtt_base[i], scratch_pte);
	readl(gtt_base);
}

2246
static void gen6_ggtt_clear_range(struct i915_address_space *vm,
2247
				  uint64_t start,
2248
				  uint64_t length)
2249
{
2250
	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);
2251 2252
	unsigned first_entry = start >> PAGE_SHIFT;
	unsigned num_entries = length >> PAGE_SHIFT;
2253
	gen6_pte_t scratch_pte, __iomem *gtt_base =
2254 2255
		(gen6_pte_t __iomem *)ggtt->gsm + first_entry;
	const int max_entries = ggtt_total_entries(ggtt) - first_entry;
2256 2257 2258 2259 2260 2261 2262
	int i;

	if (WARN(num_entries > max_entries,
		 "First entry = %d; Num entries = %d (max=%d)\n",
		 first_entry, num_entries, max_entries))
		num_entries = max_entries;

2263
	scratch_pte = vm->pte_encode(vm->scratch_page.daddr,
2264
				     I915_CACHE_LLC, 0);
2265

2266 2267 2268 2269 2270
	for (i = 0; i < num_entries; i++)
		iowrite32(scratch_pte, &gtt_base[i]);
	readl(gtt_base);
}

2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282
static void i915_ggtt_insert_page(struct i915_address_space *vm,
				  dma_addr_t addr,
				  uint64_t offset,
				  enum i915_cache_level cache_level,
				  u32 unused)
{
	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;

	intel_gtt_insert_page(addr, offset >> PAGE_SHIFT, flags);
}

2283 2284 2285 2286
static void i915_ggtt_insert_entries(struct i915_address_space *vm,
				     struct sg_table *pages,
				     uint64_t start,
				     enum i915_cache_level cache_level, u32 unused)
2287 2288 2289 2290
{
	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;

2291
	intel_gtt_insert_sg_entries(pages, start >> PAGE_SHIFT, flags);
2292

2293 2294
}

2295
static void i915_ggtt_clear_range(struct i915_address_space *vm,
2296
				  uint64_t start,
2297
				  uint64_t length)
2298
{
2299
	intel_gtt_clear_range(start >> PAGE_SHIFT, length >> PAGE_SHIFT);
2300 2301
}

2302 2303 2304
static int ggtt_bind_vma(struct i915_vma *vma,
			 enum i915_cache_level cache_level,
			 u32 flags)
2305
{
2306
	struct drm_i915_private *i915 = vma->vm->i915;
2307
	struct drm_i915_gem_object *obj = vma->obj;
2308
	u32 pte_flags;
2309

2310 2311 2312 2313 2314
	if (unlikely(!vma->pages)) {
		int ret = i915_get_ggtt_vma_pages(vma);
		if (ret)
			return ret;
	}
2315 2316

	/* Currently applicable only to VLV */
2317
	pte_flags = 0;
2318 2319 2320
	if (obj->gt_ro)
		pte_flags |= PTE_READ_ONLY;

2321
	intel_runtime_pm_get(i915);
2322
	vma->vm->insert_entries(vma->vm, vma->pages, vma->node.start,
2323
				cache_level, pte_flags);
2324
	intel_runtime_pm_put(i915);
2325 2326 2327 2328 2329 2330

	/*
	 * Without aliasing PPGTT there's no difference between
	 * GLOBAL/LOCAL_BIND, it's all the same ptes. Hence unconditionally
	 * upgrade to both bound if we bind either to avoid double-binding.
	 */
2331
	vma->flags |= I915_VMA_GLOBAL_BIND | I915_VMA_LOCAL_BIND;
2332 2333 2334 2335

	return 0;
}

2336 2337 2338 2339 2340 2341 2342 2343 2344
static void ggtt_unbind_vma(struct i915_vma *vma)
{
	struct drm_i915_private *i915 = vma->vm->i915;

	intel_runtime_pm_get(i915);
	vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
	intel_runtime_pm_put(i915);
}

2345 2346 2347
static int aliasing_gtt_bind_vma(struct i915_vma *vma,
				 enum i915_cache_level cache_level,
				 u32 flags)
2348
{
2349
	struct drm_i915_private *i915 = vma->vm->i915;
2350
	u32 pte_flags;
2351
	int ret;
2352

2353
	if (unlikely(!vma->pages)) {
2354
		ret = i915_get_ggtt_vma_pages(vma);
2355 2356 2357
		if (ret)
			return ret;
	}
2358

2359
	/* Currently applicable only to VLV */
2360 2361
	pte_flags = 0;
	if (vma->obj->gt_ro)
2362
		pte_flags |= PTE_READ_ONLY;
2363

2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379
	if (flags & I915_VMA_LOCAL_BIND) {
		struct i915_hw_ppgtt *appgtt = i915->mm.aliasing_ppgtt;

		if (appgtt->base.allocate_va_range) {
			ret = appgtt->base.allocate_va_range(&appgtt->base,
							     vma->node.start,
							     vma->node.size);
			if (ret)
				return ret;
		}

		appgtt->base.insert_entries(&appgtt->base,
					    vma->pages, vma->node.start,
					    cache_level, pte_flags);
	}

2380
	if (flags & I915_VMA_GLOBAL_BIND) {
2381
		intel_runtime_pm_get(i915);
2382
		vma->vm->insert_entries(vma->vm,
2383
					vma->pages, vma->node.start,
2384
					cache_level, pte_flags);
2385
		intel_runtime_pm_put(i915);
2386
	}
2387

2388
	return 0;
2389 2390
}

2391
static void aliasing_gtt_unbind_vma(struct i915_vma *vma)
2392
{
2393
	struct drm_i915_private *i915 = vma->vm->i915;
2394

2395 2396
	if (vma->flags & I915_VMA_GLOBAL_BIND) {
		intel_runtime_pm_get(i915);
2397
		vma->vm->clear_range(vma->vm, vma->node.start, vma->size);
2398 2399
		intel_runtime_pm_put(i915);
	}
2400

2401 2402 2403 2404 2405
	if (vma->flags & I915_VMA_LOCAL_BIND) {
		struct i915_address_space *vm = &i915->mm.aliasing_ppgtt->base;

		vm->clear_range(vm, vma->node.start, vma->size);
	}
2406 2407
}

2408 2409
void i915_gem_gtt_finish_pages(struct drm_i915_gem_object *obj,
			       struct sg_table *pages)
2410
{
D
David Weinehall 已提交
2411 2412
	struct drm_i915_private *dev_priv = to_i915(obj->base.dev);
	struct device *kdev = &dev_priv->drm.pdev->dev;
2413
	struct i915_ggtt *ggtt = &dev_priv->ggtt;
B
Ben Widawsky 已提交
2414

2415
	if (unlikely(ggtt->do_idle_maps)) {
2416
		if (i915_gem_wait_for_idle(dev_priv, I915_WAIT_LOCKED)) {
2417 2418 2419 2420 2421
			DRM_ERROR("Failed to wait for idle; VT'd may hang.\n");
			/* Wait a bit, in hopes it avoids the hang */
			udelay(10);
		}
	}
B
Ben Widawsky 已提交
2422

2423
	dma_unmap_sg(kdev, pages->sgl, pages->nents, PCI_DMA_BIDIRECTIONAL);
2424
}
2425

C
Chris Wilson 已提交
2426
static void i915_gtt_color_adjust(const struct drm_mm_node *node,
2427
				  unsigned long color,
2428 2429
				  u64 *start,
				  u64 *end)
2430
{
2431
	if (node->allocated && node->color != color)
2432
		*start += I915_GTT_PAGE_SIZE;
2433

2434 2435 2436 2437 2438
	/* Also leave a space between the unallocated reserved node after the
	 * GTT and any objects within the GTT, i.e. we use the color adjustment
	 * to insert a guard page to prevent prefetches crossing over the
	 * GTT boundary.
	 */
2439
	node = list_next_entry(node, node_list);
2440
	if (node->color != color)
2441
		*end -= I915_GTT_PAGE_SIZE;
2442
}
B
Ben Widawsky 已提交
2443

2444 2445 2446 2447 2448 2449
int i915_gem_init_aliasing_ppgtt(struct drm_i915_private *i915)
{
	struct i915_ggtt *ggtt = &i915->ggtt;
	struct i915_hw_ppgtt *ppgtt;
	int err;

2450 2451 2452
	ppgtt = i915_ppgtt_create(i915, NULL, "[alias]");
	if (IS_ERR(ppgtt))
		return PTR_ERR(ppgtt);
2453 2454 2455 2456 2457

	if (ppgtt->base.allocate_va_range) {
		err = ppgtt->base.allocate_va_range(&ppgtt->base,
						    0, ppgtt->base.total);
		if (err)
2458
			goto err_ppgtt;
2459 2460 2461
	}

	i915->mm.aliasing_ppgtt = ppgtt;
2462

2463 2464 2465
	WARN_ON(ggtt->base.bind_vma != ggtt_bind_vma);
	ggtt->base.bind_vma = aliasing_gtt_bind_vma;

2466 2467 2468
	WARN_ON(ggtt->base.unbind_vma != ggtt_unbind_vma);
	ggtt->base.unbind_vma = aliasing_gtt_unbind_vma;

2469 2470 2471
	return 0;

err_ppgtt:
2472
	i915_ppgtt_put(ppgtt);
2473 2474 2475 2476 2477 2478 2479 2480 2481 2482 2483 2484
	return err;
}

void i915_gem_fini_aliasing_ppgtt(struct drm_i915_private *i915)
{
	struct i915_ggtt *ggtt = &i915->ggtt;
	struct i915_hw_ppgtt *ppgtt;

	ppgtt = fetch_and_zero(&i915->mm.aliasing_ppgtt);
	if (!ppgtt)
		return;

2485
	i915_ppgtt_put(ppgtt);
2486 2487

	ggtt->base.bind_vma = ggtt_bind_vma;
2488
	ggtt->base.unbind_vma = ggtt_unbind_vma;
2489 2490
}

2491
int i915_gem_init_ggtt(struct drm_i915_private *dev_priv)
2492
{
2493 2494 2495 2496 2497 2498 2499 2500 2501
	/* Let GEM Manage all of the aperture.
	 *
	 * However, leave one page at the end still bound to the scratch page.
	 * There are a number of places where the hardware apparently prefetches
	 * past the end of the object, and we've seen multiple hangs with the
	 * GPU head pointer stuck in a batchbuffer bound at the last page of the
	 * aperture.  One page should be enough to keep any prefetching inside
	 * of the aperture.
	 */
2502
	struct i915_ggtt *ggtt = &dev_priv->ggtt;
2503
	unsigned long hole_start, hole_end;
2504
	struct drm_mm_node *entry;
2505
	int ret;
2506

2507 2508 2509
	ret = intel_vgt_balloon(dev_priv);
	if (ret)
		return ret;
2510

2511
	/* Reserve a mappable slot for our lockless error capture */
2512 2513 2514 2515
	ret = drm_mm_insert_node_in_range(&ggtt->base.mm, &ggtt->error_capture,
					  PAGE_SIZE, 0, I915_COLOR_UNEVICTABLE,
					  0, ggtt->mappable_end,
					  DRM_MM_INSERT_LOW);
2516 2517 2518
	if (ret)
		return ret;

2519
	/* Clear any non-preallocated blocks */
2520
	drm_mm_for_each_hole(entry, &ggtt->base.mm, hole_start, hole_end) {
2521 2522
		DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
			      hole_start, hole_end);
2523
		ggtt->base.clear_range(&ggtt->base, hole_start,
2524
				       hole_end - hole_start);
2525 2526 2527
	}

	/* And finally clear the reserved guard page */
2528
	ggtt->base.clear_range(&ggtt->base,
2529
			       ggtt->base.total - PAGE_SIZE, PAGE_SIZE);
2530

2531
	if (USES_PPGTT(dev_priv) && !USES_FULL_PPGTT(dev_priv)) {
2532
		ret = i915_gem_init_aliasing_ppgtt(dev_priv);
2533
		if (ret)
2534
			goto err;
2535 2536
	}

2537
	return 0;
2538 2539 2540 2541

err:
	drm_mm_remove_node(&ggtt->error_capture);
	return ret;
2542 2543
}

2544 2545
/**
 * i915_ggtt_cleanup_hw - Clean up GGTT hardware initialization
2546
 * @dev_priv: i915 device
2547
 */
2548
void i915_ggtt_cleanup_hw(struct drm_i915_private *dev_priv)
2549
{
2550
	struct i915_ggtt *ggtt = &dev_priv->ggtt;
2551 2552 2553 2554 2555 2556 2557 2558 2559
	struct i915_vma *vma, *vn;

	ggtt->base.closed = true;

	mutex_lock(&dev_priv->drm.struct_mutex);
	WARN_ON(!list_empty(&ggtt->base.active_list));
	list_for_each_entry_safe(vma, vn, &ggtt->base.inactive_list, vm_link)
		WARN_ON(i915_vma_unbind(vma));
	mutex_unlock(&dev_priv->drm.struct_mutex);
2560

2561
	i915_gem_cleanup_stolen(&dev_priv->drm);
2562

2563 2564 2565
	mutex_lock(&dev_priv->drm.struct_mutex);
	i915_gem_fini_aliasing_ppgtt(dev_priv);

2566 2567 2568
	if (drm_mm_node_allocated(&ggtt->error_capture))
		drm_mm_remove_node(&ggtt->error_capture);

2569
	if (drm_mm_initialized(&ggtt->base.mm)) {
2570
		intel_vgt_deballoon(dev_priv);
2571
		i915_address_space_fini(&ggtt->base);
2572 2573
	}

2574
	ggtt->base.cleanup(&ggtt->base);
2575
	mutex_unlock(&dev_priv->drm.struct_mutex);
2576 2577

	arch_phys_wc_del(ggtt->mtrr);
2578
	io_mapping_fini(&ggtt->mappable);
2579
}
2580

2581
static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
2582 2583 2584 2585 2586 2587
{
	snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
	snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
	return snb_gmch_ctl << 20;
}

2588
static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
2589 2590 2591 2592 2593
{
	bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
	bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
	if (bdw_gmch_ctl)
		bdw_gmch_ctl = 1 << bdw_gmch_ctl;
2594 2595 2596 2597 2598 2599 2600

#ifdef CONFIG_X86_32
	/* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */
	if (bdw_gmch_ctl > 4)
		bdw_gmch_ctl = 4;
#endif

2601 2602 2603
	return bdw_gmch_ctl << 20;
}

2604
static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
2605 2606 2607 2608 2609 2610 2611 2612 2613 2614
{
	gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
	gmch_ctrl &= SNB_GMCH_GGMS_MASK;

	if (gmch_ctrl)
		return 1 << (20 + gmch_ctrl);

	return 0;
}

2615
static size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
2616 2617 2618 2619 2620 2621
{
	snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
	snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
	return snb_gmch_ctl << 25; /* 32 MB units */
}

2622
static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl)
2623 2624 2625 2626 2627 2628
{
	bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
	bdw_gmch_ctl &= BDW_GMCH_GMS_MASK;
	return bdw_gmch_ctl << 25; /* 32 MB units */
}

2629 2630 2631 2632 2633 2634 2635 2636 2637 2638 2639 2640 2641 2642 2643 2644 2645 2646
static size_t chv_get_stolen_size(u16 gmch_ctrl)
{
	gmch_ctrl >>= SNB_GMCH_GMS_SHIFT;
	gmch_ctrl &= SNB_GMCH_GMS_MASK;

	/*
	 * 0x0  to 0x10: 32MB increments starting at 0MB
	 * 0x11 to 0x16: 4MB increments starting at 8MB
	 * 0x17 to 0x1d: 4MB increments start at 36MB
	 */
	if (gmch_ctrl < 0x11)
		return gmch_ctrl << 25;
	else if (gmch_ctrl < 0x17)
		return (gmch_ctrl - 0x11 + 2) << 22;
	else
		return (gmch_ctrl - 0x17 + 9) << 22;
}

2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657 2658
static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl)
{
	gen9_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
	gen9_gmch_ctl &= BDW_GMCH_GMS_MASK;

	if (gen9_gmch_ctl < 0xf0)
		return gen9_gmch_ctl << 25; /* 32 MB units */
	else
		/* 4MB increments starting at 0xf0 for 4MB */
		return (gen9_gmch_ctl - 0xf0 + 1) << 22;
}

2659
static int ggtt_probe_common(struct i915_ggtt *ggtt, u64 size)
B
Ben Widawsky 已提交
2660
{
2661 2662
	struct drm_i915_private *dev_priv = ggtt->base.i915;
	struct pci_dev *pdev = dev_priv->drm.pdev;
2663
	phys_addr_t phys_addr;
2664
	int ret;
B
Ben Widawsky 已提交
2665 2666

	/* For Modern GENs the PTEs and register space are split in the BAR */
2667
	phys_addr = pci_resource_start(pdev, 0) + pci_resource_len(pdev, 0) / 2;
B
Ben Widawsky 已提交
2668

I
Imre Deak 已提交
2669 2670 2671 2672 2673 2674 2675
	/*
	 * On BXT writes larger than 64 bit to the GTT pagetable range will be
	 * dropped. For WC mappings in general we have 64 byte burst writes
	 * when the WC buffer is flushed, so we can't use it, but have to
	 * resort to an uncached mapping. The WC issue is easily caught by the
	 * readback check when writing GTT PTE entries.
	 */
2676
	if (IS_GEN9_LP(dev_priv))
2677
		ggtt->gsm = ioremap_nocache(phys_addr, size);
I
Imre Deak 已提交
2678
	else
2679
		ggtt->gsm = ioremap_wc(phys_addr, size);
2680
	if (!ggtt->gsm) {
2681
		DRM_ERROR("Failed to map the ggtt page table\n");
B
Ben Widawsky 已提交
2682 2683 2684
		return -ENOMEM;
	}

2685
	ret = setup_scratch_page(&ggtt->base, GFP_DMA32);
2686
	if (ret) {
B
Ben Widawsky 已提交
2687 2688
		DRM_ERROR("Scratch setup failed\n");
		/* iounmap will also get called at remove, but meh */
2689
		iounmap(ggtt->gsm);
2690
		return ret;
B
Ben Widawsky 已提交
2691 2692
	}

2693
	return 0;
B
Ben Widawsky 已提交
2694 2695
}

B
Ben Widawsky 已提交
2696 2697 2698
/* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
 * bits. When using advanced contexts each context stores its own PAT, but
 * writing this data shouldn't be harmful even in those cases. */
2699
static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv)
B
Ben Widawsky 已提交
2700 2701 2702 2703 2704 2705 2706 2707 2708 2709 2710 2711
{
	uint64_t pat;

	pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC)     | /* for normal objects, no eLLC */
	      GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
	      GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */
	      GEN8_PPAT(3, GEN8_PPAT_UC)                     | /* Uncached objects, mostly for scanout */
	      GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
	      GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
	      GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
	      GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));

2712
	if (!USES_PPGTT(dev_priv))
2713 2714 2715 2716 2717 2718 2719 2720 2721 2722 2723 2724 2725 2726 2727
		/* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry,
		 * so RTL will always use the value corresponding to
		 * pat_sel = 000".
		 * So let's disable cache for GGTT to avoid screen corruptions.
		 * MOCS still can be used though.
		 * - System agent ggtt writes (i.e. cpu gtt mmaps) already work
		 * before this patch, i.e. the same uncached + snooping access
		 * like on gen6/7 seems to be in effect.
		 * - So this just fixes blitter/render access. Again it looks
		 * like it's not just uncached access, but uncached + snooping.
		 * So we can still hold onto all our assumptions wrt cpu
		 * clflushing on LLC machines.
		 */
		pat = GEN8_PPAT(0, GEN8_PPAT_UC);

B
Ben Widawsky 已提交
2728 2729
	/* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b
	 * write would work. */
2730 2731
	I915_WRITE(GEN8_PRIVATE_PAT_LO, pat);
	I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32);
B
Ben Widawsky 已提交
2732 2733
}

2734 2735 2736 2737 2738 2739 2740 2741 2742 2743
static void chv_setup_private_ppat(struct drm_i915_private *dev_priv)
{
	uint64_t pat;

	/*
	 * Map WB on BDW to snooped on CHV.
	 *
	 * Only the snoop bit has meaning for CHV, the rest is
	 * ignored.
	 *
2744 2745 2746 2747 2748 2749 2750 2751 2752 2753 2754
	 * The hardware will never snoop for certain types of accesses:
	 * - CPU GTT (GMADR->GGTT->no snoop->memory)
	 * - PPGTT page tables
	 * - some other special cycles
	 *
	 * As with BDW, we also need to consider the following for GT accesses:
	 * "For GGTT, there is NO pat_sel[2:0] from the entry,
	 * so RTL will always use the value corresponding to
	 * pat_sel = 000".
	 * Which means we must set the snoop bit in PAT entry 0
	 * in order to keep the global status page working.
2755 2756 2757 2758 2759 2760 2761 2762 2763 2764
	 */
	pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) |
	      GEN8_PPAT(1, 0) |
	      GEN8_PPAT(2, 0) |
	      GEN8_PPAT(3, 0) |
	      GEN8_PPAT(4, CHV_PPAT_SNOOP) |
	      GEN8_PPAT(5, CHV_PPAT_SNOOP) |
	      GEN8_PPAT(6, CHV_PPAT_SNOOP) |
	      GEN8_PPAT(7, CHV_PPAT_SNOOP);

2765 2766
	I915_WRITE(GEN8_PRIVATE_PAT_LO, pat);
	I915_WRITE(GEN8_PRIVATE_PAT_HI, pat >> 32);
2767 2768
}

2769 2770 2771 2772 2773
static void gen6_gmch_remove(struct i915_address_space *vm)
{
	struct i915_ggtt *ggtt = i915_vm_to_ggtt(vm);

	iounmap(ggtt->gsm);
2774
	cleanup_scratch_page(vm);
2775 2776
}

2777
static int gen8_gmch_probe(struct i915_ggtt *ggtt)
B
Ben Widawsky 已提交
2778
{
2779
	struct drm_i915_private *dev_priv = ggtt->base.i915;
2780
	struct pci_dev *pdev = dev_priv->drm.pdev;
2781
	unsigned int size;
B
Ben Widawsky 已提交
2782 2783 2784
	u16 snb_gmch_ctl;

	/* TODO: We're not aware of mappable constraints on gen8 yet */
2785 2786
	ggtt->mappable_base = pci_resource_start(pdev, 2);
	ggtt->mappable_end = pci_resource_len(pdev, 2);
B
Ben Widawsky 已提交
2787

2788 2789
	if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(39)))
		pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(39));
B
Ben Widawsky 已提交
2790

2791
	pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
B
Ben Widawsky 已提交
2792

2793
	if (INTEL_GEN(dev_priv) >= 9) {
2794
		ggtt->stolen_size = gen9_get_stolen_size(snb_gmch_ctl);
2795
		size = gen8_get_total_gtt_size(snb_gmch_ctl);
2796
	} else if (IS_CHERRYVIEW(dev_priv)) {
2797
		ggtt->stolen_size = chv_get_stolen_size(snb_gmch_ctl);
2798
		size = chv_get_total_gtt_size(snb_gmch_ctl);
2799
	} else {
2800
		ggtt->stolen_size = gen8_get_stolen_size(snb_gmch_ctl);
2801
		size = gen8_get_total_gtt_size(snb_gmch_ctl);
2802
	}
B
Ben Widawsky 已提交
2803

2804
	ggtt->base.total = (size / sizeof(gen8_pte_t)) << PAGE_SHIFT;
B
Ben Widawsky 已提交
2805

2806
	if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv))
2807 2808 2809
		chv_setup_private_ppat(dev_priv);
	else
		bdw_setup_private_ppat(dev_priv);
B
Ben Widawsky 已提交
2810

2811
	ggtt->base.cleanup = gen6_gmch_remove;
2812 2813
	ggtt->base.bind_vma = ggtt_bind_vma;
	ggtt->base.unbind_vma = ggtt_unbind_vma;
2814
	ggtt->base.insert_page = gen8_ggtt_insert_page;
2815
	ggtt->base.clear_range = nop_clear_range;
2816
	if (!USES_FULL_PPGTT(dev_priv) || intel_scanout_needs_vtd_wa(dev_priv))
2817 2818 2819 2820 2821 2822
		ggtt->base.clear_range = gen8_ggtt_clear_range;

	ggtt->base.insert_entries = gen8_ggtt_insert_entries;
	if (IS_CHERRYVIEW(dev_priv))
		ggtt->base.insert_entries = gen8_ggtt_insert_entries__BKL;

2823 2824
	ggtt->invalidate = gen6_ggtt_invalidate;

2825
	return ggtt_probe_common(ggtt, size);
B
Ben Widawsky 已提交
2826 2827
}

2828
static int gen6_gmch_probe(struct i915_ggtt *ggtt)
2829
{
2830
	struct drm_i915_private *dev_priv = ggtt->base.i915;
2831
	struct pci_dev *pdev = dev_priv->drm.pdev;
2832
	unsigned int size;
2833 2834
	u16 snb_gmch_ctl;

2835 2836
	ggtt->mappable_base = pci_resource_start(pdev, 2);
	ggtt->mappable_end = pci_resource_len(pdev, 2);
2837

2838 2839
	/* 64/512MB is the current min/max we actually know of, but this is just
	 * a coarse sanity check.
2840
	 */
2841
	if (ggtt->mappable_end < (64<<20) || ggtt->mappable_end > (512<<20)) {
2842
		DRM_ERROR("Unknown GMADR size (%llx)\n", ggtt->mappable_end);
2843
		return -ENXIO;
2844 2845
	}

2846 2847 2848
	if (!pci_set_dma_mask(pdev, DMA_BIT_MASK(40)))
		pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(40));
	pci_read_config_word(pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);
2849

2850
	ggtt->stolen_size = gen6_get_stolen_size(snb_gmch_ctl);
2851

2852 2853
	size = gen6_get_total_gtt_size(snb_gmch_ctl);
	ggtt->base.total = (size / sizeof(gen6_pte_t)) << PAGE_SHIFT;
2854

2855
	ggtt->base.clear_range = gen6_ggtt_clear_range;
2856
	ggtt->base.insert_page = gen6_ggtt_insert_page;
2857 2858 2859
	ggtt->base.insert_entries = gen6_ggtt_insert_entries;
	ggtt->base.bind_vma = ggtt_bind_vma;
	ggtt->base.unbind_vma = ggtt_unbind_vma;
2860 2861
	ggtt->base.cleanup = gen6_gmch_remove;

2862 2863
	ggtt->invalidate = gen6_ggtt_invalidate;

2864 2865 2866 2867 2868 2869 2870 2871 2872 2873
	if (HAS_EDRAM(dev_priv))
		ggtt->base.pte_encode = iris_pte_encode;
	else if (IS_HASWELL(dev_priv))
		ggtt->base.pte_encode = hsw_pte_encode;
	else if (IS_VALLEYVIEW(dev_priv))
		ggtt->base.pte_encode = byt_pte_encode;
	else if (INTEL_GEN(dev_priv) >= 7)
		ggtt->base.pte_encode = ivb_pte_encode;
	else
		ggtt->base.pte_encode = snb_pte_encode;
2874

2875
	return ggtt_probe_common(ggtt, size);
2876 2877
}

2878
static void i915_gmch_remove(struct i915_address_space *vm)
2879
{
2880
	intel_gmch_remove();
2881
}
2882

2883
static int i915_gmch_probe(struct i915_ggtt *ggtt)
2884
{
2885
	struct drm_i915_private *dev_priv = ggtt->base.i915;
2886 2887
	int ret;

2888
	ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->drm.pdev, NULL);
2889 2890 2891 2892 2893
	if (!ret) {
		DRM_ERROR("failed to set up gmch\n");
		return -EIO;
	}

2894 2895 2896 2897
	intel_gtt_get(&ggtt->base.total,
		      &ggtt->stolen_size,
		      &ggtt->mappable_base,
		      &ggtt->mappable_end);
2898

2899
	ggtt->do_idle_maps = needs_idle_maps(dev_priv);
2900
	ggtt->base.insert_page = i915_ggtt_insert_page;
2901 2902 2903 2904
	ggtt->base.insert_entries = i915_ggtt_insert_entries;
	ggtt->base.clear_range = i915_ggtt_clear_range;
	ggtt->base.bind_vma = ggtt_bind_vma;
	ggtt->base.unbind_vma = ggtt_unbind_vma;
2905
	ggtt->base.cleanup = i915_gmch_remove;
2906

2907 2908
	ggtt->invalidate = gmch_ggtt_invalidate;

2909
	if (unlikely(ggtt->do_idle_maps))
2910 2911
		DRM_INFO("applying Ironlake quirks for intel_iommu\n");

2912 2913 2914
	return 0;
}

2915
/**
2916
 * i915_ggtt_probe_hw - Probe GGTT hardware location
2917
 * @dev_priv: i915 device
2918
 */
2919
int i915_ggtt_probe_hw(struct drm_i915_private *dev_priv)
2920
{
2921
	struct i915_ggtt *ggtt = &dev_priv->ggtt;
2922 2923
	int ret;

2924
	ggtt->base.i915 = dev_priv;
2925
	ggtt->base.dma = &dev_priv->drm.pdev->dev;
2926

2927 2928 2929 2930 2931 2932
	if (INTEL_GEN(dev_priv) <= 5)
		ret = i915_gmch_probe(ggtt);
	else if (INTEL_GEN(dev_priv) < 8)
		ret = gen6_gmch_probe(ggtt);
	else
		ret = gen8_gmch_probe(ggtt);
2933
	if (ret)
2934 2935
		return ret;

2936 2937 2938 2939 2940 2941 2942 2943 2944 2945
	/* Trim the GGTT to fit the GuC mappable upper range (when enabled).
	 * This is easier than doing range restriction on the fly, as we
	 * currently don't have any bits spare to pass in this upper
	 * restriction!
	 */
	if (HAS_GUC(dev_priv) && i915.enable_guc_loading) {
		ggtt->base.total = min_t(u64, ggtt->base.total, GUC_GGTT_TOP);
		ggtt->mappable_end = min(ggtt->mappable_end, ggtt->base.total);
	}

2946 2947
	if ((ggtt->base.total - 1) >> 32) {
		DRM_ERROR("We never expected a Global GTT with more than 32bits"
2948
			  " of address space! Found %lldM!\n",
2949 2950 2951 2952 2953
			  ggtt->base.total >> 20);
		ggtt->base.total = 1ULL << 32;
		ggtt->mappable_end = min(ggtt->mappable_end, ggtt->base.total);
	}

2954 2955 2956 2957 2958 2959 2960
	if (ggtt->mappable_end > ggtt->base.total) {
		DRM_ERROR("mappable aperture extends past end of GGTT,"
			  " aperture=%llx, total=%llx\n",
			  ggtt->mappable_end, ggtt->base.total);
		ggtt->mappable_end = ggtt->base.total;
	}

2961
	/* GMADR is the PCI mmio aperture into the global GTT. */
2962
	DRM_INFO("Memory usable by graphics device = %lluM\n",
2963 2964
		 ggtt->base.total >> 20);
	DRM_DEBUG_DRIVER("GMADR size = %lldM\n", ggtt->mappable_end >> 20);
2965
	DRM_DEBUG_DRIVER("GTT stolen size = %uM\n", ggtt->stolen_size >> 20);
2966 2967 2968 2969
#ifdef CONFIG_INTEL_IOMMU
	if (intel_iommu_gfx_mapped)
		DRM_INFO("VT-d active for gfx access\n");
#endif
2970 2971

	return 0;
2972 2973 2974 2975
}

/**
 * i915_ggtt_init_hw - Initialize GGTT hardware
2976
 * @dev_priv: i915 device
2977
 */
2978
int i915_ggtt_init_hw(struct drm_i915_private *dev_priv)
2979 2980 2981 2982
{
	struct i915_ggtt *ggtt = &dev_priv->ggtt;
	int ret;

2983 2984
	INIT_LIST_HEAD(&dev_priv->vm_list);

2985 2986 2987 2988
	/* Note that we use page colouring to enforce a guard page at the
	 * end of the address space. This is required as the CS may prefetch
	 * beyond the end of the batch buffer, across the page boundary,
	 * and beyond the end of the GTT if we do not provide a guard.
2989
	 */
C
Chris Wilson 已提交
2990 2991
	mutex_lock(&dev_priv->drm.struct_mutex);
	i915_address_space_init(&ggtt->base, dev_priv, "[global]");
2992
	if (!HAS_LLC(dev_priv) && !USES_PPGTT(dev_priv))
2993
		ggtt->base.mm.color_adjust = i915_gtt_color_adjust;
C
Chris Wilson 已提交
2994
	mutex_unlock(&dev_priv->drm.struct_mutex);
2995

2996 2997 2998
	if (!io_mapping_init_wc(&dev_priv->ggtt.mappable,
				dev_priv->ggtt.mappable_base,
				dev_priv->ggtt.mappable_end)) {
2999 3000 3001 3002 3003 3004
		ret = -EIO;
		goto out_gtt_cleanup;
	}

	ggtt->mtrr = arch_phys_wc_add(ggtt->mappable_base, ggtt->mappable_end);

3005 3006 3007 3008
	/*
	 * Initialise stolen early so that we may reserve preallocated
	 * objects for the BIOS to KMS transition.
	 */
3009
	ret = i915_gem_init_stolen(dev_priv);
3010 3011 3012 3013
	if (ret)
		goto out_gtt_cleanup;

	return 0;
3014 3015

out_gtt_cleanup:
3016
	ggtt->base.cleanup(&ggtt->base);
3017
	return ret;
3018
}
3019

3020
int i915_ggtt_enable_hw(struct drm_i915_private *dev_priv)
3021
{
3022
	if (INTEL_GEN(dev_priv) < 6 && !intel_enable_gtt())
3023 3024 3025 3026 3027
		return -EIO;

	return 0;
}

3028 3029 3030 3031 3032 3033 3034 3035 3036 3037
void i915_ggtt_enable_guc(struct drm_i915_private *i915)
{
	i915->ggtt.invalidate = guc_ggtt_invalidate;
}

void i915_ggtt_disable_guc(struct drm_i915_private *i915)
{
	i915->ggtt.invalidate = gen6_ggtt_invalidate;
}

3038
void i915_gem_restore_gtt_mappings(struct drm_i915_private *dev_priv)
3039
{
3040
	struct i915_ggtt *ggtt = &dev_priv->ggtt;
3041
	struct drm_i915_gem_object *obj, *on;
3042

3043
	i915_check_and_clear_faults(dev_priv);
3044 3045

	/* First fill our portion of the GTT with scratch pages */
3046
	ggtt->base.clear_range(&ggtt->base, ggtt->base.start, ggtt->base.total);
3047

3048 3049 3050 3051
	ggtt->base.closed = true; /* skip rewriting PTE on VMA unbind */

	/* clflush objects bound into the GGTT and rebind them. */
	list_for_each_entry_safe(obj, on,
3052
				 &dev_priv->mm.bound_list, global_link) {
3053 3054 3055
		bool ggtt_bound = false;
		struct i915_vma *vma;

3056
		list_for_each_entry(vma, &obj->vma_list, obj_link) {
3057
			if (vma->vm != &ggtt->base)
3058
				continue;
3059

3060 3061 3062
			if (!i915_vma_unbind(vma))
				continue;

3063 3064
			WARN_ON(i915_vma_bind(vma, obj->cache_level,
					      PIN_UPDATE));
3065
			ggtt_bound = true;
3066 3067
		}

3068
		if (ggtt_bound)
3069
			WARN_ON(i915_gem_object_set_to_gtt_domain(obj, false));
3070
	}
3071

3072 3073
	ggtt->base.closed = false;

3074
	if (INTEL_GEN(dev_priv) >= 8) {
3075
		if (IS_CHERRYVIEW(dev_priv) || IS_GEN9_LP(dev_priv))
3076 3077 3078 3079 3080 3081 3082
			chv_setup_private_ppat(dev_priv);
		else
			bdw_setup_private_ppat(dev_priv);

		return;
	}

3083
	if (USES_PPGTT(dev_priv)) {
3084 3085
		struct i915_address_space *vm;

3086
		list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
3087
			struct i915_hw_ppgtt *ppgtt;
3088

3089
			if (i915_is_ggtt(vm))
3090
				ppgtt = dev_priv->mm.aliasing_ppgtt;
3091 3092
			else
				ppgtt = i915_vm_to_ppgtt(vm);
3093

C
Chris Wilson 已提交
3094
			gen6_write_page_range(ppgtt, 0, ppgtt->base.total);
3095 3096 3097
		}
	}

3098
	i915_ggtt_invalidate(dev_priv);
3099 3100
}

3101
static struct scatterlist *
3102
rotate_pages(const dma_addr_t *in, unsigned int offset,
3103
	     unsigned int width, unsigned int height,
3104
	     unsigned int stride,
3105
	     struct sg_table *st, struct scatterlist *sg)
3106 3107 3108 3109 3110
{
	unsigned int column, row;
	unsigned int src_idx;

	for (column = 0; column < width; column++) {
3111
		src_idx = stride * (height - 1) + column;
3112 3113 3114 3115 3116 3117 3118
		for (row = 0; row < height; row++) {
			st->nents++;
			/* We don't need the pages, but need to initialize
			 * the entries so the sg list can be happily traversed.
			 * The only thing we need are DMA addresses.
			 */
			sg_set_page(sg, NULL, PAGE_SIZE, 0);
3119
			sg_dma_address(sg) = in[offset + src_idx];
3120 3121
			sg_dma_len(sg) = PAGE_SIZE;
			sg = sg_next(sg);
3122
			src_idx -= stride;
3123 3124
		}
	}
3125 3126

	return sg;
3127 3128
}

3129 3130 3131
static noinline struct sg_table *
intel_rotate_pages(struct intel_rotation_info *rot_info,
		   struct drm_i915_gem_object *obj)
3132
{
3133
	const size_t n_pages = obj->base.size / PAGE_SIZE;
3134
	unsigned int size = intel_rotation_info_size(rot_info);
3135 3136
	struct sgt_iter sgt_iter;
	dma_addr_t dma_addr;
3137 3138 3139
	unsigned long i;
	dma_addr_t *page_addr_list;
	struct sg_table *st;
3140
	struct scatterlist *sg;
3141
	int ret = -ENOMEM;
3142 3143

	/* Allocate a temporary list of source pages for random access. */
3144
	page_addr_list = drm_malloc_gfp(n_pages,
3145 3146
					sizeof(dma_addr_t),
					GFP_TEMPORARY);
3147 3148 3149 3150 3151 3152 3153 3154
	if (!page_addr_list)
		return ERR_PTR(ret);

	/* Allocate target SG list. */
	st = kmalloc(sizeof(*st), GFP_KERNEL);
	if (!st)
		goto err_st_alloc;

3155
	ret = sg_alloc_table(st, size, GFP_KERNEL);
3156 3157 3158 3159 3160
	if (ret)
		goto err_sg_alloc;

	/* Populate source page list from the object. */
	i = 0;
C
Chris Wilson 已提交
3161
	for_each_sgt_dma(dma_addr, sgt_iter, obj->mm.pages)
3162
		page_addr_list[i++] = dma_addr;
3163

3164
	GEM_BUG_ON(i != n_pages);
3165 3166 3167
	st->nents = 0;
	sg = st->sgl;

3168 3169 3170 3171
	for (i = 0 ; i < ARRAY_SIZE(rot_info->plane); i++) {
		sg = rotate_pages(page_addr_list, rot_info->plane[i].offset,
				  rot_info->plane[i].width, rot_info->plane[i].height,
				  rot_info->plane[i].stride, st, sg);
3172 3173
	}

3174 3175
	DRM_DEBUG_KMS("Created rotated page mapping for object size %zu (%ux%u tiles, %u pages)\n",
		      obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);
3176 3177 3178 3179 3180 3181 3182 3183 3184 3185

	drm_free_large(page_addr_list);

	return st;

err_sg_alloc:
	kfree(st);
err_st_alloc:
	drm_free_large(page_addr_list);

3186 3187 3188
	DRM_DEBUG_KMS("Failed to create rotated mapping for object size %zu! (%ux%u tiles, %u pages)\n",
		      obj->base.size, rot_info->plane[0].width, rot_info->plane[0].height, size);

3189 3190
	return ERR_PTR(ret);
}
3191

3192
static noinline struct sg_table *
3193 3194 3195 3196
intel_partial_pages(const struct i915_ggtt_view *view,
		    struct drm_i915_gem_object *obj)
{
	struct sg_table *st;
3197
	struct scatterlist *sg, *iter;
3198
	unsigned int count = view->partial.size;
3199
	unsigned int offset;
3200 3201 3202 3203 3204 3205
	int ret = -ENOMEM;

	st = kmalloc(sizeof(*st), GFP_KERNEL);
	if (!st)
		goto err_st_alloc;

3206
	ret = sg_alloc_table(st, count, GFP_KERNEL);
3207 3208 3209
	if (ret)
		goto err_sg_alloc;

3210
	iter = i915_gem_object_get_sg(obj, view->partial.offset, &offset);
3211 3212
	GEM_BUG_ON(!iter);

3213 3214
	sg = st->sgl;
	st->nents = 0;
3215 3216
	do {
		unsigned int len;
3217

3218 3219 3220 3221 3222 3223
		len = min(iter->length - (offset << PAGE_SHIFT),
			  count << PAGE_SHIFT);
		sg_set_page(sg, NULL, len, 0);
		sg_dma_address(sg) =
			sg_dma_address(iter) + (offset << PAGE_SHIFT);
		sg_dma_len(sg) = len;
3224 3225

		st->nents++;
3226 3227 3228 3229 3230
		count -= len >> PAGE_SHIFT;
		if (count == 0) {
			sg_mark_end(sg);
			return st;
		}
3231

3232 3233 3234 3235
		sg = __sg_next(sg);
		iter = __sg_next(iter);
		offset = 0;
	} while (1);
3236 3237 3238 3239 3240 3241 3242

err_sg_alloc:
	kfree(st);
err_st_alloc:
	return ERR_PTR(ret);
}

3243
static int
3244
i915_get_ggtt_vma_pages(struct i915_vma *vma)
3245
{
3246
	int ret;
3247

3248 3249 3250 3251 3252 3253 3254
	/* The vma->pages are only valid within the lifespan of the borrowed
	 * obj->mm.pages. When the obj->mm.pages sg_table is regenerated, so
	 * must be the vma->pages. A simple rule is that vma->pages must only
	 * be accessed when the obj->mm.pages are pinned.
	 */
	GEM_BUG_ON(!i915_gem_object_has_pinned_pages(vma->obj));

3255 3256 3257
	switch (vma->ggtt_view.type) {
	case I915_GGTT_VIEW_NORMAL:
		vma->pages = vma->obj->mm.pages;
3258 3259
		return 0;

3260
	case I915_GGTT_VIEW_ROTATED:
3261
		vma->pages =
3262 3263 3264 3265
			intel_rotate_pages(&vma->ggtt_view.rotated, vma->obj);
		break;

	case I915_GGTT_VIEW_PARTIAL:
3266
		vma->pages = intel_partial_pages(&vma->ggtt_view, vma->obj);
3267 3268 3269
		break;

	default:
3270 3271
		WARN_ONCE(1, "GGTT view %u not implemented!\n",
			  vma->ggtt_view.type);
3272 3273
		return -EINVAL;
	}
3274

3275 3276
	ret = 0;
	if (unlikely(IS_ERR(vma->pages))) {
3277 3278
		ret = PTR_ERR(vma->pages);
		vma->pages = NULL;
3279 3280
		DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
			  vma->ggtt_view.type, ret);
3281
	}
3282
	return ret;
3283 3284
}

3285 3286
/**
 * i915_gem_gtt_reserve - reserve a node in an address_space (GTT)
3287 3288 3289 3290 3291 3292 3293 3294 3295 3296
 * @vm: the &struct i915_address_space
 * @node: the &struct drm_mm_node (typically i915_vma.mode)
 * @size: how much space to allocate inside the GTT,
 *        must be #I915_GTT_PAGE_SIZE aligned
 * @offset: where to insert inside the GTT,
 *          must be #I915_GTT_MIN_ALIGNMENT aligned, and the node
 *          (@offset + @size) must fit within the address space
 * @color: color to apply to node, if this node is not from a VMA,
 *         color must be #I915_COLOR_UNEVICTABLE
 * @flags: control search and eviction behaviour
3297 3298 3299 3300 3301 3302 3303 3304 3305 3306 3307 3308 3309 3310 3311 3312 3313 3314 3315 3316 3317 3318 3319 3320
 *
 * i915_gem_gtt_reserve() tries to insert the @node at the exact @offset inside
 * the address space (using @size and @color). If the @node does not fit, it
 * tries to evict any overlapping nodes from the GTT, including any
 * neighbouring nodes if the colors do not match (to ensure guard pages between
 * differing domains). See i915_gem_evict_for_node() for the gory details
 * on the eviction algorithm. #PIN_NONBLOCK may used to prevent waiting on
 * evicting active overlapping objects, and any overlapping node that is pinned
 * or marked as unevictable will also result in failure.
 *
 * Returns: 0 on success, -ENOSPC if no suitable hole is found, -EINTR if
 * asked to wait for eviction and interrupted.
 */
int i915_gem_gtt_reserve(struct i915_address_space *vm,
			 struct drm_mm_node *node,
			 u64 size, u64 offset, unsigned long color,
			 unsigned int flags)
{
	int err;

	GEM_BUG_ON(!size);
	GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
	GEM_BUG_ON(!IS_ALIGNED(offset, I915_GTT_MIN_ALIGNMENT));
	GEM_BUG_ON(range_overflows(offset, size, vm->total));
3321
	GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->base);
3322
	GEM_BUG_ON(drm_mm_node_allocated(node));
3323 3324 3325 3326 3327 3328 3329 3330 3331 3332 3333 3334 3335 3336 3337 3338

	node->size = size;
	node->start = offset;
	node->color = color;

	err = drm_mm_reserve_node(&vm->mm, node);
	if (err != -ENOSPC)
		return err;

	err = i915_gem_evict_for_node(vm, node, flags);
	if (err == 0)
		err = drm_mm_reserve_node(&vm->mm, node);

	return err;
}

3339 3340 3341 3342 3343 3344 3345 3346 3347 3348 3349 3350 3351 3352 3353 3354 3355 3356 3357 3358 3359 3360 3361 3362 3363
static u64 random_offset(u64 start, u64 end, u64 len, u64 align)
{
	u64 range, addr;

	GEM_BUG_ON(range_overflows(start, len, end));
	GEM_BUG_ON(round_up(start, align) > round_down(end - len, align));

	range = round_down(end - len, align) - round_up(start, align);
	if (range) {
		if (sizeof(unsigned long) == sizeof(u64)) {
			addr = get_random_long();
		} else {
			addr = get_random_int();
			if (range > U32_MAX) {
				addr <<= 32;
				addr |= get_random_int();
			}
		}
		div64_u64_rem(addr, range, &addr);
		start += addr;
	}

	return round_up(start, align);
}

3364 3365
/**
 * i915_gem_gtt_insert - insert a node into an address_space (GTT)
3366 3367 3368 3369 3370 3371 3372 3373 3374
 * @vm: the &struct i915_address_space
 * @node: the &struct drm_mm_node (typically i915_vma.node)
 * @size: how much space to allocate inside the GTT,
 *        must be #I915_GTT_PAGE_SIZE aligned
 * @alignment: required alignment of starting offset, may be 0 but
 *             if specified, this must be a power-of-two and at least
 *             #I915_GTT_MIN_ALIGNMENT
 * @color: color to apply to node
 * @start: start of any range restriction inside GTT (0 for all),
3375
 *         must be #I915_GTT_PAGE_SIZE aligned
3376 3377 3378
 * @end: end of any range restriction inside GTT (U64_MAX for all),
 *       must be #I915_GTT_PAGE_SIZE aligned if not U64_MAX
 * @flags: control search and eviction behaviour
3379 3380 3381 3382 3383 3384
 *
 * i915_gem_gtt_insert() first searches for an available hole into which
 * is can insert the node. The hole address is aligned to @alignment and
 * its @size must then fit entirely within the [@start, @end] bounds. The
 * nodes on either side of the hole must match @color, or else a guard page
 * will be inserted between the two nodes (or the node evicted). If no
3385 3386
 * suitable hole is found, first a victim is randomly selected and tested
 * for eviction, otherwise then the LRU list of objects within the GTT
3387 3388 3389 3390 3391 3392 3393 3394 3395 3396 3397 3398 3399 3400 3401 3402
 * is scanned to find the first set of replacement nodes to create the hole.
 * Those old overlapping nodes are evicted from the GTT (and so must be
 * rebound before any future use). Any node that is currently pinned cannot
 * be evicted (see i915_vma_pin()). Similar if the node's VMA is currently
 * active and #PIN_NONBLOCK is specified, that node is also skipped when
 * searching for an eviction candidate. See i915_gem_evict_something() for
 * the gory details on the eviction algorithm.
 *
 * Returns: 0 on success, -ENOSPC if no suitable hole is found, -EINTR if
 * asked to wait for eviction and interrupted.
 */
int i915_gem_gtt_insert(struct i915_address_space *vm,
			struct drm_mm_node *node,
			u64 size, u64 alignment, unsigned long color,
			u64 start, u64 end, unsigned int flags)
{
3403
	enum drm_mm_insert_mode mode;
3404
	u64 offset;
3405 3406 3407 3408 3409 3410 3411 3412 3413 3414
	int err;

	lockdep_assert_held(&vm->i915->drm.struct_mutex);
	GEM_BUG_ON(!size);
	GEM_BUG_ON(!IS_ALIGNED(size, I915_GTT_PAGE_SIZE));
	GEM_BUG_ON(alignment && !is_power_of_2(alignment));
	GEM_BUG_ON(alignment && !IS_ALIGNED(alignment, I915_GTT_MIN_ALIGNMENT));
	GEM_BUG_ON(start >= end);
	GEM_BUG_ON(start > 0  && !IS_ALIGNED(start, I915_GTT_PAGE_SIZE));
	GEM_BUG_ON(end < U64_MAX && !IS_ALIGNED(end, I915_GTT_PAGE_SIZE));
3415
	GEM_BUG_ON(vm == &vm->i915->mm.aliasing_ppgtt->base);
3416
	GEM_BUG_ON(drm_mm_node_allocated(node));
3417 3418 3419 3420 3421 3422 3423

	if (unlikely(range_overflows(start, size, end)))
		return -ENOSPC;

	if (unlikely(round_up(start, alignment) > round_down(end - size, alignment)))
		return -ENOSPC;

3424 3425 3426 3427 3428
	mode = DRM_MM_INSERT_BEST;
	if (flags & PIN_HIGH)
		mode = DRM_MM_INSERT_HIGH;
	if (flags & PIN_MAPPABLE)
		mode = DRM_MM_INSERT_LOW;
3429 3430 3431 3432 3433 3434 3435 3436 3437 3438 3439

	/* We only allocate in PAGE_SIZE/GTT_PAGE_SIZE (4096) chunks,
	 * so we know that we always have a minimum alignment of 4096.
	 * The drm_mm range manager is optimised to return results
	 * with zero alignment, so where possible use the optimal
	 * path.
	 */
	BUILD_BUG_ON(I915_GTT_MIN_ALIGNMENT > I915_GTT_PAGE_SIZE);
	if (alignment <= I915_GTT_MIN_ALIGNMENT)
		alignment = 0;

3440 3441 3442
	err = drm_mm_insert_node_in_range(&vm->mm, node,
					  size, alignment, color,
					  start, end, mode);
3443 3444 3445
	if (err != -ENOSPC)
		return err;

3446 3447 3448 3449 3450 3451 3452 3453 3454 3455 3456 3457 3458 3459 3460 3461 3462 3463 3464 3465 3466 3467 3468 3469 3470 3471 3472 3473 3474
	/* No free space, pick a slot at random.
	 *
	 * There is a pathological case here using a GTT shared between
	 * mmap and GPU (i.e. ggtt/aliasing_ppgtt but not full-ppgtt):
	 *
	 *    |<-- 256 MiB aperture -->||<-- 1792 MiB unmappable -->|
	 *         (64k objects)             (448k objects)
	 *
	 * Now imagine that the eviction LRU is ordered top-down (just because
	 * pathology meets real life), and that we need to evict an object to
	 * make room inside the aperture. The eviction scan then has to walk
	 * the 448k list before it finds one within range. And now imagine that
	 * it has to search for a new hole between every byte inside the memcpy,
	 * for several simultaneous clients.
	 *
	 * On a full-ppgtt system, if we have run out of available space, there
	 * will be lots and lots of objects in the eviction list! Again,
	 * searching that LRU list may be slow if we are also applying any
	 * range restrictions (e.g. restriction to low 4GiB) and so, for
	 * simplicity and similarilty between different GTT, try the single
	 * random replacement first.
	 */
	offset = random_offset(start, end,
			       size, alignment ?: I915_GTT_MIN_ALIGNMENT);
	err = i915_gem_gtt_reserve(vm, node, size, offset, color, flags);
	if (err != -ENOSPC)
		return err;

	/* Randomly selected placement is pinned, do a search */
3475 3476 3477 3478 3479
	err = i915_gem_evict_something(vm, size, alignment, color,
				       start, end, flags);
	if (err)
		return err;

3480 3481 3482
	return drm_mm_insert_node_in_range(&vm->mm, node,
					   size, alignment, color,
					   start, end, DRM_MM_INSERT_EVICT);
3483
}
3484 3485 3486

#if IS_ENABLED(CONFIG_DRM_I915_SELFTEST)
#include "selftests/mock_gtt.c"
3487
#include "selftests/i915_gem_gtt.c"
3488
#endif