i915_gem_gtt.c 75.6 KB
Newer Older
1 2
/*
 * Copyright © 2010 Daniel Vetter
3
 * Copyright © 2011-2014 Intel Corporation
4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 *
 */

26
#include <linux/seq_file.h>
27 28
#include <drm/drmP.h>
#include <drm/i915_drm.h>
29
#include "i915_drv.h"
30
#include "i915_vgpu.h"
31 32 33
#include "i915_trace.h"
#include "intel_drv.h"

34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69
/**
 * DOC: Global GTT views
 *
 * Background and previous state
 *
 * Historically objects could exists (be bound) in global GTT space only as
 * singular instances with a view representing all of the object's backing pages
 * in a linear fashion. This view will be called a normal view.
 *
 * To support multiple views of the same object, where the number of mapped
 * pages is not equal to the backing store, or where the layout of the pages
 * is not linear, concept of a GGTT view was added.
 *
 * One example of an alternative view is a stereo display driven by a single
 * image. In this case we would have a framebuffer looking like this
 * (2x2 pages):
 *
 *    12
 *    34
 *
 * Above would represent a normal GGTT view as normally mapped for GPU or CPU
 * rendering. In contrast, fed to the display engine would be an alternative
 * view which could look something like this:
 *
 *   1212
 *   3434
 *
 * In this example both the size and layout of pages in the alternative view is
 * different from the normal view.
 *
 * Implementation and usage
 *
 * GGTT views are implemented using VMAs and are distinguished via enum
 * i915_ggtt_view_type and struct i915_ggtt_view.
 *
 * A new flavour of core GEM functions which work with GGTT bound objects were
70 71 72
 * added with the _ggtt_ infix, and sometimes with _view postfix to avoid
 * renaming  in large amounts of code. They take the struct i915_ggtt_view
 * parameter encapsulating all metadata required to implement a view.
73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94
 *
 * As a helper for callers which are only interested in the normal view,
 * globally const i915_ggtt_view_normal singleton instance exists. All old core
 * GEM API functions, the ones not taking the view parameter, are operating on,
 * or with the normal GGTT view.
 *
 * Code wanting to add or use a new GGTT view needs to:
 *
 * 1. Add a new enum with a suitable name.
 * 2. Extend the metadata in the i915_ggtt_view structure if required.
 * 3. Add support to i915_get_vma_pages().
 *
 * New views are required to build a scatter-gather table from within the
 * i915_get_vma_pages function. This table is stored in the vma.ggtt_view and
 * exists for the lifetime of an VMA.
 *
 * Core API is designed to have copy semantics which means that passed in
 * struct i915_ggtt_view does not need to be persistent (left around after
 * calling the core API functions).
 *
 */

95 96 97
static int
i915_get_ggtt_vma_pages(struct i915_vma *vma);

98
const struct i915_ggtt_view i915_ggtt_view_normal;
99 100 101
const struct i915_ggtt_view i915_ggtt_view_rotated = {
        .type = I915_GGTT_VIEW_ROTATED
};
102

103 104
static int sanitize_enable_ppgtt(struct drm_device *dev, int enable_ppgtt)
{
105 106 107 108 109 110
	bool has_aliasing_ppgtt;
	bool has_full_ppgtt;

	has_aliasing_ppgtt = INTEL_INFO(dev)->gen >= 6;
	has_full_ppgtt = INTEL_INFO(dev)->gen >= 7;

111 112 113
	if (intel_vgpu_active(dev))
		has_full_ppgtt = false; /* emulation is too hard */

114 115 116 117 118 119
	/*
	 * We don't allow disabling PPGTT for gen9+ as it's a requirement for
	 * execlists, the sole mechanism available to submit work.
	 */
	if (INTEL_INFO(dev)->gen < 9 &&
	    (enable_ppgtt == 0 || !has_aliasing_ppgtt))
120 121 122 123 124
		return 0;

	if (enable_ppgtt == 1)
		return 1;

125
	if (enable_ppgtt == 2 && has_full_ppgtt)
126 127
		return 2;

128 129 130 131
#ifdef CONFIG_INTEL_IOMMU
	/* Disable ppgtt on SNB if VT-d is on. */
	if (INTEL_INFO(dev)->gen == 6 && intel_iommu_gfx_mapped) {
		DRM_INFO("Disabling PPGTT because VT-d is on\n");
132
		return 0;
133 134 135
	}
#endif

136
	/* Early VLV doesn't have this */
137 138
	if (IS_VALLEYVIEW(dev) && !IS_CHERRYVIEW(dev) &&
	    dev->pdev->revision < 0xb) {
139 140 141 142
		DRM_DEBUG_DRIVER("disabling PPGTT on pre-B3 step VLV\n");
		return 0;
	}

143 144 145 146
	if (INTEL_INFO(dev)->gen >= 8 && i915.enable_execlists)
		return 2;
	else
		return has_aliasing_ppgtt ? 1 : 0;
147 148
}

149 150 151
static int ppgtt_bind_vma(struct i915_vma *vma,
			  enum i915_cache_level cache_level,
			  u32 unused)
152 153 154 155 156 157 158 159 160
{
	u32 pte_flags = 0;

	/* Currently applicable only to VLV */
	if (vma->obj->gt_ro)
		pte_flags |= PTE_READ_ONLY;

	vma->vm->insert_entries(vma->vm, vma->obj->pages, vma->node.start,
				cache_level, pte_flags);
161 162

	return 0;
163 164 165 166 167 168 169 170 171
}

static void ppgtt_unbind_vma(struct i915_vma *vma)
{
	vma->vm->clear_range(vma->vm,
			     vma->node.start,
			     vma->obj->base.size,
			     true);
}
172

173 174 175
static gen8_pte_t gen8_pte_encode(dma_addr_t addr,
				  enum i915_cache_level level,
				  bool valid)
B
Ben Widawsky 已提交
176
{
177
	gen8_pte_t pte = valid ? _PAGE_PRESENT | _PAGE_RW : 0;
B
Ben Widawsky 已提交
178
	pte |= addr;
179 180 181

	switch (level) {
	case I915_CACHE_NONE:
B
Ben Widawsky 已提交
182
		pte |= PPAT_UNCACHED_INDEX;
183 184 185 186 187 188 189 190 191
		break;
	case I915_CACHE_WT:
		pte |= PPAT_DISPLAY_ELLC_INDEX;
		break;
	default:
		pte |= PPAT_CACHED_INDEX;
		break;
	}

B
Ben Widawsky 已提交
192 193 194
	return pte;
}

195 196
static gen8_pde_t gen8_pde_encode(const dma_addr_t addr,
				  const enum i915_cache_level level)
B
Ben Widawsky 已提交
197
{
198
	gen8_pde_t pde = _PAGE_PRESENT | _PAGE_RW;
B
Ben Widawsky 已提交
199 200 201 202 203 204 205 206
	pde |= addr;
	if (level != I915_CACHE_NONE)
		pde |= PPAT_CACHED_PDE_INDEX;
	else
		pde |= PPAT_UNCACHED_INDEX;
	return pde;
}

207 208 209
static gen6_pte_t snb_pte_encode(dma_addr_t addr,
				 enum i915_cache_level level,
				 bool valid, u32 unused)
210
{
211
	gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
212
	pte |= GEN6_PTE_ADDR_ENCODE(addr);
213 214

	switch (level) {
215 216 217 218 219 220 221 222
	case I915_CACHE_L3_LLC:
	case I915_CACHE_LLC:
		pte |= GEN6_PTE_CACHE_LLC;
		break;
	case I915_CACHE_NONE:
		pte |= GEN6_PTE_UNCACHED;
		break;
	default:
223
		MISSING_CASE(level);
224 225 226 227 228
	}

	return pte;
}

229 230 231
static gen6_pte_t ivb_pte_encode(dma_addr_t addr,
				 enum i915_cache_level level,
				 bool valid, u32 unused)
232
{
233
	gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
234 235 236 237 238
	pte |= GEN6_PTE_ADDR_ENCODE(addr);

	switch (level) {
	case I915_CACHE_L3_LLC:
		pte |= GEN7_PTE_CACHE_L3_LLC;
239 240 241 242 243
		break;
	case I915_CACHE_LLC:
		pte |= GEN6_PTE_CACHE_LLC;
		break;
	case I915_CACHE_NONE:
244
		pte |= GEN6_PTE_UNCACHED;
245 246
		break;
	default:
247
		MISSING_CASE(level);
248 249
	}

250 251 252
	return pte;
}

253 254 255
static gen6_pte_t byt_pte_encode(dma_addr_t addr,
				 enum i915_cache_level level,
				 bool valid, u32 flags)
256
{
257
	gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
258 259
	pte |= GEN6_PTE_ADDR_ENCODE(addr);

260 261
	if (!(flags & PTE_READ_ONLY))
		pte |= BYT_PTE_WRITEABLE;
262 263 264 265 266 267 268

	if (level != I915_CACHE_NONE)
		pte |= BYT_PTE_SNOOPED_BY_CPU_CACHES;

	return pte;
}

269 270 271
static gen6_pte_t hsw_pte_encode(dma_addr_t addr,
				 enum i915_cache_level level,
				 bool valid, u32 unused)
272
{
273
	gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
274
	pte |= HSW_PTE_ADDR_ENCODE(addr);
275 276

	if (level != I915_CACHE_NONE)
277
		pte |= HSW_WB_LLC_AGE3;
278 279 280 281

	return pte;
}

282 283 284
static gen6_pte_t iris_pte_encode(dma_addr_t addr,
				  enum i915_cache_level level,
				  bool valid, u32 unused)
285
{
286
	gen6_pte_t pte = valid ? GEN6_PTE_VALID : 0;
287 288
	pte |= HSW_PTE_ADDR_ENCODE(addr);

289 290 291 292
	switch (level) {
	case I915_CACHE_NONE:
		break;
	case I915_CACHE_WT:
293
		pte |= HSW_WT_ELLC_LLC_AGE3;
294 295
		break;
	default:
296
		pte |= HSW_WB_ELLC_LLC_AGE3;
297 298
		break;
	}
299 300 301 302

	return pte;
}

303 304
static int __setup_page_dma(struct drm_device *dev,
			    struct i915_page_dma *p, gfp_t flags)
305 306 307
{
	struct device *device = &dev->pdev->dev;

308
	p->page = alloc_page(flags);
309 310
	if (!p->page)
		return -ENOMEM;
311

312 313
	p->daddr = dma_map_page(device,
				p->page, 0, 4096, PCI_DMA_BIDIRECTIONAL);
314

315 316 317 318
	if (dma_mapping_error(device, p->daddr)) {
		__free_page(p->page);
		return -EINVAL;
	}
319 320

	return 0;
321 322
}

323 324 325 326 327
static int setup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
{
	return __setup_page_dma(dev, p, GFP_KERNEL);
}

328
static void cleanup_page_dma(struct drm_device *dev, struct i915_page_dma *p)
329
{
330
	if (WARN_ON(!p->page))
331
		return;
332

333 334 335 336 337
	dma_unmap_page(&dev->pdev->dev, p->daddr, 4096, PCI_DMA_BIDIRECTIONAL);
	__free_page(p->page);
	memset(p, 0, sizeof(*p));
}

338
static void *kmap_page_dma(struct i915_page_dma *p)
339
{
340 341
	return kmap_atomic(p->page);
}
342

343 344 345 346 347
/* We use the flushing unmap only with ppgtt structures:
 * page directories, page tables and scratch pages.
 */
static void kunmap_page_dma(struct drm_device *dev, void *vaddr)
{
348 349 350 351 352 353 354 355 356
	/* There are only few exceptions for gen >=6. chv and bxt.
	 * And we are not sure about the latter so play safe for now.
	 */
	if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
		drm_clflush_virt_range(vaddr, PAGE_SIZE);

	kunmap_atomic(vaddr);
}

357
#define kmap_px(px) kmap_page_dma(px_base(px))
358 359
#define kunmap_px(ppgtt, vaddr) kunmap_page_dma((ppgtt)->base.dev, (vaddr))

360 361 362 363 364
#define setup_px(dev, px) setup_page_dma((dev), px_base(px))
#define cleanup_px(dev, px) cleanup_page_dma((dev), px_base(px))
#define fill_px(dev, px, v) fill_page_dma((dev), px_base(px), (v))
#define fill32_px(dev, px, v) fill_page_dma_32((dev), px_base(px), (v))

365 366 367 368 369 370 371 372 373 374 375 376
static void fill_page_dma(struct drm_device *dev, struct i915_page_dma *p,
			  const uint64_t val)
{
	int i;
	uint64_t * const vaddr = kmap_page_dma(p);

	for (i = 0; i < 512; i++)
		vaddr[i] = val;

	kunmap_page_dma(dev, vaddr);
}

377 378 379 380 381 382 383 384 385 386
static void fill_page_dma_32(struct drm_device *dev, struct i915_page_dma *p,
			     const uint32_t val32)
{
	uint64_t v = val32;

	v = v << 32 | val32;

	fill_page_dma(dev, p, v);
}

387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415
static struct i915_page_scratch *alloc_scratch_page(struct drm_device *dev)
{
	struct i915_page_scratch *sp;
	int ret;

	sp = kzalloc(sizeof(*sp), GFP_KERNEL);
	if (sp == NULL)
		return ERR_PTR(-ENOMEM);

	ret = __setup_page_dma(dev, px_base(sp), GFP_DMA32 | __GFP_ZERO);
	if (ret) {
		kfree(sp);
		return ERR_PTR(ret);
	}

	set_pages_uc(px_page(sp), 1);

	return sp;
}

static void free_scratch_page(struct drm_device *dev,
			      struct i915_page_scratch *sp)
{
	set_pages_wb(px_page(sp), 1);

	cleanup_px(dev, sp);
	kfree(sp);
}

416
static struct i915_page_table *alloc_pt(struct drm_device *dev)
417
{
418
	struct i915_page_table *pt;
419 420 421
	const size_t count = INTEL_INFO(dev)->gen >= 8 ?
		GEN8_PTES : GEN6_PTES;
	int ret = -ENOMEM;
422 423 424 425 426

	pt = kzalloc(sizeof(*pt), GFP_KERNEL);
	if (!pt)
		return ERR_PTR(-ENOMEM);

427 428 429 430 431 432
	pt->used_ptes = kcalloc(BITS_TO_LONGS(count), sizeof(*pt->used_ptes),
				GFP_KERNEL);

	if (!pt->used_ptes)
		goto fail_bitmap;

433
	ret = setup_px(dev, pt);
434
	if (ret)
435
		goto fail_page_m;
436 437

	return pt;
438

439
fail_page_m:
440 441 442 443 444
	kfree(pt->used_ptes);
fail_bitmap:
	kfree(pt);

	return ERR_PTR(ret);
445 446
}

447
static void free_pt(struct drm_device *dev, struct i915_page_table *pt)
448
{
449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475
	cleanup_px(dev, pt);
	kfree(pt->used_ptes);
	kfree(pt);
}

static void gen8_initialize_pt(struct i915_address_space *vm,
			       struct i915_page_table *pt)
{
	gen8_pte_t scratch_pte;

	scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
				      I915_CACHE_LLC, true);

	fill_px(vm->dev, pt, scratch_pte);
}

static void gen6_initialize_pt(struct i915_address_space *vm,
			       struct i915_page_table *pt)
{
	gen6_pte_t scratch_pte;

	WARN_ON(px_dma(vm->scratch_page) == 0);

	scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
				     I915_CACHE_LLC, true, 0);

	fill32_px(vm->dev, pt, scratch_pte);
476 477
}

478
static struct i915_page_directory *alloc_pd(struct drm_device *dev)
479
{
480
	struct i915_page_directory *pd;
481
	int ret = -ENOMEM;
482 483 484 485 486

	pd = kzalloc(sizeof(*pd), GFP_KERNEL);
	if (!pd)
		return ERR_PTR(-ENOMEM);

487 488 489
	pd->used_pdes = kcalloc(BITS_TO_LONGS(I915_PDES),
				sizeof(*pd->used_pdes), GFP_KERNEL);
	if (!pd->used_pdes)
490
		goto fail_bitmap;
491

492
	ret = setup_px(dev, pd);
493
	if (ret)
494
		goto fail_page_m;
495

496
	return pd;
497

498
fail_page_m:
499
	kfree(pd->used_pdes);
500
fail_bitmap:
501 502 503
	kfree(pd);

	return ERR_PTR(ret);
504 505
}

506 507 508 509 510 511 512 513 514 515 516 517 518 519 520 521 522 523 524
static void free_pd(struct drm_device *dev, struct i915_page_directory *pd)
{
	if (px_page(pd)) {
		cleanup_px(dev, pd);
		kfree(pd->used_pdes);
		kfree(pd);
	}
}

static void gen8_initialize_pd(struct i915_address_space *vm,
			       struct i915_page_directory *pd)
{
	gen8_pde_t scratch_pde;

	scratch_pde = gen8_pde_encode(px_dma(vm->scratch_pt), I915_CACHE_LLC);

	fill_px(vm->dev, pd, scratch_pde);
}

525
/* Broadwell Page Directory Pointer Descriptors */
526
static int gen8_write_pdp(struct drm_i915_gem_request *req,
527 528
			  unsigned entry,
			  dma_addr_t addr)
529
{
530
	struct intel_engine_cs *ring = req->ring;
531 532 533 534
	int ret;

	BUG_ON(entry >= 4);

535
	ret = intel_ring_begin(req, 6);
536 537 538 539 540
	if (ret)
		return ret;

	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
	intel_ring_emit(ring, GEN8_RING_PDP_UDW(ring, entry));
541
	intel_ring_emit(ring, upper_32_bits(addr));
542 543
	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
	intel_ring_emit(ring, GEN8_RING_PDP_LDW(ring, entry));
544
	intel_ring_emit(ring, lower_32_bits(addr));
545 546 547 548 549
	intel_ring_advance(ring);

	return 0;
}

550
static int gen8_mm_switch(struct i915_hw_ppgtt *ppgtt,
551
			  struct drm_i915_gem_request *req)
552
{
553
	int i, ret;
554

555
	for (i = GEN8_LEGACY_PDPES - 1; i >= 0; i--) {
556 557
		const dma_addr_t pd_daddr = i915_page_dir_dma_addr(ppgtt, i);

558
		ret = gen8_write_pdp(req, i, pd_daddr);
559 560
		if (ret)
			return ret;
561
	}
B
Ben Widawsky 已提交
562

563
	return 0;
564 565
}

566
static void gen8_ppgtt_clear_range(struct i915_address_space *vm,
567 568
				   uint64_t start,
				   uint64_t length,
569 570 571 572
				   bool use_scratch)
{
	struct i915_hw_ppgtt *ppgtt =
		container_of(vm, struct i915_hw_ppgtt, base);
573
	gen8_pte_t *pt_vaddr, scratch_pte;
574 575 576
	unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
	unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
	unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
577
	unsigned num_entries = length >> PAGE_SHIFT;
578 579
	unsigned last_pte, i;

580
	scratch_pte = gen8_pte_encode(px_dma(ppgtt->base.scratch_page),
581 582 583
				      I915_CACHE_LLC, use_scratch);

	while (num_entries) {
584 585
		struct i915_page_directory *pd;
		struct i915_page_table *pt;
586 587 588 589 590 591 592 593 594 595 596

		if (WARN_ON(!ppgtt->pdp.page_directory[pdpe]))
			continue;

		pd = ppgtt->pdp.page_directory[pdpe];

		if (WARN_ON(!pd->page_table[pde]))
			continue;

		pt = pd->page_table[pde];

597
		if (WARN_ON(!px_page(pt)))
598 599
			continue;

600
		last_pte = pte + num_entries;
601 602
		if (last_pte > GEN8_PTES)
			last_pte = GEN8_PTES;
603

604
		pt_vaddr = kmap_px(pt);
605

606
		for (i = pte; i < last_pte; i++) {
607
			pt_vaddr[i] = scratch_pte;
608 609
			num_entries--;
		}
610

611
		kunmap_px(ppgtt, pt);
612

613
		pte = 0;
614
		if (++pde == I915_PDES) {
615 616 617
			pdpe++;
			pde = 0;
		}
618 619 620
	}
}

621 622
static void gen8_ppgtt_insert_entries(struct i915_address_space *vm,
				      struct sg_table *pages,
623
				      uint64_t start,
624
				      enum i915_cache_level cache_level, u32 unused)
625 626 627
{
	struct i915_hw_ppgtt *ppgtt =
		container_of(vm, struct i915_hw_ppgtt, base);
628
	gen8_pte_t *pt_vaddr;
629 630 631
	unsigned pdpe = start >> GEN8_PDPE_SHIFT & GEN8_PDPE_MASK;
	unsigned pde = start >> GEN8_PDE_SHIFT & GEN8_PDE_MASK;
	unsigned pte = start >> GEN8_PTE_SHIFT & GEN8_PTE_MASK;
632 633
	struct sg_page_iter sg_iter;

634
	pt_vaddr = NULL;
635

636
	for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
637
		if (WARN_ON(pdpe >= GEN8_LEGACY_PDPES))
638 639
			break;

B
Ben Widawsky 已提交
640
		if (pt_vaddr == NULL) {
641 642
			struct i915_page_directory *pd = ppgtt->pdp.page_directory[pdpe];
			struct i915_page_table *pt = pd->page_table[pde];
643
			pt_vaddr = kmap_px(pt);
B
Ben Widawsky 已提交
644
		}
645

646
		pt_vaddr[pte] =
647 648
			gen8_pte_encode(sg_page_iter_dma_address(&sg_iter),
					cache_level, true);
649
		if (++pte == GEN8_PTES) {
650
			kunmap_px(ppgtt, pt_vaddr);
651
			pt_vaddr = NULL;
652
			if (++pde == I915_PDES) {
653 654 655 656
				pdpe++;
				pde = 0;
			}
			pte = 0;
657 658
		}
	}
659 660 661

	if (pt_vaddr)
		kunmap_px(ppgtt, pt_vaddr);
662 663
}

664 665
static void gen8_free_page_tables(struct drm_device *dev,
				  struct i915_page_directory *pd)
666 667 668
{
	int i;

669
	if (!px_page(pd))
670 671
		return;

672
	for_each_set_bit(i, pd->used_pdes, I915_PDES) {
673 674
		if (WARN_ON(!pd->page_table[i]))
			continue;
675

676
		free_pt(dev, pd->page_table[i]);
677 678
		pd->page_table[i] = NULL;
	}
B
Ben Widawsky 已提交
679 680
}

681
static void gen8_ppgtt_cleanup(struct i915_address_space *vm)
682
{
683 684
	struct i915_hw_ppgtt *ppgtt =
		container_of(vm, struct i915_hw_ppgtt, base);
685 686
	int i;

687
	for_each_set_bit(i, ppgtt->pdp.used_pdpes, GEN8_LEGACY_PDPES) {
688 689 690
		if (WARN_ON(!ppgtt->pdp.page_directory[i]))
			continue;

691 692
		gen8_free_page_tables(ppgtt->base.dev,
				      ppgtt->pdp.page_directory[i]);
693
		free_pd(ppgtt->base.dev, ppgtt->pdp.page_directory[i]);
694
	}
695

696 697
	free_pd(vm->dev, vm->scratch_pd);
	free_pt(vm->dev, vm->scratch_pt);
698 699
}

700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717
/**
 * gen8_ppgtt_alloc_pagetabs() - Allocate page tables for VA range.
 * @ppgtt:	Master ppgtt structure.
 * @pd:		Page directory for this address range.
 * @start:	Starting virtual address to begin allocations.
 * @length	Size of the allocations.
 * @new_pts:	Bitmap set by function with new allocations. Likely used by the
 *		caller to free on error.
 *
 * Allocate the required number of page tables. Extremely similar to
 * gen8_ppgtt_alloc_page_directories(). The main difference is here we are limited by
 * the page directory boundary (instead of the page directory pointer). That
 * boundary is 1GB virtual. Therefore, unlike gen8_ppgtt_alloc_page_directories(), it is
 * possible, and likely that the caller will need to use multiple calls of this
 * function to achieve the appropriate allocation.
 *
 * Return: 0 if success; negative error code otherwise.
 */
718 719
static int gen8_ppgtt_alloc_pagetabs(struct i915_hw_ppgtt *ppgtt,
				     struct i915_page_directory *pd,
720
				     uint64_t start,
721 722
				     uint64_t length,
				     unsigned long *new_pts)
723
{
724
	struct drm_device *dev = ppgtt->base.dev;
725
	struct i915_page_table *pt;
726 727
	uint64_t temp;
	uint32_t pde;
728

729 730 731 732
	gen8_for_each_pde(pt, pd, start, length, temp, pde) {
		/* Don't reallocate page tables */
		if (pt) {
			/* Scratch is never allocated this way */
733
			WARN_ON(pt == ppgtt->base.scratch_pt);
734 735 736
			continue;
		}

737
		pt = alloc_pt(dev);
738
		if (IS_ERR(pt))
739 740
			goto unwind_out;

741 742
		gen8_initialize_pt(&ppgtt->base, pt);
		pd->page_table[pde] = pt;
743
		__set_bit(pde, new_pts);
744 745
	}

746
	return 0;
747 748

unwind_out:
749
	for_each_set_bit(pde, new_pts, I915_PDES)
750
		free_pt(dev, pd->page_table[pde]);
751

B
Ben Widawsky 已提交
752
	return -ENOMEM;
753 754
}

755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777
/**
 * gen8_ppgtt_alloc_page_directories() - Allocate page directories for VA range.
 * @ppgtt:	Master ppgtt structure.
 * @pdp:	Page directory pointer for this address range.
 * @start:	Starting virtual address to begin allocations.
 * @length	Size of the allocations.
 * @new_pds	Bitmap set by function with new allocations. Likely used by the
 *		caller to free on error.
 *
 * Allocate the required number of page directories starting at the pde index of
 * @start, and ending at the pde index @start + @length. This function will skip
 * over already allocated page directories within the range, and only allocate
 * new ones, setting the appropriate pointer within the pdp as well as the
 * correct position in the bitmap @new_pds.
 *
 * The function will only allocate the pages within the range for a give page
 * directory pointer. In other words, if @start + @length straddles a virtually
 * addressed PDP boundary (512GB for 4k pages), there will be more allocations
 * required by the caller, This is not currently possible, and the BUG in the
 * code will prevent it.
 *
 * Return: 0 if success; negative error code otherwise.
 */
778 779
static int gen8_ppgtt_alloc_page_directories(struct i915_hw_ppgtt *ppgtt,
				     struct i915_page_directory_pointer *pdp,
780
				     uint64_t start,
781 782
				     uint64_t length,
				     unsigned long *new_pds)
783
{
784
	struct drm_device *dev = ppgtt->base.dev;
785
	struct i915_page_directory *pd;
786 787 788
	uint64_t temp;
	uint32_t pdpe;

789 790 791 792 793
	WARN_ON(!bitmap_empty(new_pds, GEN8_LEGACY_PDPES));

	gen8_for_each_pdpe(pd, pdp, start, length, temp, pdpe) {
		if (pd)
			continue;
794

795
		pd = alloc_pd(dev);
796
		if (IS_ERR(pd))
B
Ben Widawsky 已提交
797
			goto unwind_out;
798

799 800
		gen8_initialize_pd(&ppgtt->base, pd);
		pdp->page_directory[pdpe] = pd;
801
		__set_bit(pdpe, new_pds);
B
Ben Widawsky 已提交
802 803
	}

804
	return 0;
B
Ben Widawsky 已提交
805 806

unwind_out:
807
	for_each_set_bit(pdpe, new_pds, GEN8_LEGACY_PDPES)
808
		free_pd(dev, pdp->page_directory[pdpe]);
B
Ben Widawsky 已提交
809 810

	return -ENOMEM;
811 812
}

813 814 815 816 817 818 819 820 821 822 823 824 825 826 827 828 829 830 831 832 833 834 835 836 837 838 839 840 841 842 843 844 845 846 847 848 849 850 851 852 853 854 855 856 857 858 859 860 861
static void
free_gen8_temp_bitmaps(unsigned long *new_pds, unsigned long **new_pts)
{
	int i;

	for (i = 0; i < GEN8_LEGACY_PDPES; i++)
		kfree(new_pts[i]);
	kfree(new_pts);
	kfree(new_pds);
}

/* Fills in the page directory bitmap, and the array of page tables bitmap. Both
 * of these are based on the number of PDPEs in the system.
 */
static
int __must_check alloc_gen8_temp_bitmaps(unsigned long **new_pds,
					 unsigned long ***new_pts)
{
	int i;
	unsigned long *pds;
	unsigned long **pts;

	pds = kcalloc(BITS_TO_LONGS(GEN8_LEGACY_PDPES), sizeof(unsigned long), GFP_KERNEL);
	if (!pds)
		return -ENOMEM;

	pts = kcalloc(GEN8_LEGACY_PDPES, sizeof(unsigned long *), GFP_KERNEL);
	if (!pts) {
		kfree(pds);
		return -ENOMEM;
	}

	for (i = 0; i < GEN8_LEGACY_PDPES; i++) {
		pts[i] = kcalloc(BITS_TO_LONGS(I915_PDES),
				 sizeof(unsigned long), GFP_KERNEL);
		if (!pts[i])
			goto err_out;
	}

	*new_pds = pds;
	*new_pts = pts;

	return 0;

err_out:
	free_gen8_temp_bitmaps(pds, pts);
	return -ENOMEM;
}

862 863 864 865 866 867 868 869 870 871
/* PDE TLBs are a pain to invalidate on GEN8+. When we modify
 * the page table structures, we mark them dirty so that
 * context switching/execlist queuing code takes extra steps
 * to ensure that tlbs are flushed.
 */
static void mark_tlbs_dirty(struct i915_hw_ppgtt *ppgtt)
{
	ppgtt->pd_dirty_rings = INTEL_INFO(ppgtt->base.dev)->ring_mask;
}

872 873 874
static int gen8_alloc_va_range(struct i915_address_space *vm,
			       uint64_t start,
			       uint64_t length)
875
{
876 877
	struct i915_hw_ppgtt *ppgtt =
		container_of(vm, struct i915_hw_ppgtt, base);
878
	unsigned long *new_page_dirs, **new_page_tables;
879
	struct i915_page_directory *pd;
880 881
	const uint64_t orig_start = start;
	const uint64_t orig_length = length;
882 883
	uint64_t temp;
	uint32_t pdpe;
884 885
	int ret;

886 887 888 889
	/* Wrap is never okay since we can only represent 48b, and we don't
	 * actually use the other side of the canonical address space.
	 */
	if (WARN_ON(start + length < start))
890 891 892 893
		return -ENODEV;

	if (WARN_ON(start + length > ppgtt->base.total))
		return -ENODEV;
894 895

	ret = alloc_gen8_temp_bitmaps(&new_page_dirs, &new_page_tables);
896 897 898
	if (ret)
		return ret;

899 900 901 902 903 904 905 906 907
	/* Do the allocations first so we can easily bail out */
	ret = gen8_ppgtt_alloc_page_directories(ppgtt, &ppgtt->pdp, start, length,
					new_page_dirs);
	if (ret) {
		free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
		return ret;
	}

	/* For every page directory referenced, allocate page tables */
908
	gen8_for_each_pdpe(pd, &ppgtt->pdp, start, length, temp, pdpe) {
909 910
		ret = gen8_ppgtt_alloc_pagetabs(ppgtt, pd, start, length,
						new_page_tables[pdpe]);
911 912 913 914
		if (ret)
			goto err_out;
	}

915 916 917
	start = orig_start;
	length = orig_length;

918 919
	/* Allocations have completed successfully, so set the bitmaps, and do
	 * the mappings. */
920
	gen8_for_each_pdpe(pd, &ppgtt->pdp, start, length, temp, pdpe) {
921
		gen8_pde_t *const page_directory = kmap_px(pd);
922 923 924 925 926
		struct i915_page_table *pt;
		uint64_t pd_len = gen8_clamp_pd(start, length);
		uint64_t pd_start = start;
		uint32_t pde;

927 928 929 930 931 932 933 934 935 936 937 938 939 940 941
		/* Every pd should be allocated, we just did that above. */
		WARN_ON(!pd);

		gen8_for_each_pde(pt, pd, pd_start, pd_len, temp, pde) {
			/* Same reasoning as pd */
			WARN_ON(!pt);
			WARN_ON(!pd_len);
			WARN_ON(!gen8_pte_count(pd_start, pd_len));

			/* Set our used ptes within the page table */
			bitmap_set(pt->used_ptes,
				   gen8_pte_index(pd_start),
				   gen8_pte_count(pd_start, pd_len));

			/* Our pde is now pointing to the pagetable, pt */
942
			__set_bit(pde, pd->used_pdes);
943 944

			/* Map the PDE to the page table */
945 946
			page_directory[pde] = gen8_pde_encode(px_dma(pt),
							      I915_CACHE_LLC);
947 948 949

			/* NB: We haven't yet mapped ptes to pages. At this
			 * point we're still relying on insert_entries() */
950
		}
951

952
		kunmap_px(ppgtt, page_directory);
953

954
		__set_bit(pdpe, ppgtt->pdp.used_pdpes);
955 956
	}

957
	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
958
	mark_tlbs_dirty(ppgtt);
B
Ben Widawsky 已提交
959
	return 0;
960

B
Ben Widawsky 已提交
961
err_out:
962 963
	while (pdpe--) {
		for_each_set_bit(temp, new_page_tables[pdpe], I915_PDES)
964
			free_pt(vm->dev, ppgtt->pdp.page_directory[pdpe]->page_table[temp]);
965 966 967
	}

	for_each_set_bit(pdpe, new_page_dirs, GEN8_LEGACY_PDPES)
968
		free_pd(vm->dev, ppgtt->pdp.page_directory[pdpe]);
969 970

	free_gen8_temp_bitmaps(new_page_dirs, new_page_tables);
971
	mark_tlbs_dirty(ppgtt);
972 973 974
	return ret;
}

975
/*
976 977 978 979
 * GEN8 legacy ppgtt programming is accomplished through a max 4 PDP registers
 * with a net effect resembling a 2-level page table in normal x86 terms. Each
 * PDP represents 1GB of memory 4 * 512 * 512 * 4096 = 4GB legacy 32b address
 * space.
B
Ben Widawsky 已提交
980
 *
981
 */
982
static int gen8_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
B
Ben Widawsky 已提交
983
{
984 985 986
	ppgtt->base.scratch_pt = alloc_pt(ppgtt->base.dev);
	if (IS_ERR(ppgtt->base.scratch_pt))
		return PTR_ERR(ppgtt->base.scratch_pt);
987

988 989 990
	ppgtt->base.scratch_pd = alloc_pd(ppgtt->base.dev);
	if (IS_ERR(ppgtt->base.scratch_pd))
		return PTR_ERR(ppgtt->base.scratch_pd);
991

992 993
	gen8_initialize_pt(&ppgtt->base, ppgtt->base.scratch_pt);
	gen8_initialize_pd(&ppgtt->base, ppgtt->base.scratch_pd);
994

995
	ppgtt->base.start = 0;
996
	ppgtt->base.total = 1ULL << 32;
997 998 999 1000 1001 1002 1003
	if (IS_ENABLED(CONFIG_X86_32))
		/* While we have a proliferation of size_t variables
		 * we cannot represent the full ppgtt size on 32bit,
		 * so limit it to the same size as the GGTT (currently
		 * 2GiB).
		 */
		ppgtt->base.total = to_i915(ppgtt->base.dev)->gtt.base.total;
1004
	ppgtt->base.cleanup = gen8_ppgtt_cleanup;
1005
	ppgtt->base.allocate_va_range = gen8_alloc_va_range;
1006
	ppgtt->base.insert_entries = gen8_ppgtt_insert_entries;
1007
	ppgtt->base.clear_range = gen8_ppgtt_clear_range;
1008 1009
	ppgtt->base.unbind_vma = ppgtt_unbind_vma;
	ppgtt->base.bind_vma = ppgtt_bind_vma;
1010 1011 1012 1013 1014 1015

	ppgtt->switch_mm = gen8_mm_switch;

	return 0;
}

B
Ben Widawsky 已提交
1016 1017 1018
static void gen6_dump_ppgtt(struct i915_hw_ppgtt *ppgtt, struct seq_file *m)
{
	struct i915_address_space *vm = &ppgtt->base;
1019
	struct i915_page_table *unused;
1020
	gen6_pte_t scratch_pte;
B
Ben Widawsky 已提交
1021
	uint32_t pd_entry;
1022 1023
	uint32_t  pte, pde, temp;
	uint32_t start = ppgtt->base.start, length = ppgtt->base.total;
B
Ben Widawsky 已提交
1024

1025 1026
	scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
				     I915_CACHE_LLC, true, 0);
B
Ben Widawsky 已提交
1027

1028
	gen6_for_each_pde(unused, &ppgtt->pd, start, length, temp, pde) {
B
Ben Widawsky 已提交
1029
		u32 expected;
1030
		gen6_pte_t *pt_vaddr;
1031
		const dma_addr_t pt_addr = px_dma(ppgtt->pd.page_table[pde]);
1032
		pd_entry = readl(ppgtt->pd_addr + pde);
B
Ben Widawsky 已提交
1033 1034 1035 1036 1037 1038 1039 1040 1041
		expected = (GEN6_PDE_ADDR_ENCODE(pt_addr) | GEN6_PDE_VALID);

		if (pd_entry != expected)
			seq_printf(m, "\tPDE #%d mismatch: Actual PDE: %x Expected PDE: %x\n",
				   pde,
				   pd_entry,
				   expected);
		seq_printf(m, "\tPDE: %x\n", pd_entry);

1042 1043
		pt_vaddr = kmap_px(ppgtt->pd.page_table[pde]);

1044
		for (pte = 0; pte < GEN6_PTES; pte+=4) {
B
Ben Widawsky 已提交
1045
			unsigned long va =
1046
				(pde * PAGE_SIZE * GEN6_PTES) +
B
Ben Widawsky 已提交
1047 1048 1049 1050 1051 1052 1053 1054 1055 1056 1057 1058 1059 1060 1061 1062 1063 1064
				(pte * PAGE_SIZE);
			int i;
			bool found = false;
			for (i = 0; i < 4; i++)
				if (pt_vaddr[pte + i] != scratch_pte)
					found = true;
			if (!found)
				continue;

			seq_printf(m, "\t\t0x%lx [%03d,%04d]: =", va, pde, pte);
			for (i = 0; i < 4; i++) {
				if (pt_vaddr[pte + i] != scratch_pte)
					seq_printf(m, " %08x", pt_vaddr[pte + i]);
				else
					seq_puts(m, "  SCRATCH ");
			}
			seq_puts(m, "\n");
		}
1065
		kunmap_px(ppgtt, pt_vaddr);
B
Ben Widawsky 已提交
1066 1067 1068
	}
}

1069
/* Write pde (index) from the page directory @pd to the page table @pt */
1070 1071
static void gen6_write_pde(struct i915_page_directory *pd,
			    const int pde, struct i915_page_table *pt)
B
Ben Widawsky 已提交
1072
{
1073 1074 1075 1076
	/* Caller needs to make sure the write completes if necessary */
	struct i915_hw_ppgtt *ppgtt =
		container_of(pd, struct i915_hw_ppgtt, pd);
	u32 pd_entry;
B
Ben Widawsky 已提交
1077

1078
	pd_entry = GEN6_PDE_ADDR_ENCODE(px_dma(pt));
1079
	pd_entry |= GEN6_PDE_VALID;
B
Ben Widawsky 已提交
1080

1081 1082
	writel(pd_entry, ppgtt->pd_addr + pde);
}
B
Ben Widawsky 已提交
1083

1084 1085 1086
/* Write all the page tables found in the ppgtt structure to incrementing page
 * directories. */
static void gen6_write_page_range(struct drm_i915_private *dev_priv,
1087
				  struct i915_page_directory *pd,
1088 1089
				  uint32_t start, uint32_t length)
{
1090
	struct i915_page_table *pt;
1091 1092 1093 1094 1095 1096 1097 1098
	uint32_t pde, temp;

	gen6_for_each_pde(pt, pd, start, length, temp, pde)
		gen6_write_pde(pd, pde, pt);

	/* Make sure write is complete before other code can use this page
	 * table. Also require for WC mapped PTEs */
	readl(dev_priv->gtt.gsm);
B
Ben Widawsky 已提交
1099 1100
}

1101
static uint32_t get_pd_offset(struct i915_hw_ppgtt *ppgtt)
B
Ben Widawsky 已提交
1102
{
1103
	BUG_ON(ppgtt->pd.base.ggtt_offset & 0x3f);
1104

1105
	return (ppgtt->pd.base.ggtt_offset / 64) << 16;
1106 1107
}

1108
static int hsw_mm_switch(struct i915_hw_ppgtt *ppgtt,
1109
			 struct drm_i915_gem_request *req)
1110
{
1111
	struct intel_engine_cs *ring = req->ring;
1112 1113 1114
	int ret;

	/* NB: TLBs must be flushed and invalidated before a switch */
1115
	ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
1116 1117 1118
	if (ret)
		return ret;

1119
	ret = intel_ring_begin(req, 6);
1120 1121 1122 1123 1124 1125 1126 1127 1128 1129 1130 1131 1132 1133
	if (ret)
		return ret;

	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
	intel_ring_emit(ring, RING_PP_DIR_DCLV(ring));
	intel_ring_emit(ring, PP_DIR_DCLV_2G);
	intel_ring_emit(ring, RING_PP_DIR_BASE(ring));
	intel_ring_emit(ring, get_pd_offset(ppgtt));
	intel_ring_emit(ring, MI_NOOP);
	intel_ring_advance(ring);

	return 0;
}

1134
static int vgpu_mm_switch(struct i915_hw_ppgtt *ppgtt,
1135
			  struct drm_i915_gem_request *req)
1136
{
1137
	struct intel_engine_cs *ring = req->ring;
1138 1139 1140 1141 1142 1143 1144
	struct drm_i915_private *dev_priv = to_i915(ppgtt->base.dev);

	I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
	I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));
	return 0;
}

1145
static int gen7_mm_switch(struct i915_hw_ppgtt *ppgtt,
1146
			  struct drm_i915_gem_request *req)
1147
{
1148
	struct intel_engine_cs *ring = req->ring;
1149 1150 1151
	int ret;

	/* NB: TLBs must be flushed and invalidated before a switch */
1152
	ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
1153 1154 1155
	if (ret)
		return ret;

1156
	ret = intel_ring_begin(req, 6);
1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167
	if (ret)
		return ret;

	intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(2));
	intel_ring_emit(ring, RING_PP_DIR_DCLV(ring));
	intel_ring_emit(ring, PP_DIR_DCLV_2G);
	intel_ring_emit(ring, RING_PP_DIR_BASE(ring));
	intel_ring_emit(ring, get_pd_offset(ppgtt));
	intel_ring_emit(ring, MI_NOOP);
	intel_ring_advance(ring);

1168 1169
	/* XXX: RCS is the only one to auto invalidate the TLBs? */
	if (ring->id != RCS) {
1170
		ret = ring->flush(req, I915_GEM_GPU_DOMAINS, I915_GEM_GPU_DOMAINS);
1171 1172 1173 1174
		if (ret)
			return ret;
	}

1175 1176 1177
	return 0;
}

1178
static int gen6_mm_switch(struct i915_hw_ppgtt *ppgtt,
1179
			  struct drm_i915_gem_request *req)
1180
{
1181
	struct intel_engine_cs *ring = req->ring;
1182 1183 1184
	struct drm_device *dev = ppgtt->base.dev;
	struct drm_i915_private *dev_priv = dev->dev_private;

1185

1186 1187 1188 1189 1190 1191 1192 1193
	I915_WRITE(RING_PP_DIR_DCLV(ring), PP_DIR_DCLV_2G);
	I915_WRITE(RING_PP_DIR_BASE(ring), get_pd_offset(ppgtt));

	POSTING_READ(RING_PP_DIR_DCLV(ring));

	return 0;
}

1194
static void gen8_ppgtt_enable(struct drm_device *dev)
1195 1196
{
	struct drm_i915_private *dev_priv = dev->dev_private;
1197
	struct intel_engine_cs *ring;
1198
	int j;
B
Ben Widawsky 已提交
1199

1200 1201 1202 1203 1204
	for_each_ring(ring, dev_priv, j) {
		I915_WRITE(RING_MODE_GEN7(ring),
			   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
	}
}
B
Ben Widawsky 已提交
1205

1206
static void gen7_ppgtt_enable(struct drm_device *dev)
B
Ben Widawsky 已提交
1207
{
1208
	struct drm_i915_private *dev_priv = dev->dev_private;
1209
	struct intel_engine_cs *ring;
1210
	uint32_t ecochk, ecobits;
B
Ben Widawsky 已提交
1211
	int i;
B
Ben Widawsky 已提交
1212

1213 1214
	ecobits = I915_READ(GAC_ECO_BITS);
	I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_PPGTT_CACHE64B);
1215

1216 1217 1218 1219 1220 1221 1222 1223
	ecochk = I915_READ(GAM_ECOCHK);
	if (IS_HASWELL(dev)) {
		ecochk |= ECOCHK_PPGTT_WB_HSW;
	} else {
		ecochk |= ECOCHK_PPGTT_LLC_IVB;
		ecochk &= ~ECOCHK_PPGTT_GFDT_IVB;
	}
	I915_WRITE(GAM_ECOCHK, ecochk);
1224

1225
	for_each_ring(ring, dev_priv, i) {
B
Ben Widawsky 已提交
1226
		/* GFX_MODE is per-ring on gen7+ */
1227 1228
		I915_WRITE(RING_MODE_GEN7(ring),
			   _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
B
Ben Widawsky 已提交
1229
	}
1230
}
B
Ben Widawsky 已提交
1231

1232
static void gen6_ppgtt_enable(struct drm_device *dev)
1233
{
1234
	struct drm_i915_private *dev_priv = dev->dev_private;
1235
	uint32_t ecochk, gab_ctl, ecobits;
1236

1237 1238 1239
	ecobits = I915_READ(GAC_ECO_BITS);
	I915_WRITE(GAC_ECO_BITS, ecobits | ECOBITS_SNB_BIT |
		   ECOBITS_PPGTT_CACHE64B);
B
Ben Widawsky 已提交
1240

1241 1242 1243 1244 1245 1246 1247
	gab_ctl = I915_READ(GAB_CTL);
	I915_WRITE(GAB_CTL, gab_ctl | GAB_CTL_CONT_AFTER_PAGEFAULT);

	ecochk = I915_READ(GAM_ECOCHK);
	I915_WRITE(GAM_ECOCHK, ecochk | ECOCHK_SNB_BIT | ECOCHK_PPGTT_CACHE64B);

	I915_WRITE(GFX_MODE, _MASKED_BIT_ENABLE(GFX_PPGTT_ENABLE));
B
Ben Widawsky 已提交
1248 1249
}

1250
/* PPGTT support for Sandybdrige/Gen6 and later */
1251
static void gen6_ppgtt_clear_range(struct i915_address_space *vm,
1252 1253
				   uint64_t start,
				   uint64_t length,
1254
				   bool use_scratch)
1255
{
1256 1257
	struct i915_hw_ppgtt *ppgtt =
		container_of(vm, struct i915_hw_ppgtt, base);
1258
	gen6_pte_t *pt_vaddr, scratch_pte;
1259 1260
	unsigned first_entry = start >> PAGE_SHIFT;
	unsigned num_entries = length >> PAGE_SHIFT;
1261 1262
	unsigned act_pt = first_entry / GEN6_PTES;
	unsigned first_pte = first_entry % GEN6_PTES;
1263
	unsigned last_pte, i;
1264

1265 1266
	scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
				     I915_CACHE_LLC, true, 0);
1267

1268 1269
	while (num_entries) {
		last_pte = first_pte + num_entries;
1270 1271
		if (last_pte > GEN6_PTES)
			last_pte = GEN6_PTES;
1272

1273
		pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
1274

1275 1276
		for (i = first_pte; i < last_pte; i++)
			pt_vaddr[i] = scratch_pte;
1277

1278
		kunmap_px(ppgtt, pt_vaddr);
1279

1280 1281
		num_entries -= last_pte - first_pte;
		first_pte = 0;
1282
		act_pt++;
1283
	}
1284 1285
}

1286
static void gen6_ppgtt_insert_entries(struct i915_address_space *vm,
D
Daniel Vetter 已提交
1287
				      struct sg_table *pages,
1288
				      uint64_t start,
1289
				      enum i915_cache_level cache_level, u32 flags)
D
Daniel Vetter 已提交
1290
{
1291 1292
	struct i915_hw_ppgtt *ppgtt =
		container_of(vm, struct i915_hw_ppgtt, base);
1293
	gen6_pte_t *pt_vaddr;
1294
	unsigned first_entry = start >> PAGE_SHIFT;
1295 1296
	unsigned act_pt = first_entry / GEN6_PTES;
	unsigned act_pte = first_entry % GEN6_PTES;
1297 1298
	struct sg_page_iter sg_iter;

1299
	pt_vaddr = NULL;
1300
	for_each_sg_page(pages->sgl, &sg_iter, pages->nents, 0) {
1301
		if (pt_vaddr == NULL)
1302
			pt_vaddr = kmap_px(ppgtt->pd.page_table[act_pt]);
1303

1304 1305
		pt_vaddr[act_pte] =
			vm->pte_encode(sg_page_iter_dma_address(&sg_iter),
1306 1307
				       cache_level, true, flags);

1308
		if (++act_pte == GEN6_PTES) {
1309
			kunmap_px(ppgtt, pt_vaddr);
1310
			pt_vaddr = NULL;
1311
			act_pt++;
1312
			act_pte = 0;
D
Daniel Vetter 已提交
1313 1314
		}
	}
1315
	if (pt_vaddr)
1316
		kunmap_px(ppgtt, pt_vaddr);
D
Daniel Vetter 已提交
1317 1318
}

1319
static int gen6_alloc_va_range(struct i915_address_space *vm,
1320
			       uint64_t start_in, uint64_t length_in)
1321
{
1322 1323 1324
	DECLARE_BITMAP(new_page_tables, I915_PDES);
	struct drm_device *dev = vm->dev;
	struct drm_i915_private *dev_priv = dev->dev_private;
1325 1326
	struct i915_hw_ppgtt *ppgtt =
				container_of(vm, struct i915_hw_ppgtt, base);
1327
	struct i915_page_table *pt;
1328
	uint32_t start, length, start_save, length_save;
1329
	uint32_t pde, temp;
1330 1331
	int ret;

1332 1333 1334 1335 1336
	if (WARN_ON(start_in + length_in > ppgtt->base.total))
		return -ENODEV;

	start = start_save = start_in;
	length = length_save = length_in;
1337 1338 1339 1340 1341 1342 1343 1344 1345

	bitmap_zero(new_page_tables, I915_PDES);

	/* The allocation is done in two stages so that we can bail out with
	 * minimal amount of pain. The first stage finds new page tables that
	 * need allocation. The second stage marks use ptes within the page
	 * tables.
	 */
	gen6_for_each_pde(pt, &ppgtt->pd, start, length, temp, pde) {
1346
		if (pt != vm->scratch_pt) {
1347 1348 1349 1350 1351 1352 1353
			WARN_ON(bitmap_empty(pt->used_ptes, GEN6_PTES));
			continue;
		}

		/* We've already allocated a page table */
		WARN_ON(!bitmap_empty(pt->used_ptes, GEN6_PTES));

1354
		pt = alloc_pt(dev);
1355 1356 1357 1358 1359 1360 1361 1362
		if (IS_ERR(pt)) {
			ret = PTR_ERR(pt);
			goto unwind_out;
		}

		gen6_initialize_pt(vm, pt);

		ppgtt->pd.page_table[pde] = pt;
1363
		__set_bit(pde, new_page_tables);
1364
		trace_i915_page_table_entry_alloc(vm, pde, start, GEN6_PDE_SHIFT);
1365 1366 1367 1368
	}

	start = start_save;
	length = length_save;
1369 1370 1371 1372 1373 1374 1375 1376

	gen6_for_each_pde(pt, &ppgtt->pd, start, length, temp, pde) {
		DECLARE_BITMAP(tmp_bitmap, GEN6_PTES);

		bitmap_zero(tmp_bitmap, GEN6_PTES);
		bitmap_set(tmp_bitmap, gen6_pte_index(start),
			   gen6_pte_count(start, length));

1377
		if (__test_and_clear_bit(pde, new_page_tables))
1378 1379
			gen6_write_pde(&ppgtt->pd, pde, pt);

1380 1381 1382 1383
		trace_i915_page_table_entry_map(vm, pde, pt,
					 gen6_pte_index(start),
					 gen6_pte_count(start, length),
					 GEN6_PTES);
1384
		bitmap_or(pt->used_ptes, tmp_bitmap, pt->used_ptes,
1385 1386 1387
				GEN6_PTES);
	}

1388 1389 1390 1391 1392 1393
	WARN_ON(!bitmap_empty(new_page_tables, I915_PDES));

	/* Make sure write is complete before other code can use this page
	 * table. Also require for WC mapped PTEs */
	readl(dev_priv->gtt.gsm);

1394
	mark_tlbs_dirty(ppgtt);
1395
	return 0;
1396 1397 1398

unwind_out:
	for_each_set_bit(pde, new_page_tables, I915_PDES) {
1399
		struct i915_page_table *pt = ppgtt->pd.page_table[pde];
1400

1401
		ppgtt->pd.page_table[pde] = vm->scratch_pt;
1402
		free_pt(vm->dev, pt);
1403 1404 1405 1406
	}

	mark_tlbs_dirty(ppgtt);
	return ret;
1407 1408
}

1409
static void gen6_ppgtt_cleanup(struct i915_address_space *vm)
1410
{
1411 1412
	struct i915_hw_ppgtt *ppgtt =
		container_of(vm, struct i915_hw_ppgtt, base);
1413 1414
	struct i915_page_table *pt;
	uint32_t pde;
1415

1416 1417
	drm_mm_remove_node(&ppgtt->node);

1418
	gen6_for_all_pdes(pt, ppgtt, pde) {
1419
		if (pt != vm->scratch_pt)
1420
			free_pt(ppgtt->base.dev, pt);
1421
	}
1422

1423
	free_pt(vm->dev, vm->scratch_pt);
1424 1425
}

1426
static int gen6_ppgtt_allocate_page_directories(struct i915_hw_ppgtt *ppgtt)
1427
{
1428
	struct drm_device *dev = ppgtt->base.dev;
1429
	struct drm_i915_private *dev_priv = dev->dev_private;
1430
	bool retried = false;
1431
	int ret;
1432

B
Ben Widawsky 已提交
1433 1434 1435 1436 1437
	/* PPGTT PDEs reside in the GGTT and consists of 512 entries. The
	 * allocator works in address space sizes, so it's multiplied by page
	 * size. We allocate at the top of the GTT to avoid fragmentation.
	 */
	BUG_ON(!drm_mm_initialized(&dev_priv->gtt.base.mm));
1438 1439 1440
	ppgtt->base.scratch_pt = alloc_pt(ppgtt->base.dev);
	if (IS_ERR(ppgtt->base.scratch_pt))
		return PTR_ERR(ppgtt->base.scratch_pt);
1441

1442
	gen6_initialize_pt(&ppgtt->base, ppgtt->base.scratch_pt);
1443

1444
alloc:
B
Ben Widawsky 已提交
1445 1446 1447 1448
	ret = drm_mm_insert_node_in_range_generic(&dev_priv->gtt.base.mm,
						  &ppgtt->node, GEN6_PD_SIZE,
						  GEN6_PD_ALIGN, 0,
						  0, dev_priv->gtt.base.total,
1449
						  DRM_MM_TOPDOWN);
1450 1451 1452
	if (ret == -ENOSPC && !retried) {
		ret = i915_gem_evict_something(dev, &dev_priv->gtt.base,
					       GEN6_PD_SIZE, GEN6_PD_ALIGN,
1453 1454 1455
					       I915_CACHE_NONE,
					       0, dev_priv->gtt.base.total,
					       0);
1456
		if (ret)
1457
			goto err_out;
1458 1459 1460 1461

		retried = true;
		goto alloc;
	}
B
Ben Widawsky 已提交
1462

1463
	if (ret)
1464 1465
		goto err_out;

1466

B
Ben Widawsky 已提交
1467 1468
	if (ppgtt->node.start < dev_priv->gtt.mappable_end)
		DRM_DEBUG("Forced to use aperture for PDEs\n");
1469

1470
	return 0;
1471 1472

err_out:
1473
	free_pt(ppgtt->base.dev, ppgtt->base.scratch_pt);
1474
	return ret;
1475 1476 1477 1478
}

static int gen6_ppgtt_alloc(struct i915_hw_ppgtt *ppgtt)
{
1479
	return gen6_ppgtt_allocate_page_directories(ppgtt);
1480
}
1481

1482 1483 1484
static void gen6_scratch_va_range(struct i915_hw_ppgtt *ppgtt,
				  uint64_t start, uint64_t length)
{
1485
	struct i915_page_table *unused;
1486
	uint32_t pde, temp;
1487

1488
	gen6_for_each_pde(unused, &ppgtt->pd, start, length, temp, pde)
1489
		ppgtt->pd.page_table[pde] = ppgtt->base.scratch_pt;
1490 1491
}

1492
static int gen6_ppgtt_init(struct i915_hw_ppgtt *ppgtt)
1493 1494 1495 1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507
{
	struct drm_device *dev = ppgtt->base.dev;
	struct drm_i915_private *dev_priv = dev->dev_private;
	int ret;

	ppgtt->base.pte_encode = dev_priv->gtt.base.pte_encode;
	if (IS_GEN6(dev)) {
		ppgtt->switch_mm = gen6_mm_switch;
	} else if (IS_HASWELL(dev)) {
		ppgtt->switch_mm = hsw_mm_switch;
	} else if (IS_GEN7(dev)) {
		ppgtt->switch_mm = gen7_mm_switch;
	} else
		BUG();

1508 1509 1510
	if (intel_vgpu_active(dev))
		ppgtt->switch_mm = vgpu_mm_switch;

1511 1512 1513 1514
	ret = gen6_ppgtt_alloc(ppgtt);
	if (ret)
		return ret;

1515
	ppgtt->base.allocate_va_range = gen6_alloc_va_range;
1516 1517
	ppgtt->base.clear_range = gen6_ppgtt_clear_range;
	ppgtt->base.insert_entries = gen6_ppgtt_insert_entries;
1518 1519
	ppgtt->base.unbind_vma = ppgtt_unbind_vma;
	ppgtt->base.bind_vma = ppgtt_bind_vma;
1520 1521
	ppgtt->base.cleanup = gen6_ppgtt_cleanup;
	ppgtt->base.start = 0;
1522
	ppgtt->base.total = I915_PDES * GEN6_PTES * PAGE_SIZE;
B
Ben Widawsky 已提交
1523
	ppgtt->debug_dump = gen6_dump_ppgtt;
1524

1525
	ppgtt->pd.base.ggtt_offset =
1526
		ppgtt->node.start / PAGE_SIZE * sizeof(gen6_pte_t);
1527

1528
	ppgtt->pd_addr = (gen6_pte_t __iomem *)dev_priv->gtt.gsm +
1529
		ppgtt->pd.base.ggtt_offset / sizeof(gen6_pte_t);
1530

1531
	gen6_scratch_va_range(ppgtt, 0, ppgtt->base.total);
1532

1533 1534
	gen6_write_page_range(dev_priv, &ppgtt->pd, 0, ppgtt->base.total);

1535
	DRM_DEBUG_DRIVER("Allocated pde space (%lldM) at GTT entry: %llx\n",
1536 1537
			 ppgtt->node.size >> 20,
			 ppgtt->node.start / PAGE_SIZE);
1538

1539
	DRM_DEBUG("Adding PPGTT at offset %x\n",
1540
		  ppgtt->pd.base.ggtt_offset << 10);
1541

1542
	return 0;
1543 1544
}

1545
static int __hw_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
1546 1547 1548
{
	struct drm_i915_private *dev_priv = dev->dev_private;

1549
	ppgtt->base.dev = dev;
1550
	ppgtt->base.scratch_page = dev_priv->gtt.base.scratch_page;
1551

B
Ben Widawsky 已提交
1552
	if (INTEL_INFO(dev)->gen < 8)
1553
		return gen6_ppgtt_init(ppgtt);
B
Ben Widawsky 已提交
1554
	else
1555
		return gen8_ppgtt_init(ppgtt);
1556
}
1557

1558 1559 1560 1561
int i915_ppgtt_init(struct drm_device *dev, struct i915_hw_ppgtt *ppgtt)
{
	struct drm_i915_private *dev_priv = dev->dev_private;
	int ret = 0;
B
Ben Widawsky 已提交
1562

1563
	ret = __hw_ppgtt_init(dev, ppgtt);
1564
	if (ret == 0) {
B
Ben Widawsky 已提交
1565
		kref_init(&ppgtt->ref);
1566 1567
		drm_mm_init(&ppgtt->base.mm, ppgtt->base.start,
			    ppgtt->base.total);
1568
		i915_init_vm(dev_priv, &ppgtt->base);
1569
	}
1570 1571 1572 1573

	return ret;
}

1574 1575
int i915_ppgtt_init_hw(struct drm_device *dev)
{
1576 1577 1578 1579 1580 1581
	/* In the case of execlists, PPGTT is enabled by the context descriptor
	 * and the PDPs are contained within the context itself.  We don't
	 * need to do anything here. */
	if (i915.enable_execlists)
		return 0;

1582 1583 1584 1585 1586 1587 1588 1589 1590 1591
	if (!USES_PPGTT(dev))
		return 0;

	if (IS_GEN6(dev))
		gen6_ppgtt_enable(dev);
	else if (IS_GEN7(dev))
		gen7_ppgtt_enable(dev);
	else if (INTEL_INFO(dev)->gen >= 8)
		gen8_ppgtt_enable(dev);
	else
1592
		MISSING_CASE(INTEL_INFO(dev)->gen);
1593

1594 1595
	return 0;
}
1596

1597
int i915_ppgtt_init_ring(struct drm_i915_gem_request *req)
1598
{
1599
	struct drm_i915_private *dev_priv = req->ring->dev->dev_private;
1600 1601 1602 1603 1604 1605 1606 1607
	struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;

	if (i915.enable_execlists)
		return 0;

	if (!ppgtt)
		return 0;

1608
	return ppgtt->switch_mm(ppgtt, req);
1609
}
1610

1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628
struct i915_hw_ppgtt *
i915_ppgtt_create(struct drm_device *dev, struct drm_i915_file_private *fpriv)
{
	struct i915_hw_ppgtt *ppgtt;
	int ret;

	ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
	if (!ppgtt)
		return ERR_PTR(-ENOMEM);

	ret = i915_ppgtt_init(dev, ppgtt);
	if (ret) {
		kfree(ppgtt);
		return ERR_PTR(ret);
	}

	ppgtt->file_priv = fpriv;

1629 1630
	trace_i915_ppgtt_create(&ppgtt->base);

1631 1632 1633
	return ppgtt;
}

1634 1635 1636 1637 1638
void  i915_ppgtt_release(struct kref *kref)
{
	struct i915_hw_ppgtt *ppgtt =
		container_of(kref, struct i915_hw_ppgtt, ref);

1639 1640
	trace_i915_ppgtt_release(&ppgtt->base);

1641 1642 1643 1644
	/* vmas should already be unbound */
	WARN_ON(!list_empty(&ppgtt->base.active_list));
	WARN_ON(!list_empty(&ppgtt->base.inactive_list));

1645 1646 1647
	list_del(&ppgtt->base.global_link);
	drm_mm_takedown(&ppgtt->base.mm);

1648 1649 1650
	ppgtt->base.cleanup(&ppgtt->base);
	kfree(ppgtt);
}
1651

1652 1653 1654 1655
extern int intel_iommu_gfx_mapped;
/* Certain Gen5 chipsets require require idling the GPU before
 * unmapping anything from the GTT when VT-d is enabled.
 */
1656
static bool needs_idle_maps(struct drm_device *dev)
1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667
{
#ifdef CONFIG_INTEL_IOMMU
	/* Query intel_iommu to see if we need the workaround. Presumably that
	 * was loaded first.
	 */
	if (IS_GEN5(dev) && IS_MOBILE(dev) && intel_iommu_gfx_mapped)
		return true;
#endif
	return false;
}

B
Ben Widawsky 已提交
1668 1669 1670 1671
static bool do_idling(struct drm_i915_private *dev_priv)
{
	bool ret = dev_priv->mm.interruptible;

1672
	if (unlikely(dev_priv->gtt.do_idle_maps)) {
B
Ben Widawsky 已提交
1673
		dev_priv->mm.interruptible = false;
1674
		if (i915_gpu_idle(dev_priv->dev)) {
B
Ben Widawsky 已提交
1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685
			DRM_ERROR("Couldn't idle GPU\n");
			/* Wait a bit, in hopes it avoids the hang */
			udelay(10);
		}
	}

	return ret;
}

static void undo_idling(struct drm_i915_private *dev_priv, bool interruptible)
{
1686
	if (unlikely(dev_priv->gtt.do_idle_maps))
B
Ben Widawsky 已提交
1687 1688 1689
		dev_priv->mm.interruptible = interruptible;
}

1690 1691 1692
void i915_check_and_clear_faults(struct drm_device *dev)
{
	struct drm_i915_private *dev_priv = dev->dev_private;
1693
	struct intel_engine_cs *ring;
1694 1695 1696 1697 1698 1699 1700 1701 1702 1703
	int i;

	if (INTEL_INFO(dev)->gen < 6)
		return;

	for_each_ring(ring, dev_priv, i) {
		u32 fault_reg;
		fault_reg = I915_READ(RING_FAULT_REG(ring));
		if (fault_reg & RING_FAULT_VALID) {
			DRM_DEBUG_DRIVER("Unexpected fault\n"
1704
					 "\tAddr: 0x%08lx\n"
1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718
					 "\tAddress space: %s\n"
					 "\tSource ID: %d\n"
					 "\tType: %d\n",
					 fault_reg & PAGE_MASK,
					 fault_reg & RING_FAULT_GTTSEL_MASK ? "GGTT" : "PPGTT",
					 RING_FAULT_SRCID(fault_reg),
					 RING_FAULT_FAULT_TYPE(fault_reg));
			I915_WRITE(RING_FAULT_REG(ring),
				   fault_reg & ~RING_FAULT_VALID);
		}
	}
	POSTING_READ(RING_FAULT_REG(&dev_priv->ring[RCS]));
}

1719 1720 1721 1722 1723 1724 1725 1726 1727 1728
static void i915_ggtt_flush(struct drm_i915_private *dev_priv)
{
	if (INTEL_INFO(dev_priv->dev)->gen < 6) {
		intel_gtt_chipset_flush();
	} else {
		I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
		POSTING_READ(GFX_FLSH_CNTL_GEN6);
	}
}

1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740 1741
void i915_gem_suspend_gtt_mappings(struct drm_device *dev)
{
	struct drm_i915_private *dev_priv = dev->dev_private;

	/* Don't bother messing with faults pre GEN6 as we have little
	 * documentation supporting that it's a good idea.
	 */
	if (INTEL_INFO(dev)->gen < 6)
		return;

	i915_check_and_clear_faults(dev);

	dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
1742 1743
				       dev_priv->gtt.base.start,
				       dev_priv->gtt.base.total,
1744
				       true);
1745 1746

	i915_ggtt_flush(dev_priv);
1747 1748
}

1749
int i915_gem_gtt_prepare_object(struct drm_i915_gem_object *obj)
1750
{
1751
	if (obj->has_dma_mapping)
1752
		return 0;
1753 1754 1755 1756 1757 1758 1759

	if (!dma_map_sg(&obj->base.dev->pdev->dev,
			obj->pages->sgl, obj->pages->nents,
			PCI_DMA_BIDIRECTIONAL))
		return -ENOSPC;

	return 0;
1760 1761
}

1762
static void gen8_set_pte(void __iomem *addr, gen8_pte_t pte)
B
Ben Widawsky 已提交
1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773
{
#ifdef writeq
	writeq(pte, addr);
#else
	iowrite32((u32)pte, addr);
	iowrite32(pte >> 32, addr + 4);
#endif
}

static void gen8_ggtt_insert_entries(struct i915_address_space *vm,
				     struct sg_table *st,
1774
				     uint64_t start,
1775
				     enum i915_cache_level level, u32 unused)
B
Ben Widawsky 已提交
1776 1777
{
	struct drm_i915_private *dev_priv = vm->dev->dev_private;
1778
	unsigned first_entry = start >> PAGE_SHIFT;
1779 1780
	gen8_pte_t __iomem *gtt_entries =
		(gen8_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
B
Ben Widawsky 已提交
1781 1782
	int i = 0;
	struct sg_page_iter sg_iter;
1783
	dma_addr_t addr = 0; /* shut up gcc */
B
Ben Widawsky 已提交
1784 1785 1786 1787 1788 1789 1790 1791 1792 1793 1794 1795 1796 1797 1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811

	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
		addr = sg_dma_address(sg_iter.sg) +
			(sg_iter.sg_pgoffset << PAGE_SHIFT);
		gen8_set_pte(&gtt_entries[i],
			     gen8_pte_encode(addr, level, true));
		i++;
	}

	/*
	 * XXX: This serves as a posting read to make sure that the PTE has
	 * actually been updated. There is some concern that even though
	 * registers and PTEs are within the same BAR that they are potentially
	 * of NUMA access patterns. Therefore, even with the way we assume
	 * hardware should work, we must keep this posting read for paranoia.
	 */
	if (i != 0)
		WARN_ON(readq(&gtt_entries[i-1])
			!= gen8_pte_encode(addr, level, true));

	/* This next bit makes the above posting read even more important. We
	 * want to flush the TLBs only after we're certain all the PTE updates
	 * have finished.
	 */
	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
	POSTING_READ(GFX_FLSH_CNTL_GEN6);
}

1812 1813 1814 1815 1816 1817
/*
 * Binds an object into the global gtt with the specified cache level. The object
 * will be accessible to the GPU via commands whose operands reference offsets
 * within the global GTT as well as accessible by the GPU through the GMADR
 * mapped BAR (dev_priv->mm.gtt->gtt).
 */
1818
static void gen6_ggtt_insert_entries(struct i915_address_space *vm,
1819
				     struct sg_table *st,
1820
				     uint64_t start,
1821
				     enum i915_cache_level level, u32 flags)
1822
{
1823
	struct drm_i915_private *dev_priv = vm->dev->dev_private;
1824
	unsigned first_entry = start >> PAGE_SHIFT;
1825 1826
	gen6_pte_t __iomem *gtt_entries =
		(gen6_pte_t __iomem *)dev_priv->gtt.gsm + first_entry;
1827 1828
	int i = 0;
	struct sg_page_iter sg_iter;
1829
	dma_addr_t addr = 0;
1830

1831
	for_each_sg_page(st->sgl, &sg_iter, st->nents, 0) {
1832
		addr = sg_page_iter_dma_address(&sg_iter);
1833
		iowrite32(vm->pte_encode(addr, level, true, flags), &gtt_entries[i]);
1834
		i++;
1835 1836 1837 1838 1839 1840 1841 1842
	}

	/* XXX: This serves as a posting read to make sure that the PTE has
	 * actually been updated. There is some concern that even though
	 * registers and PTEs are within the same BAR that they are potentially
	 * of NUMA access patterns. Therefore, even with the way we assume
	 * hardware should work, we must keep this posting read for paranoia.
	 */
1843 1844 1845 1846
	if (i != 0) {
		unsigned long gtt = readl(&gtt_entries[i-1]);
		WARN_ON(gtt != vm->pte_encode(addr, level, true, flags));
	}
1847 1848 1849 1850 1851 1852 1853

	/* This next bit makes the above posting read even more important. We
	 * want to flush the TLBs only after we're certain all the PTE updates
	 * have finished.
	 */
	I915_WRITE(GFX_FLSH_CNTL_GEN6, GFX_FLSH_CNTL_EN);
	POSTING_READ(GFX_FLSH_CNTL_GEN6);
1854 1855
}

B
Ben Widawsky 已提交
1856
static void gen8_ggtt_clear_range(struct i915_address_space *vm,
1857 1858
				  uint64_t start,
				  uint64_t length,
B
Ben Widawsky 已提交
1859 1860 1861
				  bool use_scratch)
{
	struct drm_i915_private *dev_priv = vm->dev->dev_private;
1862 1863
	unsigned first_entry = start >> PAGE_SHIFT;
	unsigned num_entries = length >> PAGE_SHIFT;
1864 1865
	gen8_pte_t scratch_pte, __iomem *gtt_base =
		(gen8_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
B
Ben Widawsky 已提交
1866 1867 1868 1869 1870 1871 1872 1873
	const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
	int i;

	if (WARN(num_entries > max_entries,
		 "First entry = %d; Num entries = %d (max=%d)\n",
		 first_entry, num_entries, max_entries))
		num_entries = max_entries;

1874
	scratch_pte = gen8_pte_encode(px_dma(vm->scratch_page),
B
Ben Widawsky 已提交
1875 1876 1877 1878 1879 1880 1881
				      I915_CACHE_LLC,
				      use_scratch);
	for (i = 0; i < num_entries; i++)
		gen8_set_pte(&gtt_base[i], scratch_pte);
	readl(gtt_base);
}

1882
static void gen6_ggtt_clear_range(struct i915_address_space *vm,
1883 1884
				  uint64_t start,
				  uint64_t length,
1885
				  bool use_scratch)
1886
{
1887
	struct drm_i915_private *dev_priv = vm->dev->dev_private;
1888 1889
	unsigned first_entry = start >> PAGE_SHIFT;
	unsigned num_entries = length >> PAGE_SHIFT;
1890 1891
	gen6_pte_t scratch_pte, __iomem *gtt_base =
		(gen6_pte_t __iomem *) dev_priv->gtt.gsm + first_entry;
1892
	const int max_entries = gtt_total_entries(dev_priv->gtt) - first_entry;
1893 1894 1895 1896 1897 1898 1899
	int i;

	if (WARN(num_entries > max_entries,
		 "First entry = %d; Num entries = %d (max=%d)\n",
		 first_entry, num_entries, max_entries))
		num_entries = max_entries;

1900 1901
	scratch_pte = vm->pte_encode(px_dma(vm->scratch_page),
				     I915_CACHE_LLC, use_scratch, 0);
1902

1903 1904 1905 1906 1907
	for (i = 0; i < num_entries; i++)
		iowrite32(scratch_pte, &gtt_base[i]);
	readl(gtt_base);
}

1908 1909 1910 1911
static void i915_ggtt_insert_entries(struct i915_address_space *vm,
				     struct sg_table *pages,
				     uint64_t start,
				     enum i915_cache_level cache_level, u32 unused)
1912 1913 1914 1915
{
	unsigned int flags = (cache_level == I915_CACHE_NONE) ?
		AGP_USER_MEMORY : AGP_USER_CACHED_MEMORY;

1916
	intel_gtt_insert_sg_entries(pages, start >> PAGE_SHIFT, flags);
1917

1918 1919
}

1920
static void i915_ggtt_clear_range(struct i915_address_space *vm,
1921 1922
				  uint64_t start,
				  uint64_t length,
1923
				  bool unused)
1924
{
1925 1926
	unsigned first_entry = start >> PAGE_SHIFT;
	unsigned num_entries = length >> PAGE_SHIFT;
1927 1928 1929
	intel_gtt_clear_range(first_entry, num_entries);
}

1930 1931 1932
static int ggtt_bind_vma(struct i915_vma *vma,
			 enum i915_cache_level cache_level,
			 u32 flags)
1933
{
1934
	struct drm_device *dev = vma->vm->dev;
1935
	struct drm_i915_private *dev_priv = dev->dev_private;
1936
	struct drm_i915_gem_object *obj = vma->obj;
1937
	struct sg_table *pages = obj->pages;
1938
	u32 pte_flags = 0;
1939 1940 1941 1942 1943 1944
	int ret;

	ret = i915_get_ggtt_vma_pages(vma);
	if (ret)
		return ret;
	pages = vma->ggtt_view.pages;
1945

1946 1947
	/* Currently applicable only to VLV */
	if (obj->gt_ro)
1948
		pte_flags |= PTE_READ_ONLY;
1949

1950

1951
	if (!dev_priv->mm.aliasing_ppgtt || flags & GLOBAL_BIND) {
1952 1953 1954
		vma->vm->insert_entries(vma->vm, pages,
					vma->node.start,
					cache_level, pte_flags);
1955
	}
1956

1957
	if (dev_priv->mm.aliasing_ppgtt && flags & LOCAL_BIND) {
1958
		struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
1959
		appgtt->base.insert_entries(&appgtt->base, pages,
1960
					    vma->node.start,
1961
					    cache_level, pte_flags);
1962
	}
1963 1964

	return 0;
1965 1966
}

1967
static void ggtt_unbind_vma(struct i915_vma *vma)
1968
{
1969
	struct drm_device *dev = vma->vm->dev;
1970
	struct drm_i915_private *dev_priv = dev->dev_private;
1971
	struct drm_i915_gem_object *obj = vma->obj;
1972 1973 1974
	const uint64_t size = min_t(uint64_t,
				    obj->base.size,
				    vma->node.size);
1975

1976
	if (vma->bound & GLOBAL_BIND) {
1977 1978
		vma->vm->clear_range(vma->vm,
				     vma->node.start,
1979
				     size,
1980 1981
				     true);
	}
1982

1983
	if (dev_priv->mm.aliasing_ppgtt && vma->bound & LOCAL_BIND) {
1984
		struct i915_hw_ppgtt *appgtt = dev_priv->mm.aliasing_ppgtt;
1985

1986
		appgtt->base.clear_range(&appgtt->base,
1987
					 vma->node.start,
1988
					 size,
1989 1990
					 true);
	}
1991 1992 1993
}

void i915_gem_gtt_finish_object(struct drm_i915_gem_object *obj)
1994
{
B
Ben Widawsky 已提交
1995 1996 1997 1998 1999 2000
	struct drm_device *dev = obj->base.dev;
	struct drm_i915_private *dev_priv = dev->dev_private;
	bool interruptible;

	interruptible = do_idling(dev_priv);

2001 2002 2003 2004
	if (!obj->has_dma_mapping)
		dma_unmap_sg(&dev->pdev->dev,
			     obj->pages->sgl, obj->pages->nents,
			     PCI_DMA_BIDIRECTIONAL);
B
Ben Widawsky 已提交
2005 2006

	undo_idling(dev_priv, interruptible);
2007
}
2008

2009 2010
static void i915_gtt_color_adjust(struct drm_mm_node *node,
				  unsigned long color,
2011 2012
				  u64 *start,
				  u64 *end)
2013 2014 2015 2016 2017 2018 2019 2020 2021 2022 2023 2024
{
	if (node->color != color)
		*start += 4096;

	if (!list_empty(&node->node_list)) {
		node = list_entry(node->node_list.next,
				  struct drm_mm_node,
				  node_list);
		if (node->allocated && node->color != color)
			*end -= 4096;
	}
}
B
Ben Widawsky 已提交
2025

D
Daniel Vetter 已提交
2026 2027 2028 2029
static int i915_gem_setup_global_gtt(struct drm_device *dev,
				     unsigned long start,
				     unsigned long mappable_end,
				     unsigned long end)
2030
{
2031 2032 2033 2034 2035 2036 2037 2038 2039
	/* Let GEM Manage all of the aperture.
	 *
	 * However, leave one page at the end still bound to the scratch page.
	 * There are a number of places where the hardware apparently prefetches
	 * past the end of the object, and we've seen multiple hangs with the
	 * GPU head pointer stuck in a batchbuffer bound at the last page of the
	 * aperture.  One page should be enough to keep any prefetching inside
	 * of the aperture.
	 */
2040 2041
	struct drm_i915_private *dev_priv = dev->dev_private;
	struct i915_address_space *ggtt_vm = &dev_priv->gtt.base;
2042 2043 2044
	struct drm_mm_node *entry;
	struct drm_i915_gem_object *obj;
	unsigned long hole_start, hole_end;
2045
	int ret;
2046

2047 2048
	BUG_ON(mappable_end > end);

2049
	/* Subtract the guard page ... */
2050
	drm_mm_init(&ggtt_vm->mm, start, end - start - PAGE_SIZE);
2051 2052 2053 2054 2055 2056 2057 2058 2059 2060

	dev_priv->gtt.base.start = start;
	dev_priv->gtt.base.total = end - start;

	if (intel_vgpu_active(dev)) {
		ret = intel_vgt_balloon(dev);
		if (ret)
			return ret;
	}

2061
	if (!HAS_LLC(dev))
2062
		dev_priv->gtt.base.mm.color_adjust = i915_gtt_color_adjust;
2063

2064
	/* Mark any preallocated objects as occupied */
2065
	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
2066
		struct i915_vma *vma = i915_gem_obj_to_vma(obj, ggtt_vm);
2067

B
Ben Widawsky 已提交
2068
		DRM_DEBUG_KMS("reserving preallocated space: %lx + %zx\n",
2069 2070 2071
			      i915_gem_obj_ggtt_offset(obj), obj->base.size);

		WARN_ON(i915_gem_obj_ggtt_bound(obj));
2072
		ret = drm_mm_reserve_node(&ggtt_vm->mm, &vma->node);
2073 2074 2075 2076
		if (ret) {
			DRM_DEBUG_KMS("Reservation failed: %i\n", ret);
			return ret;
		}
2077
		vma->bound |= GLOBAL_BIND;
2078 2079 2080
	}

	/* Clear any non-preallocated blocks */
2081
	drm_mm_for_each_hole(entry, &ggtt_vm->mm, hole_start, hole_end) {
2082 2083
		DRM_DEBUG_KMS("clearing unused GTT space: [%lx, %lx]\n",
			      hole_start, hole_end);
2084 2085
		ggtt_vm->clear_range(ggtt_vm, hole_start,
				     hole_end - hole_start, true);
2086 2087 2088
	}

	/* And finally clear the reserved guard page */
2089
	ggtt_vm->clear_range(ggtt_vm, end - PAGE_SIZE, PAGE_SIZE, true);
2090

2091 2092 2093 2094 2095 2096 2097
	if (USES_PPGTT(dev) && !USES_FULL_PPGTT(dev)) {
		struct i915_hw_ppgtt *ppgtt;

		ppgtt = kzalloc(sizeof(*ppgtt), GFP_KERNEL);
		if (!ppgtt)
			return -ENOMEM;

2098 2099 2100 2101 2102 2103 2104 2105 2106 2107
		ret = __hw_ppgtt_init(dev, ppgtt);
		if (ret) {
			ppgtt->base.cleanup(&ppgtt->base);
			kfree(ppgtt);
			return ret;
		}

		if (ppgtt->base.allocate_va_range)
			ret = ppgtt->base.allocate_va_range(&ppgtt->base, 0,
							    ppgtt->base.total);
2108
		if (ret) {
2109
			ppgtt->base.cleanup(&ppgtt->base);
2110
			kfree(ppgtt);
2111
			return ret;
2112
		}
2113

2114 2115 2116 2117 2118
		ppgtt->base.clear_range(&ppgtt->base,
					ppgtt->base.start,
					ppgtt->base.total,
					true);

2119 2120 2121
		dev_priv->mm.aliasing_ppgtt = ppgtt;
	}

2122
	return 0;
2123 2124
}

2125 2126 2127
void i915_gem_init_global_gtt(struct drm_device *dev)
{
	struct drm_i915_private *dev_priv = dev->dev_private;
2128
	u64 gtt_size, mappable_size;
2129

2130
	gtt_size = dev_priv->gtt.base.total;
2131
	mappable_size = dev_priv->gtt.mappable_end;
2132

2133
	i915_gem_setup_global_gtt(dev, 0, mappable_size, gtt_size);
2134 2135
}

2136 2137 2138 2139 2140
void i915_global_gtt_cleanup(struct drm_device *dev)
{
	struct drm_i915_private *dev_priv = dev->dev_private;
	struct i915_address_space *vm = &dev_priv->gtt.base;

2141 2142 2143 2144 2145 2146
	if (dev_priv->mm.aliasing_ppgtt) {
		struct i915_hw_ppgtt *ppgtt = dev_priv->mm.aliasing_ppgtt;

		ppgtt->base.cleanup(&ppgtt->base);
	}

2147
	if (drm_mm_initialized(&vm->mm)) {
2148 2149 2150
		if (intel_vgpu_active(dev))
			intel_vgt_deballoon();

2151 2152 2153 2154 2155 2156
		drm_mm_takedown(&vm->mm);
		list_del(&vm->global_link);
	}

	vm->cleanup(vm);
}
2157

2158
static unsigned int gen6_get_total_gtt_size(u16 snb_gmch_ctl)
2159 2160 2161 2162 2163 2164
{
	snb_gmch_ctl >>= SNB_GMCH_GGMS_SHIFT;
	snb_gmch_ctl &= SNB_GMCH_GGMS_MASK;
	return snb_gmch_ctl << 20;
}

2165
static unsigned int gen8_get_total_gtt_size(u16 bdw_gmch_ctl)
2166 2167 2168 2169 2170
{
	bdw_gmch_ctl >>= BDW_GMCH_GGMS_SHIFT;
	bdw_gmch_ctl &= BDW_GMCH_GGMS_MASK;
	if (bdw_gmch_ctl)
		bdw_gmch_ctl = 1 << bdw_gmch_ctl;
2171 2172 2173 2174 2175 2176 2177

#ifdef CONFIG_X86_32
	/* Limit 32b platforms to a 2GB GGTT: 4 << 20 / pte size * PAGE_SIZE */
	if (bdw_gmch_ctl > 4)
		bdw_gmch_ctl = 4;
#endif

2178 2179 2180
	return bdw_gmch_ctl << 20;
}

2181
static unsigned int chv_get_total_gtt_size(u16 gmch_ctrl)
2182 2183 2184 2185 2186 2187 2188 2189 2190 2191
{
	gmch_ctrl >>= SNB_GMCH_GGMS_SHIFT;
	gmch_ctrl &= SNB_GMCH_GGMS_MASK;

	if (gmch_ctrl)
		return 1 << (20 + gmch_ctrl);

	return 0;
}

2192
static size_t gen6_get_stolen_size(u16 snb_gmch_ctl)
2193 2194 2195 2196 2197 2198
{
	snb_gmch_ctl >>= SNB_GMCH_GMS_SHIFT;
	snb_gmch_ctl &= SNB_GMCH_GMS_MASK;
	return snb_gmch_ctl << 25; /* 32 MB units */
}

2199
static size_t gen8_get_stolen_size(u16 bdw_gmch_ctl)
2200 2201 2202 2203 2204 2205
{
	bdw_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
	bdw_gmch_ctl &= BDW_GMCH_GMS_MASK;
	return bdw_gmch_ctl << 25; /* 32 MB units */
}

2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223
static size_t chv_get_stolen_size(u16 gmch_ctrl)
{
	gmch_ctrl >>= SNB_GMCH_GMS_SHIFT;
	gmch_ctrl &= SNB_GMCH_GMS_MASK;

	/*
	 * 0x0  to 0x10: 32MB increments starting at 0MB
	 * 0x11 to 0x16: 4MB increments starting at 8MB
	 * 0x17 to 0x1d: 4MB increments start at 36MB
	 */
	if (gmch_ctrl < 0x11)
		return gmch_ctrl << 25;
	else if (gmch_ctrl < 0x17)
		return (gmch_ctrl - 0x11 + 2) << 22;
	else
		return (gmch_ctrl - 0x17 + 9) << 22;
}

2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235
static size_t gen9_get_stolen_size(u16 gen9_gmch_ctl)
{
	gen9_gmch_ctl >>= BDW_GMCH_GMS_SHIFT;
	gen9_gmch_ctl &= BDW_GMCH_GMS_MASK;

	if (gen9_gmch_ctl < 0xf0)
		return gen9_gmch_ctl << 25; /* 32 MB units */
	else
		/* 4MB increments starting at 0xf0 for 4MB */
		return (gen9_gmch_ctl - 0xf0 + 1) << 22;
}

B
Ben Widawsky 已提交
2236 2237 2238 2239
static int ggtt_probe_common(struct drm_device *dev,
			     size_t gtt_size)
{
	struct drm_i915_private *dev_priv = dev->dev_private;
2240
	struct i915_page_scratch *scratch_page;
2241
	phys_addr_t gtt_phys_addr;
B
Ben Widawsky 已提交
2242 2243

	/* For Modern GENs the PTEs and register space are split in the BAR */
2244
	gtt_phys_addr = pci_resource_start(dev->pdev, 0) +
B
Ben Widawsky 已提交
2245 2246
		(pci_resource_len(dev->pdev, 0) / 2);

I
Imre Deak 已提交
2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257
	/*
	 * On BXT writes larger than 64 bit to the GTT pagetable range will be
	 * dropped. For WC mappings in general we have 64 byte burst writes
	 * when the WC buffer is flushed, so we can't use it, but have to
	 * resort to an uncached mapping. The WC issue is easily caught by the
	 * readback check when writing GTT PTE entries.
	 */
	if (IS_BROXTON(dev))
		dev_priv->gtt.gsm = ioremap_nocache(gtt_phys_addr, gtt_size);
	else
		dev_priv->gtt.gsm = ioremap_wc(gtt_phys_addr, gtt_size);
B
Ben Widawsky 已提交
2258 2259 2260 2261 2262
	if (!dev_priv->gtt.gsm) {
		DRM_ERROR("Failed to map the gtt page table\n");
		return -ENOMEM;
	}

2263 2264
	scratch_page = alloc_scratch_page(dev);
	if (IS_ERR(scratch_page)) {
B
Ben Widawsky 已提交
2265 2266 2267
		DRM_ERROR("Scratch setup failed\n");
		/* iounmap will also get called at remove, but meh */
		iounmap(dev_priv->gtt.gsm);
2268
		return PTR_ERR(scratch_page);
B
Ben Widawsky 已提交
2269 2270
	}

2271 2272 2273
	dev_priv->gtt.base.scratch_page = scratch_page;

	return 0;
B
Ben Widawsky 已提交
2274 2275
}

B
Ben Widawsky 已提交
2276 2277 2278
/* The GGTT and PPGTT need a private PPAT setup in order to handle cacheability
 * bits. When using advanced contexts each context stores its own PAT, but
 * writing this data shouldn't be harmful even in those cases. */
2279
static void bdw_setup_private_ppat(struct drm_i915_private *dev_priv)
B
Ben Widawsky 已提交
2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291
{
	uint64_t pat;

	pat = GEN8_PPAT(0, GEN8_PPAT_WB | GEN8_PPAT_LLC)     | /* for normal objects, no eLLC */
	      GEN8_PPAT(1, GEN8_PPAT_WC | GEN8_PPAT_LLCELLC) | /* for something pointing to ptes? */
	      GEN8_PPAT(2, GEN8_PPAT_WT | GEN8_PPAT_LLCELLC) | /* for scanout with eLLC */
	      GEN8_PPAT(3, GEN8_PPAT_UC)                     | /* Uncached objects, mostly for scanout */
	      GEN8_PPAT(4, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(0)) |
	      GEN8_PPAT(5, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(1)) |
	      GEN8_PPAT(6, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(2)) |
	      GEN8_PPAT(7, GEN8_PPAT_WB | GEN8_PPAT_LLCELLC | GEN8_PPAT_AGE(3));

2292 2293 2294 2295 2296 2297 2298 2299 2300 2301 2302 2303 2304 2305 2306 2307
	if (!USES_PPGTT(dev_priv->dev))
		/* Spec: "For GGTT, there is NO pat_sel[2:0] from the entry,
		 * so RTL will always use the value corresponding to
		 * pat_sel = 000".
		 * So let's disable cache for GGTT to avoid screen corruptions.
		 * MOCS still can be used though.
		 * - System agent ggtt writes (i.e. cpu gtt mmaps) already work
		 * before this patch, i.e. the same uncached + snooping access
		 * like on gen6/7 seems to be in effect.
		 * - So this just fixes blitter/render access. Again it looks
		 * like it's not just uncached access, but uncached + snooping.
		 * So we can still hold onto all our assumptions wrt cpu
		 * clflushing on LLC machines.
		 */
		pat = GEN8_PPAT(0, GEN8_PPAT_UC);

B
Ben Widawsky 已提交
2308 2309 2310 2311 2312 2313
	/* XXX: spec defines this as 2 distinct registers. It's unclear if a 64b
	 * write would work. */
	I915_WRITE(GEN8_PRIVATE_PAT, pat);
	I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32);
}

2314 2315 2316 2317 2318 2319 2320 2321 2322 2323
static void chv_setup_private_ppat(struct drm_i915_private *dev_priv)
{
	uint64_t pat;

	/*
	 * Map WB on BDW to snooped on CHV.
	 *
	 * Only the snoop bit has meaning for CHV, the rest is
	 * ignored.
	 *
2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334
	 * The hardware will never snoop for certain types of accesses:
	 * - CPU GTT (GMADR->GGTT->no snoop->memory)
	 * - PPGTT page tables
	 * - some other special cycles
	 *
	 * As with BDW, we also need to consider the following for GT accesses:
	 * "For GGTT, there is NO pat_sel[2:0] from the entry,
	 * so RTL will always use the value corresponding to
	 * pat_sel = 000".
	 * Which means we must set the snoop bit in PAT entry 0
	 * in order to keep the global status page working.
2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348
	 */
	pat = GEN8_PPAT(0, CHV_PPAT_SNOOP) |
	      GEN8_PPAT(1, 0) |
	      GEN8_PPAT(2, 0) |
	      GEN8_PPAT(3, 0) |
	      GEN8_PPAT(4, CHV_PPAT_SNOOP) |
	      GEN8_PPAT(5, CHV_PPAT_SNOOP) |
	      GEN8_PPAT(6, CHV_PPAT_SNOOP) |
	      GEN8_PPAT(7, CHV_PPAT_SNOOP);

	I915_WRITE(GEN8_PRIVATE_PAT, pat);
	I915_WRITE(GEN8_PRIVATE_PAT + 4, pat >> 32);
}

B
Ben Widawsky 已提交
2349
static int gen8_gmch_probe(struct drm_device *dev,
2350
			   u64 *gtt_total,
B
Ben Widawsky 已提交
2351 2352
			   size_t *stolen,
			   phys_addr_t *mappable_base,
2353
			   u64 *mappable_end)
B
Ben Widawsky 已提交
2354 2355
{
	struct drm_i915_private *dev_priv = dev->dev_private;
2356
	u64 gtt_size;
B
Ben Widawsky 已提交
2357 2358 2359 2360 2361 2362 2363 2364 2365 2366 2367 2368
	u16 snb_gmch_ctl;
	int ret;

	/* TODO: We're not aware of mappable constraints on gen8 yet */
	*mappable_base = pci_resource_start(dev->pdev, 2);
	*mappable_end = pci_resource_len(dev->pdev, 2);

	if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(39)))
		pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(39));

	pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);

2369 2370 2371 2372
	if (INTEL_INFO(dev)->gen >= 9) {
		*stolen = gen9_get_stolen_size(snb_gmch_ctl);
		gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl);
	} else if (IS_CHERRYVIEW(dev)) {
2373 2374 2375 2376 2377 2378
		*stolen = chv_get_stolen_size(snb_gmch_ctl);
		gtt_size = chv_get_total_gtt_size(snb_gmch_ctl);
	} else {
		*stolen = gen8_get_stolen_size(snb_gmch_ctl);
		gtt_size = gen8_get_total_gtt_size(snb_gmch_ctl);
	}
B
Ben Widawsky 已提交
2379

2380
	*gtt_total = (gtt_size / sizeof(gen8_pte_t)) << PAGE_SHIFT;
B
Ben Widawsky 已提交
2381

S
Sumit Singh 已提交
2382
	if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
2383 2384 2385
		chv_setup_private_ppat(dev_priv);
	else
		bdw_setup_private_ppat(dev_priv);
B
Ben Widawsky 已提交
2386

B
Ben Widawsky 已提交
2387 2388
	ret = ggtt_probe_common(dev, gtt_size);

B
Ben Widawsky 已提交
2389 2390
	dev_priv->gtt.base.clear_range = gen8_ggtt_clear_range;
	dev_priv->gtt.base.insert_entries = gen8_ggtt_insert_entries;
2391 2392
	dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
	dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
B
Ben Widawsky 已提交
2393 2394 2395 2396

	return ret;
}

2397
static int gen6_gmch_probe(struct drm_device *dev,
2398
			   u64 *gtt_total,
2399 2400
			   size_t *stolen,
			   phys_addr_t *mappable_base,
2401
			   u64 *mappable_end)
2402 2403
{
	struct drm_i915_private *dev_priv = dev->dev_private;
2404
	unsigned int gtt_size;
2405 2406 2407
	u16 snb_gmch_ctl;
	int ret;

2408 2409 2410
	*mappable_base = pci_resource_start(dev->pdev, 2);
	*mappable_end = pci_resource_len(dev->pdev, 2);

2411 2412
	/* 64/512MB is the current min/max we actually know of, but this is just
	 * a coarse sanity check.
2413
	 */
2414
	if ((*mappable_end < (64<<20) || (*mappable_end > (512<<20)))) {
2415
		DRM_ERROR("Unknown GMADR size (%llx)\n",
2416 2417
			  dev_priv->gtt.mappable_end);
		return -ENXIO;
2418 2419 2420 2421 2422 2423
	}

	if (!pci_set_dma_mask(dev->pdev, DMA_BIT_MASK(40)))
		pci_set_consistent_dma_mask(dev->pdev, DMA_BIT_MASK(40));
	pci_read_config_word(dev->pdev, SNB_GMCH_CTRL, &snb_gmch_ctl);

2424
	*stolen = gen6_get_stolen_size(snb_gmch_ctl);
2425

B
Ben Widawsky 已提交
2426
	gtt_size = gen6_get_total_gtt_size(snb_gmch_ctl);
2427
	*gtt_total = (gtt_size / sizeof(gen6_pte_t)) << PAGE_SHIFT;
2428

B
Ben Widawsky 已提交
2429
	ret = ggtt_probe_common(dev, gtt_size);
2430

2431 2432
	dev_priv->gtt.base.clear_range = gen6_ggtt_clear_range;
	dev_priv->gtt.base.insert_entries = gen6_ggtt_insert_entries;
2433 2434
	dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
	dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
2435

2436 2437 2438
	return ret;
}

2439
static void gen6_gmch_remove(struct i915_address_space *vm)
2440
{
2441 2442

	struct i915_gtt *gtt = container_of(vm, struct i915_gtt, base);
2443

2444
	iounmap(gtt->gsm);
2445
	free_scratch_page(vm->dev, vm->scratch_page);
2446
}
2447 2448

static int i915_gmch_probe(struct drm_device *dev,
2449
			   u64 *gtt_total,
2450 2451
			   size_t *stolen,
			   phys_addr_t *mappable_base,
2452
			   u64 *mappable_end)
2453 2454 2455 2456 2457 2458 2459 2460 2461 2462
{
	struct drm_i915_private *dev_priv = dev->dev_private;
	int ret;

	ret = intel_gmch_probe(dev_priv->bridge_dev, dev_priv->dev->pdev, NULL);
	if (!ret) {
		DRM_ERROR("failed to set up gmch\n");
		return -EIO;
	}

2463
	intel_gtt_get(gtt_total, stolen, mappable_base, mappable_end);
2464 2465

	dev_priv->gtt.do_idle_maps = needs_idle_maps(dev_priv->dev);
2466
	dev_priv->gtt.base.insert_entries = i915_ggtt_insert_entries;
2467
	dev_priv->gtt.base.clear_range = i915_ggtt_clear_range;
2468 2469
	dev_priv->gtt.base.bind_vma = ggtt_bind_vma;
	dev_priv->gtt.base.unbind_vma = ggtt_unbind_vma;
2470

2471 2472 2473
	if (unlikely(dev_priv->gtt.do_idle_maps))
		DRM_INFO("applying Ironlake quirks for intel_iommu\n");

2474 2475 2476
	return 0;
}

2477
static void i915_gmch_remove(struct i915_address_space *vm)
2478 2479 2480 2481 2482 2483 2484 2485 2486 2487 2488
{
	intel_gmch_remove();
}

int i915_gem_gtt_init(struct drm_device *dev)
{
	struct drm_i915_private *dev_priv = dev->dev_private;
	struct i915_gtt *gtt = &dev_priv->gtt;
	int ret;

	if (INTEL_INFO(dev)->gen <= 5) {
2489
		gtt->gtt_probe = i915_gmch_probe;
2490
		gtt->base.cleanup = i915_gmch_remove;
B
Ben Widawsky 已提交
2491
	} else if (INTEL_INFO(dev)->gen < 8) {
2492
		gtt->gtt_probe = gen6_gmch_probe;
2493
		gtt->base.cleanup = gen6_gmch_remove;
2494
		if (IS_HASWELL(dev) && dev_priv->ellc_size)
2495
			gtt->base.pte_encode = iris_pte_encode;
2496
		else if (IS_HASWELL(dev))
2497
			gtt->base.pte_encode = hsw_pte_encode;
2498
		else if (IS_VALLEYVIEW(dev))
2499
			gtt->base.pte_encode = byt_pte_encode;
2500 2501
		else if (INTEL_INFO(dev)->gen >= 7)
			gtt->base.pte_encode = ivb_pte_encode;
2502
		else
2503
			gtt->base.pte_encode = snb_pte_encode;
B
Ben Widawsky 已提交
2504 2505 2506
	} else {
		dev_priv->gtt.gtt_probe = gen8_gmch_probe;
		dev_priv->gtt.base.cleanup = gen6_gmch_remove;
2507 2508
	}

2509 2510
	gtt->base.dev = dev;

2511
	ret = gtt->gtt_probe(dev, &gtt->base.total, &gtt->stolen_size,
2512
			     &gtt->mappable_base, &gtt->mappable_end);
2513
	if (ret)
2514 2515 2516
		return ret;

	/* GMADR is the PCI mmio aperture into the global GTT. */
2517
	DRM_INFO("Memory usable by graphics device = %lluM\n",
2518
		 gtt->base.total >> 20);
2519
	DRM_DEBUG_DRIVER("GMADR size = %lldM\n", gtt->mappable_end >> 20);
2520
	DRM_DEBUG_DRIVER("GTT stolen size = %zdM\n", gtt->stolen_size >> 20);
2521 2522 2523 2524
#ifdef CONFIG_INTEL_IOMMU
	if (intel_iommu_gfx_mapped)
		DRM_INFO("VT-d active for gfx access\n");
#endif
2525 2526 2527 2528 2529 2530 2531 2532
	/*
	 * i915.enable_ppgtt is read-only, so do an early pass to validate the
	 * user's requested state against the hardware/driver capabilities.  We
	 * do this now so that we can print out any log messages once rather
	 * than every time we check intel_enable_ppgtt().
	 */
	i915.enable_ppgtt = sanitize_enable_ppgtt(dev, i915.enable_ppgtt);
	DRM_DEBUG_DRIVER("ppgtt mode: %i\n", i915.enable_ppgtt);
2533 2534 2535

	return 0;
}
2536

2537 2538 2539 2540 2541 2542 2543 2544 2545 2546 2547 2548 2549 2550 2551 2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568 2569 2570 2571 2572 2573 2574 2575 2576 2577 2578 2579 2580 2581 2582 2583 2584 2585 2586 2587 2588 2589
void i915_gem_restore_gtt_mappings(struct drm_device *dev)
{
	struct drm_i915_private *dev_priv = dev->dev_private;
	struct drm_i915_gem_object *obj;
	struct i915_address_space *vm;

	i915_check_and_clear_faults(dev);

	/* First fill our portion of the GTT with scratch pages */
	dev_priv->gtt.base.clear_range(&dev_priv->gtt.base,
				       dev_priv->gtt.base.start,
				       dev_priv->gtt.base.total,
				       true);

	list_for_each_entry(obj, &dev_priv->mm.bound_list, global_list) {
		struct i915_vma *vma = i915_gem_obj_to_vma(obj,
							   &dev_priv->gtt.base);
		if (!vma)
			continue;

		i915_gem_clflush_object(obj, obj->pin_display);
		WARN_ON(i915_vma_bind(vma, obj->cache_level, PIN_UPDATE));
	}


	if (INTEL_INFO(dev)->gen >= 8) {
		if (IS_CHERRYVIEW(dev) || IS_BROXTON(dev))
			chv_setup_private_ppat(dev_priv);
		else
			bdw_setup_private_ppat(dev_priv);

		return;
	}

	if (USES_PPGTT(dev)) {
		list_for_each_entry(vm, &dev_priv->vm_list, global_link) {
			/* TODO: Perhaps it shouldn't be gen6 specific */

			struct i915_hw_ppgtt *ppgtt =
					container_of(vm, struct i915_hw_ppgtt,
						     base);

			if (i915_is_ggtt(vm))
				ppgtt = dev_priv->mm.aliasing_ppgtt;

			gen6_write_page_range(dev_priv, &ppgtt->pd,
					      0, ppgtt->base.total);
		}
	}

	i915_ggtt_flush(dev_priv);
}

2590 2591 2592 2593
static struct i915_vma *
__i915_gem_vma_create(struct drm_i915_gem_object *obj,
		      struct i915_address_space *vm,
		      const struct i915_ggtt_view *ggtt_view)
2594
{
2595
	struct i915_vma *vma;
2596

2597 2598
	if (WARN_ON(i915_is_ggtt(vm) != !!ggtt_view))
		return ERR_PTR(-EINVAL);
2599 2600

	vma = kmem_cache_zalloc(to_i915(obj->base.dev)->vmas, GFP_KERNEL);
2601 2602
	if (vma == NULL)
		return ERR_PTR(-ENOMEM);
2603

2604 2605 2606 2607 2608 2609
	INIT_LIST_HEAD(&vma->vma_link);
	INIT_LIST_HEAD(&vma->mm_list);
	INIT_LIST_HEAD(&vma->exec_list);
	vma->vm = vm;
	vma->obj = obj;

2610
	if (i915_is_ggtt(vm))
2611
		vma->ggtt_view = *ggtt_view;
2612

2613 2614
	list_add_tail(&vma->vma_link, &obj->vma_list);
	if (!i915_is_ggtt(vm))
2615
		i915_ppgtt_get(i915_vm_to_ppgtt(vm));
2616 2617 2618 2619 2620

	return vma;
}

struct i915_vma *
2621 2622 2623 2624 2625 2626 2627 2628 2629 2630 2631 2632 2633 2634 2635
i915_gem_obj_lookup_or_create_vma(struct drm_i915_gem_object *obj,
				  struct i915_address_space *vm)
{
	struct i915_vma *vma;

	vma = i915_gem_obj_to_vma(obj, vm);
	if (!vma)
		vma = __i915_gem_vma_create(obj, vm,
					    i915_is_ggtt(vm) ? &i915_ggtt_view_normal : NULL);

	return vma;
}

struct i915_vma *
i915_gem_obj_lookup_or_create_ggtt_vma(struct drm_i915_gem_object *obj,
2636
				       const struct i915_ggtt_view *view)
2637
{
2638
	struct i915_address_space *ggtt = i915_obj_to_ggtt(obj);
2639 2640
	struct i915_vma *vma;

2641 2642 2643 2644 2645 2646 2647 2648
	if (WARN_ON(!view))
		return ERR_PTR(-EINVAL);

	vma = i915_gem_obj_to_ggtt_view(obj, view);

	if (IS_ERR(vma))
		return vma;

2649
	if (!vma)
2650
		vma = __i915_gem_vma_create(obj, ggtt, view);
2651 2652

	return vma;
2653

2654
}
2655

2656 2657 2658 2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684 2685 2686 2687
static void
rotate_pages(dma_addr_t *in, unsigned int width, unsigned int height,
	     struct sg_table *st)
{
	unsigned int column, row;
	unsigned int src_idx;
	struct scatterlist *sg = st->sgl;

	st->nents = 0;

	for (column = 0; column < width; column++) {
		src_idx = width * (height - 1) + column;
		for (row = 0; row < height; row++) {
			st->nents++;
			/* We don't need the pages, but need to initialize
			 * the entries so the sg list can be happily traversed.
			 * The only thing we need are DMA addresses.
			 */
			sg_set_page(sg, NULL, PAGE_SIZE, 0);
			sg_dma_address(sg) = in[src_idx];
			sg_dma_len(sg) = PAGE_SIZE;
			sg = sg_next(sg);
			src_idx -= width;
		}
	}
}

static struct sg_table *
intel_rotate_fb_obj_pages(struct i915_ggtt_view *ggtt_view,
			  struct drm_i915_gem_object *obj)
{
	struct intel_rotation_info *rot_info = &ggtt_view->rotation_info;
2688
	unsigned int size_pages = rot_info->size >> PAGE_SHIFT;
2689 2690 2691 2692
	struct sg_page_iter sg_iter;
	unsigned long i;
	dma_addr_t *page_addr_list;
	struct sg_table *st;
2693
	int ret = -ENOMEM;
2694 2695

	/* Allocate a temporary list of source pages for random access. */
2696 2697
	page_addr_list = drm_malloc_ab(obj->base.size / PAGE_SIZE,
				       sizeof(dma_addr_t));
2698 2699 2700 2701 2702 2703 2704 2705
	if (!page_addr_list)
		return ERR_PTR(ret);

	/* Allocate target SG list. */
	st = kmalloc(sizeof(*st), GFP_KERNEL);
	if (!st)
		goto err_st_alloc;

2706
	ret = sg_alloc_table(st, size_pages, GFP_KERNEL);
2707 2708 2709 2710 2711 2712 2713 2714 2715 2716 2717
	if (ret)
		goto err_sg_alloc;

	/* Populate source page list from the object. */
	i = 0;
	for_each_sg_page(obj->pages->sgl, &sg_iter, obj->pages->nents, 0) {
		page_addr_list[i] = sg_page_iter_dma_address(&sg_iter);
		i++;
	}

	/* Rotate the pages. */
2718 2719 2720
	rotate_pages(page_addr_list,
		     rot_info->width_pages, rot_info->height_pages,
		     st);
2721 2722

	DRM_DEBUG_KMS(
2723
		      "Created rotated page mapping for object size %zu (pitch=%u, height=%u, pixel_format=0x%x, %ux%u tiles, %u pages).\n",
2724
		      obj->base.size, rot_info->pitch, rot_info->height,
2725 2726
		      rot_info->pixel_format, rot_info->width_pages,
		      rot_info->height_pages, size_pages);
2727 2728 2729 2730 2731 2732 2733 2734 2735 2736 2737

	drm_free_large(page_addr_list);

	return st;

err_sg_alloc:
	kfree(st);
err_st_alloc:
	drm_free_large(page_addr_list);

	DRM_DEBUG_KMS(
2738
		      "Failed to create rotated mapping for object size %zu! (%d) (pitch=%u, height=%u, pixel_format=0x%x, %ux%u tiles, %u pages)\n",
2739
		      obj->base.size, ret, rot_info->pitch, rot_info->height,
2740 2741
		      rot_info->pixel_format, rot_info->width_pages,
		      rot_info->height_pages, size_pages);
2742 2743
	return ERR_PTR(ret);
}
2744

2745 2746 2747 2748 2749 2750 2751 2752 2753 2754 2755 2756 2757 2758 2759 2760 2761 2762 2763 2764 2765 2766 2767 2768 2769 2770 2771 2772 2773 2774 2775 2776 2777 2778 2779 2780 2781 2782 2783 2784 2785
static struct sg_table *
intel_partial_pages(const struct i915_ggtt_view *view,
		    struct drm_i915_gem_object *obj)
{
	struct sg_table *st;
	struct scatterlist *sg;
	struct sg_page_iter obj_sg_iter;
	int ret = -ENOMEM;

	st = kmalloc(sizeof(*st), GFP_KERNEL);
	if (!st)
		goto err_st_alloc;

	ret = sg_alloc_table(st, view->params.partial.size, GFP_KERNEL);
	if (ret)
		goto err_sg_alloc;

	sg = st->sgl;
	st->nents = 0;
	for_each_sg_page(obj->pages->sgl, &obj_sg_iter, obj->pages->nents,
		view->params.partial.offset)
	{
		if (st->nents >= view->params.partial.size)
			break;

		sg_set_page(sg, NULL, PAGE_SIZE, 0);
		sg_dma_address(sg) = sg_page_iter_dma_address(&obj_sg_iter);
		sg_dma_len(sg) = PAGE_SIZE;

		sg = sg_next(sg);
		st->nents++;
	}

	return st;

err_sg_alloc:
	kfree(st);
err_st_alloc:
	return ERR_PTR(ret);
}

2786
static int
2787
i915_get_ggtt_vma_pages(struct i915_vma *vma)
2788
{
2789 2790
	int ret = 0;

2791 2792 2793 2794 2795
	if (vma->ggtt_view.pages)
		return 0;

	if (vma->ggtt_view.type == I915_GGTT_VIEW_NORMAL)
		vma->ggtt_view.pages = vma->obj->pages;
2796 2797 2798
	else if (vma->ggtt_view.type == I915_GGTT_VIEW_ROTATED)
		vma->ggtt_view.pages =
			intel_rotate_fb_obj_pages(&vma->ggtt_view, vma->obj);
2799 2800 2801
	else if (vma->ggtt_view.type == I915_GGTT_VIEW_PARTIAL)
		vma->ggtt_view.pages =
			intel_partial_pages(&vma->ggtt_view, vma->obj);
2802 2803 2804 2805 2806
	else
		WARN_ONCE(1, "GGTT view %u not implemented!\n",
			  vma->ggtt_view.type);

	if (!vma->ggtt_view.pages) {
2807
		DRM_ERROR("Failed to get pages for GGTT view type %u!\n",
2808
			  vma->ggtt_view.type);
2809 2810 2811 2812 2813 2814
		ret = -EINVAL;
	} else if (IS_ERR(vma->ggtt_view.pages)) {
		ret = PTR_ERR(vma->ggtt_view.pages);
		vma->ggtt_view.pages = NULL;
		DRM_ERROR("Failed to get pages for VMA view type %u (%d)!\n",
			  vma->ggtt_view.type, ret);
2815 2816
	}

2817
	return ret;
2818 2819 2820 2821 2822 2823 2824 2825 2826 2827 2828 2829 2830 2831 2832
}

/**
 * i915_vma_bind - Sets up PTEs for an VMA in it's corresponding address space.
 * @vma: VMA to map
 * @cache_level: mapping cache level
 * @flags: flags like global or local mapping
 *
 * DMA addresses are taken from the scatter-gather table of this object (or of
 * this VMA in case of non-default GGTT views) and PTE entries set up.
 * Note that DMA addresses are also the only part of the SG table we care about.
 */
int i915_vma_bind(struct i915_vma *vma, enum i915_cache_level cache_level,
		  u32 flags)
{
2833 2834
	int ret;
	u32 bind_flags;
2835

2836 2837
	if (WARN_ON(flags == 0))
		return -EINVAL;
2838

2839
	bind_flags = 0;
2840 2841 2842 2843 2844 2845 2846 2847 2848 2849
	if (flags & PIN_GLOBAL)
		bind_flags |= GLOBAL_BIND;
	if (flags & PIN_USER)
		bind_flags |= LOCAL_BIND;

	if (flags & PIN_UPDATE)
		bind_flags |= vma->bound;
	else
		bind_flags &= ~vma->bound;

2850 2851 2852 2853 2854 2855 2856 2857 2858
	if (bind_flags == 0)
		return 0;

	if (vma->bound == 0 && vma->vm->allocate_va_range) {
		trace_i915_va_alloc(vma->vm,
				    vma->node.start,
				    vma->node.size,
				    VM_TO_TRACE_NAME(vma->vm));

2859 2860
		/* XXX: i915_vma_pin() will fix this +- hack */
		vma->pin_count++;
2861 2862 2863
		ret = vma->vm->allocate_va_range(vma->vm,
						 vma->node.start,
						 vma->node.size);
2864
		vma->pin_count--;
2865 2866 2867 2868 2869
		if (ret)
			return ret;
	}

	ret = vma->vm->bind_vma(vma, cache_level, bind_flags);
2870 2871
	if (ret)
		return ret;
2872 2873

	vma->bound |= bind_flags;
2874 2875 2876

	return 0;
}
2877 2878 2879 2880 2881 2882 2883 2884 2885 2886 2887 2888

/**
 * i915_ggtt_view_size - Get the size of a GGTT view.
 * @obj: Object the view is of.
 * @view: The view in question.
 *
 * @return The size of the GGTT view in bytes.
 */
size_t
i915_ggtt_view_size(struct drm_i915_gem_object *obj,
		    const struct i915_ggtt_view *view)
{
2889
	if (view->type == I915_GGTT_VIEW_NORMAL) {
2890
		return obj->base.size;
2891 2892
	} else if (view->type == I915_GGTT_VIEW_ROTATED) {
		return view->rotation_info.size;
2893 2894
	} else if (view->type == I915_GGTT_VIEW_PARTIAL) {
		return view->params.partial.size << PAGE_SHIFT;
2895 2896 2897 2898 2899
	} else {
		WARN_ONCE(1, "GGTT view %u not implemented!\n", view->type);
		return obj->base.size;
	}
}