i915_gem_execbuffer.c 49.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
/*
 * Copyright © 2008,2010 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 *
 * Authors:
 *    Eric Anholt <eric@anholt.net>
 *    Chris Wilson <chris@chris-wilson.co.uk>
 *
 */

29 30
#include <drm/drmP.h>
#include <drm/i915_drm.h>
31 32 33
#include "i915_drv.h"
#include "i915_trace.h"
#include "intel_drv.h"
34
#include <linux/dma_remapping.h>
35
#include <linux/uaccess.h>
36

37 38 39 40 41
#define  __EXEC_OBJECT_HAS_PIN		(1<<31)
#define  __EXEC_OBJECT_HAS_FENCE	(1<<30)
#define  __EXEC_OBJECT_NEEDS_MAP	(1<<29)
#define  __EXEC_OBJECT_NEEDS_BIAS	(1<<28)
#define  __EXEC_OBJECT_INTERNAL_FLAGS (0xf<<28) /* all of the above */
42 43

#define BATCH_OFFSET_BIAS (256*1024)
44

45 46 47 48 49 50 51 52 53 54 55 56
struct i915_execbuffer_params {
	struct drm_device               *dev;
	struct drm_file                 *file;
	u32				 dispatch_flags;
	u32				 args_batch_start_offset;
	u32				 batch_obj_vm_offset;
	struct intel_engine_cs          *engine;
	struct drm_i915_gem_object      *batch_obj;
	struct i915_gem_context         *ctx;
	struct drm_i915_gem_request     *request;
};

57 58
struct eb_vmas {
	struct list_head vmas;
59
	int and;
60
	union {
61
		struct i915_vma *lut[0];
62 63
		struct hlist_head buckets[0];
	};
64 65
};

66
static struct eb_vmas *
B
Ben Widawsky 已提交
67
eb_create(struct drm_i915_gem_execbuffer2 *args)
68
{
69
	struct eb_vmas *eb = NULL;
70 71

	if (args->flags & I915_EXEC_HANDLE_LUT) {
72
		unsigned size = args->buffer_count;
73 74
		size *= sizeof(struct i915_vma *);
		size += sizeof(struct eb_vmas);
75 76 77 78
		eb = kmalloc(size, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
	}

	if (eb == NULL) {
79 80
		unsigned size = args->buffer_count;
		unsigned count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
L
Lauri Kasanen 已提交
81
		BUILD_BUG_ON_NOT_POWER_OF_2(PAGE_SIZE / sizeof(struct hlist_head));
82 83 84
		while (count > 2*size)
			count >>= 1;
		eb = kzalloc(count*sizeof(struct hlist_head) +
85
			     sizeof(struct eb_vmas),
86 87 88 89 90 91 92 93
			     GFP_TEMPORARY);
		if (eb == NULL)
			return eb;

		eb->and = count - 1;
	} else
		eb->and = -args->buffer_count;

94
	INIT_LIST_HEAD(&eb->vmas);
95 96 97 98
	return eb;
}

static void
99
eb_reset(struct eb_vmas *eb)
100
{
101 102
	if (eb->and >= 0)
		memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
103 104
}

105
static int
106 107 108 109 110
eb_lookup_vmas(struct eb_vmas *eb,
	       struct drm_i915_gem_exec_object2 *exec,
	       const struct drm_i915_gem_execbuffer2 *args,
	       struct i915_address_space *vm,
	       struct drm_file *file)
111
{
112 113
	struct drm_i915_gem_object *obj;
	struct list_head objects;
114
	int i, ret;
115

116
	INIT_LIST_HEAD(&objects);
117
	spin_lock(&file->table_lock);
118 119
	/* Grab a reference to the object and release the lock so we can lookup
	 * or create the VMA without using GFP_ATOMIC */
120
	for (i = 0; i < args->buffer_count; i++) {
121 122 123 124 125
		obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle));
		if (obj == NULL) {
			spin_unlock(&file->table_lock);
			DRM_DEBUG("Invalid object handle %d at index %d\n",
				   exec[i].handle, i);
126
			ret = -ENOENT;
127
			goto err;
128 129
		}

130
		if (!list_empty(&obj->obj_exec_link)) {
131 132 133
			spin_unlock(&file->table_lock);
			DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n",
				   obj, exec[i].handle, i);
134
			ret = -EINVAL;
135
			goto err;
136 137
		}

138
		i915_gem_object_get(obj);
139 140 141
		list_add_tail(&obj->obj_exec_link, &objects);
	}
	spin_unlock(&file->table_lock);
142

143
	i = 0;
144
	while (!list_empty(&objects)) {
145
		struct i915_vma *vma;
146

147 148 149 150
		obj = list_first_entry(&objects,
				       struct drm_i915_gem_object,
				       obj_exec_link);

151 152 153 154 155 156 157 158
		/*
		 * NOTE: We can leak any vmas created here when something fails
		 * later on. But that's no issue since vma_unbind can deal with
		 * vmas which are not actually bound. And since only
		 * lookup_or_create exists as an interface to get at the vma
		 * from the (obj, vm) we don't run the risk of creating
		 * duplicated vmas for the same vm.
		 */
159
		vma = i915_gem_obj_lookup_or_create_vma(obj, vm);
160 161 162
		if (IS_ERR(vma)) {
			DRM_DEBUG("Failed to lookup VMA\n");
			ret = PTR_ERR(vma);
163
			goto err;
164 165
		}

166
		/* Transfer ownership from the objects list to the vmas list. */
167
		list_add_tail(&vma->exec_list, &eb->vmas);
168
		list_del_init(&obj->obj_exec_link);
169 170

		vma->exec_entry = &exec[i];
171
		if (eb->and < 0) {
172
			eb->lut[i] = vma;
173 174
		} else {
			uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle;
175 176
			vma->exec_handle = handle;
			hlist_add_head(&vma->exec_node,
177 178
				       &eb->buckets[handle & eb->and]);
		}
179
		++i;
180 181
	}

182
	return 0;
183 184


185
err:
186 187 188 189 190
	while (!list_empty(&objects)) {
		obj = list_first_entry(&objects,
				       struct drm_i915_gem_object,
				       obj_exec_link);
		list_del_init(&obj->obj_exec_link);
191
		i915_gem_object_put(obj);
192
	}
193 194 195 196 197
	/*
	 * Objects already transfered to the vmas list will be unreferenced by
	 * eb_destroy.
	 */

198
	return ret;
199 200
}

D
Dave Gordon 已提交
201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229
static inline struct i915_vma *
eb_get_batch_vma(struct eb_vmas *eb)
{
	/* The batch is always the LAST item in the VMA list */
	struct i915_vma *vma = list_last_entry(&eb->vmas, typeof(*vma), exec_list);

	return vma;
}

static struct drm_i915_gem_object *
eb_get_batch(struct eb_vmas *eb)
{
	struct i915_vma *vma = eb_get_batch_vma(eb);

	/*
	 * SNA is doing fancy tricks with compressing batch buffers, which leads
	 * to negative relocation deltas. Usually that works out ok since the
	 * relocate address is still positive, except when the batch is placed
	 * very low in the GTT. Ensure this doesn't happen.
	 *
	 * Note that actual hangs have only been observed on gen7, but for
	 * paranoia do it everywhere.
	 */
	if ((vma->exec_entry->flags & EXEC_OBJECT_PINNED) == 0)
		vma->exec_entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;

	return vma->obj;
}

230
static struct i915_vma *eb_get_vma(struct eb_vmas *eb, unsigned long handle)
231
{
232 233 234 235 236 237
	if (eb->and < 0) {
		if (handle >= -eb->and)
			return NULL;
		return eb->lut[handle];
	} else {
		struct hlist_head *head;
238
		struct i915_vma *vma;
239

240
		head = &eb->buckets[handle & eb->and];
241
		hlist_for_each_entry(vma, head, exec_node) {
242 243
			if (vma->exec_handle == handle)
				return vma;
244 245 246
		}
		return NULL;
	}
247 248
}

249 250 251 252 253 254 255 256 257 258 259 260 261 262 263
static void
i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma)
{
	struct drm_i915_gem_exec_object2 *entry;
	struct drm_i915_gem_object *obj = vma->obj;

	if (!drm_mm_node_allocated(&vma->node))
		return;

	entry = vma->exec_entry;

	if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
		i915_gem_object_unpin_fence(obj);

	if (entry->flags & __EXEC_OBJECT_HAS_PIN)
264
		vma->pin_count--;
265

C
Chris Wilson 已提交
266
	entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
267 268 269 270
}

static void eb_destroy(struct eb_vmas *eb)
{
271 272
	while (!list_empty(&eb->vmas)) {
		struct i915_vma *vma;
273

274 275
		vma = list_first_entry(&eb->vmas,
				       struct i915_vma,
276
				       exec_list);
277
		list_del_init(&vma->exec_list);
278
		i915_gem_execbuffer_unreserve_vma(vma);
279
		i915_gem_object_put(vma->obj);
280
	}
281 282 283
	kfree(eb);
}

284 285
static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
{
286 287
	return (HAS_LLC(obj->base.dev) ||
		obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
288 289 290
		obj->cache_level != I915_CACHE_NONE);
}

291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315
/* Used to convert any address to canonical form.
 * Starting from gen8, some commands (e.g. STATE_BASE_ADDRESS,
 * MI_LOAD_REGISTER_MEM and others, see Broadwell PRM Vol2a) require the
 * addresses to be in a canonical form:
 * "GraphicsAddress[63:48] are ignored by the HW and assumed to be in correct
 * canonical form [63:48] == [47]."
 */
#define GEN8_HIGH_ADDRESS_BIT 47
static inline uint64_t gen8_canonical_addr(uint64_t address)
{
	return sign_extend64(address, GEN8_HIGH_ADDRESS_BIT);
}

static inline uint64_t gen8_noncanonical_addr(uint64_t address)
{
	return address & ((1ULL << (GEN8_HIGH_ADDRESS_BIT + 1)) - 1);
}

static inline uint64_t
relocation_target(struct drm_i915_gem_relocation_entry *reloc,
		  uint64_t target_offset)
{
	return gen8_canonical_addr((int)reloc->delta + target_offset);
}

316 317
static int
relocate_entry_cpu(struct drm_i915_gem_object *obj,
B
Ben Widawsky 已提交
318 319
		   struct drm_i915_gem_relocation_entry *reloc,
		   uint64_t target_offset)
320
{
321
	struct drm_device *dev = obj->base.dev;
322
	uint32_t page_offset = offset_in_page(reloc->offset);
323
	uint64_t delta = relocation_target(reloc, target_offset);
324
	char *vaddr;
325
	int ret;
326

327
	ret = i915_gem_object_set_to_cpu_domain(obj, true);
328 329 330
	if (ret)
		return ret;

331
	vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj,
332
				reloc->offset >> PAGE_SHIFT));
B
Ben Widawsky 已提交
333
	*(uint32_t *)(vaddr + page_offset) = lower_32_bits(delta);
334 335 336 337 338 339

	if (INTEL_INFO(dev)->gen >= 8) {
		page_offset = offset_in_page(page_offset + sizeof(uint32_t));

		if (page_offset == 0) {
			kunmap_atomic(vaddr);
340
			vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj,
341 342 343
			    (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT));
		}

B
Ben Widawsky 已提交
344
		*(uint32_t *)(vaddr + page_offset) = upper_32_bits(delta);
345 346
	}

347 348 349 350 351 352 353
	kunmap_atomic(vaddr);

	return 0;
}

static int
relocate_entry_gtt(struct drm_i915_gem_object *obj,
B
Ben Widawsky 已提交
354 355
		   struct drm_i915_gem_relocation_entry *reloc,
		   uint64_t target_offset)
356 357
{
	struct drm_device *dev = obj->base.dev;
358 359
	struct drm_i915_private *dev_priv = to_i915(dev);
	struct i915_ggtt *ggtt = &dev_priv->ggtt;
360
	uint64_t delta = relocation_target(reloc, target_offset);
361
	uint64_t offset;
362
	void __iomem *reloc_page;
363
	int ret;
364 365 366 367 368 369 370 371 372 373

	ret = i915_gem_object_set_to_gtt_domain(obj, true);
	if (ret)
		return ret;

	ret = i915_gem_object_put_fence(obj);
	if (ret)
		return ret;

	/* Map the page containing the relocation we're going to perform.  */
374 375
	offset = i915_gem_obj_ggtt_offset(obj);
	offset += reloc->offset;
376
	reloc_page = io_mapping_map_atomic_wc(ggtt->mappable,
377 378
					      offset & PAGE_MASK);
	iowrite32(lower_32_bits(delta), reloc_page + offset_in_page(offset));
379 380

	if (INTEL_INFO(dev)->gen >= 8) {
381
		offset += sizeof(uint32_t);
382

383
		if (offset_in_page(offset) == 0) {
384
			io_mapping_unmap_atomic(reloc_page);
385
			reloc_page =
386
				io_mapping_map_atomic_wc(ggtt->mappable,
387
							 offset);
388 389
		}

390 391
		iowrite32(upper_32_bits(delta),
			  reloc_page + offset_in_page(offset));
392 393
	}

394 395 396 397 398
	io_mapping_unmap_atomic(reloc_page);

	return 0;
}

399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414
static void
clflush_write32(void *addr, uint32_t value)
{
	/* This is not a fast path, so KISS. */
	drm_clflush_virt_range(addr, sizeof(uint32_t));
	*(uint32_t *)addr = value;
	drm_clflush_virt_range(addr, sizeof(uint32_t));
}

static int
relocate_entry_clflush(struct drm_i915_gem_object *obj,
		       struct drm_i915_gem_relocation_entry *reloc,
		       uint64_t target_offset)
{
	struct drm_device *dev = obj->base.dev;
	uint32_t page_offset = offset_in_page(reloc->offset);
415
	uint64_t delta = relocation_target(reloc, target_offset);
416 417 418 419 420 421 422
	char *vaddr;
	int ret;

	ret = i915_gem_object_set_to_gtt_domain(obj, true);
	if (ret)
		return ret;

423
	vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj,
424 425 426 427 428 429 430 431
				reloc->offset >> PAGE_SHIFT));
	clflush_write32(vaddr + page_offset, lower_32_bits(delta));

	if (INTEL_INFO(dev)->gen >= 8) {
		page_offset = offset_in_page(page_offset + sizeof(uint32_t));

		if (page_offset == 0) {
			kunmap_atomic(vaddr);
432
			vaddr = kmap_atomic(i915_gem_object_get_dirty_page(obj,
433 434 435 436 437 438 439 440 441 442 443
			    (reloc->offset + sizeof(uint32_t)) >> PAGE_SHIFT));
		}

		clflush_write32(vaddr + page_offset, upper_32_bits(delta));
	}

	kunmap_atomic(vaddr);

	return 0;
}

444 445
static int
i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
446
				   struct eb_vmas *eb,
447
				   struct drm_i915_gem_relocation_entry *reloc)
448 449 450
{
	struct drm_device *dev = obj->base.dev;
	struct drm_gem_object *target_obj;
451
	struct drm_i915_gem_object *target_i915_obj;
452
	struct i915_vma *target_vma;
B
Ben Widawsky 已提交
453
	uint64_t target_offset;
454
	int ret;
455

456
	/* we've already hold a reference to all valid objects */
457 458
	target_vma = eb_get_vma(eb, reloc->target_handle);
	if (unlikely(target_vma == NULL))
459
		return -ENOENT;
460 461
	target_i915_obj = target_vma->obj;
	target_obj = &target_vma->obj->base;
462

463
	target_offset = gen8_canonical_addr(target_vma->node.start);
464

465 466 467 468
	/* Sandybridge PPGTT errata: We need a global gtt mapping for MI and
	 * pipe_control writes because the gpu doesn't properly redirect them
	 * through the ppgtt for non_secure batchbuffers. */
	if (unlikely(IS_GEN6(dev) &&
469
	    reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION)) {
470
		ret = i915_vma_bind(target_vma, target_i915_obj->cache_level,
471
				    PIN_GLOBAL);
472 473 474
		if (WARN_ONCE(ret, "Unexpected failure to bind target VMA!"))
			return ret;
	}
475

476
	/* Validate that the target is in a valid r/w GPU domain */
477
	if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
478
		DRM_DEBUG("reloc with multiple write domains: "
479 480 481 482 483 484
			  "obj %p target %d offset %d "
			  "read %08x write %08x",
			  obj, reloc->target_handle,
			  (int) reloc->offset,
			  reloc->read_domains,
			  reloc->write_domain);
485
		return -EINVAL;
486
	}
487 488
	if (unlikely((reloc->write_domain | reloc->read_domains)
		     & ~I915_GEM_GPU_DOMAINS)) {
489
		DRM_DEBUG("reloc with read/write non-GPU domains: "
490 491 492 493 494 495
			  "obj %p target %d offset %d "
			  "read %08x write %08x",
			  obj, reloc->target_handle,
			  (int) reloc->offset,
			  reloc->read_domains,
			  reloc->write_domain);
496
		return -EINVAL;
497 498 499 500 501 502 503 504 505
	}

	target_obj->pending_read_domains |= reloc->read_domains;
	target_obj->pending_write_domain |= reloc->write_domain;

	/* If the relocation already has the right value in it, no
	 * more work needs to be done.
	 */
	if (target_offset == reloc->presumed_offset)
506
		return 0;
507 508

	/* Check that the relocation address is valid... */
509 510
	if (unlikely(reloc->offset >
		obj->base.size - (INTEL_INFO(dev)->gen >= 8 ? 8 : 4))) {
511
		DRM_DEBUG("Relocation beyond object bounds: "
512 513 514 515
			  "obj %p target %d offset %d size %d.\n",
			  obj, reloc->target_handle,
			  (int) reloc->offset,
			  (int) obj->base.size);
516
		return -EINVAL;
517
	}
518
	if (unlikely(reloc->offset & 3)) {
519
		DRM_DEBUG("Relocation not 4-byte aligned: "
520 521 522
			  "obj %p target %d offset %d.\n",
			  obj, reloc->target_handle,
			  (int) reloc->offset);
523
		return -EINVAL;
524 525
	}

526
	/* We can't wait for rendering with pagefaults disabled */
527
	if (obj->active && pagefault_disabled())
528 529
		return -EFAULT;

530
	if (use_cpu_reloc(obj))
B
Ben Widawsky 已提交
531
		ret = relocate_entry_cpu(obj, reloc, target_offset);
532
	else if (obj->map_and_fenceable)
B
Ben Widawsky 已提交
533
		ret = relocate_entry_gtt(obj, reloc, target_offset);
534
	else if (static_cpu_has(X86_FEATURE_CLFLUSH))
535 536 537 538 539
		ret = relocate_entry_clflush(obj, reloc, target_offset);
	else {
		WARN_ONCE(1, "Impossible case in relocation handling\n");
		ret = -ENODEV;
	}
540

541 542 543
	if (ret)
		return ret;

544 545 546
	/* and update the user's relocation entry */
	reloc->presumed_offset = target_offset;

547
	return 0;
548 549 550
}

static int
551 552
i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,
				 struct eb_vmas *eb)
553
{
554 555
#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
	struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)];
556
	struct drm_i915_gem_relocation_entry __user *user_relocs;
557
	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
558
	int remain, ret;
559

560
	user_relocs = u64_to_user_ptr(entry->relocs_ptr);
561

562 563 564 565 566 567 568 569 570
	remain = entry->relocation_count;
	while (remain) {
		struct drm_i915_gem_relocation_entry *r = stack_reloc;
		int count = remain;
		if (count > ARRAY_SIZE(stack_reloc))
			count = ARRAY_SIZE(stack_reloc);
		remain -= count;

		if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0])))
571 572
			return -EFAULT;

573 574
		do {
			u64 offset = r->presumed_offset;
575

576
			ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r);
577 578 579 580
			if (ret)
				return ret;

			if (r->presumed_offset != offset &&
581
			    __put_user(r->presumed_offset, &user_relocs->presumed_offset)) {
582 583 584 585 586 587
				return -EFAULT;
			}

			user_relocs++;
			r++;
		} while (--count);
588 589 590
	}

	return 0;
591
#undef N_RELOC
592 593 594
}

static int
595 596 597
i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma,
				      struct eb_vmas *eb,
				      struct drm_i915_gem_relocation_entry *relocs)
598
{
599
	const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
600 601 602
	int i, ret;

	for (i = 0; i < entry->relocation_count; i++) {
603
		ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i]);
604 605 606 607 608 609 610 611
		if (ret)
			return ret;
	}

	return 0;
}

static int
B
Ben Widawsky 已提交
612
i915_gem_execbuffer_relocate(struct eb_vmas *eb)
613
{
614
	struct i915_vma *vma;
615 616 617 618 619 620 621 622 623 624
	int ret = 0;

	/* This is the fast path and we cannot handle a pagefault whilst
	 * holding the struct mutex lest the user pass in the relocations
	 * contained within a mmaped bo. For in such a case we, the page
	 * fault handler would call i915_gem_fault() and we would try to
	 * acquire the struct mutex again. Obviously this is bad and so
	 * lockdep complains vehemently.
	 */
	pagefault_disable();
625 626
	list_for_each_entry(vma, &eb->vmas, exec_list) {
		ret = i915_gem_execbuffer_relocate_vma(vma, eb);
627
		if (ret)
628
			break;
629
	}
630
	pagefault_enable();
631

632
	return ret;
633 634
}

635 636 637 638 639 640
static bool only_mappable_for_reloc(unsigned int flags)
{
	return (flags & (EXEC_OBJECT_NEEDS_FENCE | __EXEC_OBJECT_NEEDS_MAP)) ==
		__EXEC_OBJECT_NEEDS_MAP;
}

641
static int
642
i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
643
				struct intel_engine_cs *engine,
644
				bool *need_reloc)
645
{
646
	struct drm_i915_gem_object *obj = vma->obj;
647
	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
648
	uint64_t flags;
649 650
	int ret;

651
	flags = PIN_USER;
652 653 654
	if (entry->flags & EXEC_OBJECT_NEEDS_GTT)
		flags |= PIN_GLOBAL;

655
	if (!drm_mm_node_allocated(&vma->node)) {
656 657 658 659 660
		/* Wa32bitGeneralStateOffset & Wa32bitInstructionBaseOffset,
		 * limit address to the first 4GBs for unflagged objects.
		 */
		if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0)
			flags |= PIN_ZONE_4G;
661 662 663 664
		if (entry->flags & __EXEC_OBJECT_NEEDS_MAP)
			flags |= PIN_GLOBAL | PIN_MAPPABLE;
		if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS)
			flags |= BATCH_OFFSET_BIAS | PIN_OFFSET_BIAS;
665 666
		if (entry->flags & EXEC_OBJECT_PINNED)
			flags |= entry->offset | PIN_OFFSET_FIXED;
667 668
		if ((flags & PIN_MAPPABLE) == 0)
			flags |= PIN_HIGH;
669
	}
670 671

	ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, flags);
672 673 674 675
	if ((ret == -ENOSPC  || ret == -E2BIG) &&
	    only_mappable_for_reloc(entry->flags))
		ret = i915_gem_object_pin(obj, vma->vm,
					  entry->alignment,
676
					  flags & ~PIN_MAPPABLE);
677 678 679
	if (ret)
		return ret;

680 681
	entry->flags |= __EXEC_OBJECT_HAS_PIN;

682 683 684 685
	if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
		ret = i915_gem_object_get_fence(obj);
		if (ret)
			return ret;
686

687 688
		if (i915_gem_object_pin_fence(obj))
			entry->flags |= __EXEC_OBJECT_HAS_FENCE;
689 690
	}

691 692
	if (entry->offset != vma->node.start) {
		entry->offset = vma->node.start;
693 694 695 696 697 698 699 700
		*need_reloc = true;
	}

	if (entry->flags & EXEC_OBJECT_WRITE) {
		obj->base.pending_read_domains = I915_GEM_DOMAIN_RENDER;
		obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
	}

701
	return 0;
702
}
703

704
static bool
705
need_reloc_mappable(struct i915_vma *vma)
706 707 708
{
	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;

709 710 711
	if (entry->relocation_count == 0)
		return false;

712
	if (!vma->is_ggtt)
713 714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729
		return false;

	/* See also use_cpu_reloc() */
	if (HAS_LLC(vma->obj->base.dev))
		return false;

	if (vma->obj->base.write_domain == I915_GEM_DOMAIN_CPU)
		return false;

	return true;
}

static bool
eb_vma_misplaced(struct i915_vma *vma)
{
	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
	struct drm_i915_gem_object *obj = vma->obj;
730

731
	WARN_ON(entry->flags & __EXEC_OBJECT_NEEDS_MAP && !vma->is_ggtt);
732 733 734 735 736

	if (entry->alignment &&
	    vma->node.start & (entry->alignment - 1))
		return true;

737 738 739 740
	if (entry->flags & EXEC_OBJECT_PINNED &&
	    vma->node.start != entry->offset)
		return true;

741 742 743 744
	if (entry->flags & __EXEC_OBJECT_NEEDS_BIAS &&
	    vma->node.start < BATCH_OFFSET_BIAS)
		return true;

745 746 747 748
	/* avoid costly ping-pong once a batch bo ended up non-mappable */
	if (entry->flags & __EXEC_OBJECT_NEEDS_MAP && !obj->map_and_fenceable)
		return !only_mappable_for_reloc(entry->flags);

749 750 751 752
	if ((entry->flags & EXEC_OBJECT_SUPPORTS_48B_ADDRESS) == 0 &&
	    (vma->node.start + vma->node.size - 1) >> 32)
		return true;

753 754 755
	return false;
}

756
static int
757
i915_gem_execbuffer_reserve(struct intel_engine_cs *engine,
758
			    struct list_head *vmas,
759
			    struct i915_gem_context *ctx,
760
			    bool *need_relocs)
761
{
762
	struct drm_i915_gem_object *obj;
763
	struct i915_vma *vma;
764
	struct i915_address_space *vm;
765
	struct list_head ordered_vmas;
766
	struct list_head pinned_vmas;
767
	bool has_fenced_gpu_access = INTEL_GEN(engine->i915) < 4;
768
	int retry;
769

770
	i915_gem_retire_requests_ring(engine);
771

772 773
	vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;

774
	INIT_LIST_HEAD(&ordered_vmas);
775
	INIT_LIST_HEAD(&pinned_vmas);
776
	while (!list_empty(vmas)) {
777 778 779
		struct drm_i915_gem_exec_object2 *entry;
		bool need_fence, need_mappable;

780 781 782
		vma = list_first_entry(vmas, struct i915_vma, exec_list);
		obj = vma->obj;
		entry = vma->exec_entry;
783

784 785 786
		if (ctx->flags & CONTEXT_NO_ZEROMAP)
			entry->flags |= __EXEC_OBJECT_NEEDS_BIAS;

787 788
		if (!has_fenced_gpu_access)
			entry->flags &= ~EXEC_OBJECT_NEEDS_FENCE;
789 790 791
		need_fence =
			entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
			obj->tiling_mode != I915_TILING_NONE;
792
		need_mappable = need_fence || need_reloc_mappable(vma);
793

794 795 796
		if (entry->flags & EXEC_OBJECT_PINNED)
			list_move_tail(&vma->exec_list, &pinned_vmas);
		else if (need_mappable) {
797
			entry->flags |= __EXEC_OBJECT_NEEDS_MAP;
798
			list_move(&vma->exec_list, &ordered_vmas);
799
		} else
800
			list_move_tail(&vma->exec_list, &ordered_vmas);
801

802
		obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
803
		obj->base.pending_write_domain = 0;
804
	}
805
	list_splice(&ordered_vmas, vmas);
806
	list_splice(&pinned_vmas, vmas);
807 808 809 810 811 812 813 814 815 816

	/* Attempt to pin all of the buffers into the GTT.
	 * This is done in 3 phases:
	 *
	 * 1a. Unbind all objects that do not match the GTT constraints for
	 *     the execbuffer (fenceable, mappable, alignment etc).
	 * 1b. Increment pin count for already bound objects.
	 * 2.  Bind new objects.
	 * 3.  Decrement pin count.
	 *
817
	 * This avoid unnecessary unbinding of later objects in order to make
818 819 820 821
	 * room for the earlier objects *unless* we need to defragment.
	 */
	retry = 0;
	do {
822
		int ret = 0;
823 824

		/* Unbind any ill-fitting objects or pin. */
825 826
		list_for_each_entry(vma, vmas, exec_list) {
			if (!drm_mm_node_allocated(&vma->node))
827 828
				continue;

829
			if (eb_vma_misplaced(vma))
830
				ret = i915_vma_unbind(vma);
831
			else
832 833 834
				ret = i915_gem_execbuffer_reserve_vma(vma,
								      engine,
								      need_relocs);
835
			if (ret)
836 837 838 839
				goto err;
		}

		/* Bind fresh objects */
840 841
		list_for_each_entry(vma, vmas, exec_list) {
			if (drm_mm_node_allocated(&vma->node))
842
				continue;
843

844 845
			ret = i915_gem_execbuffer_reserve_vma(vma, engine,
							      need_relocs);
846 847
			if (ret)
				goto err;
848 849
		}

850
err:
C
Chris Wilson 已提交
851
		if (ret != -ENOSPC || retry++)
852 853
			return ret;

854 855 856 857
		/* Decrement pin count for bound objects */
		list_for_each_entry(vma, vmas, exec_list)
			i915_gem_execbuffer_unreserve_vma(vma);

858
		ret = i915_gem_evict_vm(vm, true);
859 860 861 862 863 864 865
		if (ret)
			return ret;
	} while (1);
}

static int
i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
866
				  struct drm_i915_gem_execbuffer2 *args,
867
				  struct drm_file *file,
868
				  struct intel_engine_cs *engine,
869
				  struct eb_vmas *eb,
870
				  struct drm_i915_gem_exec_object2 *exec,
871
				  struct i915_gem_context *ctx)
872 873
{
	struct drm_i915_gem_relocation_entry *reloc;
874 875
	struct i915_address_space *vm;
	struct i915_vma *vma;
876
	bool need_relocs;
877
	int *reloc_offset;
878
	int i, total, ret;
879
	unsigned count = args->buffer_count;
880

881 882
	vm = list_first_entry(&eb->vmas, struct i915_vma, exec_list)->vm;

883
	/* We may process another execbuffer during the unlock... */
884 885 886
	while (!list_empty(&eb->vmas)) {
		vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list);
		list_del_init(&vma->exec_list);
887
		i915_gem_execbuffer_unreserve_vma(vma);
888
		i915_gem_object_put(vma->obj);
889 890
	}

891 892 893 894
	mutex_unlock(&dev->struct_mutex);

	total = 0;
	for (i = 0; i < count; i++)
895
		total += exec[i].relocation_count;
896

897
	reloc_offset = drm_malloc_ab(count, sizeof(*reloc_offset));
898
	reloc = drm_malloc_ab(total, sizeof(*reloc));
899 900 901
	if (reloc == NULL || reloc_offset == NULL) {
		drm_free_large(reloc);
		drm_free_large(reloc_offset);
902 903 904 905 906 907 908
		mutex_lock(&dev->struct_mutex);
		return -ENOMEM;
	}

	total = 0;
	for (i = 0; i < count; i++) {
		struct drm_i915_gem_relocation_entry __user *user_relocs;
909 910
		u64 invalid_offset = (u64)-1;
		int j;
911

912
		user_relocs = u64_to_user_ptr(exec[i].relocs_ptr);
913 914

		if (copy_from_user(reloc+total, user_relocs,
915
				   exec[i].relocation_count * sizeof(*reloc))) {
916 917 918 919 920
			ret = -EFAULT;
			mutex_lock(&dev->struct_mutex);
			goto err;
		}

921 922 923 924 925 926 927 928 929 930
		/* As we do not update the known relocation offsets after
		 * relocating (due to the complexities in lock handling),
		 * we need to mark them as invalid now so that we force the
		 * relocation processing next time. Just in case the target
		 * object is evicted and then rebound into its old
		 * presumed_offset before the next execbuffer - if that
		 * happened we would make the mistake of assuming that the
		 * relocations were valid.
		 */
		for (j = 0; j < exec[i].relocation_count; j++) {
931 932 933
			if (__copy_to_user(&user_relocs[j].presumed_offset,
					   &invalid_offset,
					   sizeof(invalid_offset))) {
934 935 936 937 938 939
				ret = -EFAULT;
				mutex_lock(&dev->struct_mutex);
				goto err;
			}
		}

940
		reloc_offset[i] = total;
941
		total += exec[i].relocation_count;
942 943 944 945 946 947 948 949
	}

	ret = i915_mutex_lock_interruptible(dev);
	if (ret) {
		mutex_lock(&dev->struct_mutex);
		goto err;
	}

950 951
	/* reacquire the objects */
	eb_reset(eb);
952
	ret = eb_lookup_vmas(eb, exec, args, vm, file);
953 954
	if (ret)
		goto err;
955

956
	need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
957 958
	ret = i915_gem_execbuffer_reserve(engine, &eb->vmas, ctx,
					  &need_relocs);
959 960 961
	if (ret)
		goto err;

962 963 964 965
	list_for_each_entry(vma, &eb->vmas, exec_list) {
		int offset = vma->exec_entry - exec;
		ret = i915_gem_execbuffer_relocate_vma_slow(vma, eb,
							    reloc + reloc_offset[offset]);
966 967 968 969 970 971 972 973 974 975 976 977
		if (ret)
			goto err;
	}

	/* Leave the user relocations as are, this is the painfully slow path,
	 * and we want to avoid the complication of dropping the lock whilst
	 * having buffers reserved in the aperture and so causing spurious
	 * ENOSPC for random operations.
	 */

err:
	drm_free_large(reloc);
978
	drm_free_large(reloc_offset);
979 980 981 982
	return ret;
}

static int
983
i915_gem_execbuffer_move_to_gpu(struct drm_i915_gem_request *req,
984
				struct list_head *vmas)
985
{
986
	const unsigned other_rings = ~intel_engine_flag(req->engine);
987
	struct i915_vma *vma;
988
	uint32_t flush_domains = 0;
989
	bool flush_chipset = false;
990
	int ret;
991

992 993
	list_for_each_entry(vma, vmas, exec_list) {
		struct drm_i915_gem_object *obj = vma->obj;
994 995

		if (obj->active & other_rings) {
996
			ret = i915_gem_object_sync(obj, req);
997 998 999
			if (ret)
				return ret;
		}
1000 1001

		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
1002
			flush_chipset |= i915_gem_clflush_object(obj, false);
1003 1004

		flush_domains |= obj->base.write_domain;
1005 1006
	}

1007
	if (flush_chipset)
1008
		i915_gem_chipset_flush(req->engine->i915);
1009 1010 1011 1012

	if (flush_domains & I915_GEM_DOMAIN_GTT)
		wmb();

1013
	/* Unconditionally invalidate GPU caches and TLBs. */
1014
	return req->engine->emit_flush(req, EMIT_INVALIDATE);
1015 1016
}

1017 1018
static bool
i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
1019
{
1020 1021 1022
	if (exec->flags & __I915_EXEC_UNKNOWN_FLAGS)
		return false;

C
Chris Wilson 已提交
1023 1024 1025 1026 1027 1028 1029 1030 1031 1032 1033 1034 1035 1036 1037
	/* Kernel clipping was a DRI1 misfeature */
	if (exec->num_cliprects || exec->cliprects_ptr)
		return false;

	if (exec->DR4 == 0xffffffff) {
		DRM_DEBUG("UXA submitting garbage DR4, fixing up\n");
		exec->DR4 = 0;
	}
	if (exec->DR1 || exec->DR4)
		return false;

	if ((exec->batch_start_offset | exec->batch_len) & 0x7)
		return false;

	return true;
1038 1039 1040
}

static int
1041 1042
validate_exec_list(struct drm_device *dev,
		   struct drm_i915_gem_exec_object2 *exec,
1043 1044
		   int count)
{
1045 1046
	unsigned relocs_total = 0;
	unsigned relocs_max = UINT_MAX / sizeof(struct drm_i915_gem_relocation_entry);
1047 1048 1049
	unsigned invalid_flags;
	int i;

1050 1051 1052
	/* INTERNAL flags must not overlap with external ones */
	BUILD_BUG_ON(__EXEC_OBJECT_INTERNAL_FLAGS & ~__EXEC_OBJECT_UNKNOWN_FLAGS);

1053 1054 1055
	invalid_flags = __EXEC_OBJECT_UNKNOWN_FLAGS;
	if (USES_FULL_PPGTT(dev))
		invalid_flags |= EXEC_OBJECT_NEEDS_GTT;
1056 1057

	for (i = 0; i < count; i++) {
1058
		char __user *ptr = u64_to_user_ptr(exec[i].relocs_ptr);
1059 1060
		int length; /* limited by fault_in_pages_readable() */

1061
		if (exec[i].flags & invalid_flags)
1062 1063
			return -EINVAL;

1064 1065 1066 1067 1068 1069 1070 1071 1072 1073 1074 1075 1076 1077 1078
		/* Offset can be used as input (EXEC_OBJECT_PINNED), reject
		 * any non-page-aligned or non-canonical addresses.
		 */
		if (exec[i].flags & EXEC_OBJECT_PINNED) {
			if (exec[i].offset !=
			    gen8_canonical_addr(exec[i].offset & PAGE_MASK))
				return -EINVAL;

			/* From drm_mm perspective address space is continuous,
			 * so from this point we're always using non-canonical
			 * form internally.
			 */
			exec[i].offset = gen8_noncanonical_addr(exec[i].offset);
		}

1079 1080 1081
		if (exec[i].alignment && !is_power_of_2(exec[i].alignment))
			return -EINVAL;

1082 1083 1084 1085 1086
		/* First check for malicious input causing overflow in
		 * the worst case where we need to allocate the entire
		 * relocation tree as a single array.
		 */
		if (exec[i].relocation_count > relocs_max - relocs_total)
1087
			return -EINVAL;
1088
		relocs_total += exec[i].relocation_count;
1089 1090 1091

		length = exec[i].relocation_count *
			sizeof(struct drm_i915_gem_relocation_entry);
1092 1093 1094 1095 1096
		/*
		 * We must check that the entire relocation array is safe
		 * to read, but since we may need to update the presumed
		 * offsets during execution, check for full write access.
		 */
1097 1098 1099
		if (!access_ok(VERIFY_WRITE, ptr, length))
			return -EFAULT;

1100
		if (likely(!i915.prefault_disable)) {
1101 1102 1103
			if (fault_in_multipages_readable(ptr, length))
				return -EFAULT;
		}
1104 1105 1106 1107 1108
	}

	return 0;
}

1109
static struct i915_gem_context *
1110
i915_gem_validate_context(struct drm_device *dev, struct drm_file *file,
1111
			  struct intel_engine_cs *engine, const u32 ctx_id)
1112
{
1113
	struct i915_gem_context *ctx = NULL;
1114 1115
	struct i915_ctx_hang_stats *hs;

1116
	if (engine->id != RCS && ctx_id != DEFAULT_CONTEXT_HANDLE)
1117 1118
		return ERR_PTR(-EINVAL);

1119
	ctx = i915_gem_context_lookup(file->driver_priv, ctx_id);
1120
	if (IS_ERR(ctx))
1121
		return ctx;
1122

1123
	hs = &ctx->hang_stats;
1124 1125
	if (hs->banned) {
		DRM_DEBUG("Context %u tried to submit while banned\n", ctx_id);
1126
		return ERR_PTR(-EIO);
1127 1128
	}

1129
	return ctx;
1130 1131
}

1132
static void
1133
i915_gem_execbuffer_move_to_active(struct list_head *vmas,
1134
				   struct drm_i915_gem_request *req)
1135
{
1136
	struct intel_engine_cs *engine = i915_gem_request_get_engine(req);
1137
	struct i915_vma *vma;
1138

1139
	list_for_each_entry(vma, vmas, exec_list) {
1140
		struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
1141
		struct drm_i915_gem_object *obj = vma->obj;
1142 1143
		u32 old_read = obj->base.read_domains;
		u32 old_write = obj->base.write_domain;
C
Chris Wilson 已提交
1144

1145
		obj->dirty = 1; /* be paranoid  */
1146
		obj->base.write_domain = obj->base.pending_write_domain;
1147 1148 1149
		if (obj->base.write_domain == 0)
			obj->base.pending_read_domains |= obj->base.read_domains;
		obj->base.read_domains = obj->base.pending_read_domains;
1150

1151
		i915_vma_move_to_active(vma, req);
1152
		if (obj->base.write_domain) {
1153
			i915_gem_request_assign(&obj->last_write_req, req);
1154

1155
			intel_fb_obj_invalidate(obj, ORIGIN_CS);
1156 1157 1158

			/* update for the implicit flush after a batch */
			obj->base.write_domain &= ~I915_GEM_GPU_DOMAINS;
1159
		}
1160
		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
1161
			i915_gem_request_assign(&obj->last_fenced_req, req);
1162
			if (entry->flags & __EXEC_OBJECT_HAS_FENCE) {
1163
				struct drm_i915_private *dev_priv = engine->i915;
1164 1165 1166 1167
				list_move_tail(&dev_priv->fence_regs[obj->fence_reg].lru_list,
					       &dev_priv->mm.fence_list);
			}
		}
1168

C
Chris Wilson 已提交
1169
		trace_i915_gem_object_change_domain(obj, old_read, old_write);
1170 1171 1172
	}
}

1173
static void
1174
i915_gem_execbuffer_retire_commands(struct i915_execbuffer_params *params)
1175
{
1176
	/* Add a breadcrumb for the completion of the batch buffer */
1177
	__i915_add_request(params->request, params->batch_obj, true);
1178
}
1179

1180
static int
1181
i915_reset_gen7_sol_offsets(struct drm_i915_gem_request *req)
1182
{
1183
	struct intel_ring *ring = req->ring;
1184 1185
	int ret, i;

1186
	if (!IS_GEN7(req->i915) || req->engine->id != RCS) {
1187 1188 1189
		DRM_DEBUG("sol reset is gen7/rcs only\n");
		return -EINVAL;
	}
1190

1191
	ret = intel_ring_begin(req, 4 * 3);
1192 1193 1194 1195
	if (ret)
		return ret;

	for (i = 0; i < 4; i++) {
1196 1197 1198
		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
		intel_ring_emit_reg(ring, GEN7_SO_WRITE_OFFSET(i));
		intel_ring_emit(ring, 0);
1199 1200
	}

1201
	intel_ring_advance(ring);
1202 1203 1204 1205

	return 0;
}

1206
static struct drm_i915_gem_object*
1207
i915_gem_execbuffer_parse(struct intel_engine_cs *engine,
1208 1209 1210 1211 1212
			  struct drm_i915_gem_exec_object2 *shadow_exec_entry,
			  struct eb_vmas *eb,
			  struct drm_i915_gem_object *batch_obj,
			  u32 batch_start_offset,
			  u32 batch_len,
1213
			  bool is_master)
1214 1215
{
	struct drm_i915_gem_object *shadow_batch_obj;
1216
	struct i915_vma *vma;
1217 1218
	int ret;

1219
	shadow_batch_obj = i915_gem_batch_pool_get(&engine->batch_pool,
1220
						   PAGE_ALIGN(batch_len));
1221 1222 1223
	if (IS_ERR(shadow_batch_obj))
		return shadow_batch_obj;

1224 1225 1226 1227 1228 1229
	ret = intel_engine_cmd_parser(engine,
				      batch_obj,
				      shadow_batch_obj,
				      batch_start_offset,
				      batch_len,
				      is_master);
1230 1231
	if (ret)
		goto err;
1232

1233 1234 1235
	ret = i915_gem_obj_ggtt_pin(shadow_batch_obj, 0, 0);
	if (ret)
		goto err;
1236

C
Chris Wilson 已提交
1237 1238
	i915_gem_object_unpin_pages(shadow_batch_obj);

1239
	memset(shadow_exec_entry, 0, sizeof(*shadow_exec_entry));
1240

1241 1242
	vma = i915_gem_obj_to_ggtt(shadow_batch_obj);
	vma->exec_entry = shadow_exec_entry;
C
Chris Wilson 已提交
1243
	vma->exec_entry->flags = __EXEC_OBJECT_HAS_PIN;
1244
	i915_gem_object_get(shadow_batch_obj);
1245
	list_add_tail(&vma->exec_list, &eb->vmas);
1246

1247 1248 1249
	shadow_batch_obj->base.pending_read_domains = I915_GEM_DOMAIN_COMMAND;

	return shadow_batch_obj;
1250

1251
err:
C
Chris Wilson 已提交
1252
	i915_gem_object_unpin_pages(shadow_batch_obj);
1253 1254 1255 1256
	if (ret == -EACCES) /* unhandled chained batch */
		return batch_obj;
	else
		return ERR_PTR(ret);
1257
}
1258

1259 1260 1261 1262
static int
execbuf_submit(struct i915_execbuffer_params *params,
	       struct drm_i915_gem_execbuffer2 *args,
	       struct list_head *vmas)
1263
{
1264
	struct drm_i915_private *dev_priv = params->request->i915;
1265
	u64 exec_start, exec_len;
1266 1267
	int instp_mode;
	u32 instp_mask;
C
Chris Wilson 已提交
1268
	int ret;
1269

1270
	ret = i915_gem_execbuffer_move_to_gpu(params->request, vmas);
1271
	if (ret)
C
Chris Wilson 已提交
1272
		return ret;
1273

1274
	ret = i915_switch_context(params->request);
1275
	if (ret)
C
Chris Wilson 已提交
1276
		return ret;
1277 1278 1279 1280 1281 1282 1283

	instp_mode = args->flags & I915_EXEC_CONSTANTS_MASK;
	instp_mask = I915_EXEC_CONSTANTS_MASK;
	switch (instp_mode) {
	case I915_EXEC_CONSTANTS_REL_GENERAL:
	case I915_EXEC_CONSTANTS_ABSOLUTE:
	case I915_EXEC_CONSTANTS_REL_SURFACE:
1284
		if (instp_mode != 0 && params->engine->id != RCS) {
1285
			DRM_DEBUG("non-0 rel constants mode on non-RCS\n");
C
Chris Wilson 已提交
1286
			return -EINVAL;
1287 1288 1289
		}

		if (instp_mode != dev_priv->relative_constants_mode) {
1290
			if (INTEL_INFO(dev_priv)->gen < 4) {
1291
				DRM_DEBUG("no rel constants on pre-gen4\n");
C
Chris Wilson 已提交
1292
				return -EINVAL;
1293 1294
			}

1295
			if (INTEL_INFO(dev_priv)->gen > 5 &&
1296 1297
			    instp_mode == I915_EXEC_CONSTANTS_REL_SURFACE) {
				DRM_DEBUG("rel surface constants mode invalid on gen5+\n");
C
Chris Wilson 已提交
1298
				return -EINVAL;
1299 1300 1301
			}

			/* The HW changed the meaning on this bit on gen6 */
1302
			if (INTEL_INFO(dev_priv)->gen >= 6)
1303 1304 1305 1306 1307
				instp_mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
		}
		break;
	default:
		DRM_DEBUG("execbuf with unknown constants: %d\n", instp_mode);
C
Chris Wilson 已提交
1308
		return -EINVAL;
1309 1310
	}

1311
	if (params->engine->id == RCS &&
C
Chris Wilson 已提交
1312
	    instp_mode != dev_priv->relative_constants_mode) {
1313
		struct intel_ring *ring = params->request->ring;
1314

1315
		ret = intel_ring_begin(params->request, 4);
1316
		if (ret)
C
Chris Wilson 已提交
1317
			return ret;
1318

1319 1320 1321 1322 1323
		intel_ring_emit(ring, MI_NOOP);
		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
		intel_ring_emit_reg(ring, INSTPM);
		intel_ring_emit(ring, instp_mask << 16 | instp_mode);
		intel_ring_advance(ring);
1324 1325 1326 1327 1328

		dev_priv->relative_constants_mode = instp_mode;
	}

	if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
1329
		ret = i915_reset_gen7_sol_offsets(params->request);
1330
		if (ret)
C
Chris Wilson 已提交
1331
			return ret;
1332 1333
	}

1334 1335 1336 1337
	exec_len   = args->batch_len;
	exec_start = params->batch_obj_vm_offset +
		     params->args_batch_start_offset;

1338 1339 1340
	if (exec_len == 0)
		exec_len = params->batch_obj->base.size;

1341 1342 1343
	ret = params->engine->emit_bb_start(params->request,
					    exec_start, exec_len,
					    params->dispatch_flags);
C
Chris Wilson 已提交
1344 1345
	if (ret)
		return ret;
1346

1347
	trace_i915_gem_ring_dispatch(params->request, params->dispatch_flags);
1348

1349
	i915_gem_execbuffer_move_to_active(vmas, params->request);
1350

C
Chris Wilson 已提交
1351
	return 0;
1352 1353
}

1354 1355
/**
 * Find one BSD ring to dispatch the corresponding BSD command.
1356
 * The engine index is returned.
1357
 */
1358
static unsigned int
1359 1360
gen8_dispatch_bsd_engine(struct drm_i915_private *dev_priv,
			 struct drm_file *file)
1361 1362 1363
{
	struct drm_i915_file_private *file_priv = file->driver_priv;

1364
	/* Check whether the file_priv has already selected one ring. */
1365
	if ((int)file_priv->bsd_engine < 0) {
1366
		/* If not, use the ping-pong mechanism to select one. */
1367
		mutex_lock(&dev_priv->drm.struct_mutex);
1368 1369
		file_priv->bsd_engine = dev_priv->mm.bsd_engine_dispatch_index;
		dev_priv->mm.bsd_engine_dispatch_index ^= 1;
1370
		mutex_unlock(&dev_priv->drm.struct_mutex);
1371
	}
1372

1373
	return file_priv->bsd_engine;
1374 1375
}

1376 1377
#define I915_USER_RINGS (4)

1378
static const enum intel_engine_id user_ring_map[I915_USER_RINGS + 1] = {
1379 1380 1381 1382 1383 1384 1385
	[I915_EXEC_DEFAULT]	= RCS,
	[I915_EXEC_RENDER]	= RCS,
	[I915_EXEC_BLT]		= BCS,
	[I915_EXEC_BSD]		= VCS,
	[I915_EXEC_VEBOX]	= VECS
};

1386 1387 1388 1389
static struct intel_engine_cs *
eb_select_engine(struct drm_i915_private *dev_priv,
		 struct drm_file *file,
		 struct drm_i915_gem_execbuffer2 *args)
1390 1391
{
	unsigned int user_ring_id = args->flags & I915_EXEC_RING_MASK;
1392
	struct intel_engine_cs *engine;
1393 1394 1395

	if (user_ring_id > I915_USER_RINGS) {
		DRM_DEBUG("execbuf with unknown ring: %u\n", user_ring_id);
1396
		return NULL;
1397 1398 1399 1400 1401 1402
	}

	if ((user_ring_id != I915_EXEC_BSD) &&
	    ((args->flags & I915_EXEC_BSD_MASK) != 0)) {
		DRM_DEBUG("execbuf with non bsd ring but with invalid "
			  "bsd dispatch flags: %d\n", (int)(args->flags));
1403
		return NULL;
1404 1405 1406 1407 1408 1409
	}

	if (user_ring_id == I915_EXEC_BSD && HAS_BSD2(dev_priv)) {
		unsigned int bsd_idx = args->flags & I915_EXEC_BSD_MASK;

		if (bsd_idx == I915_EXEC_BSD_DEFAULT) {
1410
			bsd_idx = gen8_dispatch_bsd_engine(dev_priv, file);
1411 1412
		} else if (bsd_idx >= I915_EXEC_BSD_RING1 &&
			   bsd_idx <= I915_EXEC_BSD_RING2) {
1413
			bsd_idx >>= I915_EXEC_BSD_SHIFT;
1414 1415 1416 1417
			bsd_idx--;
		} else {
			DRM_DEBUG("execbuf with unknown bsd ring: %u\n",
				  bsd_idx);
1418
			return NULL;
1419 1420
		}

1421
		engine = &dev_priv->engine[_VCS(bsd_idx)];
1422
	} else {
1423
		engine = &dev_priv->engine[user_ring_map[user_ring_id]];
1424 1425
	}

1426
	if (!intel_engine_initialized(engine)) {
1427
		DRM_DEBUG("execbuf with invalid ring: %u\n", user_ring_id);
1428
		return NULL;
1429 1430
	}

1431
	return engine;
1432 1433
}

1434 1435 1436 1437
static int
i915_gem_do_execbuffer(struct drm_device *dev, void *data,
		       struct drm_file *file,
		       struct drm_i915_gem_execbuffer2 *args,
1438
		       struct drm_i915_gem_exec_object2 *exec)
1439
{
1440 1441
	struct drm_i915_private *dev_priv = to_i915(dev);
	struct i915_ggtt *ggtt = &dev_priv->ggtt;
1442
	struct eb_vmas *eb;
1443
	struct drm_i915_gem_object *batch_obj;
1444
	struct drm_i915_gem_exec_object2 shadow_exec_entry;
1445
	struct intel_engine_cs *engine;
1446
	struct i915_gem_context *ctx;
1447
	struct i915_address_space *vm;
1448 1449
	struct i915_execbuffer_params params_master; /* XXX: will be removed later */
	struct i915_execbuffer_params *params = &params_master;
1450
	const u32 ctx_id = i915_execbuffer2_get_context_id(*args);
1451
	u32 dispatch_flags;
1452
	int ret;
1453
	bool need_relocs;
1454

1455
	if (!i915_gem_check_execbuffer(args))
1456 1457
		return -EINVAL;

1458
	ret = validate_exec_list(dev, exec, args->buffer_count);
1459 1460 1461
	if (ret)
		return ret;

1462
	dispatch_flags = 0;
1463
	if (args->flags & I915_EXEC_SECURE) {
1464
		if (!drm_is_current_master(file) || !capable(CAP_SYS_ADMIN))
1465 1466
		    return -EPERM;

1467
		dispatch_flags |= I915_DISPATCH_SECURE;
1468
	}
1469
	if (args->flags & I915_EXEC_IS_PINNED)
1470
		dispatch_flags |= I915_DISPATCH_PINNED;
1471

1472 1473 1474
	engine = eb_select_engine(dev_priv, file, args);
	if (!engine)
		return -EINVAL;
1475 1476

	if (args->buffer_count < 1) {
1477
		DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1478 1479 1480
		return -EINVAL;
	}

1481 1482 1483 1484 1485
	if (args->flags & I915_EXEC_RESOURCE_STREAMER) {
		if (!HAS_RESOURCE_STREAMER(dev)) {
			DRM_DEBUG("RS is only allowed for Haswell, Gen8 and above\n");
			return -EINVAL;
		}
1486
		if (engine->id != RCS) {
1487
			DRM_DEBUG("RS is not available on %s\n",
1488
				 engine->name);
1489 1490 1491 1492 1493 1494
			return -EINVAL;
		}

		dispatch_flags |= I915_DISPATCH_RS;
	}

1495 1496 1497 1498 1499 1500
	/* Take a local wakeref for preparing to dispatch the execbuf as
	 * we expect to access the hardware fairly frequently in the
	 * process. Upon first dispatch, we acquire another prolonged
	 * wakeref that we hold until the GPU has been idle for at least
	 * 100ms.
	 */
1501 1502
	intel_runtime_pm_get(dev_priv);

1503 1504 1505 1506
	ret = i915_mutex_lock_interruptible(dev);
	if (ret)
		goto pre_mutex_err;

1507
	ctx = i915_gem_validate_context(dev, file, engine, ctx_id);
1508
	if (IS_ERR(ctx)) {
1509
		mutex_unlock(&dev->struct_mutex);
1510
		ret = PTR_ERR(ctx);
1511
		goto pre_mutex_err;
1512
	}
1513

1514
	i915_gem_context_get(ctx);
1515

1516 1517 1518
	if (ctx->ppgtt)
		vm = &ctx->ppgtt->base;
	else
1519
		vm = &ggtt->base;
1520

1521 1522
	memset(&params_master, 0x00, sizeof(params_master));

B
Ben Widawsky 已提交
1523
	eb = eb_create(args);
1524
	if (eb == NULL) {
1525
		i915_gem_context_put(ctx);
1526 1527 1528 1529 1530
		mutex_unlock(&dev->struct_mutex);
		ret = -ENOMEM;
		goto pre_mutex_err;
	}

1531
	/* Look up object handles */
1532
	ret = eb_lookup_vmas(eb, exec, args, vm, file);
1533 1534
	if (ret)
		goto err;
1535

1536
	/* take note of the batch buffer before we might reorder the lists */
1537
	batch_obj = eb_get_batch(eb);
1538

1539
	/* Move the objects en-masse into the GTT, evicting if necessary. */
1540
	need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
1541 1542
	ret = i915_gem_execbuffer_reserve(engine, &eb->vmas, ctx,
					  &need_relocs);
1543 1544 1545 1546
	if (ret)
		goto err;

	/* The objects are in their final locations, apply the relocations. */
1547
	if (need_relocs)
B
Ben Widawsky 已提交
1548
		ret = i915_gem_execbuffer_relocate(eb);
1549 1550
	if (ret) {
		if (ret == -EFAULT) {
1551 1552
			ret = i915_gem_execbuffer_relocate_slow(dev, args, file,
								engine,
1553
								eb, exec, ctx);
1554 1555 1556 1557 1558 1559 1560 1561
			BUG_ON(!mutex_is_locked(&dev->struct_mutex));
		}
		if (ret)
			goto err;
	}

	/* Set the pending read domains for the batch buffer to COMMAND */
	if (batch_obj->base.pending_write_domain) {
1562
		DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
1563 1564 1565 1566
		ret = -EINVAL;
		goto err;
	}

1567
	params->args_batch_start_offset = args->batch_start_offset;
1568
	if (intel_engine_needs_cmd_parser(engine) && args->batch_len) {
1569 1570
		struct drm_i915_gem_object *parsed_batch_obj;

1571 1572 1573 1574 1575 1576
		parsed_batch_obj = i915_gem_execbuffer_parse(engine,
							     &shadow_exec_entry,
							     eb,
							     batch_obj,
							     args->batch_start_offset,
							     args->batch_len,
1577
							     drm_is_current_master(file));
1578 1579
		if (IS_ERR(parsed_batch_obj)) {
			ret = PTR_ERR(parsed_batch_obj);
1580 1581
			goto err;
		}
1582 1583

		/*
1584 1585
		 * parsed_batch_obj == batch_obj means batch not fully parsed:
		 * Accept, but don't promote to secure.
1586 1587
		 */

1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598
		if (parsed_batch_obj != batch_obj) {
			/*
			 * Batch parsed and accepted:
			 *
			 * Set the DISPATCH_SECURE bit to remove the NON_SECURE
			 * bit from MI_BATCH_BUFFER_START commands issued in
			 * the dispatch_execbuffer implementations. We
			 * specifically don't want that set on batches the
			 * command parser has accepted.
			 */
			dispatch_flags |= I915_DISPATCH_SECURE;
1599
			params->args_batch_start_offset = 0;
1600 1601
			batch_obj = parsed_batch_obj;
		}
1602 1603
	}

1604 1605
	batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;

1606 1607
	/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
	 * batch" bit. Hence we need to pin secure batches into the global gtt.
B
Ben Widawsky 已提交
1608
	 * hsw should have this fixed, but bdw mucks it up again. */
1609
	if (dispatch_flags & I915_DISPATCH_SECURE) {
1610 1611 1612 1613 1614 1615
		/*
		 * So on first glance it looks freaky that we pin the batch here
		 * outside of the reservation loop. But:
		 * - The batch is already pinned into the relevant ppgtt, so we
		 *   already have the backing storage fully allocated.
		 * - No other BO uses the global gtt (well contexts, but meh),
1616
		 *   so we don't really have issues with multiple objects not
1617 1618 1619 1620 1621 1622
		 *   fitting due to fragmentation.
		 * So this is actually safe.
		 */
		ret = i915_gem_obj_ggtt_pin(batch_obj, 0, 0);
		if (ret)
			goto err;
1623

1624
		params->batch_obj_vm_offset = i915_gem_obj_ggtt_offset(batch_obj);
1625
	} else
1626
		params->batch_obj_vm_offset = i915_gem_obj_offset(batch_obj, vm);
1627

1628
	/* Allocate a request for this batch buffer nice and early. */
1629 1630 1631
	params->request = i915_gem_request_alloc(engine, ctx);
	if (IS_ERR(params->request)) {
		ret = PTR_ERR(params->request);
1632
		goto err_batch_unpin;
1633
	}
1634

1635
	ret = i915_gem_request_add_to_client(params->request, file);
1636
	if (ret)
1637
		goto err_request;
1638

1639 1640 1641 1642 1643 1644 1645 1646
	/*
	 * Save assorted stuff away to pass through to *_submission().
	 * NB: This data should be 'persistent' and not local as it will
	 * kept around beyond the duration of the IOCTL once the GPU
	 * scheduler arrives.
	 */
	params->dev                     = dev;
	params->file                    = file;
1647
	params->engine                    = engine;
1648 1649 1650 1651
	params->dispatch_flags          = dispatch_flags;
	params->batch_obj               = batch_obj;
	params->ctx                     = ctx;

1652
	ret = execbuf_submit(params, args, &eb->vmas);
1653 1654
err_request:
	i915_gem_execbuffer_retire_commands(params);
1655

1656
err_batch_unpin:
1657 1658 1659 1660 1661 1662
	/*
	 * FIXME: We crucially rely upon the active tracking for the (ppgtt)
	 * batch vma for correctness. For less ugly and less fragility this
	 * needs to be adjusted to also track the ggtt batch vma properly as
	 * active.
	 */
1663
	if (dispatch_flags & I915_DISPATCH_SECURE)
1664
		i915_gem_object_ggtt_unpin(batch_obj);
1665

1666
err:
1667
	/* the request owns the ref now */
1668
	i915_gem_context_put(ctx);
1669
	eb_destroy(eb);
1670 1671 1672 1673

	mutex_unlock(&dev->struct_mutex);

pre_mutex_err:
1674 1675 1676
	/* intel_gpu_busy should also get a ref, so it will free when the device
	 * is really idle. */
	intel_runtime_pm_put(dev_priv);
1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694
	return ret;
}

/*
 * Legacy execbuffer just creates an exec2 list from the original exec object
 * list array and passes it to the real function.
 */
int
i915_gem_execbuffer(struct drm_device *dev, void *data,
		    struct drm_file *file)
{
	struct drm_i915_gem_execbuffer *args = data;
	struct drm_i915_gem_execbuffer2 exec2;
	struct drm_i915_gem_exec_object *exec_list = NULL;
	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
	int ret, i;

	if (args->buffer_count < 1) {
1695
		DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1696 1697 1698 1699 1700 1701 1702
		return -EINVAL;
	}

	/* Copy in the exec list from userland */
	exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
	exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
	if (exec_list == NULL || exec2_list == NULL) {
1703
		DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1704 1705 1706 1707 1708 1709
			  args->buffer_count);
		drm_free_large(exec_list);
		drm_free_large(exec2_list);
		return -ENOMEM;
	}
	ret = copy_from_user(exec_list,
1710
			     u64_to_user_ptr(args->buffers_ptr),
1711 1712
			     sizeof(*exec_list) * args->buffer_count);
	if (ret != 0) {
1713
		DRM_DEBUG("copy %d exec entries failed %d\n",
1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737 1738 1739 1740
			  args->buffer_count, ret);
		drm_free_large(exec_list);
		drm_free_large(exec2_list);
		return -EFAULT;
	}

	for (i = 0; i < args->buffer_count; i++) {
		exec2_list[i].handle = exec_list[i].handle;
		exec2_list[i].relocation_count = exec_list[i].relocation_count;
		exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
		exec2_list[i].alignment = exec_list[i].alignment;
		exec2_list[i].offset = exec_list[i].offset;
		if (INTEL_INFO(dev)->gen < 4)
			exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
		else
			exec2_list[i].flags = 0;
	}

	exec2.buffers_ptr = args->buffers_ptr;
	exec2.buffer_count = args->buffer_count;
	exec2.batch_start_offset = args->batch_start_offset;
	exec2.batch_len = args->batch_len;
	exec2.DR1 = args->DR1;
	exec2.DR4 = args->DR4;
	exec2.num_cliprects = args->num_cliprects;
	exec2.cliprects_ptr = args->cliprects_ptr;
	exec2.flags = I915_EXEC_RENDER;
1741
	i915_execbuffer2_set_context_id(exec2, 0);
1742

1743
	ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list);
1744
	if (!ret) {
1745
		struct drm_i915_gem_exec_object __user *user_exec_list =
1746
			u64_to_user_ptr(args->buffers_ptr);
1747

1748
		/* Copy the new buffer offsets back to the user's exec list. */
1749
		for (i = 0; i < args->buffer_count; i++) {
1750 1751
			exec2_list[i].offset =
				gen8_canonical_addr(exec2_list[i].offset);
1752 1753 1754 1755 1756 1757 1758 1759 1760 1761
			ret = __copy_to_user(&user_exec_list[i].offset,
					     &exec2_list[i].offset,
					     sizeof(user_exec_list[i].offset));
			if (ret) {
				ret = -EFAULT;
				DRM_DEBUG("failed to copy %d exec entries "
					  "back to user (%d)\n",
					  args->buffer_count, ret);
				break;
			}
1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777
		}
	}

	drm_free_large(exec_list);
	drm_free_large(exec2_list);
	return ret;
}

int
i915_gem_execbuffer2(struct drm_device *dev, void *data,
		     struct drm_file *file)
{
	struct drm_i915_gem_execbuffer2 *args = data;
	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
	int ret;

1778 1779
	if (args->buffer_count < 1 ||
	    args->buffer_count > UINT_MAX / sizeof(*exec2_list)) {
1780
		DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count);
1781 1782 1783
		return -EINVAL;
	}

1784 1785 1786 1787 1788
	if (args->rsvd2 != 0) {
		DRM_DEBUG("dirty rvsd2 field\n");
		return -EINVAL;
	}

1789 1790 1791
	exec2_list = drm_malloc_gfp(args->buffer_count,
				    sizeof(*exec2_list),
				    GFP_TEMPORARY);
1792
	if (exec2_list == NULL) {
1793
		DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1794 1795 1796 1797
			  args->buffer_count);
		return -ENOMEM;
	}
	ret = copy_from_user(exec2_list,
1798
			     u64_to_user_ptr(args->buffers_ptr),
1799 1800
			     sizeof(*exec2_list) * args->buffer_count);
	if (ret != 0) {
1801
		DRM_DEBUG("copy %d exec entries failed %d\n",
1802 1803 1804 1805 1806
			  args->buffer_count, ret);
		drm_free_large(exec2_list);
		return -EFAULT;
	}

1807
	ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list);
1808 1809
	if (!ret) {
		/* Copy the new buffer offsets back to the user's exec list. */
1810
		struct drm_i915_gem_exec_object2 __user *user_exec_list =
1811
				   u64_to_user_ptr(args->buffers_ptr);
1812 1813 1814
		int i;

		for (i = 0; i < args->buffer_count; i++) {
1815 1816
			exec2_list[i].offset =
				gen8_canonical_addr(exec2_list[i].offset);
1817 1818 1819 1820 1821 1822 1823 1824 1825 1826
			ret = __copy_to_user(&user_exec_list[i].offset,
					     &exec2_list[i].offset,
					     sizeof(user_exec_list[i].offset));
			if (ret) {
				ret = -EFAULT;
				DRM_DEBUG("failed to copy %d exec entries "
					  "back to user\n",
					  args->buffer_count);
				break;
			}
1827 1828 1829 1830 1831 1832
		}
	}

	drm_free_large(exec2_list);
	return ret;
}