i915_gem_execbuffer.c 35.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28
/*
 * Copyright © 2008,2010 Intel Corporation
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
 * IN THE SOFTWARE.
 *
 * Authors:
 *    Eric Anholt <eric@anholt.net>
 *    Chris Wilson <chris@chris-wilson.co.uk>
 *
 */

29 30
#include <drm/drmP.h>
#include <drm/i915_drm.h>
31 32 33
#include "i915_drv.h"
#include "i915_trace.h"
#include "intel_drv.h"
34
#include <linux/dma_remapping.h>
35

36 37
struct eb_vmas {
	struct list_head vmas;
38
	int and;
39
	union {
40
		struct i915_vma *lut[0];
41 42
		struct hlist_head buckets[0];
	};
43 44
};

45 46
static struct eb_vmas *
eb_create(struct drm_i915_gem_execbuffer2 *args, struct i915_address_space *vm)
47
{
48
	struct eb_vmas *eb = NULL;
49 50 51

	if (args->flags & I915_EXEC_HANDLE_LUT) {
		int size = args->buffer_count;
52 53
		size *= sizeof(struct i915_vma *);
		size += sizeof(struct eb_vmas);
54 55 56 57 58 59
		eb = kmalloc(size, GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
	}

	if (eb == NULL) {
		int size = args->buffer_count;
		int count = PAGE_SIZE / sizeof(struct hlist_head) / 2;
L
Lauri Kasanen 已提交
60
		BUILD_BUG_ON_NOT_POWER_OF_2(PAGE_SIZE / sizeof(struct hlist_head));
61 62 63
		while (count > 2*size)
			count >>= 1;
		eb = kzalloc(count*sizeof(struct hlist_head) +
64
			     sizeof(struct eb_vmas),
65 66 67 68 69 70 71 72
			     GFP_TEMPORARY);
		if (eb == NULL)
			return eb;

		eb->and = count - 1;
	} else
		eb->and = -args->buffer_count;

73
	INIT_LIST_HEAD(&eb->vmas);
74 75 76 77
	return eb;
}

static void
78
eb_reset(struct eb_vmas *eb)
79
{
80 81
	if (eb->and >= 0)
		memset(eb->buckets, 0, (eb->and+1)*sizeof(struct hlist_head));
82 83
}

84
static int
85 86 87 88 89
eb_lookup_vmas(struct eb_vmas *eb,
	       struct drm_i915_gem_exec_object2 *exec,
	       const struct drm_i915_gem_execbuffer2 *args,
	       struct i915_address_space *vm,
	       struct drm_file *file)
90
{
91 92 93
	struct drm_i915_gem_object *obj;
	struct list_head objects;
	int i, ret = 0;
94

95
	INIT_LIST_HEAD(&objects);
96
	spin_lock(&file->table_lock);
97 98
	/* Grab a reference to the object and release the lock so we can lookup
	 * or create the VMA without using GFP_ATOMIC */
99
	for (i = 0; i < args->buffer_count; i++) {
100 101 102 103 104
		obj = to_intel_bo(idr_find(&file->object_idr, exec[i].handle));
		if (obj == NULL) {
			spin_unlock(&file->table_lock);
			DRM_DEBUG("Invalid object handle %d at index %d\n",
				   exec[i].handle, i);
105 106
			ret = -ENOENT;
			goto out;
107 108
		}

109
		if (!list_empty(&obj->obj_exec_link)) {
110 111 112
			spin_unlock(&file->table_lock);
			DRM_DEBUG("Object %p [handle %d, index %d] appears more than once in object list\n",
				   obj, exec[i].handle, i);
113 114
			ret = -EINVAL;
			goto out;
115 116 117
		}

		drm_gem_object_reference(&obj->base);
118 119 120
		list_add_tail(&obj->obj_exec_link, &objects);
	}
	spin_unlock(&file->table_lock);
121

122 123 124 125
	i = 0;
	list_for_each_entry(obj, &objects, obj_exec_link) {
		struct i915_vma *vma;

126 127 128 129 130 131 132 133
		/*
		 * NOTE: We can leak any vmas created here when something fails
		 * later on. But that's no issue since vma_unbind can deal with
		 * vmas which are not actually bound. And since only
		 * lookup_or_create exists as an interface to get at the vma
		 * from the (obj, vm) we don't run the risk of creating
		 * duplicated vmas for the same vm.
		 */
134 135 136 137 138 139 140 141 142 143
		vma = i915_gem_obj_lookup_or_create_vma(obj, vm);
		if (IS_ERR(vma)) {
			DRM_DEBUG("Failed to lookup VMA\n");
			ret = PTR_ERR(vma);
			goto out;
		}

		list_add_tail(&vma->exec_list, &eb->vmas);

		vma->exec_entry = &exec[i];
144
		if (eb->and < 0) {
145
			eb->lut[i] = vma;
146 147
		} else {
			uint32_t handle = args->flags & I915_EXEC_HANDLE_LUT ? i : exec[i].handle;
148 149
			vma->exec_handle = handle;
			hlist_add_head(&vma->exec_node,
150 151
				       &eb->buckets[handle & eb->and]);
		}
152
		++i;
153 154
	}

155 156 157 158 159 160 161 162 163 164 165

out:
	while (!list_empty(&objects)) {
		obj = list_first_entry(&objects,
				       struct drm_i915_gem_object,
				       obj_exec_link);
		list_del_init(&obj->obj_exec_link);
		if (ret)
			drm_gem_object_unreference(&obj->base);
	}
	return ret;
166 167
}

168
static struct i915_vma *eb_get_vma(struct eb_vmas *eb, unsigned long handle)
169
{
170 171 172 173 174 175 176
	if (eb->and < 0) {
		if (handle >= -eb->and)
			return NULL;
		return eb->lut[handle];
	} else {
		struct hlist_head *head;
		struct hlist_node *node;
177

178 179
		head = &eb->buckets[handle & eb->and];
		hlist_for_each(node, head) {
180
			struct i915_vma *vma;
181

182 183 184
			vma = hlist_entry(node, struct i915_vma, exec_node);
			if (vma->exec_handle == handle)
				return vma;
185 186 187
		}
		return NULL;
	}
188 189
}

190 191 192
static void eb_destroy(struct eb_vmas *eb) {
	while (!list_empty(&eb->vmas)) {
		struct i915_vma *vma;
193

194 195
		vma = list_first_entry(&eb->vmas,
				       struct i915_vma,
196
				       exec_list);
197 198
		list_del_init(&vma->exec_list);
		drm_gem_object_unreference(&vma->obj->base);
199
	}
200 201 202
	kfree(eb);
}

203 204
static inline int use_cpu_reloc(struct drm_i915_gem_object *obj)
{
205 206
	return (HAS_LLC(obj->base.dev) ||
		obj->base.write_domain == I915_GEM_DOMAIN_CPU ||
207
		!obj->map_and_fenceable ||
208 209 210
		obj->cache_level != I915_CACHE_NONE);
}

211 212 213 214 215 216 217 218
static int
relocate_entry_cpu(struct drm_i915_gem_object *obj,
		   struct drm_i915_gem_relocation_entry *reloc)
{
	uint32_t page_offset = offset_in_page(reloc->offset);
	char *vaddr;
	int ret = -EINVAL;

219
	ret = i915_gem_object_set_to_cpu_domain(obj, true);
220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260
	if (ret)
		return ret;

	vaddr = kmap_atomic(i915_gem_object_get_page(obj,
				reloc->offset >> PAGE_SHIFT));
	*(uint32_t *)(vaddr + page_offset) = reloc->delta;
	kunmap_atomic(vaddr);

	return 0;
}

static int
relocate_entry_gtt(struct drm_i915_gem_object *obj,
		   struct drm_i915_gem_relocation_entry *reloc)
{
	struct drm_device *dev = obj->base.dev;
	struct drm_i915_private *dev_priv = dev->dev_private;
	uint32_t __iomem *reloc_entry;
	void __iomem *reloc_page;
	int ret = -EINVAL;

	ret = i915_gem_object_set_to_gtt_domain(obj, true);
	if (ret)
		return ret;

	ret = i915_gem_object_put_fence(obj);
	if (ret)
		return ret;

	/* Map the page containing the relocation we're going to perform.  */
	reloc->offset += i915_gem_obj_ggtt_offset(obj);
	reloc_page = io_mapping_map_atomic_wc(dev_priv->gtt.mappable,
			reloc->offset & PAGE_MASK);
	reloc_entry = (uint32_t __iomem *)
		(reloc_page + offset_in_page(reloc->offset));
	iowrite32(reloc->delta, reloc_entry);
	io_mapping_unmap_atomic(reloc_page);

	return 0;
}

261 262
static int
i915_gem_execbuffer_relocate_entry(struct drm_i915_gem_object *obj,
263
				   struct eb_vmas *eb,
264 265
				   struct drm_i915_gem_relocation_entry *reloc,
				   struct i915_address_space *vm)
266 267 268
{
	struct drm_device *dev = obj->base.dev;
	struct drm_gem_object *target_obj;
269
	struct drm_i915_gem_object *target_i915_obj;
270
	struct i915_vma *target_vma;
271 272 273
	uint32_t target_offset;
	int ret = -EINVAL;

274
	/* we've already hold a reference to all valid objects */
275 276
	target_vma = eb_get_vma(eb, reloc->target_handle);
	if (unlikely(target_vma == NULL))
277
		return -ENOENT;
278 279
	target_i915_obj = target_vma->obj;
	target_obj = &target_vma->obj->base;
280

281
	target_offset = i915_gem_obj_ggtt_offset(target_i915_obj);
282

283 284 285 286 287 288 289 290 291 292
	/* Sandybridge PPGTT errata: We need a global gtt mapping for MI and
	 * pipe_control writes because the gpu doesn't properly redirect them
	 * through the ppgtt for non_secure batchbuffers. */
	if (unlikely(IS_GEN6(dev) &&
	    reloc->write_domain == I915_GEM_DOMAIN_INSTRUCTION &&
	    !target_i915_obj->has_global_gtt_mapping)) {
		i915_gem_gtt_bind_object(target_i915_obj,
					 target_i915_obj->cache_level);
	}

293
	/* Validate that the target is in a valid r/w GPU domain */
294
	if (unlikely(reloc->write_domain & (reloc->write_domain - 1))) {
295
		DRM_DEBUG("reloc with multiple write domains: "
296 297 298 299 300 301
			  "obj %p target %d offset %d "
			  "read %08x write %08x",
			  obj, reloc->target_handle,
			  (int) reloc->offset,
			  reloc->read_domains,
			  reloc->write_domain);
302
		return ret;
303
	}
304 305
	if (unlikely((reloc->write_domain | reloc->read_domains)
		     & ~I915_GEM_GPU_DOMAINS)) {
306
		DRM_DEBUG("reloc with read/write non-GPU domains: "
307 308 309 310 311 312
			  "obj %p target %d offset %d "
			  "read %08x write %08x",
			  obj, reloc->target_handle,
			  (int) reloc->offset,
			  reloc->read_domains,
			  reloc->write_domain);
313
		return ret;
314 315 316 317 318 319 320 321 322
	}

	target_obj->pending_read_domains |= reloc->read_domains;
	target_obj->pending_write_domain |= reloc->write_domain;

	/* If the relocation already has the right value in it, no
	 * more work needs to be done.
	 */
	if (target_offset == reloc->presumed_offset)
323
		return 0;
324 325

	/* Check that the relocation address is valid... */
326
	if (unlikely(reloc->offset > obj->base.size - 4)) {
327
		DRM_DEBUG("Relocation beyond object bounds: "
328 329 330 331
			  "obj %p target %d offset %d size %d.\n",
			  obj, reloc->target_handle,
			  (int) reloc->offset,
			  (int) obj->base.size);
332
		return ret;
333
	}
334
	if (unlikely(reloc->offset & 3)) {
335
		DRM_DEBUG("Relocation not 4-byte aligned: "
336 337 338
			  "obj %p target %d offset %d.\n",
			  obj, reloc->target_handle,
			  (int) reloc->offset);
339
		return ret;
340 341
	}

342 343 344 345
	/* We can't wait for rendering with pagefaults disabled */
	if (obj->active && in_atomic())
		return -EFAULT;

346
	reloc->delta += target_offset;
347 348 349 350
	if (use_cpu_reloc(obj))
		ret = relocate_entry_cpu(obj, reloc);
	else
		ret = relocate_entry_gtt(obj, reloc);
351

352 353 354
	if (ret)
		return ret;

355 356 357
	/* and update the user's relocation entry */
	reloc->presumed_offset = target_offset;

358
	return 0;
359 360 361
}

static int
362 363
i915_gem_execbuffer_relocate_vma(struct i915_vma *vma,
				 struct eb_vmas *eb)
364
{
365 366
#define N_RELOC(x) ((x) / sizeof(struct drm_i915_gem_relocation_entry))
	struct drm_i915_gem_relocation_entry stack_reloc[N_RELOC(512)];
367
	struct drm_i915_gem_relocation_entry __user *user_relocs;
368
	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
369
	int remain, ret;
370

V
Ville Syrjälä 已提交
371
	user_relocs = to_user_ptr(entry->relocs_ptr);
372

373 374 375 376 377 378 379 380 381
	remain = entry->relocation_count;
	while (remain) {
		struct drm_i915_gem_relocation_entry *r = stack_reloc;
		int count = remain;
		if (count > ARRAY_SIZE(stack_reloc))
			count = ARRAY_SIZE(stack_reloc);
		remain -= count;

		if (__copy_from_user_inatomic(r, user_relocs, count*sizeof(r[0])))
382 383
			return -EFAULT;

384 385
		do {
			u64 offset = r->presumed_offset;
386

387 388
			ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, r,
								 vma->vm);
389 390 391 392 393 394 395 396 397 398 399 400 401
			if (ret)
				return ret;

			if (r->presumed_offset != offset &&
			    __copy_to_user_inatomic(&user_relocs->presumed_offset,
						    &r->presumed_offset,
						    sizeof(r->presumed_offset))) {
				return -EFAULT;
			}

			user_relocs++;
			r++;
		} while (--count);
402 403 404
	}

	return 0;
405
#undef N_RELOC
406 407 408
}

static int
409 410 411
i915_gem_execbuffer_relocate_vma_slow(struct i915_vma *vma,
				      struct eb_vmas *eb,
				      struct drm_i915_gem_relocation_entry *relocs)
412
{
413
	const struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
414 415 416
	int i, ret;

	for (i = 0; i < entry->relocation_count; i++) {
417 418
		ret = i915_gem_execbuffer_relocate_entry(vma->obj, eb, &relocs[i],
							 vma->vm);
419 420 421 422 423 424 425 426
		if (ret)
			return ret;
	}

	return 0;
}

static int
427
i915_gem_execbuffer_relocate(struct eb_vmas *eb,
428
			     struct i915_address_space *vm)
429
{
430
	struct i915_vma *vma;
431 432 433 434 435 436 437 438 439 440
	int ret = 0;

	/* This is the fast path and we cannot handle a pagefault whilst
	 * holding the struct mutex lest the user pass in the relocations
	 * contained within a mmaped bo. For in such a case we, the page
	 * fault handler would call i915_gem_fault() and we would try to
	 * acquire the struct mutex again. Obviously this is bad and so
	 * lockdep complains vehemently.
	 */
	pagefault_disable();
441 442
	list_for_each_entry(vma, &eb->vmas, exec_list) {
		ret = i915_gem_execbuffer_relocate_vma(vma, eb);
443
		if (ret)
444
			break;
445
	}
446
	pagefault_enable();
447

448
	return ret;
449 450
}

451 452
#define  __EXEC_OBJECT_HAS_PIN (1<<31)
#define  __EXEC_OBJECT_HAS_FENCE (1<<30)
453

454
static int
455
need_reloc_mappable(struct i915_vma *vma)
456
{
457 458 459
	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
	return entry->relocation_count && !use_cpu_reloc(vma->obj) &&
		i915_is_ggtt(vma->vm);
460 461
}

462
static int
463 464 465
i915_gem_execbuffer_reserve_vma(struct i915_vma *vma,
				struct intel_ring_buffer *ring,
				bool *need_reloc)
466
{
467 468
	struct drm_i915_private *dev_priv = ring->dev->dev_private;
	struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
469 470
	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
	bool need_fence, need_mappable;
471
	struct drm_i915_gem_object *obj = vma->obj;
472 473 474 475 476 477
	int ret;

	need_fence =
		has_fenced_gpu_access &&
		entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
		obj->tiling_mode != I915_TILING_NONE;
478
	need_mappable = need_fence || need_reloc_mappable(vma);
479

480
	ret = i915_gem_object_pin(obj, vma->vm, entry->alignment, need_mappable,
481
				  false);
482 483 484
	if (ret)
		return ret;

485 486
	entry->flags |= __EXEC_OBJECT_HAS_PIN;

487 488
	if (has_fenced_gpu_access) {
		if (entry->flags & EXEC_OBJECT_NEEDS_FENCE) {
489
			ret = i915_gem_object_get_fence(obj);
490
			if (ret)
491
				return ret;
492

493
			if (i915_gem_object_pin_fence(obj))
494
				entry->flags |= __EXEC_OBJECT_HAS_FENCE;
495

496
			obj->pending_fenced_gpu_access = true;
497 498 499
		}
	}

500 501 502 503 504 505 506 507
	/* Ensure ppgtt mapping exists if needed */
	if (dev_priv->mm.aliasing_ppgtt && !obj->has_aliasing_ppgtt_mapping) {
		i915_ppgtt_bind_object(dev_priv->mm.aliasing_ppgtt,
				       obj, obj->cache_level);

		obj->has_aliasing_ppgtt_mapping = 1;
	}

508 509
	if (entry->offset != vma->node.start) {
		entry->offset = vma->node.start;
510 511 512 513 514 515 516 517 518 519 520 521
		*need_reloc = true;
	}

	if (entry->flags & EXEC_OBJECT_WRITE) {
		obj->base.pending_read_domains = I915_GEM_DOMAIN_RENDER;
		obj->base.pending_write_domain = I915_GEM_DOMAIN_RENDER;
	}

	if (entry->flags & EXEC_OBJECT_NEEDS_GTT &&
	    !obj->has_global_gtt_mapping)
		i915_gem_gtt_bind_object(obj, obj->cache_level);

522
	return 0;
523
}
524

525
static void
526
i915_gem_execbuffer_unreserve_vma(struct i915_vma *vma)
527 528
{
	struct drm_i915_gem_exec_object2 *entry;
529
	struct drm_i915_gem_object *obj = vma->obj;
530

531
	if (!drm_mm_node_allocated(&vma->node))
532 533
		return;

534
	entry = vma->exec_entry;
535 536 537 538 539 540 541 542

	if (entry->flags & __EXEC_OBJECT_HAS_FENCE)
		i915_gem_object_unpin_fence(obj);

	if (entry->flags & __EXEC_OBJECT_HAS_PIN)
		i915_gem_object_unpin(obj);

	entry->flags &= ~(__EXEC_OBJECT_HAS_FENCE | __EXEC_OBJECT_HAS_PIN);
543 544
}

545
static int
546
i915_gem_execbuffer_reserve(struct intel_ring_buffer *ring,
547
			    struct list_head *vmas,
548
			    bool *need_relocs)
549
{
550
	struct drm_i915_gem_object *obj;
551
	struct i915_vma *vma;
552
	struct i915_address_space *vm;
553
	struct list_head ordered_vmas;
554 555
	bool has_fenced_gpu_access = INTEL_INFO(ring->dev)->gen < 4;
	int retry;
556

557 558 559 560 561
	if (list_empty(vmas))
		return 0;

	vm = list_first_entry(vmas, struct i915_vma, exec_list)->vm;

562 563
	INIT_LIST_HEAD(&ordered_vmas);
	while (!list_empty(vmas)) {
564 565 566
		struct drm_i915_gem_exec_object2 *entry;
		bool need_fence, need_mappable;

567 568 569
		vma = list_first_entry(vmas, struct i915_vma, exec_list);
		obj = vma->obj;
		entry = vma->exec_entry;
570 571 572 573 574

		need_fence =
			has_fenced_gpu_access &&
			entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
			obj->tiling_mode != I915_TILING_NONE;
575
		need_mappable = need_fence || need_reloc_mappable(vma);
576 577

		if (need_mappable)
578
			list_move(&vma->exec_list, &ordered_vmas);
579
		else
580
			list_move_tail(&vma->exec_list, &ordered_vmas);
581

582
		obj->base.pending_read_domains = I915_GEM_GPU_DOMAINS & ~I915_GEM_DOMAIN_COMMAND;
583
		obj->base.pending_write_domain = 0;
584
		obj->pending_fenced_gpu_access = false;
585
	}
586
	list_splice(&ordered_vmas, vmas);
587 588 589 590 591 592 593 594 595 596

	/* Attempt to pin all of the buffers into the GTT.
	 * This is done in 3 phases:
	 *
	 * 1a. Unbind all objects that do not match the GTT constraints for
	 *     the execbuffer (fenceable, mappable, alignment etc).
	 * 1b. Increment pin count for already bound objects.
	 * 2.  Bind new objects.
	 * 3.  Decrement pin count.
	 *
597
	 * This avoid unnecessary unbinding of later objects in order to make
598 599 600 601
	 * room for the earlier objects *unless* we need to defragment.
	 */
	retry = 0;
	do {
602
		int ret = 0;
603 604

		/* Unbind any ill-fitting objects or pin. */
605 606
		list_for_each_entry(vma, vmas, exec_list) {
			struct drm_i915_gem_exec_object2 *entry = vma->exec_entry;
607
			bool need_fence, need_mappable;
608

609 610 611
			obj = vma->obj;

			if (!drm_mm_node_allocated(&vma->node))
612 613 614
				continue;

			need_fence =
615
				has_fenced_gpu_access &&
616 617
				entry->flags & EXEC_OBJECT_NEEDS_FENCE &&
				obj->tiling_mode != I915_TILING_NONE;
618
			need_mappable = need_fence || need_reloc_mappable(vma);
619

620
			WARN_ON((need_mappable || need_fence) &&
621
			       !i915_is_ggtt(vma->vm));
622

623
			if ((entry->alignment &&
624
			     vma->node.start & (entry->alignment - 1)) ||
625
			    (need_mappable && !obj->map_and_fenceable))
626
				ret = i915_vma_unbind(vma);
627
			else
628
				ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
629
			if (ret)
630 631 632 633
				goto err;
		}

		/* Bind fresh objects */
634 635
		list_for_each_entry(vma, vmas, exec_list) {
			if (drm_mm_node_allocated(&vma->node))
636
				continue;
637

638
			ret = i915_gem_execbuffer_reserve_vma(vma, ring, need_relocs);
639 640
			if (ret)
				goto err;
641 642
		}

643
err:		/* Decrement pin count for bound objects */
644 645
		list_for_each_entry(vma, vmas, exec_list)
			i915_gem_execbuffer_unreserve_vma(vma);
646

C
Chris Wilson 已提交
647
		if (ret != -ENOSPC || retry++)
648 649
			return ret;

650
		ret = i915_gem_evict_vm(vm, true);
651 652 653 654 655 656 657
		if (ret)
			return ret;
	} while (1);
}

static int
i915_gem_execbuffer_relocate_slow(struct drm_device *dev,
658
				  struct drm_i915_gem_execbuffer2 *args,
659
				  struct drm_file *file,
660
				  struct intel_ring_buffer *ring,
661 662
				  struct eb_vmas *eb,
				  struct drm_i915_gem_exec_object2 *exec)
663 664
{
	struct drm_i915_gem_relocation_entry *reloc;
665 666
	struct i915_address_space *vm;
	struct i915_vma *vma;
667
	bool need_relocs;
668
	int *reloc_offset;
669
	int i, total, ret;
670
	int count = args->buffer_count;
671

672 673 674 675 676
	if (WARN_ON(list_empty(&eb->vmas)))
		return 0;

	vm = list_first_entry(&eb->vmas, struct i915_vma, exec_list)->vm;

677
	/* We may process another execbuffer during the unlock... */
678 679 680 681
	while (!list_empty(&eb->vmas)) {
		vma = list_first_entry(&eb->vmas, struct i915_vma, exec_list);
		list_del_init(&vma->exec_list);
		drm_gem_object_unreference(&vma->obj->base);
682 683
	}

684 685 686 687
	mutex_unlock(&dev->struct_mutex);

	total = 0;
	for (i = 0; i < count; i++)
688
		total += exec[i].relocation_count;
689

690
	reloc_offset = drm_malloc_ab(count, sizeof(*reloc_offset));
691
	reloc = drm_malloc_ab(total, sizeof(*reloc));
692 693 694
	if (reloc == NULL || reloc_offset == NULL) {
		drm_free_large(reloc);
		drm_free_large(reloc_offset);
695 696 697 698 699 700 701
		mutex_lock(&dev->struct_mutex);
		return -ENOMEM;
	}

	total = 0;
	for (i = 0; i < count; i++) {
		struct drm_i915_gem_relocation_entry __user *user_relocs;
702 703
		u64 invalid_offset = (u64)-1;
		int j;
704

V
Ville Syrjälä 已提交
705
		user_relocs = to_user_ptr(exec[i].relocs_ptr);
706 707

		if (copy_from_user(reloc+total, user_relocs,
708
				   exec[i].relocation_count * sizeof(*reloc))) {
709 710 711 712 713
			ret = -EFAULT;
			mutex_lock(&dev->struct_mutex);
			goto err;
		}

714 715 716 717 718 719 720 721 722 723 724 725 726 727 728 729 730 731 732
		/* As we do not update the known relocation offsets after
		 * relocating (due to the complexities in lock handling),
		 * we need to mark them as invalid now so that we force the
		 * relocation processing next time. Just in case the target
		 * object is evicted and then rebound into its old
		 * presumed_offset before the next execbuffer - if that
		 * happened we would make the mistake of assuming that the
		 * relocations were valid.
		 */
		for (j = 0; j < exec[i].relocation_count; j++) {
			if (copy_to_user(&user_relocs[j].presumed_offset,
					 &invalid_offset,
					 sizeof(invalid_offset))) {
				ret = -EFAULT;
				mutex_lock(&dev->struct_mutex);
				goto err;
			}
		}

733
		reloc_offset[i] = total;
734
		total += exec[i].relocation_count;
735 736 737 738 739 740 741 742
	}

	ret = i915_mutex_lock_interruptible(dev);
	if (ret) {
		mutex_lock(&dev->struct_mutex);
		goto err;
	}

743 744
	/* reacquire the objects */
	eb_reset(eb);
745
	ret = eb_lookup_vmas(eb, exec, args, vm, file);
746 747
	if (ret)
		goto err;
748

749
	need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
750
	ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, &need_relocs);
751 752 753
	if (ret)
		goto err;

754 755 756 757
	list_for_each_entry(vma, &eb->vmas, exec_list) {
		int offset = vma->exec_entry - exec;
		ret = i915_gem_execbuffer_relocate_vma_slow(vma, eb,
							    reloc + reloc_offset[offset]);
758 759 760 761 762 763 764 765 766 767 768 769
		if (ret)
			goto err;
	}

	/* Leave the user relocations as are, this is the painfully slow path,
	 * and we want to avoid the complication of dropping the lock whilst
	 * having buffers reserved in the aperture and so causing spurious
	 * ENOSPC for random operations.
	 */

err:
	drm_free_large(reloc);
770
	drm_free_large(reloc_offset);
771 772 773 774
	return ret;
}

static int
775
i915_gem_execbuffer_move_to_gpu(struct intel_ring_buffer *ring,
776
				struct list_head *vmas)
777
{
778
	struct i915_vma *vma;
779
	uint32_t flush_domains = 0;
780
	bool flush_chipset = false;
781
	int ret;
782

783 784
	list_for_each_entry(vma, vmas, exec_list) {
		struct drm_i915_gem_object *obj = vma->obj;
785
		ret = i915_gem_object_sync(obj, ring);
786 787
		if (ret)
			return ret;
788 789

		if (obj->base.write_domain & I915_GEM_DOMAIN_CPU)
790
			flush_chipset |= i915_gem_clflush_object(obj, false);
791 792

		flush_domains |= obj->base.write_domain;
793 794
	}

795
	if (flush_chipset)
796
		i915_gem_chipset_flush(ring->dev);
797 798 799 800

	if (flush_domains & I915_GEM_DOMAIN_GTT)
		wmb();

801 802 803
	/* Unconditionally invalidate gpu caches and ensure that we do flush
	 * any residual writes from the previous batch.
	 */
804
	return intel_ring_invalidate_all_caches(ring);
805 806
}

807 808
static bool
i915_gem_check_execbuffer(struct drm_i915_gem_execbuffer2 *exec)
809
{
810 811 812
	if (exec->flags & __I915_EXEC_UNKNOWN_FLAGS)
		return false;

813
	return ((exec->batch_start_offset | exec->batch_len) & 0x7) == 0;
814 815 816 817 818 819 820
}

static int
validate_exec_list(struct drm_i915_gem_exec_object2 *exec,
		   int count)
{
	int i;
821 822
	int relocs_total = 0;
	int relocs_max = INT_MAX / sizeof(struct drm_i915_gem_relocation_entry);
823 824

	for (i = 0; i < count; i++) {
V
Ville Syrjälä 已提交
825
		char __user *ptr = to_user_ptr(exec[i].relocs_ptr);
826 827
		int length; /* limited by fault_in_pages_readable() */

828 829 830
		if (exec[i].flags & __EXEC_OBJECT_UNKNOWN_FLAGS)
			return -EINVAL;

831 832 833 834 835
		/* First check for malicious input causing overflow in
		 * the worst case where we need to allocate the entire
		 * relocation tree as a single array.
		 */
		if (exec[i].relocation_count > relocs_max - relocs_total)
836
			return -EINVAL;
837
		relocs_total += exec[i].relocation_count;
838 839 840

		length = exec[i].relocation_count *
			sizeof(struct drm_i915_gem_relocation_entry);
841 842 843 844 845
		/*
		 * We must check that the entire relocation array is safe
		 * to read, but since we may need to update the presumed
		 * offsets during execution, check for full write access.
		 */
846 847 848
		if (!access_ok(VERIFY_WRITE, ptr, length))
			return -EFAULT;

849 850 851 852
		if (likely(!i915_prefault_disable)) {
			if (fault_in_multipages_readable(ptr, length))
				return -EFAULT;
		}
853 854 855 856 857
	}

	return 0;
}

858
static void
859
i915_gem_execbuffer_move_to_active(struct list_head *vmas,
860
				   struct intel_ring_buffer *ring)
861
{
862
	struct i915_vma *vma;
863

864 865
	list_for_each_entry(vma, vmas, exec_list) {
		struct drm_i915_gem_object *obj = vma->obj;
866 867
		u32 old_read = obj->base.read_domains;
		u32 old_write = obj->base.write_domain;
C
Chris Wilson 已提交
868

869
		obj->base.write_domain = obj->base.pending_write_domain;
870 871 872
		if (obj->base.write_domain == 0)
			obj->base.pending_read_domains |= obj->base.read_domains;
		obj->base.read_domains = obj->base.pending_read_domains;
873 874
		obj->fenced_gpu_access = obj->pending_fenced_gpu_access;

875
		list_move_tail(&vma->mm_list, &vma->vm->active_list);
876
		i915_gem_object_move_to_active(obj, ring);
877 878
		if (obj->base.write_domain) {
			obj->dirty = 1;
879
			obj->last_write_seqno = intel_ring_get_seqno(ring);
880
			if (obj->pin_count) /* check for potential scanout */
881
				intel_mark_fb_busy(obj, ring);
882 883
		}

C
Chris Wilson 已提交
884
		trace_i915_gem_object_change_domain(obj, old_read, old_write);
885 886 887
	}
}

888 889
static void
i915_gem_execbuffer_retire_commands(struct drm_device *dev,
890
				    struct drm_file *file,
891 892
				    struct intel_ring_buffer *ring,
				    struct drm_i915_gem_object *obj)
893
{
894 895
	/* Unconditionally force add_request to emit a full flush. */
	ring->gpu_caches_dirty = true;
896

897
	/* Add a breadcrumb for the completion of the batch buffer */
898
	(void)__i915_add_request(ring, file, obj, NULL);
899
}
900

901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925
static int
i915_reset_gen7_sol_offsets(struct drm_device *dev,
			    struct intel_ring_buffer *ring)
{
	drm_i915_private_t *dev_priv = dev->dev_private;
	int ret, i;

	if (!IS_GEN7(dev) || ring != &dev_priv->ring[RCS])
		return 0;

	ret = intel_ring_begin(ring, 4 * 3);
	if (ret)
		return ret;

	for (i = 0; i < 4; i++) {
		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
		intel_ring_emit(ring, GEN7_SO_WRITE_OFFSET(i));
		intel_ring_emit(ring, 0);
	}

	intel_ring_advance(ring);

	return 0;
}

926 927 928 929
static int
i915_gem_do_execbuffer(struct drm_device *dev, void *data,
		       struct drm_file *file,
		       struct drm_i915_gem_execbuffer2 *args,
930 931
		       struct drm_i915_gem_exec_object2 *exec,
		       struct i915_address_space *vm)
932 933
{
	drm_i915_private_t *dev_priv = dev->dev_private;
934
	struct eb_vmas *eb;
935 936 937
	struct drm_i915_gem_object *batch_obj;
	struct drm_clip_rect *cliprects = NULL;
	struct intel_ring_buffer *ring;
938
	struct i915_ctx_hang_stats *hs;
939
	u32 ctx_id = i915_execbuffer2_get_context_id(*args);
940
	u32 exec_start, exec_len;
941
	u32 mask, flags;
942
	int ret, mode, i;
943
	bool need_relocs;
944

945
	if (!i915_gem_check_execbuffer(args))
946 947 948
		return -EINVAL;

	ret = validate_exec_list(exec, args->buffer_count);
949 950 951
	if (ret)
		return ret;

952 953 954 955 956 957 958
	flags = 0;
	if (args->flags & I915_EXEC_SECURE) {
		if (!file->is_master || !capable(CAP_SYS_ADMIN))
		    return -EPERM;

		flags |= I915_DISPATCH_SECURE;
	}
959 960
	if (args->flags & I915_EXEC_IS_PINNED)
		flags |= I915_DISPATCH_PINNED;
961

962 963 964
	switch (args->flags & I915_EXEC_RING_MASK) {
	case I915_EXEC_DEFAULT:
	case I915_EXEC_RENDER:
965
		ring = &dev_priv->ring[RCS];
966 967
		break;
	case I915_EXEC_BSD:
968
		ring = &dev_priv->ring[VCS];
969
		if (ctx_id != DEFAULT_CONTEXT_ID) {
970 971 972 973
			DRM_DEBUG("Ring %s doesn't support contexts\n",
				  ring->name);
			return -EPERM;
		}
974 975
		break;
	case I915_EXEC_BLT:
976
		ring = &dev_priv->ring[BCS];
977
		if (ctx_id != DEFAULT_CONTEXT_ID) {
978 979 980 981
			DRM_DEBUG("Ring %s doesn't support contexts\n",
				  ring->name);
			return -EPERM;
		}
982
		break;
983 984
	case I915_EXEC_VEBOX:
		ring = &dev_priv->ring[VECS];
985
		if (ctx_id != DEFAULT_CONTEXT_ID) {
986 987 988 989 990 991
			DRM_DEBUG("Ring %s doesn't support contexts\n",
				  ring->name);
			return -EPERM;
		}
		break;

992
	default:
993
		DRM_DEBUG("execbuf with unknown ring: %d\n",
994 995 996
			  (int)(args->flags & I915_EXEC_RING_MASK));
		return -EINVAL;
	}
997 998 999 1000 1001
	if (!intel_ring_initialized(ring)) {
		DRM_DEBUG("execbuf with invalid ring: %d\n",
			  (int)(args->flags & I915_EXEC_RING_MASK));
		return -EINVAL;
	}
1002

1003
	mode = args->flags & I915_EXEC_CONSTANTS_MASK;
1004
	mask = I915_EXEC_CONSTANTS_MASK;
1005 1006 1007 1008 1009 1010 1011 1012 1013 1014 1015 1016
	switch (mode) {
	case I915_EXEC_CONSTANTS_REL_GENERAL:
	case I915_EXEC_CONSTANTS_ABSOLUTE:
	case I915_EXEC_CONSTANTS_REL_SURFACE:
		if (ring == &dev_priv->ring[RCS] &&
		    mode != dev_priv->relative_constants_mode) {
			if (INTEL_INFO(dev)->gen < 4)
				return -EINVAL;

			if (INTEL_INFO(dev)->gen > 5 &&
			    mode == I915_EXEC_CONSTANTS_REL_SURFACE)
				return -EINVAL;
1017 1018 1019 1020

			/* The HW changed the meaning on this bit on gen6 */
			if (INTEL_INFO(dev)->gen >= 6)
				mask &= ~I915_EXEC_CONSTANTS_REL_SURFACE;
1021 1022 1023
		}
		break;
	default:
1024
		DRM_DEBUG("execbuf with unknown constants: %d\n", mode);
1025 1026 1027
		return -EINVAL;
	}

1028
	if (args->buffer_count < 1) {
1029
		DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1030 1031 1032 1033
		return -EINVAL;
	}

	if (args->num_cliprects != 0) {
1034
		if (ring != &dev_priv->ring[RCS]) {
1035
			DRM_DEBUG("clip rectangles are only valid with the render ring\n");
1036 1037 1038
			return -EINVAL;
		}

1039 1040 1041 1042 1043
		if (INTEL_INFO(dev)->gen >= 5) {
			DRM_DEBUG("clip rectangles are only valid on pre-gen5\n");
			return -EINVAL;
		}

1044 1045 1046 1047 1048
		if (args->num_cliprects > UINT_MAX / sizeof(*cliprects)) {
			DRM_DEBUG("execbuf with %u cliprects\n",
				  args->num_cliprects);
			return -EINVAL;
		}
1049

1050
		cliprects = kmalloc(args->num_cliprects * sizeof(*cliprects),
1051 1052 1053 1054 1055 1056
				    GFP_KERNEL);
		if (cliprects == NULL) {
			ret = -ENOMEM;
			goto pre_mutex_err;
		}

1057
		if (copy_from_user(cliprects,
V
Ville Syrjälä 已提交
1058 1059
				   to_user_ptr(args->cliprects_ptr),
				   sizeof(*cliprects)*args->num_cliprects)) {
1060 1061 1062 1063 1064 1065 1066 1067 1068
			ret = -EFAULT;
			goto pre_mutex_err;
		}
	}

	ret = i915_mutex_lock_interruptible(dev);
	if (ret)
		goto pre_mutex_err;

1069
	if (dev_priv->ums.mm_suspended) {
1070 1071 1072 1073 1074
		mutex_unlock(&dev->struct_mutex);
		ret = -EBUSY;
		goto pre_mutex_err;
	}

1075
	eb = eb_create(args, vm);
1076 1077 1078 1079 1080 1081
	if (eb == NULL) {
		mutex_unlock(&dev->struct_mutex);
		ret = -ENOMEM;
		goto pre_mutex_err;
	}

1082
	/* Look up object handles */
1083
	ret = eb_lookup_vmas(eb, exec, args, vm, file);
1084 1085
	if (ret)
		goto err;
1086

1087
	/* take note of the batch buffer before we might reorder the lists */
1088
	batch_obj = list_entry(eb->vmas.prev, struct i915_vma, exec_list)->obj;
1089

1090
	/* Move the objects en-masse into the GTT, evicting if necessary. */
1091
	need_relocs = (args->flags & I915_EXEC_NO_RELOC) == 0;
1092
	ret = i915_gem_execbuffer_reserve(ring, &eb->vmas, &need_relocs);
1093 1094 1095 1096
	if (ret)
		goto err;

	/* The objects are in their final locations, apply the relocations. */
1097
	if (need_relocs)
1098
		ret = i915_gem_execbuffer_relocate(eb, vm);
1099 1100
	if (ret) {
		if (ret == -EFAULT) {
1101
			ret = i915_gem_execbuffer_relocate_slow(dev, args, file, ring,
1102
								eb, exec);
1103 1104 1105 1106 1107 1108 1109 1110
			BUG_ON(!mutex_is_locked(&dev->struct_mutex));
		}
		if (ret)
			goto err;
	}

	/* Set the pending read domains for the batch buffer to COMMAND */
	if (batch_obj->base.pending_write_domain) {
1111
		DRM_DEBUG("Attempting to use self-modifying batch buffer\n");
1112 1113 1114 1115 1116
		ret = -EINVAL;
		goto err;
	}
	batch_obj->base.pending_read_domains |= I915_GEM_DOMAIN_COMMAND;

1117 1118 1119 1120 1121 1122 1123
	/* snb/ivb/vlv conflate the "batch in ppgtt" bit with the "non-secure
	 * batch" bit. Hence we need to pin secure batches into the global gtt.
	 * hsw should have this fixed, but let's be paranoid and do it
	 * unconditionally for now. */
	if (flags & I915_DISPATCH_SECURE && !batch_obj->has_global_gtt_mapping)
		i915_gem_gtt_bind_object(batch_obj, batch_obj->cache_level);

1124
	ret = i915_gem_execbuffer_move_to_gpu(ring, &eb->vmas);
1125
	if (ret)
1126 1127
		goto err;

1128 1129 1130 1131 1132 1133 1134 1135 1136 1137 1138
	hs = i915_gem_context_get_hang_stats(dev, file, ctx_id);
	if (IS_ERR(hs)) {
		ret = PTR_ERR(hs);
		goto err;
	}

	if (hs->banned) {
		ret = -EIO;
		goto err;
	}

1139 1140 1141 1142
	ret = i915_switch_context(ring, file, ctx_id);
	if (ret)
		goto err;

1143 1144 1145 1146 1147 1148 1149 1150 1151
	if (ring == &dev_priv->ring[RCS] &&
	    mode != dev_priv->relative_constants_mode) {
		ret = intel_ring_begin(ring, 4);
		if (ret)
				goto err;

		intel_ring_emit(ring, MI_NOOP);
		intel_ring_emit(ring, MI_LOAD_REGISTER_IMM(1));
		intel_ring_emit(ring, INSTPM);
1152
		intel_ring_emit(ring, mask << 16 | mode);
1153 1154 1155 1156 1157
		intel_ring_advance(ring);

		dev_priv->relative_constants_mode = mode;
	}

1158 1159 1160 1161 1162 1163
	if (args->flags & I915_EXEC_GEN7_SOL_RESET) {
		ret = i915_reset_gen7_sol_offsets(dev, ring);
		if (ret)
			goto err;
	}

1164 1165
	exec_start = i915_gem_obj_offset(batch_obj, vm) +
		args->batch_start_offset;
1166 1167 1168 1169 1170 1171 1172 1173 1174
	exec_len = args->batch_len;
	if (cliprects) {
		for (i = 0; i < args->num_cliprects; i++) {
			ret = i915_emit_box(dev, &cliprects[i],
					    args->DR1, args->DR4);
			if (ret)
				goto err;

			ret = ring->dispatch_execbuffer(ring,
1175 1176
							exec_start, exec_len,
							flags);
1177 1178 1179 1180
			if (ret)
				goto err;
		}
	} else {
1181 1182 1183
		ret = ring->dispatch_execbuffer(ring,
						exec_start, exec_len,
						flags);
1184 1185 1186
		if (ret)
			goto err;
	}
1187

1188 1189
	trace_i915_gem_ring_dispatch(ring, intel_ring_get_seqno(ring), flags);

1190
	i915_gem_execbuffer_move_to_active(&eb->vmas, ring);
1191
	i915_gem_execbuffer_retire_commands(dev, file, ring, batch_obj);
1192 1193

err:
1194
	eb_destroy(eb);
1195 1196 1197 1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210

	mutex_unlock(&dev->struct_mutex);

pre_mutex_err:
	kfree(cliprects);
	return ret;
}

/*
 * Legacy execbuffer just creates an exec2 list from the original exec object
 * list array and passes it to the real function.
 */
int
i915_gem_execbuffer(struct drm_device *dev, void *data,
		    struct drm_file *file)
{
1211
	struct drm_i915_private *dev_priv = dev->dev_private;
1212 1213 1214 1215 1216 1217 1218
	struct drm_i915_gem_execbuffer *args = data;
	struct drm_i915_gem_execbuffer2 exec2;
	struct drm_i915_gem_exec_object *exec_list = NULL;
	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
	int ret, i;

	if (args->buffer_count < 1) {
1219
		DRM_DEBUG("execbuf with %d buffers\n", args->buffer_count);
1220 1221 1222 1223 1224 1225 1226
		return -EINVAL;
	}

	/* Copy in the exec list from userland */
	exec_list = drm_malloc_ab(sizeof(*exec_list), args->buffer_count);
	exec2_list = drm_malloc_ab(sizeof(*exec2_list), args->buffer_count);
	if (exec_list == NULL || exec2_list == NULL) {
1227
		DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1228 1229 1230 1231 1232 1233
			  args->buffer_count);
		drm_free_large(exec_list);
		drm_free_large(exec2_list);
		return -ENOMEM;
	}
	ret = copy_from_user(exec_list,
V
Ville Syrjälä 已提交
1234
			     to_user_ptr(args->buffers_ptr),
1235 1236
			     sizeof(*exec_list) * args->buffer_count);
	if (ret != 0) {
1237
		DRM_DEBUG("copy %d exec entries failed %d\n",
1238 1239 1240 1241 1242 1243 1244 1245 1246 1247 1248 1249 1250 1251 1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262 1263 1264
			  args->buffer_count, ret);
		drm_free_large(exec_list);
		drm_free_large(exec2_list);
		return -EFAULT;
	}

	for (i = 0; i < args->buffer_count; i++) {
		exec2_list[i].handle = exec_list[i].handle;
		exec2_list[i].relocation_count = exec_list[i].relocation_count;
		exec2_list[i].relocs_ptr = exec_list[i].relocs_ptr;
		exec2_list[i].alignment = exec_list[i].alignment;
		exec2_list[i].offset = exec_list[i].offset;
		if (INTEL_INFO(dev)->gen < 4)
			exec2_list[i].flags = EXEC_OBJECT_NEEDS_FENCE;
		else
			exec2_list[i].flags = 0;
	}

	exec2.buffers_ptr = args->buffers_ptr;
	exec2.buffer_count = args->buffer_count;
	exec2.batch_start_offset = args->batch_start_offset;
	exec2.batch_len = args->batch_len;
	exec2.DR1 = args->DR1;
	exec2.DR4 = args->DR4;
	exec2.num_cliprects = args->num_cliprects;
	exec2.cliprects_ptr = args->cliprects_ptr;
	exec2.flags = I915_EXEC_RENDER;
1265
	i915_execbuffer2_set_context_id(exec2, 0);
1266

1267 1268
	ret = i915_gem_do_execbuffer(dev, data, file, &exec2, exec2_list,
				     &dev_priv->gtt.base);
1269 1270 1271 1272 1273
	if (!ret) {
		/* Copy the new buffer offsets back to the user's exec list. */
		for (i = 0; i < args->buffer_count; i++)
			exec_list[i].offset = exec2_list[i].offset;
		/* ... and back out to userspace */
V
Ville Syrjälä 已提交
1274
		ret = copy_to_user(to_user_ptr(args->buffers_ptr),
1275 1276 1277 1278
				   exec_list,
				   sizeof(*exec_list) * args->buffer_count);
		if (ret) {
			ret = -EFAULT;
1279
			DRM_DEBUG("failed to copy %d exec entries "
1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292 1293
				  "back to user (%d)\n",
				  args->buffer_count, ret);
		}
	}

	drm_free_large(exec_list);
	drm_free_large(exec2_list);
	return ret;
}

int
i915_gem_execbuffer2(struct drm_device *dev, void *data,
		     struct drm_file *file)
{
1294
	struct drm_i915_private *dev_priv = dev->dev_private;
1295 1296 1297 1298
	struct drm_i915_gem_execbuffer2 *args = data;
	struct drm_i915_gem_exec_object2 *exec2_list = NULL;
	int ret;

1299 1300
	if (args->buffer_count < 1 ||
	    args->buffer_count > UINT_MAX / sizeof(*exec2_list)) {
1301
		DRM_DEBUG("execbuf2 with %d buffers\n", args->buffer_count);
1302 1303 1304
		return -EINVAL;
	}

1305
	exec2_list = kmalloc(sizeof(*exec2_list)*args->buffer_count,
1306
			     GFP_TEMPORARY | __GFP_NOWARN | __GFP_NORETRY);
1307 1308 1309
	if (exec2_list == NULL)
		exec2_list = drm_malloc_ab(sizeof(*exec2_list),
					   args->buffer_count);
1310
	if (exec2_list == NULL) {
1311
		DRM_DEBUG("Failed to allocate exec list for %d buffers\n",
1312 1313 1314 1315
			  args->buffer_count);
		return -ENOMEM;
	}
	ret = copy_from_user(exec2_list,
V
Ville Syrjälä 已提交
1316
			     to_user_ptr(args->buffers_ptr),
1317 1318
			     sizeof(*exec2_list) * args->buffer_count);
	if (ret != 0) {
1319
		DRM_DEBUG("copy %d exec entries failed %d\n",
1320 1321 1322 1323 1324
			  args->buffer_count, ret);
		drm_free_large(exec2_list);
		return -EFAULT;
	}

1325 1326
	ret = i915_gem_do_execbuffer(dev, data, file, args, exec2_list,
				     &dev_priv->gtt.base);
1327 1328
	if (!ret) {
		/* Copy the new buffer offsets back to the user's exec list. */
V
Ville Syrjälä 已提交
1329
		ret = copy_to_user(to_user_ptr(args->buffers_ptr),
1330 1331 1332 1333
				   exec2_list,
				   sizeof(*exec2_list) * args->buffer_count);
		if (ret) {
			ret = -EFAULT;
1334
			DRM_DEBUG("failed to copy %d exec entries "
1335 1336 1337 1338 1339 1340 1341 1342
				  "back to user (%d)\n",
				  args->buffer_count, ret);
		}
	}

	drm_free_large(exec2_list);
	return ret;
}