radeon_cs.c 21.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
/*
 * Copyright 2008 Jerome Glisse.
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 *
 * Authors:
 *    Jerome Glisse <glisse@freedesktop.org>
 */
27 28
#include <drm/drmP.h>
#include <drm/radeon_drm.h>
29 30 31
#include "radeon_reg.h"
#include "radeon.h"

32
static int radeon_cs_parser_relocs(struct radeon_cs_parser *p)
33 34 35 36 37 38 39 40 41 42
{
	struct drm_device *ddev = p->rdev->ddev;
	struct radeon_cs_chunk *chunk;
	unsigned i, j;
	bool duplicate;

	if (p->chunk_relocs_idx == -1) {
		return 0;
	}
	chunk = &p->chunks[p->chunk_relocs_idx];
43
	p->dma_reloc_idx = 0;
44 45 46 47 48 49 50 51 52 53 54 55 56 57 58
	/* FIXME: we assume that each relocs use 4 dwords */
	p->nrelocs = chunk->length_dw / 4;
	p->relocs_ptr = kcalloc(p->nrelocs, sizeof(void *), GFP_KERNEL);
	if (p->relocs_ptr == NULL) {
		return -ENOMEM;
	}
	p->relocs = kcalloc(p->nrelocs, sizeof(struct radeon_cs_reloc), GFP_KERNEL);
	if (p->relocs == NULL) {
		return -ENOMEM;
	}
	for (i = 0; i < p->nrelocs; i++) {
		struct drm_radeon_cs_reloc *r;

		duplicate = false;
		r = (struct drm_radeon_cs_reloc *)&chunk->kdata[i*4];
59
		for (j = 0; j < i; j++) {
60 61 62 63 64 65 66 67 68 69 70 71 72
			if (r->handle == p->relocs[j].handle) {
				p->relocs_ptr[i] = &p->relocs[j];
				duplicate = true;
				break;
			}
		}
		if (!duplicate) {
			p->relocs[i].gobj = drm_gem_object_lookup(ddev,
								  p->filp,
								  r->handle);
			if (p->relocs[i].gobj == NULL) {
				DRM_ERROR("gem object lookup failed 0x%x\n",
					  r->handle);
73
				return -ENOENT;
74 75
			}
			p->relocs_ptr[i] = &p->relocs[i];
76
			p->relocs[i].robj = gem_to_radeon_bo(p->relocs[i].gobj);
77
			p->relocs[i].lobj.bo = p->relocs[i].robj;
78
			p->relocs[i].lobj.wdomain = r->write_domain;
79 80
			p->relocs[i].lobj.rdomain = r->read_domains;
			p->relocs[i].lobj.tv.bo = &p->relocs[i].robj->tbo;
81 82
			p->relocs[i].handle = r->handle;
			p->relocs[i].flags = r->flags;
83
			radeon_bo_list_add_object(&p->relocs[i].lobj,
84
						  &p->validated);
85

86 87
		} else
			p->relocs[i].handle = 0;
88
	}
89
	return radeon_bo_list_validate(&p->validated);
90 91
}

92 93 94 95 96 97 98 99 100 101 102 103
static int radeon_cs_get_ring(struct radeon_cs_parser *p, u32 ring, s32 priority)
{
	p->priority = priority;

	switch (ring) {
	default:
		DRM_ERROR("unknown ring id: %d\n", ring);
		return -EINVAL;
	case RADEON_CS_RING_GFX:
		p->ring = RADEON_RING_TYPE_GFX_INDEX;
		break;
	case RADEON_CS_RING_COMPUTE:
104 105 106 107 108 109 110
		if (p->rdev->family >= CHIP_TAHITI) {
			if (p->priority > 0)
				p->ring = CAYMAN_RING_TYPE_CP1_INDEX;
			else
				p->ring = CAYMAN_RING_TYPE_CP2_INDEX;
		} else
			p->ring = RADEON_RING_TYPE_GFX_INDEX;
111
		break;
112 113 114 115 116 117 118 119 120 121 122 123
	case RADEON_CS_RING_DMA:
		if (p->rdev->family >= CHIP_CAYMAN) {
			if (p->priority > 0)
				p->ring = R600_RING_TYPE_DMA_INDEX;
			else
				p->ring = CAYMAN_RING_TYPE_DMA1_INDEX;
		} else if (p->rdev->family >= CHIP_R600) {
			p->ring = R600_RING_TYPE_DMA_INDEX;
		} else {
			return -EINVAL;
		}
		break;
124 125 126 127
	}
	return 0;
}

128 129 130 131 132 133 134 135 136 137 138 139
static void radeon_cs_sync_to(struct radeon_cs_parser *p,
			      struct radeon_fence *fence)
{
	struct radeon_fence *other;

	if (!fence)
		return;

	other = p->ib.sync_to[fence->ring];
	p->ib.sync_to[fence->ring] = radeon_fence_later(fence, other);
}

140
static void radeon_cs_sync_rings(struct radeon_cs_parser *p)
141
{
142
	int i;
143

144
	for (i = 0; i < p->nrelocs; i++) {
145
		if (!p->relocs[i].robj)
146 147
			continue;

148
		radeon_cs_sync_to(p, p->relocs[i].robj->tbo.sync_obj);
149
	}
150 151
}

152
/* XXX: note that this is called from the legacy UMS CS ioctl as well */
153 154 155 156
int radeon_cs_parser_init(struct radeon_cs_parser *p, void *data)
{
	struct drm_radeon_cs *cs = data;
	uint64_t *chunk_array_ptr;
157 158 159
	unsigned size, i;
	u32 ring = RADEON_CS_RING_GFX;
	s32 priority = 0;
160 161 162 163 164 165 166

	if (!cs->num_chunks) {
		return 0;
	}
	/* get chunks */
	INIT_LIST_HEAD(&p->validated);
	p->idx = 0;
167 168 169 170
	p->ib.sa_bo = NULL;
	p->ib.semaphore = NULL;
	p->const_ib.sa_bo = NULL;
	p->const_ib.semaphore = NULL;
171 172
	p->chunk_ib_idx = -1;
	p->chunk_relocs_idx = -1;
173
	p->chunk_flags_idx = -1;
174
	p->chunk_const_ib_idx = -1;
175 176 177 178 179 180 181 182 183
	p->chunks_array = kcalloc(cs->num_chunks, sizeof(uint64_t), GFP_KERNEL);
	if (p->chunks_array == NULL) {
		return -ENOMEM;
	}
	chunk_array_ptr = (uint64_t *)(unsigned long)(cs->chunks);
	if (DRM_COPY_FROM_USER(p->chunks_array, chunk_array_ptr,
			       sizeof(uint64_t)*cs->num_chunks)) {
		return -EFAULT;
	}
184
	p->cs_flags = 0;
185 186 187 188 189 190 191 192 193 194 195 196 197 198 199
	p->nchunks = cs->num_chunks;
	p->chunks = kcalloc(p->nchunks, sizeof(struct radeon_cs_chunk), GFP_KERNEL);
	if (p->chunks == NULL) {
		return -ENOMEM;
	}
	for (i = 0; i < p->nchunks; i++) {
		struct drm_radeon_cs_chunk __user **chunk_ptr = NULL;
		struct drm_radeon_cs_chunk user_chunk;
		uint32_t __user *cdata;

		chunk_ptr = (void __user*)(unsigned long)p->chunks_array[i];
		if (DRM_COPY_FROM_USER(&user_chunk, chunk_ptr,
				       sizeof(struct drm_radeon_cs_chunk))) {
			return -EFAULT;
		}
200 201
		p->chunks[i].length_dw = user_chunk.length_dw;
		p->chunks[i].kdata = NULL;
202
		p->chunks[i].chunk_id = user_chunk.chunk_id;
203
		p->chunks[i].user_ptr = (void __user *)(unsigned long)user_chunk.chunk_data;
204 205 206 207 208
		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_RELOCS) {
			p->chunk_relocs_idx = i;
		}
		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_IB) {
			p->chunk_ib_idx = i;
209 210 211
			/* zero length IB isn't useful */
			if (p->chunks[i].length_dw == 0)
				return -EINVAL;
212
		}
213 214 215 216 217 218
		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_CONST_IB) {
			p->chunk_const_ib_idx = i;
			/* zero length CONST IB isn't useful */
			if (p->chunks[i].length_dw == 0)
				return -EINVAL;
		}
219 220 221 222 223
		if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) {
			p->chunk_flags_idx = i;
			/* zero length flags aren't useful */
			if (p->chunks[i].length_dw == 0)
				return -EINVAL;
224
		}
225

226
		cdata = (uint32_t *)(unsigned long)user_chunk.chunk_data;
227 228
		if ((p->chunks[i].chunk_id == RADEON_CHUNK_ID_RELOCS) ||
		    (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS)) {
229 230 231 232 233 234 235 236 237
			size = p->chunks[i].length_dw * sizeof(uint32_t);
			p->chunks[i].kdata = kmalloc(size, GFP_KERNEL);
			if (p->chunks[i].kdata == NULL) {
				return -ENOMEM;
			}
			if (DRM_COPY_FROM_USER(p->chunks[i].kdata,
					       p->chunks[i].user_ptr, size)) {
				return -EFAULT;
			}
238
			if (p->chunks[i].chunk_id == RADEON_CHUNK_ID_FLAGS) {
239 240 241 242 243
				p->cs_flags = p->chunks[i].kdata[0];
				if (p->chunks[i].length_dw > 1)
					ring = p->chunks[i].kdata[1];
				if (p->chunks[i].length_dw > 2)
					priority = (s32)p->chunks[i].kdata[2];
244
			}
245 246
		}
	}
247

248 249 250 251 252 253 254
	/* these are KMS only */
	if (p->rdev) {
		if ((p->cs_flags & RADEON_CS_USE_VM) &&
		    !p->rdev->vm_manager.enabled) {
			DRM_ERROR("VM not active on asic!\n");
			return -EINVAL;
		}
255

256 257 258 259 260 261
		/* we only support VM on SI+ */
		if ((p->rdev->family >= CHIP_TAHITI) &&
		    ((p->cs_flags & RADEON_CS_USE_VM) == 0)) {
			DRM_ERROR("VM required on SI+!\n");
			return -EINVAL;
		}
262

263 264 265
		if (radeon_cs_get_ring(p, ring, priority))
			return -EINVAL;
	}
266 267 268 269 270 271 272 273 274 275

	/* deal with non-vm */
	if ((p->chunk_ib_idx != -1) &&
	    ((p->cs_flags & RADEON_CS_USE_VM) == 0) &&
	    (p->chunks[p->chunk_ib_idx].chunk_id == RADEON_CHUNK_ID_IB)) {
		if (p->chunks[p->chunk_ib_idx].length_dw > (16 * 1024)) {
			DRM_ERROR("cs IB too big: %d\n",
				  p->chunks[p->chunk_ib_idx].length_dw);
			return -EINVAL;
		}
276
		if (p->rdev && (p->rdev->flags & RADEON_IS_AGP)) {
277 278 279 280
			p->chunks[p->chunk_ib_idx].kpage[0] = kmalloc(PAGE_SIZE, GFP_KERNEL);
			p->chunks[p->chunk_ib_idx].kpage[1] = kmalloc(PAGE_SIZE, GFP_KERNEL);
			if (p->chunks[p->chunk_ib_idx].kpage[0] == NULL ||
			    p->chunks[p->chunk_ib_idx].kpage[1] == NULL) {
281 282
				kfree(p->chunks[p->chunk_ib_idx].kpage[0]);
				kfree(p->chunks[p->chunk_ib_idx].kpage[1]);
283 284
				p->chunks[p->chunk_ib_idx].kpage[0] = NULL;
				p->chunks[p->chunk_ib_idx].kpage[1] = NULL;
285 286 287
				return -ENOMEM;
			}
		}
288 289 290 291 292 293 294
		p->chunks[p->chunk_ib_idx].kpage_idx[0] = -1;
		p->chunks[p->chunk_ib_idx].kpage_idx[1] = -1;
		p->chunks[p->chunk_ib_idx].last_copied_page = -1;
		p->chunks[p->chunk_ib_idx].last_page_index =
			((p->chunks[p->chunk_ib_idx].length_dw * 4) - 1) / PAGE_SIZE;
	}

295 296 297 298 299 300 301 302 303 304 305 306 307 308 309
	return 0;
}

/**
 * cs_parser_fini() - clean parser states
 * @parser:	parser structure holding parsing context.
 * @error:	error number
 *
 * If error is set than unvalidate buffer, otherwise just free memory
 * used by parsing context.
 **/
static void radeon_cs_parser_fini(struct radeon_cs_parser *parser, int error)
{
	unsigned i;

310
	if (!error) {
311
		ttm_eu_fence_buffer_objects(&parser->validated,
312
					    parser->ib.fence);
313
	} else {
314
		ttm_eu_backoff_reservation(&parser->validated);
315
	}
316

317 318 319 320 321
	if (parser->relocs != NULL) {
		for (i = 0; i < parser->nrelocs; i++) {
			if (parser->relocs[i].gobj)
				drm_gem_object_unreference_unlocked(parser->relocs[i].gobj);
		}
322
	}
323
	kfree(parser->track);
324 325 326 327
	kfree(parser->relocs);
	kfree(parser->relocs_ptr);
	for (i = 0; i < parser->nchunks; i++) {
		kfree(parser->chunks[i].kdata);
328 329 330 331
		if ((parser->rdev->flags & RADEON_IS_AGP)) {
			kfree(parser->chunks[i].kpage[0]);
			kfree(parser->chunks[i].kpage[1]);
		}
332 333 334 335
	}
	kfree(parser->chunks);
	kfree(parser->chunks_array);
	radeon_ib_free(parser->rdev, &parser->ib);
336
	radeon_ib_free(parser->rdev, &parser->const_ib);
337 338
}

339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356
static int radeon_cs_ib_chunk(struct radeon_device *rdev,
			      struct radeon_cs_parser *parser)
{
	struct radeon_cs_chunk *ib_chunk;
	int r;

	if (parser->chunk_ib_idx == -1)
		return 0;

	if (parser->cs_flags & RADEON_CS_USE_VM)
		return 0;

	ib_chunk = &parser->chunks[parser->chunk_ib_idx];
	/* Copy the packet into the IB, the parser will read from the
	 * input memory (cached) and write to the IB (which can be
	 * uncached).
	 */
	r =  radeon_ib_get(rdev, parser->ring, &parser->ib,
357
			   NULL, ib_chunk->length_dw * 4);
358 359 360 361
	if (r) {
		DRM_ERROR("Failed to get ib !\n");
		return r;
	}
362
	parser->ib.length_dw = ib_chunk->length_dw;
363
	r = radeon_cs_parse(rdev, parser->ring, parser);
364 365 366 367 368 369 370 371 372
	if (r || parser->parser_error) {
		DRM_ERROR("Invalid command stream !\n");
		return r;
	}
	r = radeon_cs_finish_pages(parser);
	if (r) {
		DRM_ERROR("Invalid command stream !\n");
		return r;
	}
373
	radeon_cs_sync_rings(parser);
374
	r = radeon_ib_schedule(rdev, &parser->ib, NULL);
375 376 377
	if (r) {
		DRM_ERROR("Failed to schedule IB !\n");
	}
378
	return r;
379 380 381 382 383
}

static int radeon_bo_vm_update_pte(struct radeon_cs_parser *parser,
				   struct radeon_vm *vm)
{
384
	struct radeon_device *rdev = parser->rdev;
385 386 387 388
	struct radeon_bo_list *lobj;
	struct radeon_bo *bo;
	int r;

389 390 391 392
	r = radeon_vm_bo_update_pte(rdev, vm, rdev->ring_tmp_bo.bo, &rdev->ring_tmp_bo.bo->tbo.mem);
	if (r) {
		return r;
	}
393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415
	list_for_each_entry(lobj, &parser->validated, tv.head) {
		bo = lobj->bo;
		r = radeon_vm_bo_update_pte(parser->rdev, vm, bo, &bo->tbo.mem);
		if (r) {
			return r;
		}
	}
	return 0;
}

static int radeon_cs_ib_vm_chunk(struct radeon_device *rdev,
				 struct radeon_cs_parser *parser)
{
	struct radeon_cs_chunk *ib_chunk;
	struct radeon_fpriv *fpriv = parser->filp->driver_priv;
	struct radeon_vm *vm = &fpriv->vm;
	int r;

	if (parser->chunk_ib_idx == -1)
		return 0;
	if ((parser->cs_flags & RADEON_CS_USE_VM) == 0)
		return 0;

416 417 418 419 420 421 422 423
	if ((rdev->family >= CHIP_TAHITI) &&
	    (parser->chunk_const_ib_idx != -1)) {
		ib_chunk = &parser->chunks[parser->chunk_const_ib_idx];
		if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
			DRM_ERROR("cs IB CONST too big: %d\n", ib_chunk->length_dw);
			return -EINVAL;
		}
		r =  radeon_ib_get(rdev, parser->ring, &parser->const_ib,
424
				   vm, ib_chunk->length_dw * 4);
425 426 427 428
		if (r) {
			DRM_ERROR("Failed to get const ib !\n");
			return r;
		}
429 430
		parser->const_ib.is_const_ib = true;
		parser->const_ib.length_dw = ib_chunk->length_dw;
431
		/* Copy the packet into the IB */
432
		if (DRM_COPY_FROM_USER(parser->const_ib.ptr, ib_chunk->user_ptr,
433 434 435
				       ib_chunk->length_dw * 4)) {
			return -EFAULT;
		}
436
		r = radeon_ring_ib_parse(rdev, parser->ring, &parser->const_ib);
437 438 439 440 441
		if (r) {
			return r;
		}
	}

442 443 444 445 446 447
	ib_chunk = &parser->chunks[parser->chunk_ib_idx];
	if (ib_chunk->length_dw > RADEON_IB_VM_MAX_SIZE) {
		DRM_ERROR("cs IB too big: %d\n", ib_chunk->length_dw);
		return -EINVAL;
	}
	r =  radeon_ib_get(rdev, parser->ring, &parser->ib,
448
			   vm, ib_chunk->length_dw * 4);
449 450 451 452
	if (r) {
		DRM_ERROR("Failed to get ib !\n");
		return r;
	}
453
	parser->ib.length_dw = ib_chunk->length_dw;
454
	/* Copy the packet into the IB */
455
	if (DRM_COPY_FROM_USER(parser->ib.ptr, ib_chunk->user_ptr,
456 457 458
			       ib_chunk->length_dw * 4)) {
		return -EFAULT;
	}
459
	r = radeon_ring_ib_parse(rdev, parser->ring, &parser->ib);
460 461 462 463
	if (r) {
		return r;
	}

464
	mutex_lock(&rdev->vm_manager.lock);
465
	mutex_lock(&vm->mutex);
466
	r = radeon_vm_alloc_pt(rdev, vm);
467 468 469 470 471 472 473
	if (r) {
		goto out;
	}
	r = radeon_bo_vm_update_pte(parser, vm);
	if (r) {
		goto out;
	}
474
	radeon_cs_sync_rings(parser);
475
	radeon_cs_sync_to(parser, vm->fence);
476
	radeon_cs_sync_to(parser, radeon_vm_grab_id(rdev, vm, parser->ring));
477

478 479
	if ((rdev->family >= CHIP_TAHITI) &&
	    (parser->chunk_const_ib_idx != -1)) {
480 481 482
		r = radeon_ib_schedule(rdev, &parser->ib, &parser->const_ib);
	} else {
		r = radeon_ib_schedule(rdev, &parser->ib, NULL);
483 484
	}

485
	if (!r) {
486
		radeon_vm_fence(rdev, vm, parser->ib.fence);
487
	}
488 489

out:
490
	radeon_vm_add_to_lru(rdev, vm);
491 492
	mutex_unlock(&vm->mutex);
	mutex_unlock(&rdev->vm_manager.lock);
493 494 495
	return r;
}

496 497 498 499 500 501 502 503 504 505
static int radeon_cs_handle_lockup(struct radeon_device *rdev, int r)
{
	if (r == -EDEADLK) {
		r = radeon_gpu_reset(rdev);
		if (!r)
			r = -EAGAIN;
	}
	return r;
}

506 507 508 509 510 511
int radeon_cs_ioctl(struct drm_device *dev, void *data, struct drm_file *filp)
{
	struct radeon_device *rdev = dev->dev_private;
	struct radeon_cs_parser parser;
	int r;

512
	down_read(&rdev->exclusive_lock);
513
	if (!rdev->accel_working) {
514
		up_read(&rdev->exclusive_lock);
515 516
		return -EBUSY;
	}
517 518 519 520
	/* initialize parser */
	memset(&parser, 0, sizeof(struct radeon_cs_parser));
	parser.filp = filp;
	parser.rdev = rdev;
521
	parser.dev = rdev->dev;
522
	parser.family = rdev->family;
523 524 525 526
	r = radeon_cs_parser_init(&parser, data);
	if (r) {
		DRM_ERROR("Failed to initialize parser !\n");
		radeon_cs_parser_fini(&parser, r);
527
		up_read(&rdev->exclusive_lock);
528
		r = radeon_cs_handle_lockup(rdev, r);
529 530 531 532
		return r;
	}
	r = radeon_cs_parser_relocs(&parser);
	if (r) {
533 534
		if (r != -ERESTARTSYS)
			DRM_ERROR("Failed to parse relocation %d!\n", r);
535
		radeon_cs_parser_fini(&parser, r);
536
		up_read(&rdev->exclusive_lock);
537
		r = radeon_cs_handle_lockup(rdev, r);
538 539
		return r;
	}
540
	r = radeon_cs_ib_chunk(rdev, &parser);
541
	if (r) {
542
		goto out;
543
	}
544
	r = radeon_cs_ib_vm_chunk(rdev, &parser);
545
	if (r) {
546
		goto out;
547
	}
548
out:
549
	radeon_cs_parser_fini(&parser, r);
550
	up_read(&rdev->exclusive_lock);
551
	r = radeon_cs_handle_lockup(rdev, r);
552 553
	return r;
}
554 555 556 557 558 559 560 561 562 563 564 565 566 567

int radeon_cs_finish_pages(struct radeon_cs_parser *p)
{
	struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx];
	int i;
	int size = PAGE_SIZE;

	for (i = ibc->last_copied_page + 1; i <= ibc->last_page_index; i++) {
		if (i == ibc->last_page_index) {
			size = (ibc->length_dw * 4) % PAGE_SIZE;
			if (size == 0)
				size = PAGE_SIZE;
		}
		
568
		if (DRM_COPY_FROM_USER(p->ib.ptr + (i * (PAGE_SIZE/4)),
569 570 571 572 573 574 575
				       ibc->user_ptr + (i * PAGE_SIZE),
				       size))
			return -EFAULT;
	}
	return 0;
}

576
static int radeon_cs_update_pages(struct radeon_cs_parser *p, int pg_idx)
577 578 579 580 581
{
	int new_page;
	struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx];
	int i;
	int size = PAGE_SIZE;
582 583
	bool copy1 = (p->rdev && (p->rdev->flags & RADEON_IS_AGP)) ?
		false : true;
584

585
	for (i = ibc->last_copied_page + 1; i < pg_idx; i++) {
586
		if (DRM_COPY_FROM_USER(p->ib.ptr + (i * (PAGE_SIZE/4)),
587 588 589 590 591 592 593 594 595
				       ibc->user_ptr + (i * PAGE_SIZE),
				       PAGE_SIZE)) {
			p->parser_error = -EFAULT;
			return 0;
		}
	}

	if (pg_idx == ibc->last_page_index) {
		size = (ibc->length_dw * 4) % PAGE_SIZE;
596 597
		if (size == 0)
			size = PAGE_SIZE;
598 599
	}

600 601
	new_page = ibc->kpage_idx[0] < ibc->kpage_idx[1] ? 0 : 1;
	if (copy1)
602
		ibc->kpage[new_page] = p->ib.ptr + (pg_idx * (PAGE_SIZE / 4));
603

604 605 606 607 608 609 610
	if (DRM_COPY_FROM_USER(ibc->kpage[new_page],
			       ibc->user_ptr + (pg_idx * PAGE_SIZE),
			       size)) {
		p->parser_error = -EFAULT;
		return 0;
	}

611 612
	/* copy to IB for non single case */
	if (!copy1)
613
		memcpy((void *)(p->ib.ptr+(pg_idx*(PAGE_SIZE/4))), ibc->kpage[new_page], size);
614 615 616 617 618 619

	ibc->last_copied_page = pg_idx;
	ibc->kpage_idx[new_page] = pg_idx;

	return new_page;
}
620 621 622 623 624 625 626 627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644

u32 radeon_get_ib_value(struct radeon_cs_parser *p, int idx)
{
	struct radeon_cs_chunk *ibc = &p->chunks[p->chunk_ib_idx];
	u32 pg_idx, pg_offset;
	u32 idx_value = 0;
	int new_page;

	pg_idx = (idx * 4) / PAGE_SIZE;
	pg_offset = (idx * 4) % PAGE_SIZE;

	if (ibc->kpage_idx[0] == pg_idx)
		return ibc->kpage[0][pg_offset/4];
	if (ibc->kpage_idx[1] == pg_idx)
		return ibc->kpage[1][pg_offset/4];

	new_page = radeon_cs_update_pages(p, pg_idx);
	if (new_page < 0) {
		p->parser_error = new_page;
		return 0;
	}

	idx_value = ibc->kpage[new_page][pg_offset/4];
	return idx_value;
}
645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697

/**
 * radeon_cs_packet_parse() - parse cp packet and point ib index to next packet
 * @parser:	parser structure holding parsing context.
 * @pkt:	where to store packet information
 *
 * Assume that chunk_ib_index is properly set. Will return -EINVAL
 * if packet is bigger than remaining ib size. or if packets is unknown.
 **/
int radeon_cs_packet_parse(struct radeon_cs_parser *p,
			   struct radeon_cs_packet *pkt,
			   unsigned idx)
{
	struct radeon_cs_chunk *ib_chunk = &p->chunks[p->chunk_ib_idx];
	struct radeon_device *rdev = p->rdev;
	uint32_t header;

	if (idx >= ib_chunk->length_dw) {
		DRM_ERROR("Can not parse packet at %d after CS end %d !\n",
			  idx, ib_chunk->length_dw);
		return -EINVAL;
	}
	header = radeon_get_ib_value(p, idx);
	pkt->idx = idx;
	pkt->type = RADEON_CP_PACKET_GET_TYPE(header);
	pkt->count = RADEON_CP_PACKET_GET_COUNT(header);
	pkt->one_reg_wr = 0;
	switch (pkt->type) {
	case RADEON_PACKET_TYPE0:
		if (rdev->family < CHIP_R600) {
			pkt->reg = R100_CP_PACKET0_GET_REG(header);
			pkt->one_reg_wr =
				RADEON_CP_PACKET0_GET_ONE_REG_WR(header);
		} else
			pkt->reg = R600_CP_PACKET0_GET_REG(header);
		break;
	case RADEON_PACKET_TYPE3:
		pkt->opcode = RADEON_CP_PACKET3_GET_OPCODE(header);
		break;
	case RADEON_PACKET_TYPE2:
		pkt->count = -1;
		break;
	default:
		DRM_ERROR("Unknown packet type %d at %d !\n", pkt->type, idx);
		return -EINVAL;
	}
	if ((pkt->count + 1 + pkt->idx) >= ib_chunk->length_dw) {
		DRM_ERROR("Packet (%d:%d:%d) end after CS buffer (%d) !\n",
			  pkt->idx, pkt->type, pkt->count, ib_chunk->length_dw);
		return -EINVAL;
	}
	return 0;
}
698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714 715 716 717 718

/**
 * radeon_cs_packet_next_is_pkt3_nop() - test if the next packet is P3 NOP
 * @p:		structure holding the parser context.
 *
 * Check if the next packet is NOP relocation packet3.
 **/
bool radeon_cs_packet_next_is_pkt3_nop(struct radeon_cs_parser *p)
{
	struct radeon_cs_packet p3reloc;
	int r;

	r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
	if (r)
		return false;
	if (p3reloc.type != RADEON_PACKET_TYPE3)
		return false;
	if (p3reloc.opcode != RADEON_PACKET3_NOP)
		return false;
	return true;
}
719 720 721 722 723 724 725 726 727 728 729 730 731 732 733 734 735 736 737 738 739

/**
 * radeon_cs_dump_packet() - dump raw packet context
 * @p:		structure holding the parser context.
 * @pkt:	structure holding the packet.
 *
 * Used mostly for debugging and error reporting.
 **/
void radeon_cs_dump_packet(struct radeon_cs_parser *p,
			   struct radeon_cs_packet *pkt)
{
	volatile uint32_t *ib;
	unsigned i;
	unsigned idx;

	ib = p->ib.ptr;
	idx = pkt->idx;
	for (i = 0; i <= (pkt->count + 1); i++, idx++)
		DRM_INFO("ib[%d]=0x%08X\n", idx, ib[idx]);
}

740 741 742 743 744 745 746 747 748 749 750 751 752 753 754 755 756 757 758 759 760 761 762 763 764 765 766 767 768 769 770 771 772 773 774 775 776 777 778 779 780 781 782 783 784 785 786 787 788 789 790 791 792 793
/**
 * radeon_cs_packet_next_reloc() - parse next (should be reloc) packet
 * @parser:		parser structure holding parsing context.
 * @data:		pointer to relocation data
 * @offset_start:	starting offset
 * @offset_mask:	offset mask (to align start offset on)
 * @reloc:		reloc informations
 *
 * Check if next packet is relocation packet3, do bo validation and compute
 * GPU offset using the provided start.
 **/
int radeon_cs_packet_next_reloc(struct radeon_cs_parser *p,
				struct radeon_cs_reloc **cs_reloc,
				int nomm)
{
	struct radeon_cs_chunk *relocs_chunk;
	struct radeon_cs_packet p3reloc;
	unsigned idx;
	int r;

	if (p->chunk_relocs_idx == -1) {
		DRM_ERROR("No relocation chunk !\n");
		return -EINVAL;
	}
	*cs_reloc = NULL;
	relocs_chunk = &p->chunks[p->chunk_relocs_idx];
	r = radeon_cs_packet_parse(p, &p3reloc, p->idx);
	if (r)
		return r;
	p->idx += p3reloc.count + 2;
	if (p3reloc.type != RADEON_PACKET_TYPE3 ||
	    p3reloc.opcode != RADEON_PACKET3_NOP) {
		DRM_ERROR("No packet3 for relocation for packet at %d.\n",
			  p3reloc.idx);
		radeon_cs_dump_packet(p, &p3reloc);
		return -EINVAL;
	}
	idx = radeon_get_ib_value(p, p3reloc.idx + 1);
	if (idx >= relocs_chunk->length_dw) {
		DRM_ERROR("Relocs at %d after relocations chunk end %d !\n",
			  idx, relocs_chunk->length_dw);
		radeon_cs_dump_packet(p, &p3reloc);
		return -EINVAL;
	}
	/* FIXME: we assume reloc size is 4 dwords */
	if (nomm) {
		*cs_reloc = p->relocs;
		(*cs_reloc)->lobj.gpu_offset =
			(u64)relocs_chunk->kdata[idx + 3] << 32;
		(*cs_reloc)->lobj.gpu_offset |= relocs_chunk->kdata[idx + 0];
	} else
		*cs_reloc = p->relocs_ptr[(idx / 4)];
	return 0;
}