gf100.c 52.2 KB
Newer Older
1
/*
2
 * Copyright 2012 Red Hat Inc.
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 * Authors: Ben Skeggs
 */
24 25 26 27 28 29
#include "gf100.h"
#include "ctxgf100.h"
#include "fuc/os.h"

#include <core/client.h>
#include <core/option.h>
30
#include <core/firmware.h>
31
#include <subdev/secboot.h>
32 33
#include <subdev/fb.h>
#include <subdev/mc.h>
34
#include <subdev/pmu.h>
35
#include <subdev/timer.h>
36
#include <engine/fifo.h>
37 38

#include <nvif/class.h>
39
#include <nvif/cl9097.h>
40
#include <nvif/unpack.h>
41

42 43 44 45 46
/*******************************************************************************
 * Zero Bandwidth Clear
 ******************************************************************************/

static void
B
Ben Skeggs 已提交
47
gf100_gr_zbc_clear_color(struct gf100_gr *gr, int zbc)
48
{
49
	struct nvkm_device *device = gr->base.engine.subdev.device;
B
Ben Skeggs 已提交
50
	if (gr->zbc_color[zbc].format) {
51 52 53 54 55 56 57 58
		nvkm_wr32(device, 0x405804, gr->zbc_color[zbc].ds[0]);
		nvkm_wr32(device, 0x405808, gr->zbc_color[zbc].ds[1]);
		nvkm_wr32(device, 0x40580c, gr->zbc_color[zbc].ds[2]);
		nvkm_wr32(device, 0x405810, gr->zbc_color[zbc].ds[3]);
	}
	nvkm_wr32(device, 0x405814, gr->zbc_color[zbc].format);
	nvkm_wr32(device, 0x405820, zbc);
	nvkm_wr32(device, 0x405824, 0x00000004); /* TRIGGER | WRITE | COLOR */
59 60 61
}

static int
B
Ben Skeggs 已提交
62
gf100_gr_zbc_color_get(struct gf100_gr *gr, int format,
63
		       const u32 ds[4], const u32 l2[4])
64
{
65
	struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc;
66 67 68
	int zbc = -ENOSPC, i;

	for (i = ltc->zbc_min; i <= ltc->zbc_max; i++) {
B
Ben Skeggs 已提交
69 70
		if (gr->zbc_color[i].format) {
			if (gr->zbc_color[i].format != format)
71
				continue;
B
Ben Skeggs 已提交
72 73
			if (memcmp(gr->zbc_color[i].ds, ds, sizeof(
				   gr->zbc_color[i].ds)))
74
				continue;
B
Ben Skeggs 已提交
75 76
			if (memcmp(gr->zbc_color[i].l2, l2, sizeof(
				   gr->zbc_color[i].l2))) {
77 78 79 80 81 82 83 84 85
				WARN_ON(1);
				return -EINVAL;
			}
			return i;
		} else {
			zbc = (zbc < 0) ? i : zbc;
		}
	}

86 87 88
	if (zbc < 0)
		return zbc;

B
Ben Skeggs 已提交
89 90 91
	memcpy(gr->zbc_color[zbc].ds, ds, sizeof(gr->zbc_color[zbc].ds));
	memcpy(gr->zbc_color[zbc].l2, l2, sizeof(gr->zbc_color[zbc].l2));
	gr->zbc_color[zbc].format = format;
92
	nvkm_ltc_zbc_color_get(ltc, zbc, l2);
B
Ben Skeggs 已提交
93
	gf100_gr_zbc_clear_color(gr, zbc);
94 95 96 97
	return zbc;
}

static void
B
Ben Skeggs 已提交
98
gf100_gr_zbc_clear_depth(struct gf100_gr *gr, int zbc)
99
{
100
	struct nvkm_device *device = gr->base.engine.subdev.device;
B
Ben Skeggs 已提交
101
	if (gr->zbc_depth[zbc].format)
102 103 104 105
		nvkm_wr32(device, 0x405818, gr->zbc_depth[zbc].ds);
	nvkm_wr32(device, 0x40581c, gr->zbc_depth[zbc].format);
	nvkm_wr32(device, 0x405820, zbc);
	nvkm_wr32(device, 0x405824, 0x00000005); /* TRIGGER | WRITE | DEPTH */
106 107 108
}

static int
B
Ben Skeggs 已提交
109
gf100_gr_zbc_depth_get(struct gf100_gr *gr, int format,
110
		       const u32 ds, const u32 l2)
111
{
112
	struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc;
113 114 115
	int zbc = -ENOSPC, i;

	for (i = ltc->zbc_min; i <= ltc->zbc_max; i++) {
B
Ben Skeggs 已提交
116 117
		if (gr->zbc_depth[i].format) {
			if (gr->zbc_depth[i].format != format)
118
				continue;
B
Ben Skeggs 已提交
119
			if (gr->zbc_depth[i].ds != ds)
120
				continue;
B
Ben Skeggs 已提交
121
			if (gr->zbc_depth[i].l2 != l2) {
122 123 124 125 126 127 128 129 130
				WARN_ON(1);
				return -EINVAL;
			}
			return i;
		} else {
			zbc = (zbc < 0) ? i : zbc;
		}
	}

131 132 133
	if (zbc < 0)
		return zbc;

B
Ben Skeggs 已提交
134 135 136
	gr->zbc_depth[zbc].format = format;
	gr->zbc_depth[zbc].ds = ds;
	gr->zbc_depth[zbc].l2 = l2;
137
	nvkm_ltc_zbc_depth_get(ltc, zbc, l2);
B
Ben Skeggs 已提交
138
	gf100_gr_zbc_clear_depth(gr, zbc);
139 140 141
	return zbc;
}

142 143 144
/*******************************************************************************
 * Graphics object classes
 ******************************************************************************/
145 146 147 148 149 150
#define gf100_gr_object(p) container_of((p), struct gf100_gr_object, object)

struct gf100_gr_object {
	struct nvkm_object object;
	struct gf100_gr_chan *chan;
};
151

152
static int
153
gf100_fermi_mthd_zbc_color(struct nvkm_object *object, void *data, u32 size)
154
{
155
	struct gf100_gr *gr = gf100_gr(nvkm_gr(object->engine));
156 157 158
	union {
		struct fermi_a_zbc_color_v0 v0;
	} *args = data;
159
	int ret = -ENOSYS;
160

161
	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181
		switch (args->v0.format) {
		case FERMI_A_ZBC_COLOR_V0_FMT_ZERO:
		case FERMI_A_ZBC_COLOR_V0_FMT_UNORM_ONE:
		case FERMI_A_ZBC_COLOR_V0_FMT_RF32_GF32_BF32_AF32:
		case FERMI_A_ZBC_COLOR_V0_FMT_R16_G16_B16_A16:
		case FERMI_A_ZBC_COLOR_V0_FMT_RN16_GN16_BN16_AN16:
		case FERMI_A_ZBC_COLOR_V0_FMT_RS16_GS16_BS16_AS16:
		case FERMI_A_ZBC_COLOR_V0_FMT_RU16_GU16_BU16_AU16:
		case FERMI_A_ZBC_COLOR_V0_FMT_RF16_GF16_BF16_AF16:
		case FERMI_A_ZBC_COLOR_V0_FMT_A8R8G8B8:
		case FERMI_A_ZBC_COLOR_V0_FMT_A8RL8GL8BL8:
		case FERMI_A_ZBC_COLOR_V0_FMT_A2B10G10R10:
		case FERMI_A_ZBC_COLOR_V0_FMT_AU2BU10GU10RU10:
		case FERMI_A_ZBC_COLOR_V0_FMT_A8B8G8R8:
		case FERMI_A_ZBC_COLOR_V0_FMT_A8BL8GL8RL8:
		case FERMI_A_ZBC_COLOR_V0_FMT_AN8BN8GN8RN8:
		case FERMI_A_ZBC_COLOR_V0_FMT_AS8BS8GS8RS8:
		case FERMI_A_ZBC_COLOR_V0_FMT_AU8BU8GU8RU8:
		case FERMI_A_ZBC_COLOR_V0_FMT_A2R10G10B10:
		case FERMI_A_ZBC_COLOR_V0_FMT_BF10GF11RF11:
B
Ben Skeggs 已提交
182
			ret = gf100_gr_zbc_color_get(gr, args->v0.format,
183 184
							   args->v0.ds,
							   args->v0.l2);
185 186 187 188 189 190 191 192 193 194 195 196 197 198
			if (ret >= 0) {
				args->v0.index = ret;
				return 0;
			}
			break;
		default:
			return -EINVAL;
		}
	}

	return ret;
}

static int
199
gf100_fermi_mthd_zbc_depth(struct nvkm_object *object, void *data, u32 size)
200
{
201
	struct gf100_gr *gr = gf100_gr(nvkm_gr(object->engine));
202 203 204
	union {
		struct fermi_a_zbc_depth_v0 v0;
	} *args = data;
205
	int ret = -ENOSYS;
206

207
	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
208 209
		switch (args->v0.format) {
		case FERMI_A_ZBC_DEPTH_V0_FMT_FP32:
B
Ben Skeggs 已提交
210
			ret = gf100_gr_zbc_depth_get(gr, args->v0.format,
211 212
							   args->v0.ds,
							   args->v0.l2);
213 214 215 216 217 218 219 220 221 222
			return (ret >= 0) ? 0 : -ENOSPC;
		default:
			return -EINVAL;
		}
	}

	return ret;
}

static int
223
gf100_fermi_mthd(struct nvkm_object *object, u32 mthd, void *data, u32 size)
224
{
225
	nvif_ioctl(object, "fermi mthd %08x\n", mthd);
226 227
	switch (mthd) {
	case FERMI_A_ZBC_COLOR:
228
		return gf100_fermi_mthd_zbc_color(object, data, size);
229
	case FERMI_A_ZBC_DEPTH:
230
		return gf100_fermi_mthd_zbc_depth(object, data, size);
231 232 233 234 235 236
	default:
		break;
	}
	return -EINVAL;
}

237 238
const struct nvkm_object_func
gf100_fermi = {
239
	.mthd = gf100_fermi_mthd,
240 241
};

242 243
static void
gf100_gr_mthd_set_shader_exceptions(struct nvkm_device *device, u32 data)
244
{
245 246
	nvkm_wr32(device, 0x419e44, data ? 0xffffffff : 0x00000000);
	nvkm_wr32(device, 0x419e4c, data ? 0xffffffff : 0x00000000);
247 248
}

249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267
static bool
gf100_gr_mthd_sw(struct nvkm_device *device, u16 class, u32 mthd, u32 data)
{
	switch (class & 0x00ff) {
	case 0x97:
	case 0xc0:
		switch (mthd) {
		case 0x1528:
			gf100_gr_mthd_set_shader_exceptions(device, data);
			return true;
		default:
			break;
		}
		break;
	default:
		break;
	}
	return false;
}
268

269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289
static const struct nvkm_object_func
gf100_gr_object_func = {
};

static int
gf100_gr_object_new(const struct nvkm_oclass *oclass, void *data, u32 size,
		    struct nvkm_object **pobject)
{
	struct gf100_gr_chan *chan = gf100_gr_chan(oclass->parent);
	struct gf100_gr_object *object;

	if (!(object = kzalloc(sizeof(*object), GFP_KERNEL)))
		return -ENOMEM;
	*pobject = &object->object;

	nvkm_object_ctor(oclass->base.func ? oclass->base.func :
			 &gf100_gr_object_func, oclass, &object->object);
	object->chan = chan;
	return 0;
}

290 291 292 293 294 295 296 297 298
static int
gf100_gr_object_get(struct nvkm_gr *base, int index, struct nvkm_sclass *sclass)
{
	struct gf100_gr *gr = gf100_gr(base);
	int c = 0;

	while (gr->func->sclass[c].oclass) {
		if (c++ == index) {
			*sclass = gr->func->sclass[index];
299
			sclass->ctor = gf100_gr_object_new;
300 301 302 303 304 305
			return index;
		}
	}

	return c;
}
306 307 308 309

/*******************************************************************************
 * PGRAPH context
 ******************************************************************************/
310

311 312 313
static int
gf100_gr_chan_bind(struct nvkm_object *object, struct nvkm_gpuobj *parent,
		   int align, struct nvkm_gpuobj **pgpuobj)
314
{
315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379
	struct gf100_gr_chan *chan = gf100_gr_chan(object);
	struct gf100_gr *gr = chan->gr;
	int ret, i;

	ret = nvkm_gpuobj_new(gr->base.engine.subdev.device, gr->size,
			      align, false, parent, pgpuobj);
	if (ret)
		return ret;

	nvkm_kmap(*pgpuobj);
	for (i = 0; i < gr->size; i += 4)
		nvkm_wo32(*pgpuobj, i, gr->data[i / 4]);

	if (!gr->firmware) {
		nvkm_wo32(*pgpuobj, 0x00, chan->mmio_nr / 2);
		nvkm_wo32(*pgpuobj, 0x04, chan->mmio_vma.offset >> 8);
	} else {
		nvkm_wo32(*pgpuobj, 0xf4, 0);
		nvkm_wo32(*pgpuobj, 0xf8, 0);
		nvkm_wo32(*pgpuobj, 0x10, chan->mmio_nr / 2);
		nvkm_wo32(*pgpuobj, 0x14, lower_32_bits(chan->mmio_vma.offset));
		nvkm_wo32(*pgpuobj, 0x18, upper_32_bits(chan->mmio_vma.offset));
		nvkm_wo32(*pgpuobj, 0x1c, 1);
		nvkm_wo32(*pgpuobj, 0x20, 0);
		nvkm_wo32(*pgpuobj, 0x28, 0);
		nvkm_wo32(*pgpuobj, 0x2c, 0);
	}
	nvkm_done(*pgpuobj);
	return 0;
}

static void *
gf100_gr_chan_dtor(struct nvkm_object *object)
{
	struct gf100_gr_chan *chan = gf100_gr_chan(object);
	int i;

	for (i = 0; i < ARRAY_SIZE(chan->data); i++) {
		if (chan->data[i].vma.node) {
			nvkm_vm_unmap(&chan->data[i].vma);
			nvkm_vm_put(&chan->data[i].vma);
		}
		nvkm_memory_del(&chan->data[i].mem);
	}

	if (chan->mmio_vma.node) {
		nvkm_vm_unmap(&chan->mmio_vma);
		nvkm_vm_put(&chan->mmio_vma);
	}
	nvkm_memory_del(&chan->mmio);
	return chan;
}

static const struct nvkm_object_func
gf100_gr_chan = {
	.dtor = gf100_gr_chan_dtor,
	.bind = gf100_gr_chan_bind,
};

static int
gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
		  const struct nvkm_oclass *oclass,
		  struct nvkm_object **pobject)
{
	struct gf100_gr *gr = gf100_gr(base);
B
Ben Skeggs 已提交
380 381
	struct gf100_gr_data *data = gr->mmio_data;
	struct gf100_gr_mmio *mmio = gr->mmio_list;
382
	struct gf100_gr_chan *chan;
383
	struct nvkm_device *device = gr->base.engine.subdev.device;
384 385
	int ret, i;

386 387 388 389 390
	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
		return -ENOMEM;
	nvkm_object_ctor(&gf100_gr_chan, oclass, &chan->object);
	chan->gr = gr;
	*pobject = &chan->object;
391

392 393 394 395
	/* allocate memory for a "mmio list" buffer that's used by the HUB
	 * fuc to modify some per-context register settings on first load
	 * of the context.
	 */
396 397
	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0x100,
			      false, &chan->mmio);
398 399 400
	if (ret)
		return ret;

401
	ret = nvkm_vm_get(fifoch->vm, 0x1000, 12, NV_MEM_ACCESS_RW |
402
			  NV_MEM_ACCESS_SYS, &chan->mmio_vma);
403 404 405
	if (ret)
		return ret;

406 407
	nvkm_memory_map(chan->mmio, &chan->mmio_vma, 0);

408
	/* allocate buffers referenced by mmio list */
B
Ben Skeggs 已提交
409
	for (i = 0; data->size && i < ARRAY_SIZE(gr->mmio_data); i++) {
410 411 412
		ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST,
				      data->size, data->align, false,
				      &chan->data[i].mem);
413 414
		if (ret)
			return ret;
415

416 417 418
		ret = nvkm_vm_get(fifoch->vm,
				  nvkm_memory_size(chan->data[i].mem), 12,
				  data->access, &chan->data[i].vma);
419 420
		if (ret)
			return ret;
421

422
		nvkm_memory_map(chan->data[i].mem, &chan->data[i].vma, 0);
423
		data++;
424 425
	}

426
	/* finally, fill in the mmio list and point the context at it */
427
	nvkm_kmap(chan->mmio);
B
Ben Skeggs 已提交
428
	for (i = 0; mmio->addr && i < ARRAY_SIZE(gr->mmio_list); i++) {
429 430
		u32 addr = mmio->addr;
		u32 data = mmio->data;
431

432
		if (mmio->buffer >= 0) {
433
			u64 info = chan->data[mmio->buffer].vma.offset;
434 435
			data |= info >> mmio->shift;
		}
436

437 438
		nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, addr);
		nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, data);
439 440
		mmio++;
	}
441
	nvkm_done(chan->mmio);
442
	return 0;
443 444
}

445
/*******************************************************************************
446
 * PGRAPH register lists
447 448
 ******************************************************************************/

449 450
const struct gf100_gr_init
gf100_gr_init_main_0[] = {
451 452 453 454 455 456 457 458 459 460 461 462 463 464
	{ 0x400080,   1, 0x04, 0x003083c2 },
	{ 0x400088,   1, 0x04, 0x00006fe7 },
	{ 0x40008c,   1, 0x04, 0x00000000 },
	{ 0x400090,   1, 0x04, 0x00000030 },
	{ 0x40013c,   1, 0x04, 0x013901f7 },
	{ 0x400140,   1, 0x04, 0x00000100 },
	{ 0x400144,   1, 0x04, 0x00000000 },
	{ 0x400148,   1, 0x04, 0x00000110 },
	{ 0x400138,   1, 0x04, 0x00000000 },
	{ 0x400130,   2, 0x04, 0x00000000 },
	{ 0x400124,   1, 0x04, 0x00000002 },
	{}
};

465 466
const struct gf100_gr_init
gf100_gr_init_fe_0[] = {
467 468 469 470 471
	{ 0x40415c,   1, 0x04, 0x00000000 },
	{ 0x404170,   1, 0x04, 0x00000000 },
	{}
};

472 473
const struct gf100_gr_init
gf100_gr_init_pri_0[] = {
474 475 476 477
	{ 0x404488,   2, 0x04, 0x00000000 },
	{}
};

478 479
const struct gf100_gr_init
gf100_gr_init_rstr2d_0[] = {
480 481 482 483
	{ 0x407808,   1, 0x04, 0x00000000 },
	{}
};

484 485
const struct gf100_gr_init
gf100_gr_init_pd_0[] = {
486 487 488 489
	{ 0x406024,   1, 0x04, 0x00000000 },
	{}
};

490 491
const struct gf100_gr_init
gf100_gr_init_ds_0[] = {
492 493 494 495 496 497
	{ 0x405844,   1, 0x04, 0x00ffffff },
	{ 0x405850,   1, 0x04, 0x00000000 },
	{ 0x405908,   1, 0x04, 0x00000000 },
	{}
};

498 499
const struct gf100_gr_init
gf100_gr_init_scc_0[] = {
500 501 502 503
	{ 0x40803c,   1, 0x04, 0x00000000 },
	{}
};

504 505
const struct gf100_gr_init
gf100_gr_init_prop_0[] = {
506
	{ 0x4184a0,   1, 0x04, 0x00000000 },
507 508 509
	{}
};

510 511
const struct gf100_gr_init
gf100_gr_init_gpc_unk_0[] = {
512 513 514 515
	{ 0x418604,   1, 0x04, 0x00000000 },
	{ 0x418680,   1, 0x04, 0x00000000 },
	{ 0x418714,   1, 0x04, 0x80000000 },
	{ 0x418384,   1, 0x04, 0x00000000 },
516 517 518
	{}
};

519 520
const struct gf100_gr_init
gf100_gr_init_setup_0[] = {
521
	{ 0x418814,   3, 0x04, 0x00000000 },
522 523 524
	{}
};

525 526
const struct gf100_gr_init
gf100_gr_init_crstr_0[] = {
527
	{ 0x418b04,   1, 0x04, 0x00000000 },
528 529 530
	{}
};

531 532
const struct gf100_gr_init
gf100_gr_init_setup_1[] = {
533 534 535 536
	{ 0x4188c8,   1, 0x04, 0x80000000 },
	{ 0x4188cc,   1, 0x04, 0x00000000 },
	{ 0x4188d0,   1, 0x04, 0x00010000 },
	{ 0x4188d4,   1, 0x04, 0x00000001 },
537 538 539
	{}
};

540 541
const struct gf100_gr_init
gf100_gr_init_zcull_0[] = {
542 543 544 545 546
	{ 0x418910,   1, 0x04, 0x00010001 },
	{ 0x418914,   1, 0x04, 0x00000301 },
	{ 0x418918,   1, 0x04, 0x00800000 },
	{ 0x418980,   1, 0x04, 0x77777770 },
	{ 0x418984,   3, 0x04, 0x77777777 },
547 548 549
	{}
};

550 551
const struct gf100_gr_init
gf100_gr_init_gpm_0[] = {
552 553
	{ 0x418c04,   1, 0x04, 0x00000000 },
	{ 0x418c88,   1, 0x04, 0x00000000 },
554 555 556
	{}
};

557 558
const struct gf100_gr_init
gf100_gr_init_gpc_unk_1[] = {
559 560 561 562
	{ 0x418d00,   1, 0x04, 0x00000000 },
	{ 0x418f08,   1, 0x04, 0x00000000 },
	{ 0x418e00,   1, 0x04, 0x00000050 },
	{ 0x418e08,   1, 0x04, 0x00000000 },
563 564 565
	{}
};

566 567
const struct gf100_gr_init
gf100_gr_init_gcc_0[] = {
568 569 570 571 572
	{ 0x41900c,   1, 0x04, 0x00000000 },
	{ 0x419018,   1, 0x04, 0x00000000 },
	{}
};

573 574
const struct gf100_gr_init
gf100_gr_init_tpccs_0[] = {
575 576
	{ 0x419d08,   2, 0x04, 0x00000000 },
	{ 0x419d10,   1, 0x04, 0x00000014 },
577 578 579
	{}
};

580 581
const struct gf100_gr_init
gf100_gr_init_tex_0[] = {
582 583 584
	{ 0x419ab0,   1, 0x04, 0x00000000 },
	{ 0x419ab8,   1, 0x04, 0x000000e7 },
	{ 0x419abc,   2, 0x04, 0x00000000 },
585 586 587
	{}
};

588 589
const struct gf100_gr_init
gf100_gr_init_pe_0[] = {
590 591 592 593
	{ 0x41980c,   3, 0x04, 0x00000000 },
	{ 0x419844,   1, 0x04, 0x00000000 },
	{ 0x41984c,   1, 0x04, 0x00005bc5 },
	{ 0x419850,   4, 0x04, 0x00000000 },
594 595 596
	{}
};

597 598
const struct gf100_gr_init
gf100_gr_init_l1c_0[] = {
599 600 601 602 603 604
	{ 0x419c98,   1, 0x04, 0x00000000 },
	{ 0x419ca8,   1, 0x04, 0x80000000 },
	{ 0x419cb4,   1, 0x04, 0x00000000 },
	{ 0x419cb8,   1, 0x04, 0x00008bf4 },
	{ 0x419cbc,   1, 0x04, 0x28137606 },
	{ 0x419cc0,   2, 0x04, 0x00000000 },
605 606 607
	{}
};

608 609
const struct gf100_gr_init
gf100_gr_init_wwdx_0[] = {
610 611
	{ 0x419bd4,   1, 0x04, 0x00800000 },
	{ 0x419bdc,   1, 0x04, 0x00000000 },
612 613 614
	{}
};

615 616
const struct gf100_gr_init
gf100_gr_init_tpccs_1[] = {
617
	{ 0x419d2c,   1, 0x04, 0x00000000 },
618 619 620
	{}
};

621 622
const struct gf100_gr_init
gf100_gr_init_mpc_0[] = {
623
	{ 0x419c0c,   1, 0x04, 0x00000000 },
624 625 626
	{}
};

627 628
static const struct gf100_gr_init
gf100_gr_init_sm_0[] = {
629 630 631 632 633 634 635 636 637 638 639 640 641 642 643 644
	{ 0x419e00,   1, 0x04, 0x00000000 },
	{ 0x419ea0,   1, 0x04, 0x00000000 },
	{ 0x419ea4,   1, 0x04, 0x00000100 },
	{ 0x419ea8,   1, 0x04, 0x00001100 },
	{ 0x419eac,   1, 0x04, 0x11100702 },
	{ 0x419eb0,   1, 0x04, 0x00000003 },
	{ 0x419eb4,   4, 0x04, 0x00000000 },
	{ 0x419ec8,   1, 0x04, 0x06060618 },
	{ 0x419ed0,   1, 0x04, 0x0eff0e38 },
	{ 0x419ed4,   1, 0x04, 0x011104f1 },
	{ 0x419edc,   1, 0x04, 0x00000000 },
	{ 0x419f00,   1, 0x04, 0x00000000 },
	{ 0x419f2c,   1, 0x04, 0x00000000 },
	{}
};

645 646
const struct gf100_gr_init
gf100_gr_init_be_0[] = {
647 648 649 650 651 652 653 654 655 656
	{ 0x40880c,   1, 0x04, 0x00000000 },
	{ 0x408910,   9, 0x04, 0x00000000 },
	{ 0x408950,   1, 0x04, 0x00000000 },
	{ 0x408954,   1, 0x04, 0x0000ffff },
	{ 0x408984,   1, 0x04, 0x00000000 },
	{ 0x408988,   1, 0x04, 0x08040201 },
	{ 0x40898c,   1, 0x04, 0x80402010 },
	{}
};

657 658
const struct gf100_gr_init
gf100_gr_init_fe_1[] = {
659 660 661 662
	{ 0x4040f0,   1, 0x04, 0x00000000 },
	{}
};

663 664
const struct gf100_gr_init
gf100_gr_init_pe_1[] = {
665 666 667 668
	{ 0x419880,   1, 0x04, 0x00000002 },
	{}
};

669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697
static const struct gf100_gr_pack
gf100_gr_pack_mmio[] = {
	{ gf100_gr_init_main_0 },
	{ gf100_gr_init_fe_0 },
	{ gf100_gr_init_pri_0 },
	{ gf100_gr_init_rstr2d_0 },
	{ gf100_gr_init_pd_0 },
	{ gf100_gr_init_ds_0 },
	{ gf100_gr_init_scc_0 },
	{ gf100_gr_init_prop_0 },
	{ gf100_gr_init_gpc_unk_0 },
	{ gf100_gr_init_setup_0 },
	{ gf100_gr_init_crstr_0 },
	{ gf100_gr_init_setup_1 },
	{ gf100_gr_init_zcull_0 },
	{ gf100_gr_init_gpm_0 },
	{ gf100_gr_init_gpc_unk_1 },
	{ gf100_gr_init_gcc_0 },
	{ gf100_gr_init_tpccs_0 },
	{ gf100_gr_init_tex_0 },
	{ gf100_gr_init_pe_0 },
	{ gf100_gr_init_l1c_0 },
	{ gf100_gr_init_wwdx_0 },
	{ gf100_gr_init_tpccs_1 },
	{ gf100_gr_init_mpc_0 },
	{ gf100_gr_init_sm_0 },
	{ gf100_gr_init_be_0 },
	{ gf100_gr_init_fe_1 },
	{ gf100_gr_init_pe_1 },
M
Maarten Lankhorst 已提交
698 699 700
	{}
};

701 702 703 704
/*******************************************************************************
 * PGRAPH engine/subdev functions
 ******************************************************************************/

705
void
B
Ben Skeggs 已提交
706
gf100_gr_zbc_init(struct gf100_gr *gr)
707 708 709 710 711 712 713 714 715
{
	const u32  zero[] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000,
			      0x00000000, 0x00000000, 0x00000000, 0x00000000 };
	const u32   one[] = { 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
			      0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff };
	const u32 f32_0[] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000,
			      0x00000000, 0x00000000, 0x00000000, 0x00000000 };
	const u32 f32_1[] = { 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
			      0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 };
716
	struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc;
717 718
	int index;

B
Ben Skeggs 已提交
719 720 721 722 723 724 725
	if (!gr->zbc_color[0].format) {
		gf100_gr_zbc_color_get(gr, 1,  & zero[0],   &zero[4]);
		gf100_gr_zbc_color_get(gr, 2,  &  one[0],    &one[4]);
		gf100_gr_zbc_color_get(gr, 4,  &f32_0[0],  &f32_0[4]);
		gf100_gr_zbc_color_get(gr, 4,  &f32_1[0],  &f32_1[4]);
		gf100_gr_zbc_depth_get(gr, 1, 0x00000000, 0x00000000);
		gf100_gr_zbc_depth_get(gr, 1, 0x3f800000, 0x3f800000);
726 727 728
	}

	for (index = ltc->zbc_min; index <= ltc->zbc_max; index++)
B
Ben Skeggs 已提交
729
		gf100_gr_zbc_clear_color(gr, index);
730
	for (index = ltc->zbc_min; index <= ltc->zbc_max; index++)
B
Ben Skeggs 已提交
731
		gf100_gr_zbc_clear_depth(gr, index);
732 733
}

734 735 736 737 738 739
/**
 * Wait until GR goes idle. GR is considered idle if it is disabled by the
 * MC (0x200) register, or GR is not busy and a context switch is not in
 * progress.
 */
int
B
Ben Skeggs 已提交
740
gf100_gr_wait_idle(struct gf100_gr *gr)
741
{
742 743
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
744 745 746 747 748 749 750 751
	unsigned long end_jiffies = jiffies + msecs_to_jiffies(2000);
	bool gr_enabled, ctxsw_active, gr_busy;

	do {
		/*
		 * required to make sure FIFO_ENGINE_STATUS (0x2640) is
		 * up-to-date
		 */
752
		nvkm_rd32(device, 0x400700);
753

754 755 756
		gr_enabled = nvkm_rd32(device, 0x200) & 0x1000;
		ctxsw_active = nvkm_rd32(device, 0x2640) & 0x8000;
		gr_busy = nvkm_rd32(device, 0x40060c) & 0x1;
757 758 759 760 761

		if (!gr_enabled || (!gr_busy && !ctxsw_active))
			return 0;
	} while (time_before(jiffies, end_jiffies));

762 763 764
	nvkm_error(subdev,
		   "wait for idle timeout (en: %d, ctxsw: %d, busy: %d)\n",
		   gr_enabled, ctxsw_active, gr_busy);
765 766 767
	return -EAGAIN;
}

768
void
B
Ben Skeggs 已提交
769
gf100_gr_mmio(struct gf100_gr *gr, const struct gf100_gr_pack *p)
770
{
771
	struct nvkm_device *device = gr->base.engine.subdev.device;
772 773
	const struct gf100_gr_pack *pack;
	const struct gf100_gr_init *init;
774 775 776 777 778

	pack_for_each_init(init, pack, p) {
		u32 next = init->addr + init->count * init->pitch;
		u32 addr = init->addr;
		while (addr < next) {
779
			nvkm_wr32(device, addr, init->data);
780 781 782
			addr += init->pitch;
		}
	}
783 784 785
}

void
B
Ben Skeggs 已提交
786
gf100_gr_icmd(struct gf100_gr *gr, const struct gf100_gr_pack *p)
787
{
788
	struct nvkm_device *device = gr->base.engine.subdev.device;
789 790
	const struct gf100_gr_pack *pack;
	const struct gf100_gr_init *init;
791
	u32 data = 0;
792

793
	nvkm_wr32(device, 0x400208, 0x80000000);
794 795 796 797 798 799

	pack_for_each_init(init, pack, p) {
		u32 next = init->addr + init->count * init->pitch;
		u32 addr = init->addr;

		if ((pack == p && init == p->init) || data != init->data) {
800
			nvkm_wr32(device, 0x400204, init->data);
801 802
			data = init->data;
		}
803

804
		while (addr < next) {
805
			nvkm_wr32(device, 0x400200, addr);
806 807 808 809 810
			/**
			 * Wait for GR to go idle after submitting a
			 * GO_IDLE bundle
			 */
			if ((addr & 0xffff) == 0xe100)
B
Ben Skeggs 已提交
811
				gf100_gr_wait_idle(gr);
812 813 814 815
			nvkm_msec(device, 2000,
				if (!(nvkm_rd32(device, 0x400700) & 0x00000004))
					break;
			);
816 817 818
			addr += init->pitch;
		}
	}
819

820
	nvkm_wr32(device, 0x400208, 0x00000000);
821 822 823
}

void
B
Ben Skeggs 已提交
824
gf100_gr_mthd(struct gf100_gr *gr, const struct gf100_gr_pack *p)
825
{
826
	struct nvkm_device *device = gr->base.engine.subdev.device;
827 828
	const struct gf100_gr_pack *pack;
	const struct gf100_gr_init *init;
829
	u32 data = 0;
830

831 832 833 834 835 836
	pack_for_each_init(init, pack, p) {
		u32 ctrl = 0x80000000 | pack->type;
		u32 next = init->addr + init->count * init->pitch;
		u32 addr = init->addr;

		if ((pack == p && init == p->init) || data != init->data) {
837
			nvkm_wr32(device, 0x40448c, init->data);
838 839 840 841
			data = init->data;
		}

		while (addr < next) {
842
			nvkm_wr32(device, 0x404488, ctrl | (addr << 14));
843
			addr += init->pitch;
844 845 846 847 848
		}
	}
}

u64
849
gf100_gr_units(struct nvkm_gr *base)
850
{
851
	struct gf100_gr *gr = gf100_gr(base);
852 853
	u64 cfg;

B
Ben Skeggs 已提交
854 855 856
	cfg  = (u32)gr->gpc_nr;
	cfg |= (u32)gr->tpc_total << 8;
	cfg |= (u64)gr->rop_nr << 32;
857 858

	return cfg;
859 860
}

861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892 893
static const struct nvkm_bitfield gf100_dispatch_error[] = {
	{ 0x00000001, "INJECTED_BUNDLE_ERROR" },
	{ 0x00000002, "CLASS_SUBCH_MISMATCH" },
	{ 0x00000004, "SUBCHSW_DURING_NOTIFY" },
	{}
};

static const struct nvkm_bitfield gf100_m2mf_error[] = {
	{ 0x00000001, "PUSH_TOO_MUCH_DATA" },
	{ 0x00000002, "PUSH_NOT_ENOUGH_DATA" },
	{}
};

static const struct nvkm_bitfield gf100_unk6_error[] = {
	{ 0x00000001, "TEMP_TOO_SMALL" },
	{}
};

static const struct nvkm_bitfield gf100_ccache_error[] = {
	{ 0x00000001, "INTR" },
	{ 0x00000002, "LDCONST_OOB" },
	{}
};

static const struct nvkm_bitfield gf100_macro_error[] = {
	{ 0x00000001, "TOO_FEW_PARAMS" },
	{ 0x00000002, "TOO_MANY_PARAMS" },
	{ 0x00000004, "ILLEGAL_OPCODE" },
	{ 0x00000008, "DOUBLE_BRANCH" },
	{ 0x00000010, "WATCHDOG" },
	{}
};

894
static const struct nvkm_bitfield gk104_sked_error[] = {
895
	{ 0x00000040, "CTA_RESUME" },
896 897 898 899 900 901 902 903 904
	{ 0x00000080, "CONSTANT_BUFFER_SIZE" },
	{ 0x00000200, "LOCAL_MEMORY_SIZE_POS" },
	{ 0x00000400, "LOCAL_MEMORY_SIZE_NEG" },
	{ 0x00000800, "WARP_CSTACK_SIZE" },
	{ 0x00001000, "TOTAL_TEMP_SIZE" },
	{ 0x00002000, "REGISTER_COUNT" },
	{ 0x00040000, "TOTAL_THREADS" },
	{ 0x00100000, "PROGRAM_OFFSET" },
	{ 0x00200000, "SHARED_MEMORY_SIZE" },
905 906
	{ 0x00800000, "CTA_THREAD_DIMENSION_ZERO" },
	{ 0x01000000, "MEMORY_WINDOW_OVERLAP" },
907 908
	{ 0x02000000, "SHARED_CONFIG_TOO_SMALL" },
	{ 0x04000000, "TOTAL_REGISTER_COUNT" },
909 910 911
	{}
};

912 913 914 915 916 917 918
static const struct nvkm_bitfield gf100_gpc_rop_error[] = {
	{ 0x00000002, "RT_PITCH_OVERRUN" },
	{ 0x00000010, "RT_WIDTH_OVERRUN" },
	{ 0x00000020, "RT_HEIGHT_OVERRUN" },
	{ 0x00000080, "ZETA_STORAGE_TYPE_MISMATCH" },
	{ 0x00000100, "RT_STORAGE_TYPE_MISMATCH" },
	{ 0x00000400, "RT_LINEAR_MISMATCH" },
919 920 921
	{}
};

922
static void
B
Ben Skeggs 已提交
923
gf100_gr_trap_gpc_rop(struct gf100_gr *gr, int gpc)
924
{
925 926 927
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
	char error[128];
928
	u32 trap[4];
929

930
	trap[0] = nvkm_rd32(device, GPC_UNIT(gpc, 0x0420)) & 0x3fffffff;
931 932 933
	trap[1] = nvkm_rd32(device, GPC_UNIT(gpc, 0x0434));
	trap[2] = nvkm_rd32(device, GPC_UNIT(gpc, 0x0438));
	trap[3] = nvkm_rd32(device, GPC_UNIT(gpc, 0x043c));
934

935
	nvkm_snprintbf(error, sizeof(error), gf100_gpc_rop_error, trap[0]);
936

937 938 939 940
	nvkm_error(subdev, "GPC%d/PROP trap: %08x [%s] x = %u, y = %u, "
			   "format = %x, storage type = %x\n",
		   gpc, trap[0], error, trap[1] & 0xffff, trap[1] >> 16,
		   (trap[2] >> 8) & 0x3f, trap[3] & 0xff);
941
	nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
942 943
}

944
static const struct nvkm_enum gf100_mp_warp_error[] = {
945 946 947 948 949 950 951 952
	{ 0x00, "NO_ERROR" },
	{ 0x01, "STACK_MISMATCH" },
	{ 0x05, "MISALIGNED_PC" },
	{ 0x08, "MISALIGNED_GPR" },
	{ 0x09, "INVALID_OPCODE" },
	{ 0x0d, "GPR_OUT_OF_BOUNDS" },
	{ 0x0e, "MEM_OUT_OF_BOUNDS" },
	{ 0x0f, "UNALIGNED_MEM_ACCESS" },
953
	{ 0x10, "INVALID_ADDR_SPACE" },
954 955 956 957
	{ 0x11, "INVALID_PARAM" },
	{}
};

958
static const struct nvkm_bitfield gf100_mp_global_error[] = {
959 960 961 962 963 964
	{ 0x00000004, "MULTIPLE_WARP_ERRORS" },
	{ 0x00000008, "OUT_OF_STACK_SPACE" },
	{}
};

static void
B
Ben Skeggs 已提交
965
gf100_gr_trap_mp(struct gf100_gr *gr, int gpc, int tpc)
966
{
967 968
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
969 970
	u32 werr = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x648));
	u32 gerr = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x650));
971 972
	const struct nvkm_enum *warp;
	char glob[128];
973

974 975 976 977 978 979
	nvkm_snprintbf(glob, sizeof(glob), gf100_mp_global_error, gerr);
	warp = nvkm_enum_find(gf100_mp_warp_error, werr & 0xffff);

	nvkm_error(subdev, "GPC%i/TPC%i/MP trap: "
			   "global %08x [%s] warp %04x [%s]\n",
		   gpc, tpc, gerr, glob, werr, warp ? warp->name : "");
980

981 982
	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x648), 0x00000000);
	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x650), gerr);
983 984
}

985
static void
B
Ben Skeggs 已提交
986
gf100_gr_trap_tpc(struct gf100_gr *gr, int gpc, int tpc)
987
{
988 989
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
990
	u32 stat = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x0508));
991 992

	if (stat & 0x00000001) {
993
		u32 trap = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x0224));
994
		nvkm_error(subdev, "GPC%d/TPC%d/TEX: %08x\n", gpc, tpc, trap);
995
		nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x0224), 0xc0000000);
996 997 998 999
		stat &= ~0x00000001;
	}

	if (stat & 0x00000002) {
B
Ben Skeggs 已提交
1000
		gf100_gr_trap_mp(gr, gpc, tpc);
1001 1002 1003 1004
		stat &= ~0x00000002;
	}

	if (stat & 0x00000004) {
1005
		u32 trap = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x0084));
1006
		nvkm_error(subdev, "GPC%d/TPC%d/POLY: %08x\n", gpc, tpc, trap);
1007
		nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x0084), 0xc0000000);
1008 1009 1010 1011
		stat &= ~0x00000004;
	}

	if (stat & 0x00000008) {
1012
		u32 trap = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x048c));
1013
		nvkm_error(subdev, "GPC%d/TPC%d/L1C: %08x\n", gpc, tpc, trap);
1014
		nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x048c), 0xc0000000);
1015 1016 1017 1018
		stat &= ~0x00000008;
	}

	if (stat) {
1019
		nvkm_error(subdev, "GPC%d/TPC%d/%08x: unknown\n", gpc, tpc, stat);
1020 1021 1022 1023
	}
}

static void
B
Ben Skeggs 已提交
1024
gf100_gr_trap_gpc(struct gf100_gr *gr, int gpc)
1025
{
1026 1027
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
1028
	u32 stat = nvkm_rd32(device, GPC_UNIT(gpc, 0x2c90));
1029 1030 1031
	int tpc;

	if (stat & 0x00000001) {
B
Ben Skeggs 已提交
1032
		gf100_gr_trap_gpc_rop(gr, gpc);
1033 1034 1035 1036
		stat &= ~0x00000001;
	}

	if (stat & 0x00000002) {
1037
		u32 trap = nvkm_rd32(device, GPC_UNIT(gpc, 0x0900));
1038
		nvkm_error(subdev, "GPC%d/ZCULL: %08x\n", gpc, trap);
1039
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000);
1040 1041 1042 1043
		stat &= ~0x00000002;
	}

	if (stat & 0x00000004) {
1044
		u32 trap = nvkm_rd32(device, GPC_UNIT(gpc, 0x1028));
1045
		nvkm_error(subdev, "GPC%d/CCACHE: %08x\n", gpc, trap);
1046
		nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000);
1047 1048 1049 1050
		stat &= ~0x00000004;
	}

	if (stat & 0x00000008) {
1051
		u32 trap = nvkm_rd32(device, GPC_UNIT(gpc, 0x0824));
1052
		nvkm_error(subdev, "GPC%d/ESETUP: %08x\n", gpc, trap);
1053
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000);
1054 1055 1056
		stat &= ~0x00000009;
	}

B
Ben Skeggs 已提交
1057
	for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
1058 1059
		u32 mask = 0x00010000 << tpc;
		if (stat & mask) {
B
Ben Skeggs 已提交
1060
			gf100_gr_trap_tpc(gr, gpc, tpc);
1061
			nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), mask);
1062 1063 1064 1065 1066
			stat &= ~mask;
		}
	}

	if (stat) {
1067
		nvkm_error(subdev, "GPC%d/%08x: unknown\n", gpc, stat);
1068 1069 1070 1071
	}
}

static void
B
Ben Skeggs 已提交
1072
gf100_gr_trap_intr(struct gf100_gr *gr)
1073
{
1074 1075
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
1076
	char error[128];
1077
	u32 trap = nvkm_rd32(device, 0x400108);
1078
	int rop, gpc;
1079 1080

	if (trap & 0x00000001) {
1081
		u32 stat = nvkm_rd32(device, 0x404000);
1082 1083 1084 1085

		nvkm_snprintbf(error, sizeof(error), gf100_dispatch_error,
			       stat & 0x3fffffff);
		nvkm_error(subdev, "DISPATCH %08x [%s]\n", stat, error);
1086 1087
		nvkm_wr32(device, 0x404000, 0xc0000000);
		nvkm_wr32(device, 0x400108, 0x00000001);
1088 1089 1090 1091
		trap &= ~0x00000001;
	}

	if (trap & 0x00000002) {
1092
		u32 stat = nvkm_rd32(device, 0x404600);
1093 1094 1095 1096 1097

		nvkm_snprintbf(error, sizeof(error), gf100_m2mf_error,
			       stat & 0x3fffffff);
		nvkm_error(subdev, "M2MF %08x [%s]\n", stat, error);

1098 1099
		nvkm_wr32(device, 0x404600, 0xc0000000);
		nvkm_wr32(device, 0x400108, 0x00000002);
1100 1101 1102 1103
		trap &= ~0x00000002;
	}

	if (trap & 0x00000008) {
1104
		u32 stat = nvkm_rd32(device, 0x408030);
1105 1106 1107 1108

		nvkm_snprintbf(error, sizeof(error), gf100_m2mf_error,
			       stat & 0x3fffffff);
		nvkm_error(subdev, "CCACHE %08x [%s]\n", stat, error);
1109 1110
		nvkm_wr32(device, 0x408030, 0xc0000000);
		nvkm_wr32(device, 0x400108, 0x00000008);
1111 1112 1113 1114
		trap &= ~0x00000008;
	}

	if (trap & 0x00000010) {
1115
		u32 stat = nvkm_rd32(device, 0x405840);
1116 1117
		nvkm_error(subdev, "SHADER %08x, sph: 0x%06x, stage: 0x%02x\n",
			   stat, stat & 0xffffff, (stat >> 24) & 0x3f);
1118 1119
		nvkm_wr32(device, 0x405840, 0xc0000000);
		nvkm_wr32(device, 0x400108, 0x00000010);
1120 1121 1122 1123
		trap &= ~0x00000010;
	}

	if (trap & 0x00000040) {
1124
		u32 stat = nvkm_rd32(device, 0x40601c);
1125 1126 1127 1128 1129

		nvkm_snprintbf(error, sizeof(error), gf100_unk6_error,
			       stat & 0x3fffffff);
		nvkm_error(subdev, "UNK6 %08x [%s]\n", stat, error);

1130 1131
		nvkm_wr32(device, 0x40601c, 0xc0000000);
		nvkm_wr32(device, 0x400108, 0x00000040);
1132 1133 1134 1135
		trap &= ~0x00000040;
	}

	if (trap & 0x00000080) {
1136
		u32 stat = nvkm_rd32(device, 0x404490);
1137 1138 1139 1140 1141 1142 1143 1144 1145 1146
		u32 pc = nvkm_rd32(device, 0x404494);
		u32 op = nvkm_rd32(device, 0x40449c);

		nvkm_snprintbf(error, sizeof(error), gf100_macro_error,
			       stat & 0x1fffffff);
		nvkm_error(subdev, "MACRO %08x [%s], pc: 0x%03x%s, op: 0x%08x\n",
			   stat, error, pc & 0x7ff,
			   (pc & 0x10000000) ? "" : " (invalid)",
			   op);

1147 1148
		nvkm_wr32(device, 0x404490, 0xc0000000);
		nvkm_wr32(device, 0x400108, 0x00000080);
1149 1150 1151
		trap &= ~0x00000080;
	}

1152
	if (trap & 0x00000100) {
1153
		u32 stat = nvkm_rd32(device, 0x407020) & 0x3fffffff;
1154

1155 1156
		nvkm_snprintbf(error, sizeof(error), gk104_sked_error, stat);
		nvkm_error(subdev, "SKED: %08x [%s]\n", stat, error);
1157

1158
		if (stat)
1159 1160
			nvkm_wr32(device, 0x407020, 0x40000000);
		nvkm_wr32(device, 0x400108, 0x00000100);
1161 1162 1163
		trap &= ~0x00000100;
	}

1164
	if (trap & 0x01000000) {
1165
		u32 stat = nvkm_rd32(device, 0x400118);
B
Ben Skeggs 已提交
1166
		for (gpc = 0; stat && gpc < gr->gpc_nr; gpc++) {
1167 1168
			u32 mask = 0x00000001 << gpc;
			if (stat & mask) {
B
Ben Skeggs 已提交
1169
				gf100_gr_trap_gpc(gr, gpc);
1170
				nvkm_wr32(device, 0x400118, mask);
1171 1172 1173
				stat &= ~mask;
			}
		}
1174
		nvkm_wr32(device, 0x400108, 0x01000000);
1175 1176 1177 1178
		trap &= ~0x01000000;
	}

	if (trap & 0x02000000) {
B
Ben Skeggs 已提交
1179
		for (rop = 0; rop < gr->rop_nr; rop++) {
1180 1181
			u32 statz = nvkm_rd32(device, ROP_UNIT(rop, 0x070));
			u32 statc = nvkm_rd32(device, ROP_UNIT(rop, 0x144));
1182
			nvkm_error(subdev, "ROP%d %08x %08x\n",
1183
				 rop, statz, statc);
1184 1185
			nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0xc0000000);
			nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0xc0000000);
1186
		}
1187
		nvkm_wr32(device, 0x400108, 0x02000000);
1188 1189 1190 1191
		trap &= ~0x02000000;
	}

	if (trap) {
1192
		nvkm_error(subdev, "TRAP UNHANDLED %08x\n", trap);
1193
		nvkm_wr32(device, 0x400108, trap);
1194 1195 1196
	}
}

1197
static void
B
Ben Skeggs 已提交
1198
gf100_gr_ctxctl_debug_unit(struct gf100_gr *gr, u32 base)
1199
{
1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212 1213
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
	nvkm_error(subdev, "%06x - done %08x\n", base,
		   nvkm_rd32(device, base + 0x400));
	nvkm_error(subdev, "%06x - stat %08x %08x %08x %08x\n", base,
		   nvkm_rd32(device, base + 0x800),
		   nvkm_rd32(device, base + 0x804),
		   nvkm_rd32(device, base + 0x808),
		   nvkm_rd32(device, base + 0x80c));
	nvkm_error(subdev, "%06x - stat %08x %08x %08x %08x\n", base,
		   nvkm_rd32(device, base + 0x810),
		   nvkm_rd32(device, base + 0x814),
		   nvkm_rd32(device, base + 0x818),
		   nvkm_rd32(device, base + 0x81c));
1214 1215 1216
}

void
B
Ben Skeggs 已提交
1217
gf100_gr_ctxctl_debug(struct gf100_gr *gr)
1218
{
1219 1220
	struct nvkm_device *device = gr->base.engine.subdev.device;
	u32 gpcnr = nvkm_rd32(device, 0x409604) & 0xffff;
1221 1222
	u32 gpc;

B
Ben Skeggs 已提交
1223
	gf100_gr_ctxctl_debug_unit(gr, 0x409000);
1224
	for (gpc = 0; gpc < gpcnr; gpc++)
B
Ben Skeggs 已提交
1225
		gf100_gr_ctxctl_debug_unit(gr, 0x502000 + (gpc * 0x8000));
1226 1227 1228
}

static void
B
Ben Skeggs 已提交
1229
gf100_gr_ctxctl_isr(struct gf100_gr *gr)
1230
{
1231 1232
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
1233
	u32 stat = nvkm_rd32(device, 0x409c18);
1234

1235
	if (stat & 0x00000001) {
1236
		u32 code = nvkm_rd32(device, 0x409814);
1237
		if (code == E_BAD_FWMTHD) {
1238 1239
			u32 class = nvkm_rd32(device, 0x409808);
			u32  addr = nvkm_rd32(device, 0x40980c);
1240 1241
			u32  subc = (addr & 0x00070000) >> 16;
			u32  mthd = (addr & 0x00003ffc);
1242
			u32  data = nvkm_rd32(device, 0x409810);
1243

1244 1245 1246
			nvkm_error(subdev, "FECS MTHD subc %d class %04x "
					   "mthd %04x data %08x\n",
				   subc, class, mthd, data);
1247

1248
			nvkm_wr32(device, 0x409c20, 0x00000001);
1249 1250
			stat &= ~0x00000001;
		} else {
1251
			nvkm_error(subdev, "FECS ucode error %d\n", code);
1252 1253
		}
	}
1254

1255
	if (stat & 0x00080000) {
1256
		nvkm_error(subdev, "FECS watchdog timeout\n");
B
Ben Skeggs 已提交
1257
		gf100_gr_ctxctl_debug(gr);
1258
		nvkm_wr32(device, 0x409c20, 0x00080000);
1259 1260 1261 1262
		stat &= ~0x00080000;
	}

	if (stat) {
1263
		nvkm_error(subdev, "FECS %08x\n", stat);
B
Ben Skeggs 已提交
1264
		gf100_gr_ctxctl_debug(gr);
1265
		nvkm_wr32(device, 0x409c20, stat);
1266
	}
1267 1268
}

1269
static void
1270
gf100_gr_intr(struct nvkm_gr *base)
1271
{
1272 1273 1274
	struct gf100_gr *gr = gf100_gr(base);
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
1275 1276
	struct nvkm_fifo_chan *chan;
	unsigned long flags;
1277 1278 1279
	u64 inst = nvkm_rd32(device, 0x409b00) & 0x0fffffff;
	u32 stat = nvkm_rd32(device, 0x400100);
	u32 addr = nvkm_rd32(device, 0x400704);
1280 1281
	u32 mthd = (addr & 0x00003ffc);
	u32 subc = (addr & 0x00070000) >> 16;
1282 1283
	u32 data = nvkm_rd32(device, 0x400708);
	u32 code = nvkm_rd32(device, 0x400110);
1284
	u32 class;
1285 1286
	const char *name = "unknown";
	int chid = -1;
1287

1288
	chan = nvkm_fifo_chan_inst(device->fifo, (u64)inst << 12, &flags);
1289 1290 1291 1292
	if (chan) {
		name = chan->object.client->name;
		chid = chan->chid;
	}
1293

1294
	if (device->card_type < NV_E0 || subc < 4)
1295
		class = nvkm_rd32(device, 0x404200 + (subc * 4));
1296 1297 1298
	else
		class = 0x0000;

1299 1300 1301 1302 1303
	if (stat & 0x00000001) {
		/*
		 * notifier interrupt, only needed for cyclestats
		 * can be safely ignored
		 */
1304
		nvkm_wr32(device, 0x400100, 0x00000001);
1305 1306 1307
		stat &= ~0x00000001;
	}

1308
	if (stat & 0x00000010) {
1309
		if (!gf100_gr_mthd_sw(device, class, mthd, data)) {
1310 1311
			nvkm_error(subdev, "ILLEGAL_MTHD ch %d [%010llx %s] "
				   "subc %d class %04x mthd %04x data %08x\n",
1312 1313
				   chid, inst << 12, name, subc,
				   class, mthd, data);
1314
		}
1315
		nvkm_wr32(device, 0x400100, 0x00000010);
1316 1317 1318 1319
		stat &= ~0x00000010;
	}

	if (stat & 0x00000020) {
1320 1321
		nvkm_error(subdev, "ILLEGAL_CLASS ch %d [%010llx %s] "
			   "subc %d class %04x mthd %04x data %08x\n",
1322
			   chid, inst << 12, name, subc, class, mthd, data);
1323
		nvkm_wr32(device, 0x400100, 0x00000020);
1324 1325 1326 1327
		stat &= ~0x00000020;
	}

	if (stat & 0x00100000) {
1328 1329 1330 1331 1332
		const struct nvkm_enum *en =
			nvkm_enum_find(nv50_data_error_names, code);
		nvkm_error(subdev, "DATA_ERROR %08x [%s] ch %d [%010llx %s] "
				   "subc %d class %04x mthd %04x data %08x\n",
			   code, en ? en->name : "", chid, inst << 12,
1333
			   name, subc, class, mthd, data);
1334
		nvkm_wr32(device, 0x400100, 0x00100000);
1335 1336 1337 1338
		stat &= ~0x00100000;
	}

	if (stat & 0x00200000) {
1339
		nvkm_error(subdev, "TRAP ch %d [%010llx %s]\n",
1340
			   chid, inst << 12, name);
B
Ben Skeggs 已提交
1341
		gf100_gr_trap_intr(gr);
1342
		nvkm_wr32(device, 0x400100, 0x00200000);
1343 1344 1345 1346
		stat &= ~0x00200000;
	}

	if (stat & 0x00080000) {
B
Ben Skeggs 已提交
1347
		gf100_gr_ctxctl_isr(gr);
1348
		nvkm_wr32(device, 0x400100, 0x00080000);
1349 1350 1351 1352
		stat &= ~0x00080000;
	}

	if (stat) {
1353
		nvkm_error(subdev, "intr %08x\n", stat);
1354
		nvkm_wr32(device, 0x400100, stat);
1355 1356
	}

1357
	nvkm_wr32(device, 0x400500, 0x00010001);
1358
	nvkm_fifo_chan_put(device->fifo, flags, &chan);
1359 1360
}

1361
void
B
Ben Skeggs 已提交
1362
gf100_gr_init_fw(struct gf100_gr *gr, u32 fuc_base,
1363
		 struct gf100_gr_fuc *code, struct gf100_gr_fuc *data)
1364
{
1365
	struct nvkm_device *device = gr->base.engine.subdev.device;
1366
	int i;
1367

1368
	nvkm_wr32(device, fuc_base + 0x01c0, 0x01000000);
1369
	for (i = 0; i < data->size / 4; i++)
1370
		nvkm_wr32(device, fuc_base + 0x01c4, data->data[i]);
1371

1372
	nvkm_wr32(device, fuc_base + 0x0180, 0x01000000);
1373 1374
	for (i = 0; i < code->size / 4; i++) {
		if ((i & 0x3f) == 0)
1375 1376
			nvkm_wr32(device, fuc_base + 0x0188, i >> 6);
		nvkm_wr32(device, fuc_base + 0x0184, code->data[i]);
1377
	}
1378 1379 1380

	/* code must be padded to 0x40 words */
	for (; i & 0x3f; i++)
1381
		nvkm_wr32(device, fuc_base + 0x0184, 0);
1382 1383
}

1384
static void
B
Ben Skeggs 已提交
1385
gf100_gr_init_csdata(struct gf100_gr *gr,
1386 1387
		     const struct gf100_gr_pack *pack,
		     u32 falcon, u32 starstar, u32 base)
1388
{
1389
	struct nvkm_device *device = gr->base.engine.subdev.device;
1390 1391
	const struct gf100_gr_pack *iter;
	const struct gf100_gr_init *init;
1392
	u32 addr = ~0, prev = ~0, xfer = 0;
1393 1394
	u32 star, temp;

1395 1396 1397
	nvkm_wr32(device, falcon + 0x01c0, 0x02000000 + starstar);
	star = nvkm_rd32(device, falcon + 0x01c4);
	temp = nvkm_rd32(device, falcon + 0x01c4);
1398 1399
	if (temp > star)
		star = temp;
1400
	nvkm_wr32(device, falcon + 0x01c0, 0x01000000 + star);
1401

1402 1403 1404 1405 1406 1407 1408
	pack_for_each_init(init, iter, pack) {
		u32 head = init->addr - base;
		u32 tail = head + init->count * init->pitch;
		while (head < tail) {
			if (head != prev + 4 || xfer >= 32) {
				if (xfer) {
					u32 data = ((--xfer << 26) | addr);
1409
					nvkm_wr32(device, falcon + 0x01c4, data);
1410 1411 1412 1413
					star += 4;
				}
				addr = head;
				xfer = 0;
1414
			}
1415 1416 1417
			prev = head;
			xfer = xfer + 1;
			head = head + init->pitch;
1418
		}
1419
	}
1420

1421 1422 1423
	nvkm_wr32(device, falcon + 0x01c4, (--xfer << 26) | addr);
	nvkm_wr32(device, falcon + 0x01c0, 0x01000004 + starstar);
	nvkm_wr32(device, falcon + 0x01c4, star + 4);
1424 1425
}

1426
int
B
Ben Skeggs 已提交
1427
gf100_gr_init_ctxctl(struct gf100_gr *gr)
1428
{
1429
	const struct gf100_grctx_func *grctx = gr->func->grctx;
1430 1431
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
1432
	struct nvkm_secboot *sb = device->secboot;
1433
	int i;
1434

B
Ben Skeggs 已提交
1435
	if (gr->firmware) {
1436
		/* load fuc microcode */
1437
		nvkm_mc_unk260(device->mc, 0);
1438 1439 1440 1441 1442 1443 1444 1445 1446 1447 1448 1449 1450

		/* securely-managed falcons must be reset using secure boot */
		if (nvkm_secboot_is_managed(sb, NVKM_SECBOOT_FALCON_FECS))
			nvkm_secboot_reset(sb, NVKM_SECBOOT_FALCON_FECS);
		else
			gf100_gr_init_fw(gr, 0x409000, &gr->fuc409c,
					 &gr->fuc409d);
		if (nvkm_secboot_is_managed(sb, NVKM_SECBOOT_FALCON_GPCCS))
			nvkm_secboot_reset(sb, NVKM_SECBOOT_FALCON_GPCCS);
		else
			gf100_gr_init_fw(gr, 0x41a000, &gr->fuc41ac,
					 &gr->fuc41ad);

1451
		nvkm_mc_unk260(device->mc, 1);
1452

1453
		/* start both of them running */
1454 1455 1456
		nvkm_wr32(device, 0x409840, 0xffffffff);
		nvkm_wr32(device, 0x41a10c, 0x00000000);
		nvkm_wr32(device, 0x40910c, 0x00000000);
1457 1458 1459 1460 1461 1462 1463 1464 1465

		if (nvkm_secboot_is_managed(sb, NVKM_SECBOOT_FALCON_GPCCS))
			nvkm_secboot_start(sb, NVKM_SECBOOT_FALCON_GPCCS);
		else
			nvkm_wr32(device, 0x41a100, 0x00000002);
		if (nvkm_secboot_is_managed(sb, NVKM_SECBOOT_FALCON_FECS))
			nvkm_secboot_start(sb, NVKM_SECBOOT_FALCON_FECS);
		else
			nvkm_wr32(device, 0x409100, 0x00000002);
1466 1467 1468 1469 1470
		if (nvkm_msec(device, 2000,
			if (nvkm_rd32(device, 0x409800) & 0x00000001)
				break;
		) < 0)
			return -EBUSY;
B
Ben Skeggs 已提交
1471

1472 1473 1474
		nvkm_wr32(device, 0x409840, 0xffffffff);
		nvkm_wr32(device, 0x409500, 0x7fffffff);
		nvkm_wr32(device, 0x409504, 0x00000021);
B
Ben Skeggs 已提交
1475

1476 1477 1478
		nvkm_wr32(device, 0x409840, 0xffffffff);
		nvkm_wr32(device, 0x409500, 0x00000000);
		nvkm_wr32(device, 0x409504, 0x00000010);
1479 1480 1481 1482
		if (nvkm_msec(device, 2000,
			if ((gr->size = nvkm_rd32(device, 0x409800)))
				break;
		) < 0)
1483
			return -EBUSY;
1484

1485 1486 1487
		nvkm_wr32(device, 0x409840, 0xffffffff);
		nvkm_wr32(device, 0x409500, 0x00000000);
		nvkm_wr32(device, 0x409504, 0x00000016);
1488 1489 1490 1491
		if (nvkm_msec(device, 2000,
			if (nvkm_rd32(device, 0x409800))
				break;
		) < 0)
1492 1493
			return -EBUSY;

1494 1495 1496
		nvkm_wr32(device, 0x409840, 0xffffffff);
		nvkm_wr32(device, 0x409500, 0x00000000);
		nvkm_wr32(device, 0x409504, 0x00000025);
1497 1498 1499 1500
		if (nvkm_msec(device, 2000,
			if (nvkm_rd32(device, 0x409800))
				break;
		) < 0)
1501 1502
			return -EBUSY;

1503
		if (device->chipset >= 0xe0) {
1504 1505 1506
			nvkm_wr32(device, 0x409800, 0x00000000);
			nvkm_wr32(device, 0x409500, 0x00000001);
			nvkm_wr32(device, 0x409504, 0x00000030);
1507 1508 1509 1510
			if (nvkm_msec(device, 2000,
				if (nvkm_rd32(device, 0x409800))
					break;
			) < 0)
1511 1512
				return -EBUSY;

1513 1514 1515 1516
			nvkm_wr32(device, 0x409810, 0xb00095c8);
			nvkm_wr32(device, 0x409800, 0x00000000);
			nvkm_wr32(device, 0x409500, 0x00000001);
			nvkm_wr32(device, 0x409504, 0x00000031);
1517 1518 1519 1520
			if (nvkm_msec(device, 2000,
				if (nvkm_rd32(device, 0x409800))
					break;
			) < 0)
1521 1522
				return -EBUSY;

1523 1524 1525 1526
			nvkm_wr32(device, 0x409810, 0x00080420);
			nvkm_wr32(device, 0x409800, 0x00000000);
			nvkm_wr32(device, 0x409500, 0x00000001);
			nvkm_wr32(device, 0x409504, 0x00000032);
1527 1528 1529 1530
			if (nvkm_msec(device, 2000,
				if (nvkm_rd32(device, 0x409800))
					break;
			) < 0)
1531 1532
				return -EBUSY;

1533 1534 1535
			nvkm_wr32(device, 0x409614, 0x00000070);
			nvkm_wr32(device, 0x409614, 0x00000770);
			nvkm_wr32(device, 0x40802c, 0x00000001);
1536 1537
		}

B
Ben Skeggs 已提交
1538 1539
		if (gr->data == NULL) {
			int ret = gf100_grctx_generate(gr);
1540
			if (ret) {
1541
				nvkm_error(subdev, "failed to construct context\n");
1542 1543 1544 1545 1546
				return ret;
			}
		}

		return 0;
1547
	} else
1548
	if (!gr->func->fecs.ucode) {
1549
		return -ENOSYS;
1550
	}
1551

1552
	/* load HUB microcode */
1553
	nvkm_mc_unk260(device->mc, 0);
1554
	nvkm_wr32(device, 0x4091c0, 0x01000000);
1555 1556
	for (i = 0; i < gr->func->fecs.ucode->data.size / 4; i++)
		nvkm_wr32(device, 0x4091c4, gr->func->fecs.ucode->data.data[i]);
1557

1558
	nvkm_wr32(device, 0x409180, 0x01000000);
1559
	for (i = 0; i < gr->func->fecs.ucode->code.size / 4; i++) {
1560
		if ((i & 0x3f) == 0)
1561
			nvkm_wr32(device, 0x409188, i >> 6);
1562
		nvkm_wr32(device, 0x409184, gr->func->fecs.ucode->code.data[i]);
1563 1564 1565
	}

	/* load GPC microcode */
1566
	nvkm_wr32(device, 0x41a1c0, 0x01000000);
1567 1568
	for (i = 0; i < gr->func->gpccs.ucode->data.size / 4; i++)
		nvkm_wr32(device, 0x41a1c4, gr->func->gpccs.ucode->data.data[i]);
1569

1570
	nvkm_wr32(device, 0x41a180, 0x01000000);
1571
	for (i = 0; i < gr->func->gpccs.ucode->code.size / 4; i++) {
1572
		if ((i & 0x3f) == 0)
1573
			nvkm_wr32(device, 0x41a188, i >> 6);
1574
		nvkm_wr32(device, 0x41a184, gr->func->gpccs.ucode->code.data[i]);
1575
	}
1576
	nvkm_mc_unk260(device->mc, 1);
1577

1578
	/* load register lists */
1579 1580 1581 1582
	gf100_gr_init_csdata(gr, grctx->hub, 0x409000, 0x000, 0x000000);
	gf100_gr_init_csdata(gr, grctx->gpc, 0x41a000, 0x000, 0x418000);
	gf100_gr_init_csdata(gr, grctx->tpc, 0x41a000, 0x004, 0x419800);
	gf100_gr_init_csdata(gr, grctx->ppc, 0x41a000, 0x008, 0x41be00);
1583

1584
	/* start HUB ucode running, it'll init the GPCs */
1585 1586
	nvkm_wr32(device, 0x40910c, 0x00000000);
	nvkm_wr32(device, 0x409100, 0x00000002);
1587 1588 1589 1590
	if (nvkm_msec(device, 2000,
		if (nvkm_rd32(device, 0x409800) & 0x80000000)
			break;
	) < 0) {
B
Ben Skeggs 已提交
1591
		gf100_gr_ctxctl_debug(gr);
1592 1593 1594
		return -EBUSY;
	}

1595
	gr->size = nvkm_rd32(device, 0x409804);
B
Ben Skeggs 已提交
1596 1597
	if (gr->data == NULL) {
		int ret = gf100_grctx_generate(gr);
1598
		if (ret) {
1599
			nvkm_error(subdev, "failed to construct context\n");
1600 1601
			return ret;
		}
1602 1603 1604
	}

	return 0;
1605 1606
}

1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643
static int
gf100_gr_oneinit(struct nvkm_gr *base)
{
	struct gf100_gr *gr = gf100_gr(base);
	struct nvkm_device *device = gr->base.engine.subdev.device;
	int ret, i, j;

	nvkm_pmu_pgob(device->pmu, false);

	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 256, false,
			      &gr->unk4188b4);
	if (ret)
		return ret;

	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 256, false,
			      &gr->unk4188b8);
	if (ret)
		return ret;

	nvkm_kmap(gr->unk4188b4);
	for (i = 0; i < 0x1000; i += 4)
		nvkm_wo32(gr->unk4188b4, i, 0x00000010);
	nvkm_done(gr->unk4188b4);

	nvkm_kmap(gr->unk4188b8);
	for (i = 0; i < 0x1000; i += 4)
		nvkm_wo32(gr->unk4188b8, i, 0x00000010);
	nvkm_done(gr->unk4188b8);

	gr->rop_nr = (nvkm_rd32(device, 0x409604) & 0x001f0000) >> 16;
	gr->gpc_nr =  nvkm_rd32(device, 0x409604) & 0x0000001f;
	for (i = 0; i < gr->gpc_nr; i++) {
		gr->tpc_nr[i]  = nvkm_rd32(device, GPC_UNIT(i, 0x2608));
		gr->tpc_total += gr->tpc_nr[i];
		gr->ppc_nr[i]  = gr->func->ppc_nr;
		for (j = 0; j < gr->ppc_nr[i]; j++) {
			u8 mask = nvkm_rd32(device, GPC_UNIT(i, 0x0c30 + (j * 4)));
1644 1645
			if (mask)
				gr->ppc_mask[i] |= (1 << j);
1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706
			gr->ppc_tpc_nr[i][j] = hweight8(mask);
		}
	}

	/*XXX: these need figuring out... though it might not even matter */
	switch (device->chipset) {
	case 0xc0:
		if (gr->tpc_total == 11) { /* 465, 3/4/4/0, 4 */
			gr->magic_not_rop_nr = 0x07;
		} else
		if (gr->tpc_total == 14) { /* 470, 3/3/4/4, 5 */
			gr->magic_not_rop_nr = 0x05;
		} else
		if (gr->tpc_total == 15) { /* 480, 3/4/4/4, 6 */
			gr->magic_not_rop_nr = 0x06;
		}
		break;
	case 0xc3: /* 450, 4/0/0/0, 2 */
		gr->magic_not_rop_nr = 0x03;
		break;
	case 0xc4: /* 460, 3/4/0/0, 4 */
		gr->magic_not_rop_nr = 0x01;
		break;
	case 0xc1: /* 2/0/0/0, 1 */
		gr->magic_not_rop_nr = 0x01;
		break;
	case 0xc8: /* 4/4/3/4, 5 */
		gr->magic_not_rop_nr = 0x06;
		break;
	case 0xce: /* 4/4/0/0, 4 */
		gr->magic_not_rop_nr = 0x03;
		break;
	case 0xcf: /* 4/0/0/0, 3 */
		gr->magic_not_rop_nr = 0x03;
		break;
	case 0xd7:
	case 0xd9: /* 1/0/0/0, 1 */
	case 0xea: /* gk20a */
	case 0x12b: /* gm20b */
		gr->magic_not_rop_nr = 0x01;
		break;
	}

	return 0;
}

int
gf100_gr_init_(struct nvkm_gr *base)
{
	struct gf100_gr *gr = gf100_gr(base);
	nvkm_pmu_pgob(gr->base.engine.subdev.device->pmu, false);
	return gr->func->init(gr);
}

void
gf100_gr_dtor_fw(struct gf100_gr_fuc *fuc)
{
	kfree(fuc->data);
	fuc->data = NULL;
}

1707 1708 1709 1710 1711 1712
static void
gf100_gr_dtor_init(struct gf100_gr_pack *pack)
{
	vfree(pack);
}

1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726
void *
gf100_gr_dtor(struct nvkm_gr *base)
{
	struct gf100_gr *gr = gf100_gr(base);

	if (gr->func->dtor)
		gr->func->dtor(gr);
	kfree(gr->data);

	gf100_gr_dtor_fw(&gr->fuc409c);
	gf100_gr_dtor_fw(&gr->fuc409d);
	gf100_gr_dtor_fw(&gr->fuc41ac);
	gf100_gr_dtor_fw(&gr->fuc41ad);

1727 1728 1729 1730 1731
	gf100_gr_dtor_init(gr->fuc_bundle);
	gf100_gr_dtor_init(gr->fuc_method);
	gf100_gr_dtor_init(gr->fuc_sw_ctx);
	gf100_gr_dtor_init(gr->fuc_sw_nonctx);

1732 1733 1734 1735 1736 1737 1738 1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756
	nvkm_memory_del(&gr->unk4188b8);
	nvkm_memory_del(&gr->unk4188b4);
	return gr;
}

static const struct nvkm_gr_func
gf100_gr_ = {
	.dtor = gf100_gr_dtor,
	.oneinit = gf100_gr_oneinit,
	.init = gf100_gr_init_,
	.intr = gf100_gr_intr,
	.units = gf100_gr_units,
	.chan_new = gf100_gr_chan_new,
	.object_get = gf100_gr_object_get,
};

int
gf100_gr_ctor_fw(struct gf100_gr *gr, const char *fwname,
		 struct gf100_gr_fuc *fuc)
{
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
	const struct firmware *fw;
	int ret;

1757
	ret = nvkm_firmware_get(device, fwname, &fw);
1758 1759 1760 1761 1762 1763 1764
	if (ret) {
		nvkm_error(subdev, "failed to load %s\n", fwname);
		return ret;
	}

	fuc->size = fw->size;
	fuc->data = kmemdup(fw->data, fuc->size, GFP_KERNEL);
1765
	nvkm_firmware_put(fw);
1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777 1778 1779 1780 1781 1782 1783 1784 1785 1786 1787
	return (fuc->data != NULL) ? 0 : -ENOMEM;
}

int
gf100_gr_ctor(const struct gf100_gr_func *func, struct nvkm_device *device,
	      int index, struct gf100_gr *gr)
{
	int ret;

	gr->func = func;
	gr->firmware = nvkm_boolopt(device->cfgopt, "NvGrUseFW",
				    func->fecs.ucode == NULL);

	ret = nvkm_gr_ctor(&gf100_gr_, device, index, 0x08001000,
			   gr->firmware || func->fecs.ucode != NULL,
			   &gr->base);
	if (ret)
		return ret;

	return 0;
}

1788
int
1789 1790 1791 1792
gf100_gr_new_(const struct gf100_gr_func *func, struct nvkm_device *device,
	      int index, struct nvkm_gr **pgr)
{
	struct gf100_gr *gr;
1793 1794
	int ret;

1795 1796 1797
	if (!(gr = kzalloc(sizeof(*gr), GFP_KERNEL)))
		return -ENOMEM;
	*pgr = &gr->base;
1798 1799 1800 1801 1802 1803 1804 1805 1806 1807 1808 1809 1810 1811

	ret = gf100_gr_ctor(func, device, index, gr);
	if (ret)
		return ret;

	if (gr->firmware) {
		if (gf100_gr_ctor_fw(gr, "fecs_inst", &gr->fuc409c) ||
		    gf100_gr_ctor_fw(gr, "fecs_data", &gr->fuc409d) ||
		    gf100_gr_ctor_fw(gr, "gpccs_inst", &gr->fuc41ac) ||
		    gf100_gr_ctor_fw(gr, "gpccs_data", &gr->fuc41ad))
			return -ENODEV;
	}

	return 0;
1812 1813 1814 1815
}

int
gf100_gr_init(struct gf100_gr *gr)
1816
{
1817
	struct nvkm_device *device = gr->base.engine.subdev.device;
B
Ben Skeggs 已提交
1818
	const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
1819 1820 1821
	u32 data[TPC_MAX / 8] = {};
	u8  tpcnr[GPC_MAX];
	int gpc, tpc, rop;
1822
	int i;
1823

1824 1825 1826 1827 1828 1829
	nvkm_wr32(device, GPC_BCAST(0x0880), 0x00000000);
	nvkm_wr32(device, GPC_BCAST(0x08a4), 0x00000000);
	nvkm_wr32(device, GPC_BCAST(0x0888), 0x00000000);
	nvkm_wr32(device, GPC_BCAST(0x088c), 0x00000000);
	nvkm_wr32(device, GPC_BCAST(0x0890), 0x00000000);
	nvkm_wr32(device, GPC_BCAST(0x0894), 0x00000000);
1830 1831
	nvkm_wr32(device, GPC_BCAST(0x08b4), nvkm_memory_addr(gr->unk4188b4) >> 8);
	nvkm_wr32(device, GPC_BCAST(0x08b8), nvkm_memory_addr(gr->unk4188b8) >> 8);
1832

1833
	gf100_gr_mmio(gr, gr->func->mmio);
1834

B
Ben Skeggs 已提交
1835 1836
	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
	for (i = 0, gpc = -1; i < gr->tpc_total; i++) {
1837
		do {
B
Ben Skeggs 已提交
1838
			gpc = (gpc + 1) % gr->gpc_nr;
1839
		} while (!tpcnr[gpc]);
B
Ben Skeggs 已提交
1840
		tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
1841 1842 1843 1844

		data[i / 8] |= tpc << ((i % 8) * 4);
	}

1845 1846 1847 1848
	nvkm_wr32(device, GPC_BCAST(0x0980), data[0]);
	nvkm_wr32(device, GPC_BCAST(0x0984), data[1]);
	nvkm_wr32(device, GPC_BCAST(0x0988), data[2]);
	nvkm_wr32(device, GPC_BCAST(0x098c), data[3]);
1849

B
Ben Skeggs 已提交
1850
	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
1851
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0914),
B
Ben Skeggs 已提交
1852
			gr->magic_not_rop_nr << 8 | gr->tpc_nr[gpc]);
1853
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 |
B
Ben Skeggs 已提交
1854
			gr->tpc_total);
1855
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918);
1856 1857
	}

1858
	if (device->chipset != 0xd7)
1859
		nvkm_wr32(device, GPC_BCAST(0x1bd4), magicgpc918);
M
Maarten Lankhorst 已提交
1860
	else
1861
		nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918);
B
Ben Skeggs 已提交
1862

1863
	nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800));
B
Ben Skeggs 已提交
1864

1865
	nvkm_wr32(device, 0x400500, 0x00010001);
B
Ben Skeggs 已提交
1866

1867 1868
	nvkm_wr32(device, 0x400100, 0xffffffff);
	nvkm_wr32(device, 0x40013c, 0xffffffff);
B
Ben Skeggs 已提交
1869

1870 1871 1872 1873 1874 1875 1876 1877 1878 1879 1880
	nvkm_wr32(device, 0x409c24, 0x000f0000);
	nvkm_wr32(device, 0x404000, 0xc0000000);
	nvkm_wr32(device, 0x404600, 0xc0000000);
	nvkm_wr32(device, 0x408030, 0xc0000000);
	nvkm_wr32(device, 0x40601c, 0xc0000000);
	nvkm_wr32(device, 0x404490, 0xc0000000);
	nvkm_wr32(device, 0x406018, 0xc0000000);
	nvkm_wr32(device, 0x405840, 0xc0000000);
	nvkm_wr32(device, 0x405844, 0x00ffffff);
	nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008);
	nvkm_mask(device, 0x419eb4, 0x00001000, 0x00001000);
B
Ben Skeggs 已提交
1881 1882

	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
1883 1884 1885 1886
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000);
		nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000);
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000);
B
Ben Skeggs 已提交
1887
		for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
1888 1889 1890 1891 1892 1893 1894
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff);
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff);
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000);
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000);
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000);
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x001ffffe);
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x0000000f);
1895
		}
1896 1897
		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), 0xffffffff);
		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff);
1898 1899
	}

B
Ben Skeggs 已提交
1900
	for (rop = 0; rop < gr->rop_nr; rop++) {
1901 1902 1903 1904
		nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0xc0000000);
		nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0xc0000000);
		nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff);
		nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff);
1905
	}
1906

1907 1908 1909 1910 1911 1912
	nvkm_wr32(device, 0x400108, 0xffffffff);
	nvkm_wr32(device, 0x400138, 0xffffffff);
	nvkm_wr32(device, 0x400118, 0xffffffff);
	nvkm_wr32(device, 0x400130, 0xffffffff);
	nvkm_wr32(device, 0x40011c, 0xffffffff);
	nvkm_wr32(device, 0x400134, 0xffffffff);
1913

1914
	nvkm_wr32(device, 0x400054, 0x34ce3464);
1915

B
Ben Skeggs 已提交
1916
	gf100_gr_zbc_init(gr);
1917

B
Ben Skeggs 已提交
1918
	return gf100_gr_init_ctxctl(gr);
1919 1920
}

1921
#include "fuc/hubgf100.fuc3.h"
1922

1923 1924 1925 1926 1927 1928
struct gf100_gr_ucode
gf100_gr_fecs_ucode = {
	.code.data = gf100_grhub_code,
	.code.size = sizeof(gf100_grhub_code),
	.data.data = gf100_grhub_data,
	.data.size = sizeof(gf100_grhub_data),
1929 1930
};

1931
#include "fuc/gpcgf100.fuc3.h"
1932

1933 1934 1935 1936 1937 1938
struct gf100_gr_ucode
gf100_gr_gpccs_ucode = {
	.code.data = gf100_grgpc_code,
	.code.size = sizeof(gf100_grgpc_code),
	.data.data = gf100_grgpc_data,
	.data.size = sizeof(gf100_grgpc_data),
1939 1940
};

1941 1942
static const struct gf100_gr_func
gf100_gr = {
1943 1944 1945 1946
	.init = gf100_gr_init,
	.mmio = gf100_gr_pack_mmio,
	.fecs.ucode = &gf100_gr_fecs_ucode,
	.gpccs.ucode = &gf100_gr_gpccs_ucode,
1947 1948 1949 1950 1951 1952 1953 1954 1955 1956
	.grctx = &gf100_grctx,
	.sclass = {
		{ -1, -1, FERMI_TWOD_A },
		{ -1, -1, FERMI_MEMORY_TO_MEMORY_FORMAT_A },
		{ -1, -1, FERMI_A, &gf100_fermi },
		{ -1, -1, FERMI_COMPUTE_A },
		{}
	}
};

1957 1958 1959 1960 1961
int
gf100_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
{
	return gf100_gr_new_(&gf100_gr, device, index, pgr);
}