gf100.c 51.6 KB
Newer Older
1
/*
2
 * Copyright 2012 Red Hat Inc.
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 * Authors: Ben Skeggs
 */
24 25 26 27 28 29 30 31
#include "gf100.h"
#include "ctxgf100.h"
#include "fuc/os.h"

#include <core/client.h>
#include <core/option.h>
#include <subdev/fb.h>
#include <subdev/mc.h>
32
#include <subdev/pmu.h>
33
#include <subdev/timer.h>
34
#include <engine/fifo.h>
35 36

#include <nvif/class.h>
37
#include <nvif/cl9097.h>
38
#include <nvif/unpack.h>
39

40 41 42 43 44
/*******************************************************************************
 * Zero Bandwidth Clear
 ******************************************************************************/

static void
B
Ben Skeggs 已提交
45
gf100_gr_zbc_clear_color(struct gf100_gr *gr, int zbc)
46
{
47
	struct nvkm_device *device = gr->base.engine.subdev.device;
B
Ben Skeggs 已提交
48
	if (gr->zbc_color[zbc].format) {
49 50 51 52 53 54 55 56
		nvkm_wr32(device, 0x405804, gr->zbc_color[zbc].ds[0]);
		nvkm_wr32(device, 0x405808, gr->zbc_color[zbc].ds[1]);
		nvkm_wr32(device, 0x40580c, gr->zbc_color[zbc].ds[2]);
		nvkm_wr32(device, 0x405810, gr->zbc_color[zbc].ds[3]);
	}
	nvkm_wr32(device, 0x405814, gr->zbc_color[zbc].format);
	nvkm_wr32(device, 0x405820, zbc);
	nvkm_wr32(device, 0x405824, 0x00000004); /* TRIGGER | WRITE | COLOR */
57 58 59
}

static int
B
Ben Skeggs 已提交
60
gf100_gr_zbc_color_get(struct gf100_gr *gr, int format,
61
		       const u32 ds[4], const u32 l2[4])
62
{
63
	struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc;
64 65 66
	int zbc = -ENOSPC, i;

	for (i = ltc->zbc_min; i <= ltc->zbc_max; i++) {
B
Ben Skeggs 已提交
67 68
		if (gr->zbc_color[i].format) {
			if (gr->zbc_color[i].format != format)
69
				continue;
B
Ben Skeggs 已提交
70 71
			if (memcmp(gr->zbc_color[i].ds, ds, sizeof(
				   gr->zbc_color[i].ds)))
72
				continue;
B
Ben Skeggs 已提交
73 74
			if (memcmp(gr->zbc_color[i].l2, l2, sizeof(
				   gr->zbc_color[i].l2))) {
75 76 77 78 79 80 81 82 83
				WARN_ON(1);
				return -EINVAL;
			}
			return i;
		} else {
			zbc = (zbc < 0) ? i : zbc;
		}
	}

84 85 86
	if (zbc < 0)
		return zbc;

B
Ben Skeggs 已提交
87 88 89
	memcpy(gr->zbc_color[zbc].ds, ds, sizeof(gr->zbc_color[zbc].ds));
	memcpy(gr->zbc_color[zbc].l2, l2, sizeof(gr->zbc_color[zbc].l2));
	gr->zbc_color[zbc].format = format;
90
	nvkm_ltc_zbc_color_get(ltc, zbc, l2);
B
Ben Skeggs 已提交
91
	gf100_gr_zbc_clear_color(gr, zbc);
92 93 94 95
	return zbc;
}

static void
B
Ben Skeggs 已提交
96
gf100_gr_zbc_clear_depth(struct gf100_gr *gr, int zbc)
97
{
98
	struct nvkm_device *device = gr->base.engine.subdev.device;
B
Ben Skeggs 已提交
99
	if (gr->zbc_depth[zbc].format)
100 101 102 103
		nvkm_wr32(device, 0x405818, gr->zbc_depth[zbc].ds);
	nvkm_wr32(device, 0x40581c, gr->zbc_depth[zbc].format);
	nvkm_wr32(device, 0x405820, zbc);
	nvkm_wr32(device, 0x405824, 0x00000005); /* TRIGGER | WRITE | DEPTH */
104 105 106
}

static int
B
Ben Skeggs 已提交
107
gf100_gr_zbc_depth_get(struct gf100_gr *gr, int format,
108
		       const u32 ds, const u32 l2)
109
{
110
	struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc;
111 112 113
	int zbc = -ENOSPC, i;

	for (i = ltc->zbc_min; i <= ltc->zbc_max; i++) {
B
Ben Skeggs 已提交
114 115
		if (gr->zbc_depth[i].format) {
			if (gr->zbc_depth[i].format != format)
116
				continue;
B
Ben Skeggs 已提交
117
			if (gr->zbc_depth[i].ds != ds)
118
				continue;
B
Ben Skeggs 已提交
119
			if (gr->zbc_depth[i].l2 != l2) {
120 121 122 123 124 125 126 127 128
				WARN_ON(1);
				return -EINVAL;
			}
			return i;
		} else {
			zbc = (zbc < 0) ? i : zbc;
		}
	}

129 130 131
	if (zbc < 0)
		return zbc;

B
Ben Skeggs 已提交
132 133 134
	gr->zbc_depth[zbc].format = format;
	gr->zbc_depth[zbc].ds = ds;
	gr->zbc_depth[zbc].l2 = l2;
135
	nvkm_ltc_zbc_depth_get(ltc, zbc, l2);
B
Ben Skeggs 已提交
136
	gf100_gr_zbc_clear_depth(gr, zbc);
137 138 139
	return zbc;
}

140 141 142
/*******************************************************************************
 * Graphics object classes
 ******************************************************************************/
143 144 145 146 147 148
#define gf100_gr_object(p) container_of((p), struct gf100_gr_object, object)

struct gf100_gr_object {
	struct nvkm_object object;
	struct gf100_gr_chan *chan;
};
149

150
static int
151
gf100_fermi_mthd_zbc_color(struct nvkm_object *object, void *data, u32 size)
152
{
153
	struct gf100_gr *gr = gf100_gr(nvkm_gr(object->engine));
154 155 156
	union {
		struct fermi_a_zbc_color_v0 v0;
	} *args = data;
157
	int ret = -ENOSYS;
158

159
	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179
		switch (args->v0.format) {
		case FERMI_A_ZBC_COLOR_V0_FMT_ZERO:
		case FERMI_A_ZBC_COLOR_V0_FMT_UNORM_ONE:
		case FERMI_A_ZBC_COLOR_V0_FMT_RF32_GF32_BF32_AF32:
		case FERMI_A_ZBC_COLOR_V0_FMT_R16_G16_B16_A16:
		case FERMI_A_ZBC_COLOR_V0_FMT_RN16_GN16_BN16_AN16:
		case FERMI_A_ZBC_COLOR_V0_FMT_RS16_GS16_BS16_AS16:
		case FERMI_A_ZBC_COLOR_V0_FMT_RU16_GU16_BU16_AU16:
		case FERMI_A_ZBC_COLOR_V0_FMT_RF16_GF16_BF16_AF16:
		case FERMI_A_ZBC_COLOR_V0_FMT_A8R8G8B8:
		case FERMI_A_ZBC_COLOR_V0_FMT_A8RL8GL8BL8:
		case FERMI_A_ZBC_COLOR_V0_FMT_A2B10G10R10:
		case FERMI_A_ZBC_COLOR_V0_FMT_AU2BU10GU10RU10:
		case FERMI_A_ZBC_COLOR_V0_FMT_A8B8G8R8:
		case FERMI_A_ZBC_COLOR_V0_FMT_A8BL8GL8RL8:
		case FERMI_A_ZBC_COLOR_V0_FMT_AN8BN8GN8RN8:
		case FERMI_A_ZBC_COLOR_V0_FMT_AS8BS8GS8RS8:
		case FERMI_A_ZBC_COLOR_V0_FMT_AU8BU8GU8RU8:
		case FERMI_A_ZBC_COLOR_V0_FMT_A2R10G10B10:
		case FERMI_A_ZBC_COLOR_V0_FMT_BF10GF11RF11:
B
Ben Skeggs 已提交
180
			ret = gf100_gr_zbc_color_get(gr, args->v0.format,
181 182
							   args->v0.ds,
							   args->v0.l2);
183 184 185 186 187 188 189 190 191 192 193 194 195 196
			if (ret >= 0) {
				args->v0.index = ret;
				return 0;
			}
			break;
		default:
			return -EINVAL;
		}
	}

	return ret;
}

static int
197
gf100_fermi_mthd_zbc_depth(struct nvkm_object *object, void *data, u32 size)
198
{
199
	struct gf100_gr *gr = gf100_gr(nvkm_gr(object->engine));
200 201 202
	union {
		struct fermi_a_zbc_depth_v0 v0;
	} *args = data;
203
	int ret = -ENOSYS;
204

205
	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
206 207
		switch (args->v0.format) {
		case FERMI_A_ZBC_DEPTH_V0_FMT_FP32:
B
Ben Skeggs 已提交
208
			ret = gf100_gr_zbc_depth_get(gr, args->v0.format,
209 210
							   args->v0.ds,
							   args->v0.l2);
211 212 213 214 215 216 217 218 219 220
			return (ret >= 0) ? 0 : -ENOSPC;
		default:
			return -EINVAL;
		}
	}

	return ret;
}

static int
221
gf100_fermi_mthd(struct nvkm_object *object, u32 mthd, void *data, u32 size)
222
{
223
	nvif_ioctl(object, "fermi mthd %08x\n", mthd);
224 225
	switch (mthd) {
	case FERMI_A_ZBC_COLOR:
226
		return gf100_fermi_mthd_zbc_color(object, data, size);
227
	case FERMI_A_ZBC_DEPTH:
228
		return gf100_fermi_mthd_zbc_depth(object, data, size);
229 230 231 232 233 234
	default:
		break;
	}
	return -EINVAL;
}

235 236
const struct nvkm_object_func
gf100_fermi = {
237
	.mthd = gf100_fermi_mthd,
238 239
};

240 241
static void
gf100_gr_mthd_set_shader_exceptions(struct nvkm_device *device, u32 data)
242
{
243 244
	nvkm_wr32(device, 0x419e44, data ? 0xffffffff : 0x00000000);
	nvkm_wr32(device, 0x419e4c, data ? 0xffffffff : 0x00000000);
245 246
}

247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265
static bool
gf100_gr_mthd_sw(struct nvkm_device *device, u16 class, u32 mthd, u32 data)
{
	switch (class & 0x00ff) {
	case 0x97:
	case 0xc0:
		switch (mthd) {
		case 0x1528:
			gf100_gr_mthd_set_shader_exceptions(device, data);
			return true;
		default:
			break;
		}
		break;
	default:
		break;
	}
	return false;
}
266

267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287
static const struct nvkm_object_func
gf100_gr_object_func = {
};

static int
gf100_gr_object_new(const struct nvkm_oclass *oclass, void *data, u32 size,
		    struct nvkm_object **pobject)
{
	struct gf100_gr_chan *chan = gf100_gr_chan(oclass->parent);
	struct gf100_gr_object *object;

	if (!(object = kzalloc(sizeof(*object), GFP_KERNEL)))
		return -ENOMEM;
	*pobject = &object->object;

	nvkm_object_ctor(oclass->base.func ? oclass->base.func :
			 &gf100_gr_object_func, oclass, &object->object);
	object->chan = chan;
	return 0;
}

288 289 290 291 292 293 294 295 296
static int
gf100_gr_object_get(struct nvkm_gr *base, int index, struct nvkm_sclass *sclass)
{
	struct gf100_gr *gr = gf100_gr(base);
	int c = 0;

	while (gr->func->sclass[c].oclass) {
		if (c++ == index) {
			*sclass = gr->func->sclass[index];
297
			sclass->ctor = gf100_gr_object_new;
298 299 300 301 302 303
			return index;
		}
	}

	return c;
}
304 305 306 307

/*******************************************************************************
 * PGRAPH context
 ******************************************************************************/
308

309 310 311
static int
gf100_gr_chan_bind(struct nvkm_object *object, struct nvkm_gpuobj *parent,
		   int align, struct nvkm_gpuobj **pgpuobj)
312
{
313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377
	struct gf100_gr_chan *chan = gf100_gr_chan(object);
	struct gf100_gr *gr = chan->gr;
	int ret, i;

	ret = nvkm_gpuobj_new(gr->base.engine.subdev.device, gr->size,
			      align, false, parent, pgpuobj);
	if (ret)
		return ret;

	nvkm_kmap(*pgpuobj);
	for (i = 0; i < gr->size; i += 4)
		nvkm_wo32(*pgpuobj, i, gr->data[i / 4]);

	if (!gr->firmware) {
		nvkm_wo32(*pgpuobj, 0x00, chan->mmio_nr / 2);
		nvkm_wo32(*pgpuobj, 0x04, chan->mmio_vma.offset >> 8);
	} else {
		nvkm_wo32(*pgpuobj, 0xf4, 0);
		nvkm_wo32(*pgpuobj, 0xf8, 0);
		nvkm_wo32(*pgpuobj, 0x10, chan->mmio_nr / 2);
		nvkm_wo32(*pgpuobj, 0x14, lower_32_bits(chan->mmio_vma.offset));
		nvkm_wo32(*pgpuobj, 0x18, upper_32_bits(chan->mmio_vma.offset));
		nvkm_wo32(*pgpuobj, 0x1c, 1);
		nvkm_wo32(*pgpuobj, 0x20, 0);
		nvkm_wo32(*pgpuobj, 0x28, 0);
		nvkm_wo32(*pgpuobj, 0x2c, 0);
	}
	nvkm_done(*pgpuobj);
	return 0;
}

static void *
gf100_gr_chan_dtor(struct nvkm_object *object)
{
	struct gf100_gr_chan *chan = gf100_gr_chan(object);
	int i;

	for (i = 0; i < ARRAY_SIZE(chan->data); i++) {
		if (chan->data[i].vma.node) {
			nvkm_vm_unmap(&chan->data[i].vma);
			nvkm_vm_put(&chan->data[i].vma);
		}
		nvkm_memory_del(&chan->data[i].mem);
	}

	if (chan->mmio_vma.node) {
		nvkm_vm_unmap(&chan->mmio_vma);
		nvkm_vm_put(&chan->mmio_vma);
	}
	nvkm_memory_del(&chan->mmio);
	return chan;
}

static const struct nvkm_object_func
gf100_gr_chan = {
	.dtor = gf100_gr_chan_dtor,
	.bind = gf100_gr_chan_bind,
};

static int
gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
		  const struct nvkm_oclass *oclass,
		  struct nvkm_object **pobject)
{
	struct gf100_gr *gr = gf100_gr(base);
B
Ben Skeggs 已提交
378 379
	struct gf100_gr_data *data = gr->mmio_data;
	struct gf100_gr_mmio *mmio = gr->mmio_list;
380
	struct gf100_gr_chan *chan;
381
	struct nvkm_device *device = gr->base.engine.subdev.device;
382 383
	int ret, i;

384 385 386 387 388
	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
		return -ENOMEM;
	nvkm_object_ctor(&gf100_gr_chan, oclass, &chan->object);
	chan->gr = gr;
	*pobject = &chan->object;
389

390 391 392 393
	/* allocate memory for a "mmio list" buffer that's used by the HUB
	 * fuc to modify some per-context register settings on first load
	 * of the context.
	 */
394 395
	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0x100,
			      false, &chan->mmio);
396 397 398
	if (ret)
		return ret;

399
	ret = nvkm_vm_get(fifoch->vm, 0x1000, 12, NV_MEM_ACCESS_RW |
400
			  NV_MEM_ACCESS_SYS, &chan->mmio_vma);
401 402 403
	if (ret)
		return ret;

404 405
	nvkm_memory_map(chan->mmio, &chan->mmio_vma, 0);

406
	/* allocate buffers referenced by mmio list */
B
Ben Skeggs 已提交
407
	for (i = 0; data->size && i < ARRAY_SIZE(gr->mmio_data); i++) {
408 409 410
		ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST,
				      data->size, data->align, false,
				      &chan->data[i].mem);
411 412
		if (ret)
			return ret;
413

414 415 416
		ret = nvkm_vm_get(fifoch->vm,
				  nvkm_memory_size(chan->data[i].mem), 12,
				  data->access, &chan->data[i].vma);
417 418
		if (ret)
			return ret;
419

420
		nvkm_memory_map(chan->data[i].mem, &chan->data[i].vma, 0);
421
		data++;
422 423
	}

424
	/* finally, fill in the mmio list and point the context at it */
425
	nvkm_kmap(chan->mmio);
B
Ben Skeggs 已提交
426
	for (i = 0; mmio->addr && i < ARRAY_SIZE(gr->mmio_list); i++) {
427 428
		u32 addr = mmio->addr;
		u32 data = mmio->data;
429

430
		if (mmio->buffer >= 0) {
431
			u64 info = chan->data[mmio->buffer].vma.offset;
432 433
			data |= info >> mmio->shift;
		}
434

435 436
		nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, addr);
		nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, data);
437 438
		mmio++;
	}
439
	nvkm_done(chan->mmio);
440
	return 0;
441 442
}

443
/*******************************************************************************
444
 * PGRAPH register lists
445 446
 ******************************************************************************/

447 448
const struct gf100_gr_init
gf100_gr_init_main_0[] = {
449 450 451 452 453 454 455 456 457 458 459 460 461 462
	{ 0x400080,   1, 0x04, 0x003083c2 },
	{ 0x400088,   1, 0x04, 0x00006fe7 },
	{ 0x40008c,   1, 0x04, 0x00000000 },
	{ 0x400090,   1, 0x04, 0x00000030 },
	{ 0x40013c,   1, 0x04, 0x013901f7 },
	{ 0x400140,   1, 0x04, 0x00000100 },
	{ 0x400144,   1, 0x04, 0x00000000 },
	{ 0x400148,   1, 0x04, 0x00000110 },
	{ 0x400138,   1, 0x04, 0x00000000 },
	{ 0x400130,   2, 0x04, 0x00000000 },
	{ 0x400124,   1, 0x04, 0x00000002 },
	{}
};

463 464
const struct gf100_gr_init
gf100_gr_init_fe_0[] = {
465 466 467 468 469
	{ 0x40415c,   1, 0x04, 0x00000000 },
	{ 0x404170,   1, 0x04, 0x00000000 },
	{}
};

470 471
const struct gf100_gr_init
gf100_gr_init_pri_0[] = {
472 473 474 475
	{ 0x404488,   2, 0x04, 0x00000000 },
	{}
};

476 477
const struct gf100_gr_init
gf100_gr_init_rstr2d_0[] = {
478 479 480 481
	{ 0x407808,   1, 0x04, 0x00000000 },
	{}
};

482 483
const struct gf100_gr_init
gf100_gr_init_pd_0[] = {
484 485 486 487
	{ 0x406024,   1, 0x04, 0x00000000 },
	{}
};

488 489
const struct gf100_gr_init
gf100_gr_init_ds_0[] = {
490 491 492 493 494 495
	{ 0x405844,   1, 0x04, 0x00ffffff },
	{ 0x405850,   1, 0x04, 0x00000000 },
	{ 0x405908,   1, 0x04, 0x00000000 },
	{}
};

496 497
const struct gf100_gr_init
gf100_gr_init_scc_0[] = {
498 499 500 501
	{ 0x40803c,   1, 0x04, 0x00000000 },
	{}
};

502 503
const struct gf100_gr_init
gf100_gr_init_prop_0[] = {
504
	{ 0x4184a0,   1, 0x04, 0x00000000 },
505 506 507
	{}
};

508 509
const struct gf100_gr_init
gf100_gr_init_gpc_unk_0[] = {
510 511 512 513
	{ 0x418604,   1, 0x04, 0x00000000 },
	{ 0x418680,   1, 0x04, 0x00000000 },
	{ 0x418714,   1, 0x04, 0x80000000 },
	{ 0x418384,   1, 0x04, 0x00000000 },
514 515 516
	{}
};

517 518
const struct gf100_gr_init
gf100_gr_init_setup_0[] = {
519
	{ 0x418814,   3, 0x04, 0x00000000 },
520 521 522
	{}
};

523 524
const struct gf100_gr_init
gf100_gr_init_crstr_0[] = {
525
	{ 0x418b04,   1, 0x04, 0x00000000 },
526 527 528
	{}
};

529 530
const struct gf100_gr_init
gf100_gr_init_setup_1[] = {
531 532 533 534
	{ 0x4188c8,   1, 0x04, 0x80000000 },
	{ 0x4188cc,   1, 0x04, 0x00000000 },
	{ 0x4188d0,   1, 0x04, 0x00010000 },
	{ 0x4188d4,   1, 0x04, 0x00000001 },
535 536 537
	{}
};

538 539
const struct gf100_gr_init
gf100_gr_init_zcull_0[] = {
540 541 542 543 544
	{ 0x418910,   1, 0x04, 0x00010001 },
	{ 0x418914,   1, 0x04, 0x00000301 },
	{ 0x418918,   1, 0x04, 0x00800000 },
	{ 0x418980,   1, 0x04, 0x77777770 },
	{ 0x418984,   3, 0x04, 0x77777777 },
545 546 547
	{}
};

548 549
const struct gf100_gr_init
gf100_gr_init_gpm_0[] = {
550 551
	{ 0x418c04,   1, 0x04, 0x00000000 },
	{ 0x418c88,   1, 0x04, 0x00000000 },
552 553 554
	{}
};

555 556
const struct gf100_gr_init
gf100_gr_init_gpc_unk_1[] = {
557 558 559 560
	{ 0x418d00,   1, 0x04, 0x00000000 },
	{ 0x418f08,   1, 0x04, 0x00000000 },
	{ 0x418e00,   1, 0x04, 0x00000050 },
	{ 0x418e08,   1, 0x04, 0x00000000 },
561 562 563
	{}
};

564 565
const struct gf100_gr_init
gf100_gr_init_gcc_0[] = {
566 567 568 569 570
	{ 0x41900c,   1, 0x04, 0x00000000 },
	{ 0x419018,   1, 0x04, 0x00000000 },
	{}
};

571 572
const struct gf100_gr_init
gf100_gr_init_tpccs_0[] = {
573 574
	{ 0x419d08,   2, 0x04, 0x00000000 },
	{ 0x419d10,   1, 0x04, 0x00000014 },
575 576 577
	{}
};

578 579
const struct gf100_gr_init
gf100_gr_init_tex_0[] = {
580 581 582
	{ 0x419ab0,   1, 0x04, 0x00000000 },
	{ 0x419ab8,   1, 0x04, 0x000000e7 },
	{ 0x419abc,   2, 0x04, 0x00000000 },
583 584 585
	{}
};

586 587
const struct gf100_gr_init
gf100_gr_init_pe_0[] = {
588 589 590 591
	{ 0x41980c,   3, 0x04, 0x00000000 },
	{ 0x419844,   1, 0x04, 0x00000000 },
	{ 0x41984c,   1, 0x04, 0x00005bc5 },
	{ 0x419850,   4, 0x04, 0x00000000 },
592 593 594
	{}
};

595 596
const struct gf100_gr_init
gf100_gr_init_l1c_0[] = {
597 598 599 600 601 602
	{ 0x419c98,   1, 0x04, 0x00000000 },
	{ 0x419ca8,   1, 0x04, 0x80000000 },
	{ 0x419cb4,   1, 0x04, 0x00000000 },
	{ 0x419cb8,   1, 0x04, 0x00008bf4 },
	{ 0x419cbc,   1, 0x04, 0x28137606 },
	{ 0x419cc0,   2, 0x04, 0x00000000 },
603 604 605
	{}
};

606 607
const struct gf100_gr_init
gf100_gr_init_wwdx_0[] = {
608 609
	{ 0x419bd4,   1, 0x04, 0x00800000 },
	{ 0x419bdc,   1, 0x04, 0x00000000 },
610 611 612
	{}
};

613 614
const struct gf100_gr_init
gf100_gr_init_tpccs_1[] = {
615
	{ 0x419d2c,   1, 0x04, 0x00000000 },
616 617 618
	{}
};

619 620
const struct gf100_gr_init
gf100_gr_init_mpc_0[] = {
621
	{ 0x419c0c,   1, 0x04, 0x00000000 },
622 623 624
	{}
};

625 626
static const struct gf100_gr_init
gf100_gr_init_sm_0[] = {
627 628 629 630 631 632 633 634 635 636 637 638 639 640 641 642
	{ 0x419e00,   1, 0x04, 0x00000000 },
	{ 0x419ea0,   1, 0x04, 0x00000000 },
	{ 0x419ea4,   1, 0x04, 0x00000100 },
	{ 0x419ea8,   1, 0x04, 0x00001100 },
	{ 0x419eac,   1, 0x04, 0x11100702 },
	{ 0x419eb0,   1, 0x04, 0x00000003 },
	{ 0x419eb4,   4, 0x04, 0x00000000 },
	{ 0x419ec8,   1, 0x04, 0x06060618 },
	{ 0x419ed0,   1, 0x04, 0x0eff0e38 },
	{ 0x419ed4,   1, 0x04, 0x011104f1 },
	{ 0x419edc,   1, 0x04, 0x00000000 },
	{ 0x419f00,   1, 0x04, 0x00000000 },
	{ 0x419f2c,   1, 0x04, 0x00000000 },
	{}
};

643 644
const struct gf100_gr_init
gf100_gr_init_be_0[] = {
645 646 647 648 649 650 651 652 653 654
	{ 0x40880c,   1, 0x04, 0x00000000 },
	{ 0x408910,   9, 0x04, 0x00000000 },
	{ 0x408950,   1, 0x04, 0x00000000 },
	{ 0x408954,   1, 0x04, 0x0000ffff },
	{ 0x408984,   1, 0x04, 0x00000000 },
	{ 0x408988,   1, 0x04, 0x08040201 },
	{ 0x40898c,   1, 0x04, 0x80402010 },
	{}
};

655 656
const struct gf100_gr_init
gf100_gr_init_fe_1[] = {
657 658 659 660
	{ 0x4040f0,   1, 0x04, 0x00000000 },
	{}
};

661 662
const struct gf100_gr_init
gf100_gr_init_pe_1[] = {
663 664 665 666
	{ 0x419880,   1, 0x04, 0x00000002 },
	{}
};

667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695
static const struct gf100_gr_pack
gf100_gr_pack_mmio[] = {
	{ gf100_gr_init_main_0 },
	{ gf100_gr_init_fe_0 },
	{ gf100_gr_init_pri_0 },
	{ gf100_gr_init_rstr2d_0 },
	{ gf100_gr_init_pd_0 },
	{ gf100_gr_init_ds_0 },
	{ gf100_gr_init_scc_0 },
	{ gf100_gr_init_prop_0 },
	{ gf100_gr_init_gpc_unk_0 },
	{ gf100_gr_init_setup_0 },
	{ gf100_gr_init_crstr_0 },
	{ gf100_gr_init_setup_1 },
	{ gf100_gr_init_zcull_0 },
	{ gf100_gr_init_gpm_0 },
	{ gf100_gr_init_gpc_unk_1 },
	{ gf100_gr_init_gcc_0 },
	{ gf100_gr_init_tpccs_0 },
	{ gf100_gr_init_tex_0 },
	{ gf100_gr_init_pe_0 },
	{ gf100_gr_init_l1c_0 },
	{ gf100_gr_init_wwdx_0 },
	{ gf100_gr_init_tpccs_1 },
	{ gf100_gr_init_mpc_0 },
	{ gf100_gr_init_sm_0 },
	{ gf100_gr_init_be_0 },
	{ gf100_gr_init_fe_1 },
	{ gf100_gr_init_pe_1 },
M
Maarten Lankhorst 已提交
696 697 698
	{}
};

699 700 701 702
/*******************************************************************************
 * PGRAPH engine/subdev functions
 ******************************************************************************/

703
void
B
Ben Skeggs 已提交
704
gf100_gr_zbc_init(struct gf100_gr *gr)
705 706 707 708 709 710 711 712 713
{
	const u32  zero[] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000,
			      0x00000000, 0x00000000, 0x00000000, 0x00000000 };
	const u32   one[] = { 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
			      0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff };
	const u32 f32_0[] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000,
			      0x00000000, 0x00000000, 0x00000000, 0x00000000 };
	const u32 f32_1[] = { 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
			      0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 };
714
	struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc;
715 716
	int index;

B
Ben Skeggs 已提交
717 718 719 720 721 722 723
	if (!gr->zbc_color[0].format) {
		gf100_gr_zbc_color_get(gr, 1,  & zero[0],   &zero[4]);
		gf100_gr_zbc_color_get(gr, 2,  &  one[0],    &one[4]);
		gf100_gr_zbc_color_get(gr, 4,  &f32_0[0],  &f32_0[4]);
		gf100_gr_zbc_color_get(gr, 4,  &f32_1[0],  &f32_1[4]);
		gf100_gr_zbc_depth_get(gr, 1, 0x00000000, 0x00000000);
		gf100_gr_zbc_depth_get(gr, 1, 0x3f800000, 0x3f800000);
724 725 726
	}

	for (index = ltc->zbc_min; index <= ltc->zbc_max; index++)
B
Ben Skeggs 已提交
727
		gf100_gr_zbc_clear_color(gr, index);
728
	for (index = ltc->zbc_min; index <= ltc->zbc_max; index++)
B
Ben Skeggs 已提交
729
		gf100_gr_zbc_clear_depth(gr, index);
730 731
}

732 733 734 735 736 737
/**
 * Wait until GR goes idle. GR is considered idle if it is disabled by the
 * MC (0x200) register, or GR is not busy and a context switch is not in
 * progress.
 */
int
B
Ben Skeggs 已提交
738
gf100_gr_wait_idle(struct gf100_gr *gr)
739
{
740 741
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
742 743 744 745 746 747 748 749
	unsigned long end_jiffies = jiffies + msecs_to_jiffies(2000);
	bool gr_enabled, ctxsw_active, gr_busy;

	do {
		/*
		 * required to make sure FIFO_ENGINE_STATUS (0x2640) is
		 * up-to-date
		 */
750
		nvkm_rd32(device, 0x400700);
751

752 753 754
		gr_enabled = nvkm_rd32(device, 0x200) & 0x1000;
		ctxsw_active = nvkm_rd32(device, 0x2640) & 0x8000;
		gr_busy = nvkm_rd32(device, 0x40060c) & 0x1;
755 756 757 758 759

		if (!gr_enabled || (!gr_busy && !ctxsw_active))
			return 0;
	} while (time_before(jiffies, end_jiffies));

760 761 762
	nvkm_error(subdev,
		   "wait for idle timeout (en: %d, ctxsw: %d, busy: %d)\n",
		   gr_enabled, ctxsw_active, gr_busy);
763 764 765
	return -EAGAIN;
}

766
void
B
Ben Skeggs 已提交
767
gf100_gr_mmio(struct gf100_gr *gr, const struct gf100_gr_pack *p)
768
{
769
	struct nvkm_device *device = gr->base.engine.subdev.device;
770 771
	const struct gf100_gr_pack *pack;
	const struct gf100_gr_init *init;
772 773 774 775 776

	pack_for_each_init(init, pack, p) {
		u32 next = init->addr + init->count * init->pitch;
		u32 addr = init->addr;
		while (addr < next) {
777
			nvkm_wr32(device, addr, init->data);
778 779 780
			addr += init->pitch;
		}
	}
781 782 783
}

void
B
Ben Skeggs 已提交
784
gf100_gr_icmd(struct gf100_gr *gr, const struct gf100_gr_pack *p)
785
{
786
	struct nvkm_device *device = gr->base.engine.subdev.device;
787 788
	const struct gf100_gr_pack *pack;
	const struct gf100_gr_init *init;
789
	u32 data = 0;
790

791
	nvkm_wr32(device, 0x400208, 0x80000000);
792 793 794 795 796 797

	pack_for_each_init(init, pack, p) {
		u32 next = init->addr + init->count * init->pitch;
		u32 addr = init->addr;

		if ((pack == p && init == p->init) || data != init->data) {
798
			nvkm_wr32(device, 0x400204, init->data);
799 800
			data = init->data;
		}
801

802
		while (addr < next) {
803
			nvkm_wr32(device, 0x400200, addr);
804 805 806 807 808
			/**
			 * Wait for GR to go idle after submitting a
			 * GO_IDLE bundle
			 */
			if ((addr & 0xffff) == 0xe100)
B
Ben Skeggs 已提交
809
				gf100_gr_wait_idle(gr);
810 811 812 813
			nvkm_msec(device, 2000,
				if (!(nvkm_rd32(device, 0x400700) & 0x00000004))
					break;
			);
814 815 816
			addr += init->pitch;
		}
	}
817

818
	nvkm_wr32(device, 0x400208, 0x00000000);
819 820 821
}

void
B
Ben Skeggs 已提交
822
gf100_gr_mthd(struct gf100_gr *gr, const struct gf100_gr_pack *p)
823
{
824
	struct nvkm_device *device = gr->base.engine.subdev.device;
825 826
	const struct gf100_gr_pack *pack;
	const struct gf100_gr_init *init;
827
	u32 data = 0;
828

829 830 831 832 833 834
	pack_for_each_init(init, pack, p) {
		u32 ctrl = 0x80000000 | pack->type;
		u32 next = init->addr + init->count * init->pitch;
		u32 addr = init->addr;

		if ((pack == p && init == p->init) || data != init->data) {
835
			nvkm_wr32(device, 0x40448c, init->data);
836 837 838 839
			data = init->data;
		}

		while (addr < next) {
840
			nvkm_wr32(device, 0x404488, ctrl | (addr << 14));
841
			addr += init->pitch;
842 843 844 845 846
		}
	}
}

u64
847
gf100_gr_units(struct nvkm_gr *base)
848
{
849
	struct gf100_gr *gr = gf100_gr(base);
850 851
	u64 cfg;

B
Ben Skeggs 已提交
852 853 854
	cfg  = (u32)gr->gpc_nr;
	cfg |= (u32)gr->tpc_total << 8;
	cfg |= (u64)gr->rop_nr << 32;
855 856

	return cfg;
857 858
}

859 860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891
static const struct nvkm_bitfield gf100_dispatch_error[] = {
	{ 0x00000001, "INJECTED_BUNDLE_ERROR" },
	{ 0x00000002, "CLASS_SUBCH_MISMATCH" },
	{ 0x00000004, "SUBCHSW_DURING_NOTIFY" },
	{}
};

static const struct nvkm_bitfield gf100_m2mf_error[] = {
	{ 0x00000001, "PUSH_TOO_MUCH_DATA" },
	{ 0x00000002, "PUSH_NOT_ENOUGH_DATA" },
	{}
};

static const struct nvkm_bitfield gf100_unk6_error[] = {
	{ 0x00000001, "TEMP_TOO_SMALL" },
	{}
};

static const struct nvkm_bitfield gf100_ccache_error[] = {
	{ 0x00000001, "INTR" },
	{ 0x00000002, "LDCONST_OOB" },
	{}
};

static const struct nvkm_bitfield gf100_macro_error[] = {
	{ 0x00000001, "TOO_FEW_PARAMS" },
	{ 0x00000002, "TOO_MANY_PARAMS" },
	{ 0x00000004, "ILLEGAL_OPCODE" },
	{ 0x00000008, "DOUBLE_BRANCH" },
	{ 0x00000010, "WATCHDOG" },
	{}
};

892
static const struct nvkm_bitfield gk104_sked_error[] = {
893
	{ 0x00000040, "CTA_RESUME" },
894 895 896 897 898 899 900 901 902
	{ 0x00000080, "CONSTANT_BUFFER_SIZE" },
	{ 0x00000200, "LOCAL_MEMORY_SIZE_POS" },
	{ 0x00000400, "LOCAL_MEMORY_SIZE_NEG" },
	{ 0x00000800, "WARP_CSTACK_SIZE" },
	{ 0x00001000, "TOTAL_TEMP_SIZE" },
	{ 0x00002000, "REGISTER_COUNT" },
	{ 0x00040000, "TOTAL_THREADS" },
	{ 0x00100000, "PROGRAM_OFFSET" },
	{ 0x00200000, "SHARED_MEMORY_SIZE" },
903 904
	{ 0x00800000, "CTA_THREAD_DIMENSION_ZERO" },
	{ 0x01000000, "MEMORY_WINDOW_OVERLAP" },
905 906
	{ 0x02000000, "SHARED_CONFIG_TOO_SMALL" },
	{ 0x04000000, "TOTAL_REGISTER_COUNT" },
907 908 909
	{}
};

910 911 912 913 914 915 916
static const struct nvkm_bitfield gf100_gpc_rop_error[] = {
	{ 0x00000002, "RT_PITCH_OVERRUN" },
	{ 0x00000010, "RT_WIDTH_OVERRUN" },
	{ 0x00000020, "RT_HEIGHT_OVERRUN" },
	{ 0x00000080, "ZETA_STORAGE_TYPE_MISMATCH" },
	{ 0x00000100, "RT_STORAGE_TYPE_MISMATCH" },
	{ 0x00000400, "RT_LINEAR_MISMATCH" },
917 918 919
	{}
};

920
static void
B
Ben Skeggs 已提交
921
gf100_gr_trap_gpc_rop(struct gf100_gr *gr, int gpc)
922
{
923 924 925
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
	char error[128];
926
	u32 trap[4];
927

928
	trap[0] = nvkm_rd32(device, GPC_UNIT(gpc, 0x0420)) & 0x3fffffff;
929 930 931
	trap[1] = nvkm_rd32(device, GPC_UNIT(gpc, 0x0434));
	trap[2] = nvkm_rd32(device, GPC_UNIT(gpc, 0x0438));
	trap[3] = nvkm_rd32(device, GPC_UNIT(gpc, 0x043c));
932

933
	nvkm_snprintbf(error, sizeof(error), gf100_gpc_rop_error, trap[0]);
934

935 936 937 938
	nvkm_error(subdev, "GPC%d/PROP trap: %08x [%s] x = %u, y = %u, "
			   "format = %x, storage type = %x\n",
		   gpc, trap[0], error, trap[1] & 0xffff, trap[1] >> 16,
		   (trap[2] >> 8) & 0x3f, trap[3] & 0xff);
939
	nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
940 941
}

942
static const struct nvkm_enum gf100_mp_warp_error[] = {
943 944 945 946 947 948 949 950
	{ 0x00, "NO_ERROR" },
	{ 0x01, "STACK_MISMATCH" },
	{ 0x05, "MISALIGNED_PC" },
	{ 0x08, "MISALIGNED_GPR" },
	{ 0x09, "INVALID_OPCODE" },
	{ 0x0d, "GPR_OUT_OF_BOUNDS" },
	{ 0x0e, "MEM_OUT_OF_BOUNDS" },
	{ 0x0f, "UNALIGNED_MEM_ACCESS" },
951
	{ 0x10, "INVALID_ADDR_SPACE" },
952 953 954 955
	{ 0x11, "INVALID_PARAM" },
	{}
};

956
static const struct nvkm_bitfield gf100_mp_global_error[] = {
957 958 959 960 961 962
	{ 0x00000004, "MULTIPLE_WARP_ERRORS" },
	{ 0x00000008, "OUT_OF_STACK_SPACE" },
	{}
};

static void
B
Ben Skeggs 已提交
963
gf100_gr_trap_mp(struct gf100_gr *gr, int gpc, int tpc)
964
{
965 966
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
967 968
	u32 werr = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x648));
	u32 gerr = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x650));
969 970
	const struct nvkm_enum *warp;
	char glob[128];
971

972 973 974 975 976 977
	nvkm_snprintbf(glob, sizeof(glob), gf100_mp_global_error, gerr);
	warp = nvkm_enum_find(gf100_mp_warp_error, werr & 0xffff);

	nvkm_error(subdev, "GPC%i/TPC%i/MP trap: "
			   "global %08x [%s] warp %04x [%s]\n",
		   gpc, tpc, gerr, glob, werr, warp ? warp->name : "");
978

979 980
	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x648), 0x00000000);
	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x650), gerr);
981 982
}

983
static void
B
Ben Skeggs 已提交
984
gf100_gr_trap_tpc(struct gf100_gr *gr, int gpc, int tpc)
985
{
986 987
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
988
	u32 stat = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x0508));
989 990

	if (stat & 0x00000001) {
991
		u32 trap = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x0224));
992
		nvkm_error(subdev, "GPC%d/TPC%d/TEX: %08x\n", gpc, tpc, trap);
993
		nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x0224), 0xc0000000);
994 995 996 997
		stat &= ~0x00000001;
	}

	if (stat & 0x00000002) {
B
Ben Skeggs 已提交
998
		gf100_gr_trap_mp(gr, gpc, tpc);
999 1000 1001 1002
		stat &= ~0x00000002;
	}

	if (stat & 0x00000004) {
1003
		u32 trap = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x0084));
1004
		nvkm_error(subdev, "GPC%d/TPC%d/POLY: %08x\n", gpc, tpc, trap);
1005
		nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x0084), 0xc0000000);
1006 1007 1008 1009
		stat &= ~0x00000004;
	}

	if (stat & 0x00000008) {
1010
		u32 trap = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x048c));
1011
		nvkm_error(subdev, "GPC%d/TPC%d/L1C: %08x\n", gpc, tpc, trap);
1012
		nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x048c), 0xc0000000);
1013 1014 1015 1016
		stat &= ~0x00000008;
	}

	if (stat) {
1017
		nvkm_error(subdev, "GPC%d/TPC%d/%08x: unknown\n", gpc, tpc, stat);
1018 1019 1020 1021
	}
}

static void
B
Ben Skeggs 已提交
1022
gf100_gr_trap_gpc(struct gf100_gr *gr, int gpc)
1023
{
1024 1025
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
1026
	u32 stat = nvkm_rd32(device, GPC_UNIT(gpc, 0x2c90));
1027 1028 1029
	int tpc;

	if (stat & 0x00000001) {
B
Ben Skeggs 已提交
1030
		gf100_gr_trap_gpc_rop(gr, gpc);
1031 1032 1033 1034
		stat &= ~0x00000001;
	}

	if (stat & 0x00000002) {
1035
		u32 trap = nvkm_rd32(device, GPC_UNIT(gpc, 0x0900));
1036
		nvkm_error(subdev, "GPC%d/ZCULL: %08x\n", gpc, trap);
1037
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000);
1038 1039 1040 1041
		stat &= ~0x00000002;
	}

	if (stat & 0x00000004) {
1042
		u32 trap = nvkm_rd32(device, GPC_UNIT(gpc, 0x1028));
1043
		nvkm_error(subdev, "GPC%d/CCACHE: %08x\n", gpc, trap);
1044
		nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000);
1045 1046 1047 1048
		stat &= ~0x00000004;
	}

	if (stat & 0x00000008) {
1049
		u32 trap = nvkm_rd32(device, GPC_UNIT(gpc, 0x0824));
1050
		nvkm_error(subdev, "GPC%d/ESETUP: %08x\n", gpc, trap);
1051
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000);
1052 1053 1054
		stat &= ~0x00000009;
	}

B
Ben Skeggs 已提交
1055
	for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
1056 1057
		u32 mask = 0x00010000 << tpc;
		if (stat & mask) {
B
Ben Skeggs 已提交
1058
			gf100_gr_trap_tpc(gr, gpc, tpc);
1059
			nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), mask);
1060 1061 1062 1063 1064
			stat &= ~mask;
		}
	}

	if (stat) {
1065
		nvkm_error(subdev, "GPC%d/%08x: unknown\n", gpc, stat);
1066 1067 1068 1069
	}
}

static void
B
Ben Skeggs 已提交
1070
gf100_gr_trap_intr(struct gf100_gr *gr)
1071
{
1072 1073
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
1074
	char error[128];
1075
	u32 trap = nvkm_rd32(device, 0x400108);
1076
	int rop, gpc;
1077 1078

	if (trap & 0x00000001) {
1079
		u32 stat = nvkm_rd32(device, 0x404000);
1080 1081 1082 1083

		nvkm_snprintbf(error, sizeof(error), gf100_dispatch_error,
			       stat & 0x3fffffff);
		nvkm_error(subdev, "DISPATCH %08x [%s]\n", stat, error);
1084 1085
		nvkm_wr32(device, 0x404000, 0xc0000000);
		nvkm_wr32(device, 0x400108, 0x00000001);
1086 1087 1088 1089
		trap &= ~0x00000001;
	}

	if (trap & 0x00000002) {
1090
		u32 stat = nvkm_rd32(device, 0x404600);
1091 1092 1093 1094 1095

		nvkm_snprintbf(error, sizeof(error), gf100_m2mf_error,
			       stat & 0x3fffffff);
		nvkm_error(subdev, "M2MF %08x [%s]\n", stat, error);

1096 1097
		nvkm_wr32(device, 0x404600, 0xc0000000);
		nvkm_wr32(device, 0x400108, 0x00000002);
1098 1099 1100 1101
		trap &= ~0x00000002;
	}

	if (trap & 0x00000008) {
1102
		u32 stat = nvkm_rd32(device, 0x408030);
1103 1104 1105 1106

		nvkm_snprintbf(error, sizeof(error), gf100_m2mf_error,
			       stat & 0x3fffffff);
		nvkm_error(subdev, "CCACHE %08x [%s]\n", stat, error);
1107 1108
		nvkm_wr32(device, 0x408030, 0xc0000000);
		nvkm_wr32(device, 0x400108, 0x00000008);
1109 1110 1111 1112
		trap &= ~0x00000008;
	}

	if (trap & 0x00000010) {
1113
		u32 stat = nvkm_rd32(device, 0x405840);
1114 1115
		nvkm_error(subdev, "SHADER %08x, sph: 0x%06x, stage: 0x%02x\n",
			   stat, stat & 0xffffff, (stat >> 24) & 0x3f);
1116 1117
		nvkm_wr32(device, 0x405840, 0xc0000000);
		nvkm_wr32(device, 0x400108, 0x00000010);
1118 1119 1120 1121
		trap &= ~0x00000010;
	}

	if (trap & 0x00000040) {
1122
		u32 stat = nvkm_rd32(device, 0x40601c);
1123 1124 1125 1126 1127

		nvkm_snprintbf(error, sizeof(error), gf100_unk6_error,
			       stat & 0x3fffffff);
		nvkm_error(subdev, "UNK6 %08x [%s]\n", stat, error);

1128 1129
		nvkm_wr32(device, 0x40601c, 0xc0000000);
		nvkm_wr32(device, 0x400108, 0x00000040);
1130 1131 1132 1133
		trap &= ~0x00000040;
	}

	if (trap & 0x00000080) {
1134
		u32 stat = nvkm_rd32(device, 0x404490);
1135 1136 1137 1138 1139 1140 1141 1142 1143 1144
		u32 pc = nvkm_rd32(device, 0x404494);
		u32 op = nvkm_rd32(device, 0x40449c);

		nvkm_snprintbf(error, sizeof(error), gf100_macro_error,
			       stat & 0x1fffffff);
		nvkm_error(subdev, "MACRO %08x [%s], pc: 0x%03x%s, op: 0x%08x\n",
			   stat, error, pc & 0x7ff,
			   (pc & 0x10000000) ? "" : " (invalid)",
			   op);

1145 1146
		nvkm_wr32(device, 0x404490, 0xc0000000);
		nvkm_wr32(device, 0x400108, 0x00000080);
1147 1148 1149
		trap &= ~0x00000080;
	}

1150
	if (trap & 0x00000100) {
1151
		u32 stat = nvkm_rd32(device, 0x407020) & 0x3fffffff;
1152

1153 1154
		nvkm_snprintbf(error, sizeof(error), gk104_sked_error, stat);
		nvkm_error(subdev, "SKED: %08x [%s]\n", stat, error);
1155

1156
		if (stat)
1157 1158
			nvkm_wr32(device, 0x407020, 0x40000000);
		nvkm_wr32(device, 0x400108, 0x00000100);
1159 1160 1161
		trap &= ~0x00000100;
	}

1162
	if (trap & 0x01000000) {
1163
		u32 stat = nvkm_rd32(device, 0x400118);
B
Ben Skeggs 已提交
1164
		for (gpc = 0; stat && gpc < gr->gpc_nr; gpc++) {
1165 1166
			u32 mask = 0x00000001 << gpc;
			if (stat & mask) {
B
Ben Skeggs 已提交
1167
				gf100_gr_trap_gpc(gr, gpc);
1168
				nvkm_wr32(device, 0x400118, mask);
1169 1170 1171
				stat &= ~mask;
			}
		}
1172
		nvkm_wr32(device, 0x400108, 0x01000000);
1173 1174 1175 1176
		trap &= ~0x01000000;
	}

	if (trap & 0x02000000) {
B
Ben Skeggs 已提交
1177
		for (rop = 0; rop < gr->rop_nr; rop++) {
1178 1179
			u32 statz = nvkm_rd32(device, ROP_UNIT(rop, 0x070));
			u32 statc = nvkm_rd32(device, ROP_UNIT(rop, 0x144));
1180
			nvkm_error(subdev, "ROP%d %08x %08x\n",
1181
				 rop, statz, statc);
1182 1183
			nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0xc0000000);
			nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0xc0000000);
1184
		}
1185
		nvkm_wr32(device, 0x400108, 0x02000000);
1186 1187 1188 1189
		trap &= ~0x02000000;
	}

	if (trap) {
1190
		nvkm_error(subdev, "TRAP UNHANDLED %08x\n", trap);
1191
		nvkm_wr32(device, 0x400108, trap);
1192 1193 1194
	}
}

1195
static void
B
Ben Skeggs 已提交
1196
gf100_gr_ctxctl_debug_unit(struct gf100_gr *gr, u32 base)
1197
{
1198 1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
	nvkm_error(subdev, "%06x - done %08x\n", base,
		   nvkm_rd32(device, base + 0x400));
	nvkm_error(subdev, "%06x - stat %08x %08x %08x %08x\n", base,
		   nvkm_rd32(device, base + 0x800),
		   nvkm_rd32(device, base + 0x804),
		   nvkm_rd32(device, base + 0x808),
		   nvkm_rd32(device, base + 0x80c));
	nvkm_error(subdev, "%06x - stat %08x %08x %08x %08x\n", base,
		   nvkm_rd32(device, base + 0x810),
		   nvkm_rd32(device, base + 0x814),
		   nvkm_rd32(device, base + 0x818),
		   nvkm_rd32(device, base + 0x81c));
1212 1213 1214
}

void
B
Ben Skeggs 已提交
1215
gf100_gr_ctxctl_debug(struct gf100_gr *gr)
1216
{
1217 1218
	struct nvkm_device *device = gr->base.engine.subdev.device;
	u32 gpcnr = nvkm_rd32(device, 0x409604) & 0xffff;
1219 1220
	u32 gpc;

B
Ben Skeggs 已提交
1221
	gf100_gr_ctxctl_debug_unit(gr, 0x409000);
1222
	for (gpc = 0; gpc < gpcnr; gpc++)
B
Ben Skeggs 已提交
1223
		gf100_gr_ctxctl_debug_unit(gr, 0x502000 + (gpc * 0x8000));
1224 1225 1226
}

static void
B
Ben Skeggs 已提交
1227
gf100_gr_ctxctl_isr(struct gf100_gr *gr)
1228
{
1229 1230
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
1231
	u32 stat = nvkm_rd32(device, 0x409c18);
1232

1233
	if (stat & 0x00000001) {
1234
		u32 code = nvkm_rd32(device, 0x409814);
1235
		if (code == E_BAD_FWMTHD) {
1236 1237
			u32 class = nvkm_rd32(device, 0x409808);
			u32  addr = nvkm_rd32(device, 0x40980c);
1238 1239
			u32  subc = (addr & 0x00070000) >> 16;
			u32  mthd = (addr & 0x00003ffc);
1240
			u32  data = nvkm_rd32(device, 0x409810);
1241

1242 1243 1244
			nvkm_error(subdev, "FECS MTHD subc %d class %04x "
					   "mthd %04x data %08x\n",
				   subc, class, mthd, data);
1245

1246
			nvkm_wr32(device, 0x409c20, 0x00000001);
1247 1248
			stat &= ~0x00000001;
		} else {
1249
			nvkm_error(subdev, "FECS ucode error %d\n", code);
1250 1251
		}
	}
1252

1253
	if (stat & 0x00080000) {
1254
		nvkm_error(subdev, "FECS watchdog timeout\n");
B
Ben Skeggs 已提交
1255
		gf100_gr_ctxctl_debug(gr);
1256
		nvkm_wr32(device, 0x409c20, 0x00080000);
1257 1258 1259 1260
		stat &= ~0x00080000;
	}

	if (stat) {
1261
		nvkm_error(subdev, "FECS %08x\n", stat);
B
Ben Skeggs 已提交
1262
		gf100_gr_ctxctl_debug(gr);
1263
		nvkm_wr32(device, 0x409c20, stat);
1264
	}
1265 1266
}

1267
static void
1268
gf100_gr_intr(struct nvkm_gr *base)
1269
{
1270 1271 1272
	struct gf100_gr *gr = gf100_gr(base);
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
1273 1274
	struct nvkm_fifo_chan *chan;
	unsigned long flags;
1275 1276 1277
	u64 inst = nvkm_rd32(device, 0x409b00) & 0x0fffffff;
	u32 stat = nvkm_rd32(device, 0x400100);
	u32 addr = nvkm_rd32(device, 0x400704);
1278 1279
	u32 mthd = (addr & 0x00003ffc);
	u32 subc = (addr & 0x00070000) >> 16;
1280 1281
	u32 data = nvkm_rd32(device, 0x400708);
	u32 code = nvkm_rd32(device, 0x400110);
1282
	u32 class;
1283 1284
	const char *name = "unknown";
	int chid = -1;
1285

1286
	chan = nvkm_fifo_chan_inst(device->fifo, (u64)inst << 12, &flags);
1287 1288 1289 1290
	if (chan) {
		name = chan->object.client->name;
		chid = chan->chid;
	}
1291

1292
	if (device->card_type < NV_E0 || subc < 4)
1293
		class = nvkm_rd32(device, 0x404200 + (subc * 4));
1294 1295 1296
	else
		class = 0x0000;

1297 1298 1299 1300 1301
	if (stat & 0x00000001) {
		/*
		 * notifier interrupt, only needed for cyclestats
		 * can be safely ignored
		 */
1302
		nvkm_wr32(device, 0x400100, 0x00000001);
1303 1304 1305
		stat &= ~0x00000001;
	}

1306
	if (stat & 0x00000010) {
1307
		if (!gf100_gr_mthd_sw(device, class, mthd, data)) {
1308 1309
			nvkm_error(subdev, "ILLEGAL_MTHD ch %d [%010llx %s] "
				   "subc %d class %04x mthd %04x data %08x\n",
1310 1311
				   chid, inst << 12, name, subc,
				   class, mthd, data);
1312
		}
1313
		nvkm_wr32(device, 0x400100, 0x00000010);
1314 1315 1316 1317
		stat &= ~0x00000010;
	}

	if (stat & 0x00000020) {
1318 1319
		nvkm_error(subdev, "ILLEGAL_CLASS ch %d [%010llx %s] "
			   "subc %d class %04x mthd %04x data %08x\n",
1320
			   chid, inst << 12, name, subc, class, mthd, data);
1321
		nvkm_wr32(device, 0x400100, 0x00000020);
1322 1323 1324 1325
		stat &= ~0x00000020;
	}

	if (stat & 0x00100000) {
1326 1327 1328 1329 1330
		const struct nvkm_enum *en =
			nvkm_enum_find(nv50_data_error_names, code);
		nvkm_error(subdev, "DATA_ERROR %08x [%s] ch %d [%010llx %s] "
				   "subc %d class %04x mthd %04x data %08x\n",
			   code, en ? en->name : "", chid, inst << 12,
1331
			   name, subc, class, mthd, data);
1332
		nvkm_wr32(device, 0x400100, 0x00100000);
1333 1334 1335 1336
		stat &= ~0x00100000;
	}

	if (stat & 0x00200000) {
1337
		nvkm_error(subdev, "TRAP ch %d [%010llx %s]\n",
1338
			   chid, inst << 12, name);
B
Ben Skeggs 已提交
1339
		gf100_gr_trap_intr(gr);
1340
		nvkm_wr32(device, 0x400100, 0x00200000);
1341 1342 1343 1344
		stat &= ~0x00200000;
	}

	if (stat & 0x00080000) {
B
Ben Skeggs 已提交
1345
		gf100_gr_ctxctl_isr(gr);
1346
		nvkm_wr32(device, 0x400100, 0x00080000);
1347 1348 1349 1350
		stat &= ~0x00080000;
	}

	if (stat) {
1351
		nvkm_error(subdev, "intr %08x\n", stat);
1352
		nvkm_wr32(device, 0x400100, stat);
1353 1354
	}

1355
	nvkm_wr32(device, 0x400500, 0x00010001);
1356
	nvkm_fifo_chan_put(device->fifo, flags, &chan);
1357 1358
}

1359
void
B
Ben Skeggs 已提交
1360
gf100_gr_init_fw(struct gf100_gr *gr, u32 fuc_base,
1361
		 struct gf100_gr_fuc *code, struct gf100_gr_fuc *data)
1362
{
1363
	struct nvkm_device *device = gr->base.engine.subdev.device;
1364
	int i;
1365

1366
	nvkm_wr32(device, fuc_base + 0x01c0, 0x01000000);
1367
	for (i = 0; i < data->size / 4; i++)
1368
		nvkm_wr32(device, fuc_base + 0x01c4, data->data[i]);
1369

1370
	nvkm_wr32(device, fuc_base + 0x0180, 0x01000000);
1371 1372
	for (i = 0; i < code->size / 4; i++) {
		if ((i & 0x3f) == 0)
1373 1374
			nvkm_wr32(device, fuc_base + 0x0188, i >> 6);
		nvkm_wr32(device, fuc_base + 0x0184, code->data[i]);
1375
	}
1376 1377 1378

	/* code must be padded to 0x40 words */
	for (; i & 0x3f; i++)
1379
		nvkm_wr32(device, fuc_base + 0x0184, 0);
1380 1381
}

1382
static void
B
Ben Skeggs 已提交
1383
gf100_gr_init_csdata(struct gf100_gr *gr,
1384 1385
		     const struct gf100_gr_pack *pack,
		     u32 falcon, u32 starstar, u32 base)
1386
{
1387
	struct nvkm_device *device = gr->base.engine.subdev.device;
1388 1389
	const struct gf100_gr_pack *iter;
	const struct gf100_gr_init *init;
1390
	u32 addr = ~0, prev = ~0, xfer = 0;
1391 1392
	u32 star, temp;

1393 1394 1395
	nvkm_wr32(device, falcon + 0x01c0, 0x02000000 + starstar);
	star = nvkm_rd32(device, falcon + 0x01c4);
	temp = nvkm_rd32(device, falcon + 0x01c4);
1396 1397
	if (temp > star)
		star = temp;
1398
	nvkm_wr32(device, falcon + 0x01c0, 0x01000000 + star);
1399

1400 1401 1402 1403 1404 1405 1406
	pack_for_each_init(init, iter, pack) {
		u32 head = init->addr - base;
		u32 tail = head + init->count * init->pitch;
		while (head < tail) {
			if (head != prev + 4 || xfer >= 32) {
				if (xfer) {
					u32 data = ((--xfer << 26) | addr);
1407
					nvkm_wr32(device, falcon + 0x01c4, data);
1408 1409 1410 1411
					star += 4;
				}
				addr = head;
				xfer = 0;
1412
			}
1413 1414 1415
			prev = head;
			xfer = xfer + 1;
			head = head + init->pitch;
1416
		}
1417
	}
1418

1419 1420 1421
	nvkm_wr32(device, falcon + 0x01c4, (--xfer << 26) | addr);
	nvkm_wr32(device, falcon + 0x01c0, 0x01000004 + starstar);
	nvkm_wr32(device, falcon + 0x01c4, star + 4);
1422 1423
}

1424
int
B
Ben Skeggs 已提交
1425
gf100_gr_init_ctxctl(struct gf100_gr *gr)
1426
{
1427
	const struct gf100_grctx_func *grctx = gr->func->grctx;
1428 1429
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
1430
	int i;
1431

B
Ben Skeggs 已提交
1432
	if (gr->firmware) {
1433
		/* load fuc microcode */
1434
		nvkm_mc_unk260(device->mc, 0);
1435 1436
		gf100_gr_init_fw(gr, 0x409000, &gr->fuc409c, &gr->fuc409d);
		gf100_gr_init_fw(gr, 0x41a000, &gr->fuc41ac, &gr->fuc41ad);
1437
		nvkm_mc_unk260(device->mc, 1);
1438

1439
		/* start both of them running */
1440 1441 1442 1443 1444
		nvkm_wr32(device, 0x409840, 0xffffffff);
		nvkm_wr32(device, 0x41a10c, 0x00000000);
		nvkm_wr32(device, 0x40910c, 0x00000000);
		nvkm_wr32(device, 0x41a100, 0x00000002);
		nvkm_wr32(device, 0x409100, 0x00000002);
1445 1446 1447 1448 1449
		if (nvkm_msec(device, 2000,
			if (nvkm_rd32(device, 0x409800) & 0x00000001)
				break;
		) < 0)
			return -EBUSY;
B
Ben Skeggs 已提交
1450

1451 1452 1453
		nvkm_wr32(device, 0x409840, 0xffffffff);
		nvkm_wr32(device, 0x409500, 0x7fffffff);
		nvkm_wr32(device, 0x409504, 0x00000021);
B
Ben Skeggs 已提交
1454

1455 1456 1457
		nvkm_wr32(device, 0x409840, 0xffffffff);
		nvkm_wr32(device, 0x409500, 0x00000000);
		nvkm_wr32(device, 0x409504, 0x00000010);
1458 1459 1460 1461
		if (nvkm_msec(device, 2000,
			if ((gr->size = nvkm_rd32(device, 0x409800)))
				break;
		) < 0)
1462
			return -EBUSY;
1463

1464 1465 1466
		nvkm_wr32(device, 0x409840, 0xffffffff);
		nvkm_wr32(device, 0x409500, 0x00000000);
		nvkm_wr32(device, 0x409504, 0x00000016);
1467 1468 1469 1470
		if (nvkm_msec(device, 2000,
			if (nvkm_rd32(device, 0x409800))
				break;
		) < 0)
1471 1472
			return -EBUSY;

1473 1474 1475
		nvkm_wr32(device, 0x409840, 0xffffffff);
		nvkm_wr32(device, 0x409500, 0x00000000);
		nvkm_wr32(device, 0x409504, 0x00000025);
1476 1477 1478 1479
		if (nvkm_msec(device, 2000,
			if (nvkm_rd32(device, 0x409800))
				break;
		) < 0)
1480 1481
			return -EBUSY;

1482
		if (device->chipset >= 0xe0) {
1483 1484 1485
			nvkm_wr32(device, 0x409800, 0x00000000);
			nvkm_wr32(device, 0x409500, 0x00000001);
			nvkm_wr32(device, 0x409504, 0x00000030);
1486 1487 1488 1489
			if (nvkm_msec(device, 2000,
				if (nvkm_rd32(device, 0x409800))
					break;
			) < 0)
1490 1491
				return -EBUSY;

1492 1493 1494 1495
			nvkm_wr32(device, 0x409810, 0xb00095c8);
			nvkm_wr32(device, 0x409800, 0x00000000);
			nvkm_wr32(device, 0x409500, 0x00000001);
			nvkm_wr32(device, 0x409504, 0x00000031);
1496 1497 1498 1499
			if (nvkm_msec(device, 2000,
				if (nvkm_rd32(device, 0x409800))
					break;
			) < 0)
1500 1501
				return -EBUSY;

1502 1503 1504 1505
			nvkm_wr32(device, 0x409810, 0x00080420);
			nvkm_wr32(device, 0x409800, 0x00000000);
			nvkm_wr32(device, 0x409500, 0x00000001);
			nvkm_wr32(device, 0x409504, 0x00000032);
1506 1507 1508 1509
			if (nvkm_msec(device, 2000,
				if (nvkm_rd32(device, 0x409800))
					break;
			) < 0)
1510 1511
				return -EBUSY;

1512 1513 1514
			nvkm_wr32(device, 0x409614, 0x00000070);
			nvkm_wr32(device, 0x409614, 0x00000770);
			nvkm_wr32(device, 0x40802c, 0x00000001);
1515 1516
		}

B
Ben Skeggs 已提交
1517 1518
		if (gr->data == NULL) {
			int ret = gf100_grctx_generate(gr);
1519
			if (ret) {
1520
				nvkm_error(subdev, "failed to construct context\n");
1521 1522 1523 1524 1525
				return ret;
			}
		}

		return 0;
1526
	} else
1527
	if (!gr->func->fecs.ucode) {
1528
		return -ENOSYS;
1529
	}
1530

1531
	/* load HUB microcode */
1532
	nvkm_mc_unk260(device->mc, 0);
1533
	nvkm_wr32(device, 0x4091c0, 0x01000000);
1534 1535
	for (i = 0; i < gr->func->fecs.ucode->data.size / 4; i++)
		nvkm_wr32(device, 0x4091c4, gr->func->fecs.ucode->data.data[i]);
1536

1537
	nvkm_wr32(device, 0x409180, 0x01000000);
1538
	for (i = 0; i < gr->func->fecs.ucode->code.size / 4; i++) {
1539
		if ((i & 0x3f) == 0)
1540
			nvkm_wr32(device, 0x409188, i >> 6);
1541
		nvkm_wr32(device, 0x409184, gr->func->fecs.ucode->code.data[i]);
1542 1543 1544
	}

	/* load GPC microcode */
1545
	nvkm_wr32(device, 0x41a1c0, 0x01000000);
1546 1547
	for (i = 0; i < gr->func->gpccs.ucode->data.size / 4; i++)
		nvkm_wr32(device, 0x41a1c4, gr->func->gpccs.ucode->data.data[i]);
1548

1549
	nvkm_wr32(device, 0x41a180, 0x01000000);
1550
	for (i = 0; i < gr->func->gpccs.ucode->code.size / 4; i++) {
1551
		if ((i & 0x3f) == 0)
1552
			nvkm_wr32(device, 0x41a188, i >> 6);
1553
		nvkm_wr32(device, 0x41a184, gr->func->gpccs.ucode->code.data[i]);
1554
	}
1555
	nvkm_mc_unk260(device->mc, 1);
1556

1557
	/* load register lists */
1558 1559 1560 1561
	gf100_gr_init_csdata(gr, grctx->hub, 0x409000, 0x000, 0x000000);
	gf100_gr_init_csdata(gr, grctx->gpc, 0x41a000, 0x000, 0x418000);
	gf100_gr_init_csdata(gr, grctx->tpc, 0x41a000, 0x004, 0x419800);
	gf100_gr_init_csdata(gr, grctx->ppc, 0x41a000, 0x008, 0x41be00);
1562

1563
	/* start HUB ucode running, it'll init the GPCs */
1564 1565
	nvkm_wr32(device, 0x40910c, 0x00000000);
	nvkm_wr32(device, 0x409100, 0x00000002);
1566 1567 1568 1569
	if (nvkm_msec(device, 2000,
		if (nvkm_rd32(device, 0x409800) & 0x80000000)
			break;
	) < 0) {
B
Ben Skeggs 已提交
1570
		gf100_gr_ctxctl_debug(gr);
1571 1572 1573
		return -EBUSY;
	}

1574
	gr->size = nvkm_rd32(device, 0x409804);
B
Ben Skeggs 已提交
1575 1576
	if (gr->data == NULL) {
		int ret = gf100_grctx_generate(gr);
1577
		if (ret) {
1578
			nvkm_error(subdev, "failed to construct context\n");
1579 1580
			return ret;
		}
1581 1582 1583
	}

	return 0;
1584 1585
}

1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622
static int
gf100_gr_oneinit(struct nvkm_gr *base)
{
	struct gf100_gr *gr = gf100_gr(base);
	struct nvkm_device *device = gr->base.engine.subdev.device;
	int ret, i, j;

	nvkm_pmu_pgob(device->pmu, false);

	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 256, false,
			      &gr->unk4188b4);
	if (ret)
		return ret;

	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 256, false,
			      &gr->unk4188b8);
	if (ret)
		return ret;

	nvkm_kmap(gr->unk4188b4);
	for (i = 0; i < 0x1000; i += 4)
		nvkm_wo32(gr->unk4188b4, i, 0x00000010);
	nvkm_done(gr->unk4188b4);

	nvkm_kmap(gr->unk4188b8);
	for (i = 0; i < 0x1000; i += 4)
		nvkm_wo32(gr->unk4188b8, i, 0x00000010);
	nvkm_done(gr->unk4188b8);

	gr->rop_nr = (nvkm_rd32(device, 0x409604) & 0x001f0000) >> 16;
	gr->gpc_nr =  nvkm_rd32(device, 0x409604) & 0x0000001f;
	for (i = 0; i < gr->gpc_nr; i++) {
		gr->tpc_nr[i]  = nvkm_rd32(device, GPC_UNIT(i, 0x2608));
		gr->tpc_total += gr->tpc_nr[i];
		gr->ppc_nr[i]  = gr->func->ppc_nr;
		for (j = 0; j < gr->ppc_nr[i]; j++) {
			u8 mask = nvkm_rd32(device, GPC_UNIT(i, 0x0c30 + (j * 4)));
1623 1624
			if (mask)
				gr->ppc_mask[i] |= (1 << j);
1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706 1707 1708 1709 1710 1711 1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736 1737
			gr->ppc_tpc_nr[i][j] = hweight8(mask);
		}
	}

	/*XXX: these need figuring out... though it might not even matter */
	switch (device->chipset) {
	case 0xc0:
		if (gr->tpc_total == 11) { /* 465, 3/4/4/0, 4 */
			gr->magic_not_rop_nr = 0x07;
		} else
		if (gr->tpc_total == 14) { /* 470, 3/3/4/4, 5 */
			gr->magic_not_rop_nr = 0x05;
		} else
		if (gr->tpc_total == 15) { /* 480, 3/4/4/4, 6 */
			gr->magic_not_rop_nr = 0x06;
		}
		break;
	case 0xc3: /* 450, 4/0/0/0, 2 */
		gr->magic_not_rop_nr = 0x03;
		break;
	case 0xc4: /* 460, 3/4/0/0, 4 */
		gr->magic_not_rop_nr = 0x01;
		break;
	case 0xc1: /* 2/0/0/0, 1 */
		gr->magic_not_rop_nr = 0x01;
		break;
	case 0xc8: /* 4/4/3/4, 5 */
		gr->magic_not_rop_nr = 0x06;
		break;
	case 0xce: /* 4/4/0/0, 4 */
		gr->magic_not_rop_nr = 0x03;
		break;
	case 0xcf: /* 4/0/0/0, 3 */
		gr->magic_not_rop_nr = 0x03;
		break;
	case 0xd7:
	case 0xd9: /* 1/0/0/0, 1 */
	case 0xea: /* gk20a */
	case 0x12b: /* gm20b */
		gr->magic_not_rop_nr = 0x01;
		break;
	}

	return 0;
}

int
gf100_gr_init_(struct nvkm_gr *base)
{
	struct gf100_gr *gr = gf100_gr(base);
	nvkm_pmu_pgob(gr->base.engine.subdev.device->pmu, false);
	return gr->func->init(gr);
}

void
gf100_gr_dtor_fw(struct gf100_gr_fuc *fuc)
{
	kfree(fuc->data);
	fuc->data = NULL;
}

void *
gf100_gr_dtor(struct nvkm_gr *base)
{
	struct gf100_gr *gr = gf100_gr(base);

	if (gr->func->dtor)
		gr->func->dtor(gr);
	kfree(gr->data);

	gf100_gr_dtor_fw(&gr->fuc409c);
	gf100_gr_dtor_fw(&gr->fuc409d);
	gf100_gr_dtor_fw(&gr->fuc41ac);
	gf100_gr_dtor_fw(&gr->fuc41ad);

	nvkm_memory_del(&gr->unk4188b8);
	nvkm_memory_del(&gr->unk4188b4);
	return gr;
}

static const struct nvkm_gr_func
gf100_gr_ = {
	.dtor = gf100_gr_dtor,
	.oneinit = gf100_gr_oneinit,
	.init = gf100_gr_init_,
	.intr = gf100_gr_intr,
	.units = gf100_gr_units,
	.chan_new = gf100_gr_chan_new,
	.object_get = gf100_gr_object_get,
};

int
gf100_gr_ctor_fw(struct gf100_gr *gr, const char *fwname,
		 struct gf100_gr_fuc *fuc)
{
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
	const struct firmware *fw;
	char f[64];
	char cname[16];
	int ret;
	int i;

	/* Convert device name to lowercase */
	strncpy(cname, device->chip->name, sizeof(cname));
	cname[sizeof(cname) - 1] = '\0';
	i = strlen(cname);
	while (i) {
		--i;
		cname[i] = tolower(cname[i]);
	}

	snprintf(f, sizeof(f), "nvidia/%s/%s.bin", cname, fwname);
1738
	ret = request_firmware(&fw, f, device->dev);
1739 1740 1741 1742 1743 1744 1745 1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767 1768 1769 1770 1771 1772 1773 1774 1775 1776 1777
	if (ret) {
		nvkm_error(subdev, "failed to load %s\n", fwname);
		return ret;
	}

	fuc->size = fw->size;
	fuc->data = kmemdup(fw->data, fuc->size, GFP_KERNEL);
	release_firmware(fw);
	return (fuc->data != NULL) ? 0 : -ENOMEM;
}

int
gf100_gr_ctor(const struct gf100_gr_func *func, struct nvkm_device *device,
	      int index, struct gf100_gr *gr)
{
	int ret;

	gr->func = func;
	gr->firmware = nvkm_boolopt(device->cfgopt, "NvGrUseFW",
				    func->fecs.ucode == NULL);

	ret = nvkm_gr_ctor(&gf100_gr_, device, index, 0x08001000,
			   gr->firmware || func->fecs.ucode != NULL,
			   &gr->base);
	if (ret)
		return ret;

	if (gr->firmware) {
		nvkm_info(&gr->base.engine.subdev, "using external firmware\n");
		if (gf100_gr_ctor_fw(gr, "fecs_inst", &gr->fuc409c) ||
		    gf100_gr_ctor_fw(gr, "fecs_data", &gr->fuc409d) ||
		    gf100_gr_ctor_fw(gr, "gpccs_inst", &gr->fuc41ac) ||
		    gf100_gr_ctor_fw(gr, "gpccs_data", &gr->fuc41ad))
			return -ENODEV;
	}

	return 0;
}

1778
int
1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790
gf100_gr_new_(const struct gf100_gr_func *func, struct nvkm_device *device,
	      int index, struct nvkm_gr **pgr)
{
	struct gf100_gr *gr;
	if (!(gr = kzalloc(sizeof(*gr), GFP_KERNEL)))
		return -ENOMEM;
	*pgr = &gr->base;
	return gf100_gr_ctor(func, device, index, gr);
}

int
gf100_gr_init(struct gf100_gr *gr)
1791
{
1792
	struct nvkm_device *device = gr->base.engine.subdev.device;
B
Ben Skeggs 已提交
1793
	const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
1794 1795 1796
	u32 data[TPC_MAX / 8] = {};
	u8  tpcnr[GPC_MAX];
	int gpc, tpc, rop;
1797
	int i;
1798

1799 1800 1801 1802 1803 1804
	nvkm_wr32(device, GPC_BCAST(0x0880), 0x00000000);
	nvkm_wr32(device, GPC_BCAST(0x08a4), 0x00000000);
	nvkm_wr32(device, GPC_BCAST(0x0888), 0x00000000);
	nvkm_wr32(device, GPC_BCAST(0x088c), 0x00000000);
	nvkm_wr32(device, GPC_BCAST(0x0890), 0x00000000);
	nvkm_wr32(device, GPC_BCAST(0x0894), 0x00000000);
1805 1806
	nvkm_wr32(device, GPC_BCAST(0x08b4), nvkm_memory_addr(gr->unk4188b4) >> 8);
	nvkm_wr32(device, GPC_BCAST(0x08b8), nvkm_memory_addr(gr->unk4188b8) >> 8);
1807

1808
	gf100_gr_mmio(gr, gr->func->mmio);
1809

B
Ben Skeggs 已提交
1810 1811
	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
	for (i = 0, gpc = -1; i < gr->tpc_total; i++) {
1812
		do {
B
Ben Skeggs 已提交
1813
			gpc = (gpc + 1) % gr->gpc_nr;
1814
		} while (!tpcnr[gpc]);
B
Ben Skeggs 已提交
1815
		tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
1816 1817 1818 1819

		data[i / 8] |= tpc << ((i % 8) * 4);
	}

1820 1821 1822 1823
	nvkm_wr32(device, GPC_BCAST(0x0980), data[0]);
	nvkm_wr32(device, GPC_BCAST(0x0984), data[1]);
	nvkm_wr32(device, GPC_BCAST(0x0988), data[2]);
	nvkm_wr32(device, GPC_BCAST(0x098c), data[3]);
1824

B
Ben Skeggs 已提交
1825
	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
1826
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0914),
B
Ben Skeggs 已提交
1827
			gr->magic_not_rop_nr << 8 | gr->tpc_nr[gpc]);
1828
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 |
B
Ben Skeggs 已提交
1829
			gr->tpc_total);
1830
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918);
1831 1832
	}

1833
	if (device->chipset != 0xd7)
1834
		nvkm_wr32(device, GPC_BCAST(0x1bd4), magicgpc918);
M
Maarten Lankhorst 已提交
1835
	else
1836
		nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918);
B
Ben Skeggs 已提交
1837

1838
	nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800));
B
Ben Skeggs 已提交
1839

1840
	nvkm_wr32(device, 0x400500, 0x00010001);
B
Ben Skeggs 已提交
1841

1842 1843
	nvkm_wr32(device, 0x400100, 0xffffffff);
	nvkm_wr32(device, 0x40013c, 0xffffffff);
B
Ben Skeggs 已提交
1844

1845 1846 1847 1848 1849 1850 1851 1852 1853 1854 1855
	nvkm_wr32(device, 0x409c24, 0x000f0000);
	nvkm_wr32(device, 0x404000, 0xc0000000);
	nvkm_wr32(device, 0x404600, 0xc0000000);
	nvkm_wr32(device, 0x408030, 0xc0000000);
	nvkm_wr32(device, 0x40601c, 0xc0000000);
	nvkm_wr32(device, 0x404490, 0xc0000000);
	nvkm_wr32(device, 0x406018, 0xc0000000);
	nvkm_wr32(device, 0x405840, 0xc0000000);
	nvkm_wr32(device, 0x405844, 0x00ffffff);
	nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008);
	nvkm_mask(device, 0x419eb4, 0x00001000, 0x00001000);
B
Ben Skeggs 已提交
1856 1857

	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
1858 1859 1860 1861
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000);
		nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000);
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000);
B
Ben Skeggs 已提交
1862
		for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
1863 1864 1865 1866 1867 1868 1869
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff);
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff);
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000);
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000);
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000);
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x001ffffe);
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x0000000f);
1870
		}
1871 1872
		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), 0xffffffff);
		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff);
1873 1874
	}

B
Ben Skeggs 已提交
1875
	for (rop = 0; rop < gr->rop_nr; rop++) {
1876 1877 1878 1879
		nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0xc0000000);
		nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0xc0000000);
		nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff);
		nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff);
1880
	}
1881

1882 1883 1884 1885 1886 1887
	nvkm_wr32(device, 0x400108, 0xffffffff);
	nvkm_wr32(device, 0x400138, 0xffffffff);
	nvkm_wr32(device, 0x400118, 0xffffffff);
	nvkm_wr32(device, 0x400130, 0xffffffff);
	nvkm_wr32(device, 0x40011c, 0xffffffff);
	nvkm_wr32(device, 0x400134, 0xffffffff);
1888

1889
	nvkm_wr32(device, 0x400054, 0x34ce3464);
1890

B
Ben Skeggs 已提交
1891
	gf100_gr_zbc_init(gr);
1892

B
Ben Skeggs 已提交
1893
	return gf100_gr_init_ctxctl(gr);
1894 1895
}

1896
#include "fuc/hubgf100.fuc3.h"
1897

1898 1899 1900 1901 1902 1903
struct gf100_gr_ucode
gf100_gr_fecs_ucode = {
	.code.data = gf100_grhub_code,
	.code.size = sizeof(gf100_grhub_code),
	.data.data = gf100_grhub_data,
	.data.size = sizeof(gf100_grhub_data),
1904 1905
};

1906
#include "fuc/gpcgf100.fuc3.h"
1907

1908 1909 1910 1911 1912 1913
struct gf100_gr_ucode
gf100_gr_gpccs_ucode = {
	.code.data = gf100_grgpc_code,
	.code.size = sizeof(gf100_grgpc_code),
	.data.data = gf100_grgpc_data,
	.data.size = sizeof(gf100_grgpc_data),
1914 1915
};

1916 1917
static const struct gf100_gr_func
gf100_gr = {
1918 1919 1920 1921
	.init = gf100_gr_init,
	.mmio = gf100_gr_pack_mmio,
	.fecs.ucode = &gf100_gr_fecs_ucode,
	.gpccs.ucode = &gf100_gr_gpccs_ucode,
1922 1923 1924 1925 1926 1927 1928 1929 1930 1931
	.grctx = &gf100_grctx,
	.sclass = {
		{ -1, -1, FERMI_TWOD_A },
		{ -1, -1, FERMI_MEMORY_TO_MEMORY_FORMAT_A },
		{ -1, -1, FERMI_A, &gf100_fermi },
		{ -1, -1, FERMI_COMPUTE_A },
		{}
	}
};

1932 1933 1934 1935 1936
int
gf100_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
{
	return gf100_gr_new_(&gf100_gr, device, index, pgr);
}