gf100.c 51.5 KB
Newer Older
1
/*
2
 * Copyright 2012 Red Hat Inc.
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 * Authors: Ben Skeggs
 */
24 25 26 27 28 29
#include "gf100.h"
#include "ctxgf100.h"
#include "fuc/os.h"

#include <core/client.h>
#include <core/option.h>
30
#include <core/firmware.h>
31 32
#include <subdev/fb.h>
#include <subdev/mc.h>
33
#include <subdev/pmu.h>
34
#include <subdev/timer.h>
35
#include <engine/fifo.h>
36 37

#include <nvif/class.h>
38
#include <nvif/cl9097.h>
39
#include <nvif/unpack.h>
40

41 42 43 44 45
/*******************************************************************************
 * Zero Bandwidth Clear
 ******************************************************************************/

static void
B
Ben Skeggs 已提交
46
gf100_gr_zbc_clear_color(struct gf100_gr *gr, int zbc)
47
{
48
	struct nvkm_device *device = gr->base.engine.subdev.device;
B
Ben Skeggs 已提交
49
	if (gr->zbc_color[zbc].format) {
50 51 52 53 54 55 56 57
		nvkm_wr32(device, 0x405804, gr->zbc_color[zbc].ds[0]);
		nvkm_wr32(device, 0x405808, gr->zbc_color[zbc].ds[1]);
		nvkm_wr32(device, 0x40580c, gr->zbc_color[zbc].ds[2]);
		nvkm_wr32(device, 0x405810, gr->zbc_color[zbc].ds[3]);
	}
	nvkm_wr32(device, 0x405814, gr->zbc_color[zbc].format);
	nvkm_wr32(device, 0x405820, zbc);
	nvkm_wr32(device, 0x405824, 0x00000004); /* TRIGGER | WRITE | COLOR */
58 59 60
}

static int
B
Ben Skeggs 已提交
61
gf100_gr_zbc_color_get(struct gf100_gr *gr, int format,
62
		       const u32 ds[4], const u32 l2[4])
63
{
64
	struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc;
65 66 67
	int zbc = -ENOSPC, i;

	for (i = ltc->zbc_min; i <= ltc->zbc_max; i++) {
B
Ben Skeggs 已提交
68 69
		if (gr->zbc_color[i].format) {
			if (gr->zbc_color[i].format != format)
70
				continue;
B
Ben Skeggs 已提交
71 72
			if (memcmp(gr->zbc_color[i].ds, ds, sizeof(
				   gr->zbc_color[i].ds)))
73
				continue;
B
Ben Skeggs 已提交
74 75
			if (memcmp(gr->zbc_color[i].l2, l2, sizeof(
				   gr->zbc_color[i].l2))) {
76 77 78 79 80 81 82 83 84
				WARN_ON(1);
				return -EINVAL;
			}
			return i;
		} else {
			zbc = (zbc < 0) ? i : zbc;
		}
	}

85 86 87
	if (zbc < 0)
		return zbc;

B
Ben Skeggs 已提交
88 89 90
	memcpy(gr->zbc_color[zbc].ds, ds, sizeof(gr->zbc_color[zbc].ds));
	memcpy(gr->zbc_color[zbc].l2, l2, sizeof(gr->zbc_color[zbc].l2));
	gr->zbc_color[zbc].format = format;
91
	nvkm_ltc_zbc_color_get(ltc, zbc, l2);
B
Ben Skeggs 已提交
92
	gf100_gr_zbc_clear_color(gr, zbc);
93 94 95 96
	return zbc;
}

static void
B
Ben Skeggs 已提交
97
gf100_gr_zbc_clear_depth(struct gf100_gr *gr, int zbc)
98
{
99
	struct nvkm_device *device = gr->base.engine.subdev.device;
B
Ben Skeggs 已提交
100
	if (gr->zbc_depth[zbc].format)
101 102 103 104
		nvkm_wr32(device, 0x405818, gr->zbc_depth[zbc].ds);
	nvkm_wr32(device, 0x40581c, gr->zbc_depth[zbc].format);
	nvkm_wr32(device, 0x405820, zbc);
	nvkm_wr32(device, 0x405824, 0x00000005); /* TRIGGER | WRITE | DEPTH */
105 106 107
}

static int
B
Ben Skeggs 已提交
108
gf100_gr_zbc_depth_get(struct gf100_gr *gr, int format,
109
		       const u32 ds, const u32 l2)
110
{
111
	struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc;
112 113 114
	int zbc = -ENOSPC, i;

	for (i = ltc->zbc_min; i <= ltc->zbc_max; i++) {
B
Ben Skeggs 已提交
115 116
		if (gr->zbc_depth[i].format) {
			if (gr->zbc_depth[i].format != format)
117
				continue;
B
Ben Skeggs 已提交
118
			if (gr->zbc_depth[i].ds != ds)
119
				continue;
B
Ben Skeggs 已提交
120
			if (gr->zbc_depth[i].l2 != l2) {
121 122 123 124 125 126 127 128 129
				WARN_ON(1);
				return -EINVAL;
			}
			return i;
		} else {
			zbc = (zbc < 0) ? i : zbc;
		}
	}

130 131 132
	if (zbc < 0)
		return zbc;

B
Ben Skeggs 已提交
133 134 135
	gr->zbc_depth[zbc].format = format;
	gr->zbc_depth[zbc].ds = ds;
	gr->zbc_depth[zbc].l2 = l2;
136
	nvkm_ltc_zbc_depth_get(ltc, zbc, l2);
B
Ben Skeggs 已提交
137
	gf100_gr_zbc_clear_depth(gr, zbc);
138 139 140
	return zbc;
}

141 142 143
/*******************************************************************************
 * Graphics object classes
 ******************************************************************************/
144 145 146 147 148 149
#define gf100_gr_object(p) container_of((p), struct gf100_gr_object, object)

struct gf100_gr_object {
	struct nvkm_object object;
	struct gf100_gr_chan *chan;
};
150

151
static int
152
gf100_fermi_mthd_zbc_color(struct nvkm_object *object, void *data, u32 size)
153
{
154
	struct gf100_gr *gr = gf100_gr(nvkm_gr(object->engine));
155 156 157
	union {
		struct fermi_a_zbc_color_v0 v0;
	} *args = data;
158
	int ret = -ENOSYS;
159

160
	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
		switch (args->v0.format) {
		case FERMI_A_ZBC_COLOR_V0_FMT_ZERO:
		case FERMI_A_ZBC_COLOR_V0_FMT_UNORM_ONE:
		case FERMI_A_ZBC_COLOR_V0_FMT_RF32_GF32_BF32_AF32:
		case FERMI_A_ZBC_COLOR_V0_FMT_R16_G16_B16_A16:
		case FERMI_A_ZBC_COLOR_V0_FMT_RN16_GN16_BN16_AN16:
		case FERMI_A_ZBC_COLOR_V0_FMT_RS16_GS16_BS16_AS16:
		case FERMI_A_ZBC_COLOR_V0_FMT_RU16_GU16_BU16_AU16:
		case FERMI_A_ZBC_COLOR_V0_FMT_RF16_GF16_BF16_AF16:
		case FERMI_A_ZBC_COLOR_V0_FMT_A8R8G8B8:
		case FERMI_A_ZBC_COLOR_V0_FMT_A8RL8GL8BL8:
		case FERMI_A_ZBC_COLOR_V0_FMT_A2B10G10R10:
		case FERMI_A_ZBC_COLOR_V0_FMT_AU2BU10GU10RU10:
		case FERMI_A_ZBC_COLOR_V0_FMT_A8B8G8R8:
		case FERMI_A_ZBC_COLOR_V0_FMT_A8BL8GL8RL8:
		case FERMI_A_ZBC_COLOR_V0_FMT_AN8BN8GN8RN8:
		case FERMI_A_ZBC_COLOR_V0_FMT_AS8BS8GS8RS8:
		case FERMI_A_ZBC_COLOR_V0_FMT_AU8BU8GU8RU8:
		case FERMI_A_ZBC_COLOR_V0_FMT_A2R10G10B10:
		case FERMI_A_ZBC_COLOR_V0_FMT_BF10GF11RF11:
B
Ben Skeggs 已提交
181
			ret = gf100_gr_zbc_color_get(gr, args->v0.format,
182 183
							   args->v0.ds,
							   args->v0.l2);
184 185 186 187 188 189 190 191 192 193 194 195 196 197
			if (ret >= 0) {
				args->v0.index = ret;
				return 0;
			}
			break;
		default:
			return -EINVAL;
		}
	}

	return ret;
}

static int
198
gf100_fermi_mthd_zbc_depth(struct nvkm_object *object, void *data, u32 size)
199
{
200
	struct gf100_gr *gr = gf100_gr(nvkm_gr(object->engine));
201 202 203
	union {
		struct fermi_a_zbc_depth_v0 v0;
	} *args = data;
204
	int ret = -ENOSYS;
205

206
	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
207 208
		switch (args->v0.format) {
		case FERMI_A_ZBC_DEPTH_V0_FMT_FP32:
B
Ben Skeggs 已提交
209
			ret = gf100_gr_zbc_depth_get(gr, args->v0.format,
210 211
							   args->v0.ds,
							   args->v0.l2);
212 213 214 215 216 217 218 219 220 221
			return (ret >= 0) ? 0 : -ENOSPC;
		default:
			return -EINVAL;
		}
	}

	return ret;
}

static int
222
gf100_fermi_mthd(struct nvkm_object *object, u32 mthd, void *data, u32 size)
223
{
224
	nvif_ioctl(object, "fermi mthd %08x\n", mthd);
225 226
	switch (mthd) {
	case FERMI_A_ZBC_COLOR:
227
		return gf100_fermi_mthd_zbc_color(object, data, size);
228
	case FERMI_A_ZBC_DEPTH:
229
		return gf100_fermi_mthd_zbc_depth(object, data, size);
230 231 232 233 234 235
	default:
		break;
	}
	return -EINVAL;
}

236 237
const struct nvkm_object_func
gf100_fermi = {
238
	.mthd = gf100_fermi_mthd,
239 240
};

241 242
static void
gf100_gr_mthd_set_shader_exceptions(struct nvkm_device *device, u32 data)
243
{
244 245
	nvkm_wr32(device, 0x419e44, data ? 0xffffffff : 0x00000000);
	nvkm_wr32(device, 0x419e4c, data ? 0xffffffff : 0x00000000);
246 247
}

248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266
static bool
gf100_gr_mthd_sw(struct nvkm_device *device, u16 class, u32 mthd, u32 data)
{
	switch (class & 0x00ff) {
	case 0x97:
	case 0xc0:
		switch (mthd) {
		case 0x1528:
			gf100_gr_mthd_set_shader_exceptions(device, data);
			return true;
		default:
			break;
		}
		break;
	default:
		break;
	}
	return false;
}
267

268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288
static const struct nvkm_object_func
gf100_gr_object_func = {
};

static int
gf100_gr_object_new(const struct nvkm_oclass *oclass, void *data, u32 size,
		    struct nvkm_object **pobject)
{
	struct gf100_gr_chan *chan = gf100_gr_chan(oclass->parent);
	struct gf100_gr_object *object;

	if (!(object = kzalloc(sizeof(*object), GFP_KERNEL)))
		return -ENOMEM;
	*pobject = &object->object;

	nvkm_object_ctor(oclass->base.func ? oclass->base.func :
			 &gf100_gr_object_func, oclass, &object->object);
	object->chan = chan;
	return 0;
}

289 290 291 292 293 294 295 296 297
static int
gf100_gr_object_get(struct nvkm_gr *base, int index, struct nvkm_sclass *sclass)
{
	struct gf100_gr *gr = gf100_gr(base);
	int c = 0;

	while (gr->func->sclass[c].oclass) {
		if (c++ == index) {
			*sclass = gr->func->sclass[index];
298
			sclass->ctor = gf100_gr_object_new;
299 300 301 302 303 304
			return index;
		}
	}

	return c;
}
305 306 307 308

/*******************************************************************************
 * PGRAPH context
 ******************************************************************************/
309

310 311 312
static int
gf100_gr_chan_bind(struct nvkm_object *object, struct nvkm_gpuobj *parent,
		   int align, struct nvkm_gpuobj **pgpuobj)
313
{
314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378
	struct gf100_gr_chan *chan = gf100_gr_chan(object);
	struct gf100_gr *gr = chan->gr;
	int ret, i;

	ret = nvkm_gpuobj_new(gr->base.engine.subdev.device, gr->size,
			      align, false, parent, pgpuobj);
	if (ret)
		return ret;

	nvkm_kmap(*pgpuobj);
	for (i = 0; i < gr->size; i += 4)
		nvkm_wo32(*pgpuobj, i, gr->data[i / 4]);

	if (!gr->firmware) {
		nvkm_wo32(*pgpuobj, 0x00, chan->mmio_nr / 2);
		nvkm_wo32(*pgpuobj, 0x04, chan->mmio_vma.offset >> 8);
	} else {
		nvkm_wo32(*pgpuobj, 0xf4, 0);
		nvkm_wo32(*pgpuobj, 0xf8, 0);
		nvkm_wo32(*pgpuobj, 0x10, chan->mmio_nr / 2);
		nvkm_wo32(*pgpuobj, 0x14, lower_32_bits(chan->mmio_vma.offset));
		nvkm_wo32(*pgpuobj, 0x18, upper_32_bits(chan->mmio_vma.offset));
		nvkm_wo32(*pgpuobj, 0x1c, 1);
		nvkm_wo32(*pgpuobj, 0x20, 0);
		nvkm_wo32(*pgpuobj, 0x28, 0);
		nvkm_wo32(*pgpuobj, 0x2c, 0);
	}
	nvkm_done(*pgpuobj);
	return 0;
}

static void *
gf100_gr_chan_dtor(struct nvkm_object *object)
{
	struct gf100_gr_chan *chan = gf100_gr_chan(object);
	int i;

	for (i = 0; i < ARRAY_SIZE(chan->data); i++) {
		if (chan->data[i].vma.node) {
			nvkm_vm_unmap(&chan->data[i].vma);
			nvkm_vm_put(&chan->data[i].vma);
		}
		nvkm_memory_del(&chan->data[i].mem);
	}

	if (chan->mmio_vma.node) {
		nvkm_vm_unmap(&chan->mmio_vma);
		nvkm_vm_put(&chan->mmio_vma);
	}
	nvkm_memory_del(&chan->mmio);
	return chan;
}

static const struct nvkm_object_func
gf100_gr_chan = {
	.dtor = gf100_gr_chan_dtor,
	.bind = gf100_gr_chan_bind,
};

static int
gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
		  const struct nvkm_oclass *oclass,
		  struct nvkm_object **pobject)
{
	struct gf100_gr *gr = gf100_gr(base);
B
Ben Skeggs 已提交
379 380
	struct gf100_gr_data *data = gr->mmio_data;
	struct gf100_gr_mmio *mmio = gr->mmio_list;
381
	struct gf100_gr_chan *chan;
382
	struct nvkm_device *device = gr->base.engine.subdev.device;
383 384
	int ret, i;

385 386 387 388 389
	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
		return -ENOMEM;
	nvkm_object_ctor(&gf100_gr_chan, oclass, &chan->object);
	chan->gr = gr;
	*pobject = &chan->object;
390

391 392 393 394
	/* allocate memory for a "mmio list" buffer that's used by the HUB
	 * fuc to modify some per-context register settings on first load
	 * of the context.
	 */
395 396
	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0x100,
			      false, &chan->mmio);
397 398 399
	if (ret)
		return ret;

400
	ret = nvkm_vm_get(fifoch->vm, 0x1000, 12, NV_MEM_ACCESS_RW |
401
			  NV_MEM_ACCESS_SYS, &chan->mmio_vma);
402 403 404
	if (ret)
		return ret;

405 406
	nvkm_memory_map(chan->mmio, &chan->mmio_vma, 0);

407
	/* allocate buffers referenced by mmio list */
B
Ben Skeggs 已提交
408
	for (i = 0; data->size && i < ARRAY_SIZE(gr->mmio_data); i++) {
409 410 411
		ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST,
				      data->size, data->align, false,
				      &chan->data[i].mem);
412 413
		if (ret)
			return ret;
414

415 416 417
		ret = nvkm_vm_get(fifoch->vm,
				  nvkm_memory_size(chan->data[i].mem), 12,
				  data->access, &chan->data[i].vma);
418 419
		if (ret)
			return ret;
420

421
		nvkm_memory_map(chan->data[i].mem, &chan->data[i].vma, 0);
422
		data++;
423 424
	}

425
	/* finally, fill in the mmio list and point the context at it */
426
	nvkm_kmap(chan->mmio);
B
Ben Skeggs 已提交
427
	for (i = 0; mmio->addr && i < ARRAY_SIZE(gr->mmio_list); i++) {
428 429
		u32 addr = mmio->addr;
		u32 data = mmio->data;
430

431
		if (mmio->buffer >= 0) {
432
			u64 info = chan->data[mmio->buffer].vma.offset;
433 434
			data |= info >> mmio->shift;
		}
435

436 437
		nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, addr);
		nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, data);
438 439
		mmio++;
	}
440
	nvkm_done(chan->mmio);
441
	return 0;
442 443
}

444
/*******************************************************************************
445
 * PGRAPH register lists
446 447
 ******************************************************************************/

448 449
const struct gf100_gr_init
gf100_gr_init_main_0[] = {
450 451 452 453 454 455 456 457 458 459 460 461 462 463
	{ 0x400080,   1, 0x04, 0x003083c2 },
	{ 0x400088,   1, 0x04, 0x00006fe7 },
	{ 0x40008c,   1, 0x04, 0x00000000 },
	{ 0x400090,   1, 0x04, 0x00000030 },
	{ 0x40013c,   1, 0x04, 0x013901f7 },
	{ 0x400140,   1, 0x04, 0x00000100 },
	{ 0x400144,   1, 0x04, 0x00000000 },
	{ 0x400148,   1, 0x04, 0x00000110 },
	{ 0x400138,   1, 0x04, 0x00000000 },
	{ 0x400130,   2, 0x04, 0x00000000 },
	{ 0x400124,   1, 0x04, 0x00000002 },
	{}
};

464 465
const struct gf100_gr_init
gf100_gr_init_fe_0[] = {
466 467 468 469 470
	{ 0x40415c,   1, 0x04, 0x00000000 },
	{ 0x404170,   1, 0x04, 0x00000000 },
	{}
};

471 472
const struct gf100_gr_init
gf100_gr_init_pri_0[] = {
473 474 475 476
	{ 0x404488,   2, 0x04, 0x00000000 },
	{}
};

477 478
const struct gf100_gr_init
gf100_gr_init_rstr2d_0[] = {
479 480 481 482
	{ 0x407808,   1, 0x04, 0x00000000 },
	{}
};

483 484
const struct gf100_gr_init
gf100_gr_init_pd_0[] = {
485 486 487 488
	{ 0x406024,   1, 0x04, 0x00000000 },
	{}
};

489 490
const struct gf100_gr_init
gf100_gr_init_ds_0[] = {
491 492 493 494 495 496
	{ 0x405844,   1, 0x04, 0x00ffffff },
	{ 0x405850,   1, 0x04, 0x00000000 },
	{ 0x405908,   1, 0x04, 0x00000000 },
	{}
};

497 498
const struct gf100_gr_init
gf100_gr_init_scc_0[] = {
499 500 501 502
	{ 0x40803c,   1, 0x04, 0x00000000 },
	{}
};

503 504
const struct gf100_gr_init
gf100_gr_init_prop_0[] = {
505
	{ 0x4184a0,   1, 0x04, 0x00000000 },
506 507 508
	{}
};

509 510
const struct gf100_gr_init
gf100_gr_init_gpc_unk_0[] = {
511 512 513 514
	{ 0x418604,   1, 0x04, 0x00000000 },
	{ 0x418680,   1, 0x04, 0x00000000 },
	{ 0x418714,   1, 0x04, 0x80000000 },
	{ 0x418384,   1, 0x04, 0x00000000 },
515 516 517
	{}
};

518 519
const struct gf100_gr_init
gf100_gr_init_setup_0[] = {
520
	{ 0x418814,   3, 0x04, 0x00000000 },
521 522 523
	{}
};

524 525
const struct gf100_gr_init
gf100_gr_init_crstr_0[] = {
526
	{ 0x418b04,   1, 0x04, 0x00000000 },
527 528 529
	{}
};

530 531
const struct gf100_gr_init
gf100_gr_init_setup_1[] = {
532 533 534 535
	{ 0x4188c8,   1, 0x04, 0x80000000 },
	{ 0x4188cc,   1, 0x04, 0x00000000 },
	{ 0x4188d0,   1, 0x04, 0x00010000 },
	{ 0x4188d4,   1, 0x04, 0x00000001 },
536 537 538
	{}
};

539 540
const struct gf100_gr_init
gf100_gr_init_zcull_0[] = {
541 542 543 544 545
	{ 0x418910,   1, 0x04, 0x00010001 },
	{ 0x418914,   1, 0x04, 0x00000301 },
	{ 0x418918,   1, 0x04, 0x00800000 },
	{ 0x418980,   1, 0x04, 0x77777770 },
	{ 0x418984,   3, 0x04, 0x77777777 },
546 547 548
	{}
};

549 550
const struct gf100_gr_init
gf100_gr_init_gpm_0[] = {
551 552
	{ 0x418c04,   1, 0x04, 0x00000000 },
	{ 0x418c88,   1, 0x04, 0x00000000 },
553 554 555
	{}
};

556 557
const struct gf100_gr_init
gf100_gr_init_gpc_unk_1[] = {
558 559 560 561
	{ 0x418d00,   1, 0x04, 0x00000000 },
	{ 0x418f08,   1, 0x04, 0x00000000 },
	{ 0x418e00,   1, 0x04, 0x00000050 },
	{ 0x418e08,   1, 0x04, 0x00000000 },
562 563 564
	{}
};

565 566
const struct gf100_gr_init
gf100_gr_init_gcc_0[] = {
567 568 569 570 571
	{ 0x41900c,   1, 0x04, 0x00000000 },
	{ 0x419018,   1, 0x04, 0x00000000 },
	{}
};

572 573
const struct gf100_gr_init
gf100_gr_init_tpccs_0[] = {
574 575
	{ 0x419d08,   2, 0x04, 0x00000000 },
	{ 0x419d10,   1, 0x04, 0x00000014 },
576 577 578
	{}
};

579 580
const struct gf100_gr_init
gf100_gr_init_tex_0[] = {
581 582 583
	{ 0x419ab0,   1, 0x04, 0x00000000 },
	{ 0x419ab8,   1, 0x04, 0x000000e7 },
	{ 0x419abc,   2, 0x04, 0x00000000 },
584 585 586
	{}
};

587 588
const struct gf100_gr_init
gf100_gr_init_pe_0[] = {
589 590 591 592
	{ 0x41980c,   3, 0x04, 0x00000000 },
	{ 0x419844,   1, 0x04, 0x00000000 },
	{ 0x41984c,   1, 0x04, 0x00005bc5 },
	{ 0x419850,   4, 0x04, 0x00000000 },
593 594 595
	{}
};

596 597
const struct gf100_gr_init
gf100_gr_init_l1c_0[] = {
598 599 600 601 602 603
	{ 0x419c98,   1, 0x04, 0x00000000 },
	{ 0x419ca8,   1, 0x04, 0x80000000 },
	{ 0x419cb4,   1, 0x04, 0x00000000 },
	{ 0x419cb8,   1, 0x04, 0x00008bf4 },
	{ 0x419cbc,   1, 0x04, 0x28137606 },
	{ 0x419cc0,   2, 0x04, 0x00000000 },
604 605 606
	{}
};

607 608
const struct gf100_gr_init
gf100_gr_init_wwdx_0[] = {
609 610
	{ 0x419bd4,   1, 0x04, 0x00800000 },
	{ 0x419bdc,   1, 0x04, 0x00000000 },
611 612 613
	{}
};

614 615
const struct gf100_gr_init
gf100_gr_init_tpccs_1[] = {
616
	{ 0x419d2c,   1, 0x04, 0x00000000 },
617 618 619
	{}
};

620 621
const struct gf100_gr_init
gf100_gr_init_mpc_0[] = {
622
	{ 0x419c0c,   1, 0x04, 0x00000000 },
623 624 625
	{}
};

626 627
static const struct gf100_gr_init
gf100_gr_init_sm_0[] = {
628 629 630 631 632 633 634 635 636 637 638 639 640 641 642 643
	{ 0x419e00,   1, 0x04, 0x00000000 },
	{ 0x419ea0,   1, 0x04, 0x00000000 },
	{ 0x419ea4,   1, 0x04, 0x00000100 },
	{ 0x419ea8,   1, 0x04, 0x00001100 },
	{ 0x419eac,   1, 0x04, 0x11100702 },
	{ 0x419eb0,   1, 0x04, 0x00000003 },
	{ 0x419eb4,   4, 0x04, 0x00000000 },
	{ 0x419ec8,   1, 0x04, 0x06060618 },
	{ 0x419ed0,   1, 0x04, 0x0eff0e38 },
	{ 0x419ed4,   1, 0x04, 0x011104f1 },
	{ 0x419edc,   1, 0x04, 0x00000000 },
	{ 0x419f00,   1, 0x04, 0x00000000 },
	{ 0x419f2c,   1, 0x04, 0x00000000 },
	{}
};

644 645
const struct gf100_gr_init
gf100_gr_init_be_0[] = {
646 647 648 649 650 651 652 653 654 655
	{ 0x40880c,   1, 0x04, 0x00000000 },
	{ 0x408910,   9, 0x04, 0x00000000 },
	{ 0x408950,   1, 0x04, 0x00000000 },
	{ 0x408954,   1, 0x04, 0x0000ffff },
	{ 0x408984,   1, 0x04, 0x00000000 },
	{ 0x408988,   1, 0x04, 0x08040201 },
	{ 0x40898c,   1, 0x04, 0x80402010 },
	{}
};

656 657
const struct gf100_gr_init
gf100_gr_init_fe_1[] = {
658 659 660 661
	{ 0x4040f0,   1, 0x04, 0x00000000 },
	{}
};

662 663
const struct gf100_gr_init
gf100_gr_init_pe_1[] = {
664 665 666 667
	{ 0x419880,   1, 0x04, 0x00000002 },
	{}
};

668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696
static const struct gf100_gr_pack
gf100_gr_pack_mmio[] = {
	{ gf100_gr_init_main_0 },
	{ gf100_gr_init_fe_0 },
	{ gf100_gr_init_pri_0 },
	{ gf100_gr_init_rstr2d_0 },
	{ gf100_gr_init_pd_0 },
	{ gf100_gr_init_ds_0 },
	{ gf100_gr_init_scc_0 },
	{ gf100_gr_init_prop_0 },
	{ gf100_gr_init_gpc_unk_0 },
	{ gf100_gr_init_setup_0 },
	{ gf100_gr_init_crstr_0 },
	{ gf100_gr_init_setup_1 },
	{ gf100_gr_init_zcull_0 },
	{ gf100_gr_init_gpm_0 },
	{ gf100_gr_init_gpc_unk_1 },
	{ gf100_gr_init_gcc_0 },
	{ gf100_gr_init_tpccs_0 },
	{ gf100_gr_init_tex_0 },
	{ gf100_gr_init_pe_0 },
	{ gf100_gr_init_l1c_0 },
	{ gf100_gr_init_wwdx_0 },
	{ gf100_gr_init_tpccs_1 },
	{ gf100_gr_init_mpc_0 },
	{ gf100_gr_init_sm_0 },
	{ gf100_gr_init_be_0 },
	{ gf100_gr_init_fe_1 },
	{ gf100_gr_init_pe_1 },
M
Maarten Lankhorst 已提交
697 698 699
	{}
};

700 701 702 703
/*******************************************************************************
 * PGRAPH engine/subdev functions
 ******************************************************************************/

704
void
B
Ben Skeggs 已提交
705
gf100_gr_zbc_init(struct gf100_gr *gr)
706 707 708 709 710 711 712 713 714
{
	const u32  zero[] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000,
			      0x00000000, 0x00000000, 0x00000000, 0x00000000 };
	const u32   one[] = { 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
			      0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff };
	const u32 f32_0[] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000,
			      0x00000000, 0x00000000, 0x00000000, 0x00000000 };
	const u32 f32_1[] = { 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
			      0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 };
715
	struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc;
716 717
	int index;

B
Ben Skeggs 已提交
718 719 720 721 722 723 724
	if (!gr->zbc_color[0].format) {
		gf100_gr_zbc_color_get(gr, 1,  & zero[0],   &zero[4]);
		gf100_gr_zbc_color_get(gr, 2,  &  one[0],    &one[4]);
		gf100_gr_zbc_color_get(gr, 4,  &f32_0[0],  &f32_0[4]);
		gf100_gr_zbc_color_get(gr, 4,  &f32_1[0],  &f32_1[4]);
		gf100_gr_zbc_depth_get(gr, 1, 0x00000000, 0x00000000);
		gf100_gr_zbc_depth_get(gr, 1, 0x3f800000, 0x3f800000);
725 726 727
	}

	for (index = ltc->zbc_min; index <= ltc->zbc_max; index++)
B
Ben Skeggs 已提交
728
		gf100_gr_zbc_clear_color(gr, index);
729
	for (index = ltc->zbc_min; index <= ltc->zbc_max; index++)
B
Ben Skeggs 已提交
730
		gf100_gr_zbc_clear_depth(gr, index);
731 732
}

733 734 735 736 737 738
/**
 * Wait until GR goes idle. GR is considered idle if it is disabled by the
 * MC (0x200) register, or GR is not busy and a context switch is not in
 * progress.
 */
int
B
Ben Skeggs 已提交
739
gf100_gr_wait_idle(struct gf100_gr *gr)
740
{
741 742
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
743 744 745 746 747 748 749 750
	unsigned long end_jiffies = jiffies + msecs_to_jiffies(2000);
	bool gr_enabled, ctxsw_active, gr_busy;

	do {
		/*
		 * required to make sure FIFO_ENGINE_STATUS (0x2640) is
		 * up-to-date
		 */
751
		nvkm_rd32(device, 0x400700);
752

753 754 755
		gr_enabled = nvkm_rd32(device, 0x200) & 0x1000;
		ctxsw_active = nvkm_rd32(device, 0x2640) & 0x8000;
		gr_busy = nvkm_rd32(device, 0x40060c) & 0x1;
756 757 758 759 760

		if (!gr_enabled || (!gr_busy && !ctxsw_active))
			return 0;
	} while (time_before(jiffies, end_jiffies));

761 762 763
	nvkm_error(subdev,
		   "wait for idle timeout (en: %d, ctxsw: %d, busy: %d)\n",
		   gr_enabled, ctxsw_active, gr_busy);
764 765 766
	return -EAGAIN;
}

767
void
B
Ben Skeggs 已提交
768
gf100_gr_mmio(struct gf100_gr *gr, const struct gf100_gr_pack *p)
769
{
770
	struct nvkm_device *device = gr->base.engine.subdev.device;
771 772
	const struct gf100_gr_pack *pack;
	const struct gf100_gr_init *init;
773 774 775 776 777

	pack_for_each_init(init, pack, p) {
		u32 next = init->addr + init->count * init->pitch;
		u32 addr = init->addr;
		while (addr < next) {
778
			nvkm_wr32(device, addr, init->data);
779 780 781
			addr += init->pitch;
		}
	}
782 783 784
}

void
B
Ben Skeggs 已提交
785
gf100_gr_icmd(struct gf100_gr *gr, const struct gf100_gr_pack *p)
786
{
787
	struct nvkm_device *device = gr->base.engine.subdev.device;
788 789
	const struct gf100_gr_pack *pack;
	const struct gf100_gr_init *init;
790
	u32 data = 0;
791

792
	nvkm_wr32(device, 0x400208, 0x80000000);
793 794 795 796 797 798

	pack_for_each_init(init, pack, p) {
		u32 next = init->addr + init->count * init->pitch;
		u32 addr = init->addr;

		if ((pack == p && init == p->init) || data != init->data) {
799
			nvkm_wr32(device, 0x400204, init->data);
800 801
			data = init->data;
		}
802

803
		while (addr < next) {
804
			nvkm_wr32(device, 0x400200, addr);
805 806 807 808 809
			/**
			 * Wait for GR to go idle after submitting a
			 * GO_IDLE bundle
			 */
			if ((addr & 0xffff) == 0xe100)
B
Ben Skeggs 已提交
810
				gf100_gr_wait_idle(gr);
811 812 813 814
			nvkm_msec(device, 2000,
				if (!(nvkm_rd32(device, 0x400700) & 0x00000004))
					break;
			);
815 816 817
			addr += init->pitch;
		}
	}
818

819
	nvkm_wr32(device, 0x400208, 0x00000000);
820 821 822
}

void
B
Ben Skeggs 已提交
823
gf100_gr_mthd(struct gf100_gr *gr, const struct gf100_gr_pack *p)
824
{
825
	struct nvkm_device *device = gr->base.engine.subdev.device;
826 827
	const struct gf100_gr_pack *pack;
	const struct gf100_gr_init *init;
828
	u32 data = 0;
829

830 831 832 833 834 835
	pack_for_each_init(init, pack, p) {
		u32 ctrl = 0x80000000 | pack->type;
		u32 next = init->addr + init->count * init->pitch;
		u32 addr = init->addr;

		if ((pack == p && init == p->init) || data != init->data) {
836
			nvkm_wr32(device, 0x40448c, init->data);
837 838 839 840
			data = init->data;
		}

		while (addr < next) {
841
			nvkm_wr32(device, 0x404488, ctrl | (addr << 14));
842
			addr += init->pitch;
843 844 845 846 847
		}
	}
}

u64
848
gf100_gr_units(struct nvkm_gr *base)
849
{
850
	struct gf100_gr *gr = gf100_gr(base);
851 852
	u64 cfg;

B
Ben Skeggs 已提交
853 854 855
	cfg  = (u32)gr->gpc_nr;
	cfg |= (u32)gr->tpc_total << 8;
	cfg |= (u64)gr->rop_nr << 32;
856 857

	return cfg;
858 859
}

860 861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884 885 886 887 888 889 890 891 892
static const struct nvkm_bitfield gf100_dispatch_error[] = {
	{ 0x00000001, "INJECTED_BUNDLE_ERROR" },
	{ 0x00000002, "CLASS_SUBCH_MISMATCH" },
	{ 0x00000004, "SUBCHSW_DURING_NOTIFY" },
	{}
};

static const struct nvkm_bitfield gf100_m2mf_error[] = {
	{ 0x00000001, "PUSH_TOO_MUCH_DATA" },
	{ 0x00000002, "PUSH_NOT_ENOUGH_DATA" },
	{}
};

static const struct nvkm_bitfield gf100_unk6_error[] = {
	{ 0x00000001, "TEMP_TOO_SMALL" },
	{}
};

static const struct nvkm_bitfield gf100_ccache_error[] = {
	{ 0x00000001, "INTR" },
	{ 0x00000002, "LDCONST_OOB" },
	{}
};

static const struct nvkm_bitfield gf100_macro_error[] = {
	{ 0x00000001, "TOO_FEW_PARAMS" },
	{ 0x00000002, "TOO_MANY_PARAMS" },
	{ 0x00000004, "ILLEGAL_OPCODE" },
	{ 0x00000008, "DOUBLE_BRANCH" },
	{ 0x00000010, "WATCHDOG" },
	{}
};

893
static const struct nvkm_bitfield gk104_sked_error[] = {
894
	{ 0x00000040, "CTA_RESUME" },
895 896 897 898 899 900 901 902 903
	{ 0x00000080, "CONSTANT_BUFFER_SIZE" },
	{ 0x00000200, "LOCAL_MEMORY_SIZE_POS" },
	{ 0x00000400, "LOCAL_MEMORY_SIZE_NEG" },
	{ 0x00000800, "WARP_CSTACK_SIZE" },
	{ 0x00001000, "TOTAL_TEMP_SIZE" },
	{ 0x00002000, "REGISTER_COUNT" },
	{ 0x00040000, "TOTAL_THREADS" },
	{ 0x00100000, "PROGRAM_OFFSET" },
	{ 0x00200000, "SHARED_MEMORY_SIZE" },
904 905
	{ 0x00800000, "CTA_THREAD_DIMENSION_ZERO" },
	{ 0x01000000, "MEMORY_WINDOW_OVERLAP" },
906 907
	{ 0x02000000, "SHARED_CONFIG_TOO_SMALL" },
	{ 0x04000000, "TOTAL_REGISTER_COUNT" },
908 909 910
	{}
};

911 912 913 914 915 916 917
static const struct nvkm_bitfield gf100_gpc_rop_error[] = {
	{ 0x00000002, "RT_PITCH_OVERRUN" },
	{ 0x00000010, "RT_WIDTH_OVERRUN" },
	{ 0x00000020, "RT_HEIGHT_OVERRUN" },
	{ 0x00000080, "ZETA_STORAGE_TYPE_MISMATCH" },
	{ 0x00000100, "RT_STORAGE_TYPE_MISMATCH" },
	{ 0x00000400, "RT_LINEAR_MISMATCH" },
918 919 920
	{}
};

921
static void
B
Ben Skeggs 已提交
922
gf100_gr_trap_gpc_rop(struct gf100_gr *gr, int gpc)
923
{
924 925 926
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
	char error[128];
927
	u32 trap[4];
928

929
	trap[0] = nvkm_rd32(device, GPC_UNIT(gpc, 0x0420)) & 0x3fffffff;
930 931 932
	trap[1] = nvkm_rd32(device, GPC_UNIT(gpc, 0x0434));
	trap[2] = nvkm_rd32(device, GPC_UNIT(gpc, 0x0438));
	trap[3] = nvkm_rd32(device, GPC_UNIT(gpc, 0x043c));
933

934
	nvkm_snprintbf(error, sizeof(error), gf100_gpc_rop_error, trap[0]);
935

936 937 938 939
	nvkm_error(subdev, "GPC%d/PROP trap: %08x [%s] x = %u, y = %u, "
			   "format = %x, storage type = %x\n",
		   gpc, trap[0], error, trap[1] & 0xffff, trap[1] >> 16,
		   (trap[2] >> 8) & 0x3f, trap[3] & 0xff);
940
	nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
941 942
}

943
static const struct nvkm_enum gf100_mp_warp_error[] = {
944 945 946 947 948 949 950 951
	{ 0x00, "NO_ERROR" },
	{ 0x01, "STACK_MISMATCH" },
	{ 0x05, "MISALIGNED_PC" },
	{ 0x08, "MISALIGNED_GPR" },
	{ 0x09, "INVALID_OPCODE" },
	{ 0x0d, "GPR_OUT_OF_BOUNDS" },
	{ 0x0e, "MEM_OUT_OF_BOUNDS" },
	{ 0x0f, "UNALIGNED_MEM_ACCESS" },
952
	{ 0x10, "INVALID_ADDR_SPACE" },
953 954 955 956
	{ 0x11, "INVALID_PARAM" },
	{}
};

957
static const struct nvkm_bitfield gf100_mp_global_error[] = {
958 959 960 961 962 963
	{ 0x00000004, "MULTIPLE_WARP_ERRORS" },
	{ 0x00000008, "OUT_OF_STACK_SPACE" },
	{}
};

static void
B
Ben Skeggs 已提交
964
gf100_gr_trap_mp(struct gf100_gr *gr, int gpc, int tpc)
965
{
966 967
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
968 969
	u32 werr = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x648));
	u32 gerr = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x650));
970 971
	const struct nvkm_enum *warp;
	char glob[128];
972

973 974 975 976 977 978
	nvkm_snprintbf(glob, sizeof(glob), gf100_mp_global_error, gerr);
	warp = nvkm_enum_find(gf100_mp_warp_error, werr & 0xffff);

	nvkm_error(subdev, "GPC%i/TPC%i/MP trap: "
			   "global %08x [%s] warp %04x [%s]\n",
		   gpc, tpc, gerr, glob, werr, warp ? warp->name : "");
979

980 981
	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x648), 0x00000000);
	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x650), gerr);
982 983
}

984
static void
B
Ben Skeggs 已提交
985
gf100_gr_trap_tpc(struct gf100_gr *gr, int gpc, int tpc)
986
{
987 988
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
989
	u32 stat = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x0508));
990 991

	if (stat & 0x00000001) {
992
		u32 trap = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x0224));
993
		nvkm_error(subdev, "GPC%d/TPC%d/TEX: %08x\n", gpc, tpc, trap);
994
		nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x0224), 0xc0000000);
995 996 997 998
		stat &= ~0x00000001;
	}

	if (stat & 0x00000002) {
B
Ben Skeggs 已提交
999
		gf100_gr_trap_mp(gr, gpc, tpc);
1000 1001 1002 1003
		stat &= ~0x00000002;
	}

	if (stat & 0x00000004) {
1004
		u32 trap = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x0084));
1005
		nvkm_error(subdev, "GPC%d/TPC%d/POLY: %08x\n", gpc, tpc, trap);
1006
		nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x0084), 0xc0000000);
1007 1008 1009 1010
		stat &= ~0x00000004;
	}

	if (stat & 0x00000008) {
1011
		u32 trap = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x048c));
1012
		nvkm_error(subdev, "GPC%d/TPC%d/L1C: %08x\n", gpc, tpc, trap);
1013
		nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x048c), 0xc0000000);
1014 1015 1016 1017
		stat &= ~0x00000008;
	}

	if (stat) {
1018
		nvkm_error(subdev, "GPC%d/TPC%d/%08x: unknown\n", gpc, tpc, stat);
1019 1020 1021 1022
	}
}

static void
B
Ben Skeggs 已提交
1023
gf100_gr_trap_gpc(struct gf100_gr *gr, int gpc)
1024
{
1025 1026
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
1027
	u32 stat = nvkm_rd32(device, GPC_UNIT(gpc, 0x2c90));
1028 1029 1030
	int tpc;

	if (stat & 0x00000001) {
B
Ben Skeggs 已提交
1031
		gf100_gr_trap_gpc_rop(gr, gpc);
1032 1033 1034 1035
		stat &= ~0x00000001;
	}

	if (stat & 0x00000002) {
1036
		u32 trap = nvkm_rd32(device, GPC_UNIT(gpc, 0x0900));
1037
		nvkm_error(subdev, "GPC%d/ZCULL: %08x\n", gpc, trap);
1038
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000);
1039 1040 1041 1042
		stat &= ~0x00000002;
	}

	if (stat & 0x00000004) {
1043
		u32 trap = nvkm_rd32(device, GPC_UNIT(gpc, 0x1028));
1044
		nvkm_error(subdev, "GPC%d/CCACHE: %08x\n", gpc, trap);
1045
		nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000);
1046 1047 1048 1049
		stat &= ~0x00000004;
	}

	if (stat & 0x00000008) {
1050
		u32 trap = nvkm_rd32(device, GPC_UNIT(gpc, 0x0824));
1051
		nvkm_error(subdev, "GPC%d/ESETUP: %08x\n", gpc, trap);
1052
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000);
1053 1054 1055
		stat &= ~0x00000009;
	}

B
Ben Skeggs 已提交
1056
	for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
1057 1058
		u32 mask = 0x00010000 << tpc;
		if (stat & mask) {
B
Ben Skeggs 已提交
1059
			gf100_gr_trap_tpc(gr, gpc, tpc);
1060
			nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), mask);
1061 1062 1063 1064 1065
			stat &= ~mask;
		}
	}

	if (stat) {
1066
		nvkm_error(subdev, "GPC%d/%08x: unknown\n", gpc, stat);
1067 1068 1069 1070
	}
}

static void
B
Ben Skeggs 已提交
1071
gf100_gr_trap_intr(struct gf100_gr *gr)
1072
{
1073 1074
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
1075
	char error[128];
1076
	u32 trap = nvkm_rd32(device, 0x400108);
1077
	int rop, gpc;
1078 1079

	if (trap & 0x00000001) {
1080
		u32 stat = nvkm_rd32(device, 0x404000);
1081 1082 1083 1084

		nvkm_snprintbf(error, sizeof(error), gf100_dispatch_error,
			       stat & 0x3fffffff);
		nvkm_error(subdev, "DISPATCH %08x [%s]\n", stat, error);
1085 1086
		nvkm_wr32(device, 0x404000, 0xc0000000);
		nvkm_wr32(device, 0x400108, 0x00000001);
1087 1088 1089 1090
		trap &= ~0x00000001;
	}

	if (trap & 0x00000002) {
1091
		u32 stat = nvkm_rd32(device, 0x404600);
1092 1093 1094 1095 1096

		nvkm_snprintbf(error, sizeof(error), gf100_m2mf_error,
			       stat & 0x3fffffff);
		nvkm_error(subdev, "M2MF %08x [%s]\n", stat, error);

1097 1098
		nvkm_wr32(device, 0x404600, 0xc0000000);
		nvkm_wr32(device, 0x400108, 0x00000002);
1099 1100 1101 1102
		trap &= ~0x00000002;
	}

	if (trap & 0x00000008) {
1103
		u32 stat = nvkm_rd32(device, 0x408030);
1104 1105 1106 1107

		nvkm_snprintbf(error, sizeof(error), gf100_m2mf_error,
			       stat & 0x3fffffff);
		nvkm_error(subdev, "CCACHE %08x [%s]\n", stat, error);
1108 1109
		nvkm_wr32(device, 0x408030, 0xc0000000);
		nvkm_wr32(device, 0x400108, 0x00000008);
1110 1111 1112 1113
		trap &= ~0x00000008;
	}

	if (trap & 0x00000010) {
1114
		u32 stat = nvkm_rd32(device, 0x405840);
1115 1116
		nvkm_error(subdev, "SHADER %08x, sph: 0x%06x, stage: 0x%02x\n",
			   stat, stat & 0xffffff, (stat >> 24) & 0x3f);
1117 1118
		nvkm_wr32(device, 0x405840, 0xc0000000);
		nvkm_wr32(device, 0x400108, 0x00000010);
1119 1120 1121 1122
		trap &= ~0x00000010;
	}

	if (trap & 0x00000040) {
1123
		u32 stat = nvkm_rd32(device, 0x40601c);
1124 1125 1126 1127 1128

		nvkm_snprintbf(error, sizeof(error), gf100_unk6_error,
			       stat & 0x3fffffff);
		nvkm_error(subdev, "UNK6 %08x [%s]\n", stat, error);

1129 1130
		nvkm_wr32(device, 0x40601c, 0xc0000000);
		nvkm_wr32(device, 0x400108, 0x00000040);
1131 1132 1133 1134
		trap &= ~0x00000040;
	}

	if (trap & 0x00000080) {
1135
		u32 stat = nvkm_rd32(device, 0x404490);
1136 1137 1138 1139 1140 1141 1142 1143 1144 1145
		u32 pc = nvkm_rd32(device, 0x404494);
		u32 op = nvkm_rd32(device, 0x40449c);

		nvkm_snprintbf(error, sizeof(error), gf100_macro_error,
			       stat & 0x1fffffff);
		nvkm_error(subdev, "MACRO %08x [%s], pc: 0x%03x%s, op: 0x%08x\n",
			   stat, error, pc & 0x7ff,
			   (pc & 0x10000000) ? "" : " (invalid)",
			   op);

1146 1147
		nvkm_wr32(device, 0x404490, 0xc0000000);
		nvkm_wr32(device, 0x400108, 0x00000080);
1148 1149 1150
		trap &= ~0x00000080;
	}

1151
	if (trap & 0x00000100) {
1152
		u32 stat = nvkm_rd32(device, 0x407020) & 0x3fffffff;
1153

1154 1155
		nvkm_snprintbf(error, sizeof(error), gk104_sked_error, stat);
		nvkm_error(subdev, "SKED: %08x [%s]\n", stat, error);
1156

1157
		if (stat)
1158 1159
			nvkm_wr32(device, 0x407020, 0x40000000);
		nvkm_wr32(device, 0x400108, 0x00000100);
1160 1161 1162
		trap &= ~0x00000100;
	}

1163
	if (trap & 0x01000000) {
1164
		u32 stat = nvkm_rd32(device, 0x400118);
B
Ben Skeggs 已提交
1165
		for (gpc = 0; stat && gpc < gr->gpc_nr; gpc++) {
1166 1167
			u32 mask = 0x00000001 << gpc;
			if (stat & mask) {
B
Ben Skeggs 已提交
1168
				gf100_gr_trap_gpc(gr, gpc);
1169
				nvkm_wr32(device, 0x400118, mask);
1170 1171 1172
				stat &= ~mask;
			}
		}
1173
		nvkm_wr32(device, 0x400108, 0x01000000);
1174 1175 1176 1177
		trap &= ~0x01000000;
	}

	if (trap & 0x02000000) {
B
Ben Skeggs 已提交
1178
		for (rop = 0; rop < gr->rop_nr; rop++) {
1179 1180
			u32 statz = nvkm_rd32(device, ROP_UNIT(rop, 0x070));
			u32 statc = nvkm_rd32(device, ROP_UNIT(rop, 0x144));
1181
			nvkm_error(subdev, "ROP%d %08x %08x\n",
1182
				 rop, statz, statc);
1183 1184
			nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0xc0000000);
			nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0xc0000000);
1185
		}
1186
		nvkm_wr32(device, 0x400108, 0x02000000);
1187 1188 1189 1190
		trap &= ~0x02000000;
	}

	if (trap) {
1191
		nvkm_error(subdev, "TRAP UNHANDLED %08x\n", trap);
1192
		nvkm_wr32(device, 0x400108, trap);
1193 1194 1195
	}
}

1196
static void
B
Ben Skeggs 已提交
1197
gf100_gr_ctxctl_debug_unit(struct gf100_gr *gr, u32 base)
1198
{
1199 1200 1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
	nvkm_error(subdev, "%06x - done %08x\n", base,
		   nvkm_rd32(device, base + 0x400));
	nvkm_error(subdev, "%06x - stat %08x %08x %08x %08x\n", base,
		   nvkm_rd32(device, base + 0x800),
		   nvkm_rd32(device, base + 0x804),
		   nvkm_rd32(device, base + 0x808),
		   nvkm_rd32(device, base + 0x80c));
	nvkm_error(subdev, "%06x - stat %08x %08x %08x %08x\n", base,
		   nvkm_rd32(device, base + 0x810),
		   nvkm_rd32(device, base + 0x814),
		   nvkm_rd32(device, base + 0x818),
		   nvkm_rd32(device, base + 0x81c));
1213 1214 1215
}

void
B
Ben Skeggs 已提交
1216
gf100_gr_ctxctl_debug(struct gf100_gr *gr)
1217
{
1218 1219
	struct nvkm_device *device = gr->base.engine.subdev.device;
	u32 gpcnr = nvkm_rd32(device, 0x409604) & 0xffff;
1220 1221
	u32 gpc;

B
Ben Skeggs 已提交
1222
	gf100_gr_ctxctl_debug_unit(gr, 0x409000);
1223
	for (gpc = 0; gpc < gpcnr; gpc++)
B
Ben Skeggs 已提交
1224
		gf100_gr_ctxctl_debug_unit(gr, 0x502000 + (gpc * 0x8000));
1225 1226 1227
}

static void
B
Ben Skeggs 已提交
1228
gf100_gr_ctxctl_isr(struct gf100_gr *gr)
1229
{
1230 1231
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
1232
	u32 stat = nvkm_rd32(device, 0x409c18);
1233

1234
	if (stat & 0x00000001) {
1235
		u32 code = nvkm_rd32(device, 0x409814);
1236
		if (code == E_BAD_FWMTHD) {
1237 1238
			u32 class = nvkm_rd32(device, 0x409808);
			u32  addr = nvkm_rd32(device, 0x40980c);
1239 1240
			u32  subc = (addr & 0x00070000) >> 16;
			u32  mthd = (addr & 0x00003ffc);
1241
			u32  data = nvkm_rd32(device, 0x409810);
1242

1243 1244 1245
			nvkm_error(subdev, "FECS MTHD subc %d class %04x "
					   "mthd %04x data %08x\n",
				   subc, class, mthd, data);
1246

1247
			nvkm_wr32(device, 0x409c20, 0x00000001);
1248 1249
			stat &= ~0x00000001;
		} else {
1250
			nvkm_error(subdev, "FECS ucode error %d\n", code);
1251 1252
		}
	}
1253

1254
	if (stat & 0x00080000) {
1255
		nvkm_error(subdev, "FECS watchdog timeout\n");
B
Ben Skeggs 已提交
1256
		gf100_gr_ctxctl_debug(gr);
1257
		nvkm_wr32(device, 0x409c20, 0x00080000);
1258 1259 1260 1261
		stat &= ~0x00080000;
	}

	if (stat) {
1262
		nvkm_error(subdev, "FECS %08x\n", stat);
B
Ben Skeggs 已提交
1263
		gf100_gr_ctxctl_debug(gr);
1264
		nvkm_wr32(device, 0x409c20, stat);
1265
	}
1266 1267
}

1268
static void
1269
gf100_gr_intr(struct nvkm_gr *base)
1270
{
1271 1272 1273
	struct gf100_gr *gr = gf100_gr(base);
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
1274 1275
	struct nvkm_fifo_chan *chan;
	unsigned long flags;
1276 1277 1278
	u64 inst = nvkm_rd32(device, 0x409b00) & 0x0fffffff;
	u32 stat = nvkm_rd32(device, 0x400100);
	u32 addr = nvkm_rd32(device, 0x400704);
1279 1280
	u32 mthd = (addr & 0x00003ffc);
	u32 subc = (addr & 0x00070000) >> 16;
1281 1282
	u32 data = nvkm_rd32(device, 0x400708);
	u32 code = nvkm_rd32(device, 0x400110);
1283
	u32 class;
1284 1285
	const char *name = "unknown";
	int chid = -1;
1286

1287
	chan = nvkm_fifo_chan_inst(device->fifo, (u64)inst << 12, &flags);
1288 1289 1290 1291
	if (chan) {
		name = chan->object.client->name;
		chid = chan->chid;
	}
1292

1293
	if (device->card_type < NV_E0 || subc < 4)
1294
		class = nvkm_rd32(device, 0x404200 + (subc * 4));
1295 1296 1297
	else
		class = 0x0000;

1298 1299 1300 1301 1302
	if (stat & 0x00000001) {
		/*
		 * notifier interrupt, only needed for cyclestats
		 * can be safely ignored
		 */
1303
		nvkm_wr32(device, 0x400100, 0x00000001);
1304 1305 1306
		stat &= ~0x00000001;
	}

1307
	if (stat & 0x00000010) {
1308
		if (!gf100_gr_mthd_sw(device, class, mthd, data)) {
1309 1310
			nvkm_error(subdev, "ILLEGAL_MTHD ch %d [%010llx %s] "
				   "subc %d class %04x mthd %04x data %08x\n",
1311 1312
				   chid, inst << 12, name, subc,
				   class, mthd, data);
1313
		}
1314
		nvkm_wr32(device, 0x400100, 0x00000010);
1315 1316 1317 1318
		stat &= ~0x00000010;
	}

	if (stat & 0x00000020) {
1319 1320
		nvkm_error(subdev, "ILLEGAL_CLASS ch %d [%010llx %s] "
			   "subc %d class %04x mthd %04x data %08x\n",
1321
			   chid, inst << 12, name, subc, class, mthd, data);
1322
		nvkm_wr32(device, 0x400100, 0x00000020);
1323 1324 1325 1326
		stat &= ~0x00000020;
	}

	if (stat & 0x00100000) {
1327 1328 1329 1330 1331
		const struct nvkm_enum *en =
			nvkm_enum_find(nv50_data_error_names, code);
		nvkm_error(subdev, "DATA_ERROR %08x [%s] ch %d [%010llx %s] "
				   "subc %d class %04x mthd %04x data %08x\n",
			   code, en ? en->name : "", chid, inst << 12,
1332
			   name, subc, class, mthd, data);
1333
		nvkm_wr32(device, 0x400100, 0x00100000);
1334 1335 1336 1337
		stat &= ~0x00100000;
	}

	if (stat & 0x00200000) {
1338
		nvkm_error(subdev, "TRAP ch %d [%010llx %s]\n",
1339
			   chid, inst << 12, name);
B
Ben Skeggs 已提交
1340
		gf100_gr_trap_intr(gr);
1341
		nvkm_wr32(device, 0x400100, 0x00200000);
1342 1343 1344 1345
		stat &= ~0x00200000;
	}

	if (stat & 0x00080000) {
B
Ben Skeggs 已提交
1346
		gf100_gr_ctxctl_isr(gr);
1347
		nvkm_wr32(device, 0x400100, 0x00080000);
1348 1349 1350 1351
		stat &= ~0x00080000;
	}

	if (stat) {
1352
		nvkm_error(subdev, "intr %08x\n", stat);
1353
		nvkm_wr32(device, 0x400100, stat);
1354 1355
	}

1356
	nvkm_wr32(device, 0x400500, 0x00010001);
1357
	nvkm_fifo_chan_put(device->fifo, flags, &chan);
1358 1359
}

1360
void
B
Ben Skeggs 已提交
1361
gf100_gr_init_fw(struct gf100_gr *gr, u32 fuc_base,
1362
		 struct gf100_gr_fuc *code, struct gf100_gr_fuc *data)
1363
{
1364
	struct nvkm_device *device = gr->base.engine.subdev.device;
1365
	int i;
1366

1367
	nvkm_wr32(device, fuc_base + 0x01c0, 0x01000000);
1368
	for (i = 0; i < data->size / 4; i++)
1369
		nvkm_wr32(device, fuc_base + 0x01c4, data->data[i]);
1370

1371
	nvkm_wr32(device, fuc_base + 0x0180, 0x01000000);
1372 1373
	for (i = 0; i < code->size / 4; i++) {
		if ((i & 0x3f) == 0)
1374 1375
			nvkm_wr32(device, fuc_base + 0x0188, i >> 6);
		nvkm_wr32(device, fuc_base + 0x0184, code->data[i]);
1376
	}
1377 1378 1379

	/* code must be padded to 0x40 words */
	for (; i & 0x3f; i++)
1380
		nvkm_wr32(device, fuc_base + 0x0184, 0);
1381 1382
}

1383
static void
B
Ben Skeggs 已提交
1384
gf100_gr_init_csdata(struct gf100_gr *gr,
1385 1386
		     const struct gf100_gr_pack *pack,
		     u32 falcon, u32 starstar, u32 base)
1387
{
1388
	struct nvkm_device *device = gr->base.engine.subdev.device;
1389 1390
	const struct gf100_gr_pack *iter;
	const struct gf100_gr_init *init;
1391
	u32 addr = ~0, prev = ~0, xfer = 0;
1392 1393
	u32 star, temp;

1394 1395 1396
	nvkm_wr32(device, falcon + 0x01c0, 0x02000000 + starstar);
	star = nvkm_rd32(device, falcon + 0x01c4);
	temp = nvkm_rd32(device, falcon + 0x01c4);
1397 1398
	if (temp > star)
		star = temp;
1399
	nvkm_wr32(device, falcon + 0x01c0, 0x01000000 + star);
1400

1401 1402 1403 1404 1405 1406 1407
	pack_for_each_init(init, iter, pack) {
		u32 head = init->addr - base;
		u32 tail = head + init->count * init->pitch;
		while (head < tail) {
			if (head != prev + 4 || xfer >= 32) {
				if (xfer) {
					u32 data = ((--xfer << 26) | addr);
1408
					nvkm_wr32(device, falcon + 0x01c4, data);
1409 1410 1411 1412
					star += 4;
				}
				addr = head;
				xfer = 0;
1413
			}
1414 1415 1416
			prev = head;
			xfer = xfer + 1;
			head = head + init->pitch;
1417
		}
1418
	}
1419

1420 1421 1422
	nvkm_wr32(device, falcon + 0x01c4, (--xfer << 26) | addr);
	nvkm_wr32(device, falcon + 0x01c0, 0x01000004 + starstar);
	nvkm_wr32(device, falcon + 0x01c4, star + 4);
1423 1424
}

1425
int
B
Ben Skeggs 已提交
1426
gf100_gr_init_ctxctl(struct gf100_gr *gr)
1427
{
1428
	const struct gf100_grctx_func *grctx = gr->func->grctx;
1429 1430
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
1431
	int i;
1432

B
Ben Skeggs 已提交
1433
	if (gr->firmware) {
1434
		/* load fuc microcode */
1435
		nvkm_mc_unk260(device->mc, 0);
1436 1437
		gf100_gr_init_fw(gr, 0x409000, &gr->fuc409c, &gr->fuc409d);
		gf100_gr_init_fw(gr, 0x41a000, &gr->fuc41ac, &gr->fuc41ad);
1438
		nvkm_mc_unk260(device->mc, 1);
1439

1440
		/* start both of them running */
1441 1442 1443 1444 1445
		nvkm_wr32(device, 0x409840, 0xffffffff);
		nvkm_wr32(device, 0x41a10c, 0x00000000);
		nvkm_wr32(device, 0x40910c, 0x00000000);
		nvkm_wr32(device, 0x41a100, 0x00000002);
		nvkm_wr32(device, 0x409100, 0x00000002);
1446 1447 1448 1449 1450
		if (nvkm_msec(device, 2000,
			if (nvkm_rd32(device, 0x409800) & 0x00000001)
				break;
		) < 0)
			return -EBUSY;
B
Ben Skeggs 已提交
1451

1452 1453 1454
		nvkm_wr32(device, 0x409840, 0xffffffff);
		nvkm_wr32(device, 0x409500, 0x7fffffff);
		nvkm_wr32(device, 0x409504, 0x00000021);
B
Ben Skeggs 已提交
1455

1456 1457 1458
		nvkm_wr32(device, 0x409840, 0xffffffff);
		nvkm_wr32(device, 0x409500, 0x00000000);
		nvkm_wr32(device, 0x409504, 0x00000010);
1459 1460 1461 1462
		if (nvkm_msec(device, 2000,
			if ((gr->size = nvkm_rd32(device, 0x409800)))
				break;
		) < 0)
1463
			return -EBUSY;
1464

1465 1466 1467
		nvkm_wr32(device, 0x409840, 0xffffffff);
		nvkm_wr32(device, 0x409500, 0x00000000);
		nvkm_wr32(device, 0x409504, 0x00000016);
1468 1469 1470 1471
		if (nvkm_msec(device, 2000,
			if (nvkm_rd32(device, 0x409800))
				break;
		) < 0)
1472 1473
			return -EBUSY;

1474 1475 1476
		nvkm_wr32(device, 0x409840, 0xffffffff);
		nvkm_wr32(device, 0x409500, 0x00000000);
		nvkm_wr32(device, 0x409504, 0x00000025);
1477 1478 1479 1480
		if (nvkm_msec(device, 2000,
			if (nvkm_rd32(device, 0x409800))
				break;
		) < 0)
1481 1482
			return -EBUSY;

1483
		if (device->chipset >= 0xe0) {
1484 1485 1486
			nvkm_wr32(device, 0x409800, 0x00000000);
			nvkm_wr32(device, 0x409500, 0x00000001);
			nvkm_wr32(device, 0x409504, 0x00000030);
1487 1488 1489 1490
			if (nvkm_msec(device, 2000,
				if (nvkm_rd32(device, 0x409800))
					break;
			) < 0)
1491 1492
				return -EBUSY;

1493 1494 1495 1496
			nvkm_wr32(device, 0x409810, 0xb00095c8);
			nvkm_wr32(device, 0x409800, 0x00000000);
			nvkm_wr32(device, 0x409500, 0x00000001);
			nvkm_wr32(device, 0x409504, 0x00000031);
1497 1498 1499 1500
			if (nvkm_msec(device, 2000,
				if (nvkm_rd32(device, 0x409800))
					break;
			) < 0)
1501 1502
				return -EBUSY;

1503 1504 1505 1506
			nvkm_wr32(device, 0x409810, 0x00080420);
			nvkm_wr32(device, 0x409800, 0x00000000);
			nvkm_wr32(device, 0x409500, 0x00000001);
			nvkm_wr32(device, 0x409504, 0x00000032);
1507 1508 1509 1510
			if (nvkm_msec(device, 2000,
				if (nvkm_rd32(device, 0x409800))
					break;
			) < 0)
1511 1512
				return -EBUSY;

1513 1514 1515
			nvkm_wr32(device, 0x409614, 0x00000070);
			nvkm_wr32(device, 0x409614, 0x00000770);
			nvkm_wr32(device, 0x40802c, 0x00000001);
1516 1517
		}

B
Ben Skeggs 已提交
1518 1519
		if (gr->data == NULL) {
			int ret = gf100_grctx_generate(gr);
1520
			if (ret) {
1521
				nvkm_error(subdev, "failed to construct context\n");
1522 1523 1524 1525 1526
				return ret;
			}
		}

		return 0;
1527
	} else
1528
	if (!gr->func->fecs.ucode) {
1529
		return -ENOSYS;
1530
	}
1531

1532
	/* load HUB microcode */
1533
	nvkm_mc_unk260(device->mc, 0);
1534
	nvkm_wr32(device, 0x4091c0, 0x01000000);
1535 1536
	for (i = 0; i < gr->func->fecs.ucode->data.size / 4; i++)
		nvkm_wr32(device, 0x4091c4, gr->func->fecs.ucode->data.data[i]);
1537

1538
	nvkm_wr32(device, 0x409180, 0x01000000);
1539
	for (i = 0; i < gr->func->fecs.ucode->code.size / 4; i++) {
1540
		if ((i & 0x3f) == 0)
1541
			nvkm_wr32(device, 0x409188, i >> 6);
1542
		nvkm_wr32(device, 0x409184, gr->func->fecs.ucode->code.data[i]);
1543 1544 1545
	}

	/* load GPC microcode */
1546
	nvkm_wr32(device, 0x41a1c0, 0x01000000);
1547 1548
	for (i = 0; i < gr->func->gpccs.ucode->data.size / 4; i++)
		nvkm_wr32(device, 0x41a1c4, gr->func->gpccs.ucode->data.data[i]);
1549

1550
	nvkm_wr32(device, 0x41a180, 0x01000000);
1551
	for (i = 0; i < gr->func->gpccs.ucode->code.size / 4; i++) {
1552
		if ((i & 0x3f) == 0)
1553
			nvkm_wr32(device, 0x41a188, i >> 6);
1554
		nvkm_wr32(device, 0x41a184, gr->func->gpccs.ucode->code.data[i]);
1555
	}
1556
	nvkm_mc_unk260(device->mc, 1);
1557

1558
	/* load register lists */
1559 1560 1561 1562
	gf100_gr_init_csdata(gr, grctx->hub, 0x409000, 0x000, 0x000000);
	gf100_gr_init_csdata(gr, grctx->gpc, 0x41a000, 0x000, 0x418000);
	gf100_gr_init_csdata(gr, grctx->tpc, 0x41a000, 0x004, 0x419800);
	gf100_gr_init_csdata(gr, grctx->ppc, 0x41a000, 0x008, 0x41be00);
1563

1564
	/* start HUB ucode running, it'll init the GPCs */
1565 1566
	nvkm_wr32(device, 0x40910c, 0x00000000);
	nvkm_wr32(device, 0x409100, 0x00000002);
1567 1568 1569 1570
	if (nvkm_msec(device, 2000,
		if (nvkm_rd32(device, 0x409800) & 0x80000000)
			break;
	) < 0) {
B
Ben Skeggs 已提交
1571
		gf100_gr_ctxctl_debug(gr);
1572 1573 1574
		return -EBUSY;
	}

1575
	gr->size = nvkm_rd32(device, 0x409804);
B
Ben Skeggs 已提交
1576 1577
	if (gr->data == NULL) {
		int ret = gf100_grctx_generate(gr);
1578
		if (ret) {
1579
			nvkm_error(subdev, "failed to construct context\n");
1580 1581
			return ret;
		}
1582 1583 1584
	}

	return 0;
1585 1586
}

1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623
static int
gf100_gr_oneinit(struct nvkm_gr *base)
{
	struct gf100_gr *gr = gf100_gr(base);
	struct nvkm_device *device = gr->base.engine.subdev.device;
	int ret, i, j;

	nvkm_pmu_pgob(device->pmu, false);

	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 256, false,
			      &gr->unk4188b4);
	if (ret)
		return ret;

	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 256, false,
			      &gr->unk4188b8);
	if (ret)
		return ret;

	nvkm_kmap(gr->unk4188b4);
	for (i = 0; i < 0x1000; i += 4)
		nvkm_wo32(gr->unk4188b4, i, 0x00000010);
	nvkm_done(gr->unk4188b4);

	nvkm_kmap(gr->unk4188b8);
	for (i = 0; i < 0x1000; i += 4)
		nvkm_wo32(gr->unk4188b8, i, 0x00000010);
	nvkm_done(gr->unk4188b8);

	gr->rop_nr = (nvkm_rd32(device, 0x409604) & 0x001f0000) >> 16;
	gr->gpc_nr =  nvkm_rd32(device, 0x409604) & 0x0000001f;
	for (i = 0; i < gr->gpc_nr; i++) {
		gr->tpc_nr[i]  = nvkm_rd32(device, GPC_UNIT(i, 0x2608));
		gr->tpc_total += gr->tpc_nr[i];
		gr->ppc_nr[i]  = gr->func->ppc_nr;
		for (j = 0; j < gr->ppc_nr[i]; j++) {
			u8 mask = nvkm_rd32(device, GPC_UNIT(i, 0x0c30 + (j * 4)));
1624 1625
			if (mask)
				gr->ppc_mask[i] |= (1 << j);
1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686
			gr->ppc_tpc_nr[i][j] = hweight8(mask);
		}
	}

	/*XXX: these need figuring out... though it might not even matter */
	switch (device->chipset) {
	case 0xc0:
		if (gr->tpc_total == 11) { /* 465, 3/4/4/0, 4 */
			gr->magic_not_rop_nr = 0x07;
		} else
		if (gr->tpc_total == 14) { /* 470, 3/3/4/4, 5 */
			gr->magic_not_rop_nr = 0x05;
		} else
		if (gr->tpc_total == 15) { /* 480, 3/4/4/4, 6 */
			gr->magic_not_rop_nr = 0x06;
		}
		break;
	case 0xc3: /* 450, 4/0/0/0, 2 */
		gr->magic_not_rop_nr = 0x03;
		break;
	case 0xc4: /* 460, 3/4/0/0, 4 */
		gr->magic_not_rop_nr = 0x01;
		break;
	case 0xc1: /* 2/0/0/0, 1 */
		gr->magic_not_rop_nr = 0x01;
		break;
	case 0xc8: /* 4/4/3/4, 5 */
		gr->magic_not_rop_nr = 0x06;
		break;
	case 0xce: /* 4/4/0/0, 4 */
		gr->magic_not_rop_nr = 0x03;
		break;
	case 0xcf: /* 4/0/0/0, 3 */
		gr->magic_not_rop_nr = 0x03;
		break;
	case 0xd7:
	case 0xd9: /* 1/0/0/0, 1 */
	case 0xea: /* gk20a */
	case 0x12b: /* gm20b */
		gr->magic_not_rop_nr = 0x01;
		break;
	}

	return 0;
}

int
gf100_gr_init_(struct nvkm_gr *base)
{
	struct gf100_gr *gr = gf100_gr(base);
	nvkm_pmu_pgob(gr->base.engine.subdev.device->pmu, false);
	return gr->func->init(gr);
}

void
gf100_gr_dtor_fw(struct gf100_gr_fuc *fuc)
{
	kfree(fuc->data);
	fuc->data = NULL;
}

1687 1688 1689 1690 1691 1692
static void
gf100_gr_dtor_init(struct gf100_gr_pack *pack)
{
	vfree(pack);
}

1693 1694 1695 1696 1697 1698 1699 1700 1701 1702 1703 1704 1705 1706
void *
gf100_gr_dtor(struct nvkm_gr *base)
{
	struct gf100_gr *gr = gf100_gr(base);

	if (gr->func->dtor)
		gr->func->dtor(gr);
	kfree(gr->data);

	gf100_gr_dtor_fw(&gr->fuc409c);
	gf100_gr_dtor_fw(&gr->fuc409d);
	gf100_gr_dtor_fw(&gr->fuc41ac);
	gf100_gr_dtor_fw(&gr->fuc41ad);

1707 1708 1709 1710 1711
	gf100_gr_dtor_init(gr->fuc_bundle);
	gf100_gr_dtor_init(gr->fuc_method);
	gf100_gr_dtor_init(gr->fuc_sw_ctx);
	gf100_gr_dtor_init(gr->fuc_sw_nonctx);

1712 1713 1714 1715 1716 1717 1718 1719 1720 1721 1722 1723 1724 1725 1726 1727 1728 1729 1730 1731 1732 1733 1734 1735 1736
	nvkm_memory_del(&gr->unk4188b8);
	nvkm_memory_del(&gr->unk4188b4);
	return gr;
}

static const struct nvkm_gr_func
gf100_gr_ = {
	.dtor = gf100_gr_dtor,
	.oneinit = gf100_gr_oneinit,
	.init = gf100_gr_init_,
	.intr = gf100_gr_intr,
	.units = gf100_gr_units,
	.chan_new = gf100_gr_chan_new,
	.object_get = gf100_gr_object_get,
};

int
gf100_gr_ctor_fw(struct gf100_gr *gr, const char *fwname,
		 struct gf100_gr_fuc *fuc)
{
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
	const struct firmware *fw;
	int ret;

1737
	ret = nvkm_firmware_get(device, fwname, &fw);
1738 1739 1740 1741 1742 1743 1744
	if (ret) {
		nvkm_error(subdev, "failed to load %s\n", fwname);
		return ret;
	}

	fuc->size = fw->size;
	fuc->data = kmemdup(fw->data, fuc->size, GFP_KERNEL);
1745
	nvkm_firmware_put(fw);
1746 1747 1748 1749 1750 1751 1752 1753 1754 1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767
	return (fuc->data != NULL) ? 0 : -ENOMEM;
}

int
gf100_gr_ctor(const struct gf100_gr_func *func, struct nvkm_device *device,
	      int index, struct gf100_gr *gr)
{
	int ret;

	gr->func = func;
	gr->firmware = nvkm_boolopt(device->cfgopt, "NvGrUseFW",
				    func->fecs.ucode == NULL);

	ret = nvkm_gr_ctor(&gf100_gr_, device, index, 0x08001000,
			   gr->firmware || func->fecs.ucode != NULL,
			   &gr->base);
	if (ret)
		return ret;

	return 0;
}

1768
int
1769 1770 1771 1772
gf100_gr_new_(const struct gf100_gr_func *func, struct nvkm_device *device,
	      int index, struct nvkm_gr **pgr)
{
	struct gf100_gr *gr;
1773 1774
	int ret;

1775 1776 1777
	if (!(gr = kzalloc(sizeof(*gr), GFP_KERNEL)))
		return -ENOMEM;
	*pgr = &gr->base;
1778 1779 1780 1781 1782 1783 1784 1785 1786 1787 1788 1789 1790 1791

	ret = gf100_gr_ctor(func, device, index, gr);
	if (ret)
		return ret;

	if (gr->firmware) {
		if (gf100_gr_ctor_fw(gr, "fecs_inst", &gr->fuc409c) ||
		    gf100_gr_ctor_fw(gr, "fecs_data", &gr->fuc409d) ||
		    gf100_gr_ctor_fw(gr, "gpccs_inst", &gr->fuc41ac) ||
		    gf100_gr_ctor_fw(gr, "gpccs_data", &gr->fuc41ad))
			return -ENODEV;
	}

	return 0;
1792 1793 1794 1795
}

int
gf100_gr_init(struct gf100_gr *gr)
1796
{
1797
	struct nvkm_device *device = gr->base.engine.subdev.device;
B
Ben Skeggs 已提交
1798
	const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
1799 1800 1801
	u32 data[TPC_MAX / 8] = {};
	u8  tpcnr[GPC_MAX];
	int gpc, tpc, rop;
1802
	int i;
1803

1804 1805 1806 1807 1808 1809
	nvkm_wr32(device, GPC_BCAST(0x0880), 0x00000000);
	nvkm_wr32(device, GPC_BCAST(0x08a4), 0x00000000);
	nvkm_wr32(device, GPC_BCAST(0x0888), 0x00000000);
	nvkm_wr32(device, GPC_BCAST(0x088c), 0x00000000);
	nvkm_wr32(device, GPC_BCAST(0x0890), 0x00000000);
	nvkm_wr32(device, GPC_BCAST(0x0894), 0x00000000);
1810 1811
	nvkm_wr32(device, GPC_BCAST(0x08b4), nvkm_memory_addr(gr->unk4188b4) >> 8);
	nvkm_wr32(device, GPC_BCAST(0x08b8), nvkm_memory_addr(gr->unk4188b8) >> 8);
1812

1813
	gf100_gr_mmio(gr, gr->func->mmio);
1814

B
Ben Skeggs 已提交
1815 1816
	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
	for (i = 0, gpc = -1; i < gr->tpc_total; i++) {
1817
		do {
B
Ben Skeggs 已提交
1818
			gpc = (gpc + 1) % gr->gpc_nr;
1819
		} while (!tpcnr[gpc]);
B
Ben Skeggs 已提交
1820
		tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
1821 1822 1823 1824

		data[i / 8] |= tpc << ((i % 8) * 4);
	}

1825 1826 1827 1828
	nvkm_wr32(device, GPC_BCAST(0x0980), data[0]);
	nvkm_wr32(device, GPC_BCAST(0x0984), data[1]);
	nvkm_wr32(device, GPC_BCAST(0x0988), data[2]);
	nvkm_wr32(device, GPC_BCAST(0x098c), data[3]);
1829

B
Ben Skeggs 已提交
1830
	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
1831
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0914),
B
Ben Skeggs 已提交
1832
			gr->magic_not_rop_nr << 8 | gr->tpc_nr[gpc]);
1833
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 |
B
Ben Skeggs 已提交
1834
			gr->tpc_total);
1835
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918);
1836 1837
	}

1838
	if (device->chipset != 0xd7)
1839
		nvkm_wr32(device, GPC_BCAST(0x1bd4), magicgpc918);
M
Maarten Lankhorst 已提交
1840
	else
1841
		nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918);
B
Ben Skeggs 已提交
1842

1843
	nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800));
B
Ben Skeggs 已提交
1844

1845
	nvkm_wr32(device, 0x400500, 0x00010001);
B
Ben Skeggs 已提交
1846

1847 1848
	nvkm_wr32(device, 0x400100, 0xffffffff);
	nvkm_wr32(device, 0x40013c, 0xffffffff);
B
Ben Skeggs 已提交
1849

1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860
	nvkm_wr32(device, 0x409c24, 0x000f0000);
	nvkm_wr32(device, 0x404000, 0xc0000000);
	nvkm_wr32(device, 0x404600, 0xc0000000);
	nvkm_wr32(device, 0x408030, 0xc0000000);
	nvkm_wr32(device, 0x40601c, 0xc0000000);
	nvkm_wr32(device, 0x404490, 0xc0000000);
	nvkm_wr32(device, 0x406018, 0xc0000000);
	nvkm_wr32(device, 0x405840, 0xc0000000);
	nvkm_wr32(device, 0x405844, 0x00ffffff);
	nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008);
	nvkm_mask(device, 0x419eb4, 0x00001000, 0x00001000);
B
Ben Skeggs 已提交
1861 1862

	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
1863 1864 1865 1866
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000);
		nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000);
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000);
B
Ben Skeggs 已提交
1867
		for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
1868 1869 1870 1871 1872 1873 1874
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff);
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff);
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000);
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000);
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000);
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x001ffffe);
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x0000000f);
1875
		}
1876 1877
		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), 0xffffffff);
		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff);
1878 1879
	}

B
Ben Skeggs 已提交
1880
	for (rop = 0; rop < gr->rop_nr; rop++) {
1881 1882 1883 1884
		nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0xc0000000);
		nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0xc0000000);
		nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff);
		nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff);
1885
	}
1886

1887 1888 1889 1890 1891 1892
	nvkm_wr32(device, 0x400108, 0xffffffff);
	nvkm_wr32(device, 0x400138, 0xffffffff);
	nvkm_wr32(device, 0x400118, 0xffffffff);
	nvkm_wr32(device, 0x400130, 0xffffffff);
	nvkm_wr32(device, 0x40011c, 0xffffffff);
	nvkm_wr32(device, 0x400134, 0xffffffff);
1893

1894
	nvkm_wr32(device, 0x400054, 0x34ce3464);
1895

B
Ben Skeggs 已提交
1896
	gf100_gr_zbc_init(gr);
1897

B
Ben Skeggs 已提交
1898
	return gf100_gr_init_ctxctl(gr);
1899 1900
}

1901
#include "fuc/hubgf100.fuc3.h"
1902

1903 1904 1905 1906 1907 1908
struct gf100_gr_ucode
gf100_gr_fecs_ucode = {
	.code.data = gf100_grhub_code,
	.code.size = sizeof(gf100_grhub_code),
	.data.data = gf100_grhub_data,
	.data.size = sizeof(gf100_grhub_data),
1909 1910
};

1911
#include "fuc/gpcgf100.fuc3.h"
1912

1913 1914 1915 1916 1917 1918
struct gf100_gr_ucode
gf100_gr_gpccs_ucode = {
	.code.data = gf100_grgpc_code,
	.code.size = sizeof(gf100_grgpc_code),
	.data.data = gf100_grgpc_data,
	.data.size = sizeof(gf100_grgpc_data),
1919 1920
};

1921 1922
static const struct gf100_gr_func
gf100_gr = {
1923 1924 1925 1926
	.init = gf100_gr_init,
	.mmio = gf100_gr_pack_mmio,
	.fecs.ucode = &gf100_gr_fecs_ucode,
	.gpccs.ucode = &gf100_gr_gpccs_ucode,
1927 1928 1929 1930 1931 1932 1933 1934 1935 1936
	.grctx = &gf100_grctx,
	.sclass = {
		{ -1, -1, FERMI_TWOD_A },
		{ -1, -1, FERMI_MEMORY_TO_MEMORY_FORMAT_A },
		{ -1, -1, FERMI_A, &gf100_fermi },
		{ -1, -1, FERMI_COMPUTE_A },
		{}
	}
};

1937 1938 1939 1940 1941
int
gf100_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
{
	return gf100_gr_new_(&gf100_gr, device, index, pgr);
}