gf100.c 48.8 KB
Newer Older
1
/*
2
 * Copyright 2012 Red Hat Inc.
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 * Authors: Ben Skeggs
 */
24 25 26 27 28 29 30 31 32
#include "gf100.h"
#include "ctxgf100.h"
#include "fuc/os.h"

#include <core/client.h>
#include <core/option.h>
#include <subdev/fb.h>
#include <subdev/mc.h>
#include <subdev/timer.h>
33
#include <engine/fifo.h>
34 35 36

#include <nvif/class.h>
#include <nvif/unpack.h>
37

38 39 40 41 42
/*******************************************************************************
 * Zero Bandwidth Clear
 ******************************************************************************/

static void
B
Ben Skeggs 已提交
43
gf100_gr_zbc_clear_color(struct gf100_gr *gr, int zbc)
44
{
45
	struct nvkm_device *device = gr->base.engine.subdev.device;
B
Ben Skeggs 已提交
46
	if (gr->zbc_color[zbc].format) {
47 48 49 50 51 52 53 54
		nvkm_wr32(device, 0x405804, gr->zbc_color[zbc].ds[0]);
		nvkm_wr32(device, 0x405808, gr->zbc_color[zbc].ds[1]);
		nvkm_wr32(device, 0x40580c, gr->zbc_color[zbc].ds[2]);
		nvkm_wr32(device, 0x405810, gr->zbc_color[zbc].ds[3]);
	}
	nvkm_wr32(device, 0x405814, gr->zbc_color[zbc].format);
	nvkm_wr32(device, 0x405820, zbc);
	nvkm_wr32(device, 0x405824, 0x00000004); /* TRIGGER | WRITE | COLOR */
55 56 57
}

static int
B
Ben Skeggs 已提交
58
gf100_gr_zbc_color_get(struct gf100_gr *gr, int format,
59
		       const u32 ds[4], const u32 l2[4])
60
{
B
Ben Skeggs 已提交
61
	struct nvkm_ltc *ltc = nvkm_ltc(gr);
62 63 64
	int zbc = -ENOSPC, i;

	for (i = ltc->zbc_min; i <= ltc->zbc_max; i++) {
B
Ben Skeggs 已提交
65 66
		if (gr->zbc_color[i].format) {
			if (gr->zbc_color[i].format != format)
67
				continue;
B
Ben Skeggs 已提交
68 69
			if (memcmp(gr->zbc_color[i].ds, ds, sizeof(
				   gr->zbc_color[i].ds)))
70
				continue;
B
Ben Skeggs 已提交
71 72
			if (memcmp(gr->zbc_color[i].l2, l2, sizeof(
				   gr->zbc_color[i].l2))) {
73 74 75 76 77 78 79 80 81
				WARN_ON(1);
				return -EINVAL;
			}
			return i;
		} else {
			zbc = (zbc < 0) ? i : zbc;
		}
	}

82 83 84
	if (zbc < 0)
		return zbc;

B
Ben Skeggs 已提交
85 86 87
	memcpy(gr->zbc_color[zbc].ds, ds, sizeof(gr->zbc_color[zbc].ds));
	memcpy(gr->zbc_color[zbc].l2, l2, sizeof(gr->zbc_color[zbc].l2));
	gr->zbc_color[zbc].format = format;
88
	ltc->zbc_color_get(ltc, zbc, l2);
B
Ben Skeggs 已提交
89
	gf100_gr_zbc_clear_color(gr, zbc);
90 91 92 93
	return zbc;
}

static void
B
Ben Skeggs 已提交
94
gf100_gr_zbc_clear_depth(struct gf100_gr *gr, int zbc)
95
{
96
	struct nvkm_device *device = gr->base.engine.subdev.device;
B
Ben Skeggs 已提交
97
	if (gr->zbc_depth[zbc].format)
98 99 100 101
		nvkm_wr32(device, 0x405818, gr->zbc_depth[zbc].ds);
	nvkm_wr32(device, 0x40581c, gr->zbc_depth[zbc].format);
	nvkm_wr32(device, 0x405820, zbc);
	nvkm_wr32(device, 0x405824, 0x00000005); /* TRIGGER | WRITE | DEPTH */
102 103 104
}

static int
B
Ben Skeggs 已提交
105
gf100_gr_zbc_depth_get(struct gf100_gr *gr, int format,
106
		       const u32 ds, const u32 l2)
107
{
B
Ben Skeggs 已提交
108
	struct nvkm_ltc *ltc = nvkm_ltc(gr);
109 110 111
	int zbc = -ENOSPC, i;

	for (i = ltc->zbc_min; i <= ltc->zbc_max; i++) {
B
Ben Skeggs 已提交
112 113
		if (gr->zbc_depth[i].format) {
			if (gr->zbc_depth[i].format != format)
114
				continue;
B
Ben Skeggs 已提交
115
			if (gr->zbc_depth[i].ds != ds)
116
				continue;
B
Ben Skeggs 已提交
117
			if (gr->zbc_depth[i].l2 != l2) {
118 119 120 121 122 123 124 125 126
				WARN_ON(1);
				return -EINVAL;
			}
			return i;
		} else {
			zbc = (zbc < 0) ? i : zbc;
		}
	}

127 128 129
	if (zbc < 0)
		return zbc;

B
Ben Skeggs 已提交
130 131 132
	gr->zbc_depth[zbc].format = format;
	gr->zbc_depth[zbc].ds = ds;
	gr->zbc_depth[zbc].l2 = l2;
133
	ltc->zbc_depth_get(ltc, zbc, l2);
B
Ben Skeggs 已提交
134
	gf100_gr_zbc_clear_depth(gr, zbc);
135 136 137
	return zbc;
}

138 139 140 141
/*******************************************************************************
 * Graphics object classes
 ******************************************************************************/

142
static int
143
gf100_fermi_mthd_zbc_color(struct nvkm_object *object, void *data, u32 size)
144
{
B
Ben Skeggs 已提交
145
	struct gf100_gr *gr = (void *)object->engine;
146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171
	union {
		struct fermi_a_zbc_color_v0 v0;
	} *args = data;
	int ret;

	if (nvif_unpack(args->v0, 0, 0, false)) {
		switch (args->v0.format) {
		case FERMI_A_ZBC_COLOR_V0_FMT_ZERO:
		case FERMI_A_ZBC_COLOR_V0_FMT_UNORM_ONE:
		case FERMI_A_ZBC_COLOR_V0_FMT_RF32_GF32_BF32_AF32:
		case FERMI_A_ZBC_COLOR_V0_FMT_R16_G16_B16_A16:
		case FERMI_A_ZBC_COLOR_V0_FMT_RN16_GN16_BN16_AN16:
		case FERMI_A_ZBC_COLOR_V0_FMT_RS16_GS16_BS16_AS16:
		case FERMI_A_ZBC_COLOR_V0_FMT_RU16_GU16_BU16_AU16:
		case FERMI_A_ZBC_COLOR_V0_FMT_RF16_GF16_BF16_AF16:
		case FERMI_A_ZBC_COLOR_V0_FMT_A8R8G8B8:
		case FERMI_A_ZBC_COLOR_V0_FMT_A8RL8GL8BL8:
		case FERMI_A_ZBC_COLOR_V0_FMT_A2B10G10R10:
		case FERMI_A_ZBC_COLOR_V0_FMT_AU2BU10GU10RU10:
		case FERMI_A_ZBC_COLOR_V0_FMT_A8B8G8R8:
		case FERMI_A_ZBC_COLOR_V0_FMT_A8BL8GL8RL8:
		case FERMI_A_ZBC_COLOR_V0_FMT_AN8BN8GN8RN8:
		case FERMI_A_ZBC_COLOR_V0_FMT_AS8BS8GS8RS8:
		case FERMI_A_ZBC_COLOR_V0_FMT_AU8BU8GU8RU8:
		case FERMI_A_ZBC_COLOR_V0_FMT_A2R10G10B10:
		case FERMI_A_ZBC_COLOR_V0_FMT_BF10GF11RF11:
B
Ben Skeggs 已提交
172
			ret = gf100_gr_zbc_color_get(gr, args->v0.format,
173 174
							   args->v0.ds,
							   args->v0.l2);
175 176 177 178 179 180 181 182 183 184 185 186 187 188
			if (ret >= 0) {
				args->v0.index = ret;
				return 0;
			}
			break;
		default:
			return -EINVAL;
		}
	}

	return ret;
}

static int
189
gf100_fermi_mthd_zbc_depth(struct nvkm_object *object, void *data, u32 size)
190
{
B
Ben Skeggs 已提交
191
	struct gf100_gr *gr = (void *)object->engine;
192 193 194 195 196 197 198 199
	union {
		struct fermi_a_zbc_depth_v0 v0;
	} *args = data;
	int ret;

	if (nvif_unpack(args->v0, 0, 0, false)) {
		switch (args->v0.format) {
		case FERMI_A_ZBC_DEPTH_V0_FMT_FP32:
B
Ben Skeggs 已提交
200
			ret = gf100_gr_zbc_depth_get(gr, args->v0.format,
201 202
							   args->v0.ds,
							   args->v0.l2);
203 204 205 206 207 208 209 210 211 212
			return (ret >= 0) ? 0 : -ENOSPC;
		default:
			return -EINVAL;
		}
	}

	return ret;
}

static int
213
gf100_fermi_mthd(struct nvkm_object *object, u32 mthd, void *data, u32 size)
214 215 216
{
	switch (mthd) {
	case FERMI_A_ZBC_COLOR:
217
		return gf100_fermi_mthd_zbc_color(object, data, size);
218
	case FERMI_A_ZBC_DEPTH:
219
		return gf100_fermi_mthd_zbc_depth(object, data, size);
220 221 222 223 224 225
	default:
		break;
	}
	return -EINVAL;
}

226 227
const struct nvkm_object_func
gf100_fermi = {
228
	.mthd = gf100_fermi_mthd,
229 230
};

231 232
static void
gf100_gr_mthd_set_shader_exceptions(struct nvkm_device *device, u32 data)
233
{
234 235
	nvkm_wr32(device, 0x419e44, data ? 0xffffffff : 0x00000000);
	nvkm_wr32(device, 0x419e4c, data ? 0xffffffff : 0x00000000);
236 237
}

238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256
static bool
gf100_gr_mthd_sw(struct nvkm_device *device, u16 class, u32 mthd, u32 data)
{
	switch (class & 0x00ff) {
	case 0x97:
	case 0xc0:
		switch (mthd) {
		case 0x1528:
			gf100_gr_mthd_set_shader_exceptions(device, data);
			return true;
		default:
			break;
		}
		break;
	default:
		break;
	}
	return false;
}
257

258 259 260 261 262 263 264 265 266 267 268 269 270 271 272
static int
gf100_gr_object_get(struct nvkm_gr *base, int index, struct nvkm_sclass *sclass)
{
	struct gf100_gr *gr = gf100_gr(base);
	int c = 0;

	while (gr->func->sclass[c].oclass) {
		if (c++ == index) {
			*sclass = gr->func->sclass[index];
			return index;
		}
	}

	return c;
}
273 274 275 276

/*******************************************************************************
 * PGRAPH context
 ******************************************************************************/
277

278 279 280
static int
gf100_gr_chan_bind(struct nvkm_object *object, struct nvkm_gpuobj *parent,
		   int align, struct nvkm_gpuobj **pgpuobj)
281
{
282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346
	struct gf100_gr_chan *chan = gf100_gr_chan(object);
	struct gf100_gr *gr = chan->gr;
	int ret, i;

	ret = nvkm_gpuobj_new(gr->base.engine.subdev.device, gr->size,
			      align, false, parent, pgpuobj);
	if (ret)
		return ret;

	nvkm_kmap(*pgpuobj);
	for (i = 0; i < gr->size; i += 4)
		nvkm_wo32(*pgpuobj, i, gr->data[i / 4]);

	if (!gr->firmware) {
		nvkm_wo32(*pgpuobj, 0x00, chan->mmio_nr / 2);
		nvkm_wo32(*pgpuobj, 0x04, chan->mmio_vma.offset >> 8);
	} else {
		nvkm_wo32(*pgpuobj, 0xf4, 0);
		nvkm_wo32(*pgpuobj, 0xf8, 0);
		nvkm_wo32(*pgpuobj, 0x10, chan->mmio_nr / 2);
		nvkm_wo32(*pgpuobj, 0x14, lower_32_bits(chan->mmio_vma.offset));
		nvkm_wo32(*pgpuobj, 0x18, upper_32_bits(chan->mmio_vma.offset));
		nvkm_wo32(*pgpuobj, 0x1c, 1);
		nvkm_wo32(*pgpuobj, 0x20, 0);
		nvkm_wo32(*pgpuobj, 0x28, 0);
		nvkm_wo32(*pgpuobj, 0x2c, 0);
	}
	nvkm_done(*pgpuobj);
	return 0;
}

static void *
gf100_gr_chan_dtor(struct nvkm_object *object)
{
	struct gf100_gr_chan *chan = gf100_gr_chan(object);
	int i;

	for (i = 0; i < ARRAY_SIZE(chan->data); i++) {
		if (chan->data[i].vma.node) {
			nvkm_vm_unmap(&chan->data[i].vma);
			nvkm_vm_put(&chan->data[i].vma);
		}
		nvkm_memory_del(&chan->data[i].mem);
	}

	if (chan->mmio_vma.node) {
		nvkm_vm_unmap(&chan->mmio_vma);
		nvkm_vm_put(&chan->mmio_vma);
	}
	nvkm_memory_del(&chan->mmio);
	return chan;
}

static const struct nvkm_object_func
gf100_gr_chan = {
	.dtor = gf100_gr_chan_dtor,
	.bind = gf100_gr_chan_bind,
};

static int
gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
		  const struct nvkm_oclass *oclass,
		  struct nvkm_object **pobject)
{
	struct gf100_gr *gr = gf100_gr(base);
B
Ben Skeggs 已提交
347 348
	struct gf100_gr_data *data = gr->mmio_data;
	struct gf100_gr_mmio *mmio = gr->mmio_list;
349
	struct gf100_gr_chan *chan;
350
	struct nvkm_device *device = gr->base.engine.subdev.device;
351 352
	int ret, i;

353 354 355 356 357
	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
		return -ENOMEM;
	nvkm_object_ctor(&gf100_gr_chan, oclass, &chan->object);
	chan->gr = gr;
	*pobject = &chan->object;
358

359 360 361 362
	/* allocate memory for a "mmio list" buffer that's used by the HUB
	 * fuc to modify some per-context register settings on first load
	 * of the context.
	 */
363 364
	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0x100,
			      false, &chan->mmio);
365 366 367
	if (ret)
		return ret;

368
	ret = nvkm_vm_get(fifoch->vm, 0x1000, 12, NV_MEM_ACCESS_RW |
369
			  NV_MEM_ACCESS_SYS, &chan->mmio_vma);
370 371 372
	if (ret)
		return ret;

373 374
	nvkm_memory_map(chan->mmio, &chan->mmio_vma, 0);

375
	/* allocate buffers referenced by mmio list */
B
Ben Skeggs 已提交
376
	for (i = 0; data->size && i < ARRAY_SIZE(gr->mmio_data); i++) {
377 378 379
		ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST,
				      data->size, data->align, false,
				      &chan->data[i].mem);
380 381
		if (ret)
			return ret;
382

383 384 385
		ret = nvkm_vm_get(fifoch->vm,
				  nvkm_memory_size(chan->data[i].mem), 12,
				  data->access, &chan->data[i].vma);
386 387
		if (ret)
			return ret;
388

389
		nvkm_memory_map(chan->data[i].mem, &chan->data[i].vma, 0);
390
		data++;
391 392
	}

393
	/* finally, fill in the mmio list and point the context at it */
394
	nvkm_kmap(chan->mmio);
B
Ben Skeggs 已提交
395
	for (i = 0; mmio->addr && i < ARRAY_SIZE(gr->mmio_list); i++) {
396 397
		u32 addr = mmio->addr;
		u32 data = mmio->data;
398

399
		if (mmio->buffer >= 0) {
400
			u64 info = chan->data[mmio->buffer].vma.offset;
401 402
			data |= info >> mmio->shift;
		}
403

404 405
		nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, addr);
		nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, data);
406 407
		mmio++;
	}
408
	nvkm_done(chan->mmio);
409
	return 0;
410 411
}

412
/*******************************************************************************
413
 * PGRAPH register lists
414 415
 ******************************************************************************/

416 417
const struct gf100_gr_init
gf100_gr_init_main_0[] = {
418 419 420 421 422 423 424 425 426 427 428 429 430 431
	{ 0x400080,   1, 0x04, 0x003083c2 },
	{ 0x400088,   1, 0x04, 0x00006fe7 },
	{ 0x40008c,   1, 0x04, 0x00000000 },
	{ 0x400090,   1, 0x04, 0x00000030 },
	{ 0x40013c,   1, 0x04, 0x013901f7 },
	{ 0x400140,   1, 0x04, 0x00000100 },
	{ 0x400144,   1, 0x04, 0x00000000 },
	{ 0x400148,   1, 0x04, 0x00000110 },
	{ 0x400138,   1, 0x04, 0x00000000 },
	{ 0x400130,   2, 0x04, 0x00000000 },
	{ 0x400124,   1, 0x04, 0x00000002 },
	{}
};

432 433
const struct gf100_gr_init
gf100_gr_init_fe_0[] = {
434 435 436 437 438
	{ 0x40415c,   1, 0x04, 0x00000000 },
	{ 0x404170,   1, 0x04, 0x00000000 },
	{}
};

439 440
const struct gf100_gr_init
gf100_gr_init_pri_0[] = {
441 442 443 444
	{ 0x404488,   2, 0x04, 0x00000000 },
	{}
};

445 446
const struct gf100_gr_init
gf100_gr_init_rstr2d_0[] = {
447 448 449 450
	{ 0x407808,   1, 0x04, 0x00000000 },
	{}
};

451 452
const struct gf100_gr_init
gf100_gr_init_pd_0[] = {
453 454 455 456
	{ 0x406024,   1, 0x04, 0x00000000 },
	{}
};

457 458
const struct gf100_gr_init
gf100_gr_init_ds_0[] = {
459 460 461 462 463 464
	{ 0x405844,   1, 0x04, 0x00ffffff },
	{ 0x405850,   1, 0x04, 0x00000000 },
	{ 0x405908,   1, 0x04, 0x00000000 },
	{}
};

465 466
const struct gf100_gr_init
gf100_gr_init_scc_0[] = {
467 468 469 470
	{ 0x40803c,   1, 0x04, 0x00000000 },
	{}
};

471 472
const struct gf100_gr_init
gf100_gr_init_prop_0[] = {
473
	{ 0x4184a0,   1, 0x04, 0x00000000 },
474 475 476
	{}
};

477 478
const struct gf100_gr_init
gf100_gr_init_gpc_unk_0[] = {
479 480 481 482
	{ 0x418604,   1, 0x04, 0x00000000 },
	{ 0x418680,   1, 0x04, 0x00000000 },
	{ 0x418714,   1, 0x04, 0x80000000 },
	{ 0x418384,   1, 0x04, 0x00000000 },
483 484 485
	{}
};

486 487
const struct gf100_gr_init
gf100_gr_init_setup_0[] = {
488
	{ 0x418814,   3, 0x04, 0x00000000 },
489 490 491
	{}
};

492 493
const struct gf100_gr_init
gf100_gr_init_crstr_0[] = {
494
	{ 0x418b04,   1, 0x04, 0x00000000 },
495 496 497
	{}
};

498 499
const struct gf100_gr_init
gf100_gr_init_setup_1[] = {
500 501 502 503
	{ 0x4188c8,   1, 0x04, 0x80000000 },
	{ 0x4188cc,   1, 0x04, 0x00000000 },
	{ 0x4188d0,   1, 0x04, 0x00010000 },
	{ 0x4188d4,   1, 0x04, 0x00000001 },
504 505 506
	{}
};

507 508
const struct gf100_gr_init
gf100_gr_init_zcull_0[] = {
509 510 511 512 513
	{ 0x418910,   1, 0x04, 0x00010001 },
	{ 0x418914,   1, 0x04, 0x00000301 },
	{ 0x418918,   1, 0x04, 0x00800000 },
	{ 0x418980,   1, 0x04, 0x77777770 },
	{ 0x418984,   3, 0x04, 0x77777777 },
514 515 516
	{}
};

517 518
const struct gf100_gr_init
gf100_gr_init_gpm_0[] = {
519 520
	{ 0x418c04,   1, 0x04, 0x00000000 },
	{ 0x418c88,   1, 0x04, 0x00000000 },
521 522 523
	{}
};

524 525
const struct gf100_gr_init
gf100_gr_init_gpc_unk_1[] = {
526 527 528 529
	{ 0x418d00,   1, 0x04, 0x00000000 },
	{ 0x418f08,   1, 0x04, 0x00000000 },
	{ 0x418e00,   1, 0x04, 0x00000050 },
	{ 0x418e08,   1, 0x04, 0x00000000 },
530 531 532
	{}
};

533 534
const struct gf100_gr_init
gf100_gr_init_gcc_0[] = {
535 536 537 538 539
	{ 0x41900c,   1, 0x04, 0x00000000 },
	{ 0x419018,   1, 0x04, 0x00000000 },
	{}
};

540 541
const struct gf100_gr_init
gf100_gr_init_tpccs_0[] = {
542 543
	{ 0x419d08,   2, 0x04, 0x00000000 },
	{ 0x419d10,   1, 0x04, 0x00000014 },
544 545 546
	{}
};

547 548
const struct gf100_gr_init
gf100_gr_init_tex_0[] = {
549 550 551
	{ 0x419ab0,   1, 0x04, 0x00000000 },
	{ 0x419ab8,   1, 0x04, 0x000000e7 },
	{ 0x419abc,   2, 0x04, 0x00000000 },
552 553 554
	{}
};

555 556
const struct gf100_gr_init
gf100_gr_init_pe_0[] = {
557 558 559 560
	{ 0x41980c,   3, 0x04, 0x00000000 },
	{ 0x419844,   1, 0x04, 0x00000000 },
	{ 0x41984c,   1, 0x04, 0x00005bc5 },
	{ 0x419850,   4, 0x04, 0x00000000 },
561 562 563
	{}
};

564 565
const struct gf100_gr_init
gf100_gr_init_l1c_0[] = {
566 567 568 569 570 571
	{ 0x419c98,   1, 0x04, 0x00000000 },
	{ 0x419ca8,   1, 0x04, 0x80000000 },
	{ 0x419cb4,   1, 0x04, 0x00000000 },
	{ 0x419cb8,   1, 0x04, 0x00008bf4 },
	{ 0x419cbc,   1, 0x04, 0x28137606 },
	{ 0x419cc0,   2, 0x04, 0x00000000 },
572 573 574
	{}
};

575 576
const struct gf100_gr_init
gf100_gr_init_wwdx_0[] = {
577 578
	{ 0x419bd4,   1, 0x04, 0x00800000 },
	{ 0x419bdc,   1, 0x04, 0x00000000 },
579 580 581
	{}
};

582 583
const struct gf100_gr_init
gf100_gr_init_tpccs_1[] = {
584
	{ 0x419d2c,   1, 0x04, 0x00000000 },
585 586 587
	{}
};

588 589
const struct gf100_gr_init
gf100_gr_init_mpc_0[] = {
590
	{ 0x419c0c,   1, 0x04, 0x00000000 },
591 592 593
	{}
};

594 595
static const struct gf100_gr_init
gf100_gr_init_sm_0[] = {
596 597 598 599 600 601 602 603 604 605 606 607 608 609 610 611
	{ 0x419e00,   1, 0x04, 0x00000000 },
	{ 0x419ea0,   1, 0x04, 0x00000000 },
	{ 0x419ea4,   1, 0x04, 0x00000100 },
	{ 0x419ea8,   1, 0x04, 0x00001100 },
	{ 0x419eac,   1, 0x04, 0x11100702 },
	{ 0x419eb0,   1, 0x04, 0x00000003 },
	{ 0x419eb4,   4, 0x04, 0x00000000 },
	{ 0x419ec8,   1, 0x04, 0x06060618 },
	{ 0x419ed0,   1, 0x04, 0x0eff0e38 },
	{ 0x419ed4,   1, 0x04, 0x011104f1 },
	{ 0x419edc,   1, 0x04, 0x00000000 },
	{ 0x419f00,   1, 0x04, 0x00000000 },
	{ 0x419f2c,   1, 0x04, 0x00000000 },
	{}
};

612 613
const struct gf100_gr_init
gf100_gr_init_be_0[] = {
614 615 616 617 618 619 620 621 622 623
	{ 0x40880c,   1, 0x04, 0x00000000 },
	{ 0x408910,   9, 0x04, 0x00000000 },
	{ 0x408950,   1, 0x04, 0x00000000 },
	{ 0x408954,   1, 0x04, 0x0000ffff },
	{ 0x408984,   1, 0x04, 0x00000000 },
	{ 0x408988,   1, 0x04, 0x08040201 },
	{ 0x40898c,   1, 0x04, 0x80402010 },
	{}
};

624 625
const struct gf100_gr_init
gf100_gr_init_fe_1[] = {
626 627 628 629
	{ 0x4040f0,   1, 0x04, 0x00000000 },
	{}
};

630 631
const struct gf100_gr_init
gf100_gr_init_pe_1[] = {
632 633 634 635
	{ 0x419880,   1, 0x04, 0x00000002 },
	{}
};

636 637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664
static const struct gf100_gr_pack
gf100_gr_pack_mmio[] = {
	{ gf100_gr_init_main_0 },
	{ gf100_gr_init_fe_0 },
	{ gf100_gr_init_pri_0 },
	{ gf100_gr_init_rstr2d_0 },
	{ gf100_gr_init_pd_0 },
	{ gf100_gr_init_ds_0 },
	{ gf100_gr_init_scc_0 },
	{ gf100_gr_init_prop_0 },
	{ gf100_gr_init_gpc_unk_0 },
	{ gf100_gr_init_setup_0 },
	{ gf100_gr_init_crstr_0 },
	{ gf100_gr_init_setup_1 },
	{ gf100_gr_init_zcull_0 },
	{ gf100_gr_init_gpm_0 },
	{ gf100_gr_init_gpc_unk_1 },
	{ gf100_gr_init_gcc_0 },
	{ gf100_gr_init_tpccs_0 },
	{ gf100_gr_init_tex_0 },
	{ gf100_gr_init_pe_0 },
	{ gf100_gr_init_l1c_0 },
	{ gf100_gr_init_wwdx_0 },
	{ gf100_gr_init_tpccs_1 },
	{ gf100_gr_init_mpc_0 },
	{ gf100_gr_init_sm_0 },
	{ gf100_gr_init_be_0 },
	{ gf100_gr_init_fe_1 },
	{ gf100_gr_init_pe_1 },
M
Maarten Lankhorst 已提交
665 666 667
	{}
};

668 669 670 671
/*******************************************************************************
 * PGRAPH engine/subdev functions
 ******************************************************************************/

672
void
B
Ben Skeggs 已提交
673
gf100_gr_zbc_init(struct gf100_gr *gr)
674 675 676 677 678 679 680 681 682
{
	const u32  zero[] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000,
			      0x00000000, 0x00000000, 0x00000000, 0x00000000 };
	const u32   one[] = { 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
			      0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff };
	const u32 f32_0[] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000,
			      0x00000000, 0x00000000, 0x00000000, 0x00000000 };
	const u32 f32_1[] = { 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
			      0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 };
B
Ben Skeggs 已提交
683
	struct nvkm_ltc *ltc = nvkm_ltc(gr);
684 685
	int index;

B
Ben Skeggs 已提交
686 687 688 689 690 691 692
	if (!gr->zbc_color[0].format) {
		gf100_gr_zbc_color_get(gr, 1,  & zero[0],   &zero[4]);
		gf100_gr_zbc_color_get(gr, 2,  &  one[0],    &one[4]);
		gf100_gr_zbc_color_get(gr, 4,  &f32_0[0],  &f32_0[4]);
		gf100_gr_zbc_color_get(gr, 4,  &f32_1[0],  &f32_1[4]);
		gf100_gr_zbc_depth_get(gr, 1, 0x00000000, 0x00000000);
		gf100_gr_zbc_depth_get(gr, 1, 0x3f800000, 0x3f800000);
693 694 695
	}

	for (index = ltc->zbc_min; index <= ltc->zbc_max; index++)
B
Ben Skeggs 已提交
696
		gf100_gr_zbc_clear_color(gr, index);
697
	for (index = ltc->zbc_min; index <= ltc->zbc_max; index++)
B
Ben Skeggs 已提交
698
		gf100_gr_zbc_clear_depth(gr, index);
699 700
}

701 702 703 704 705 706
/**
 * Wait until GR goes idle. GR is considered idle if it is disabled by the
 * MC (0x200) register, or GR is not busy and a context switch is not in
 * progress.
 */
int
B
Ben Skeggs 已提交
707
gf100_gr_wait_idle(struct gf100_gr *gr)
708
{
709 710
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
711 712 713 714 715 716 717 718
	unsigned long end_jiffies = jiffies + msecs_to_jiffies(2000);
	bool gr_enabled, ctxsw_active, gr_busy;

	do {
		/*
		 * required to make sure FIFO_ENGINE_STATUS (0x2640) is
		 * up-to-date
		 */
719
		nvkm_rd32(device, 0x400700);
720

721 722 723
		gr_enabled = nvkm_rd32(device, 0x200) & 0x1000;
		ctxsw_active = nvkm_rd32(device, 0x2640) & 0x8000;
		gr_busy = nvkm_rd32(device, 0x40060c) & 0x1;
724 725 726 727 728

		if (!gr_enabled || (!gr_busy && !ctxsw_active))
			return 0;
	} while (time_before(jiffies, end_jiffies));

729 730 731
	nvkm_error(subdev,
		   "wait for idle timeout (en: %d, ctxsw: %d, busy: %d)\n",
		   gr_enabled, ctxsw_active, gr_busy);
732 733 734
	return -EAGAIN;
}

735
void
B
Ben Skeggs 已提交
736
gf100_gr_mmio(struct gf100_gr *gr, const struct gf100_gr_pack *p)
737
{
738
	struct nvkm_device *device = gr->base.engine.subdev.device;
739 740
	const struct gf100_gr_pack *pack;
	const struct gf100_gr_init *init;
741 742 743 744 745

	pack_for_each_init(init, pack, p) {
		u32 next = init->addr + init->count * init->pitch;
		u32 addr = init->addr;
		while (addr < next) {
746
			nvkm_wr32(device, addr, init->data);
747 748 749
			addr += init->pitch;
		}
	}
750 751 752
}

void
B
Ben Skeggs 已提交
753
gf100_gr_icmd(struct gf100_gr *gr, const struct gf100_gr_pack *p)
754
{
755
	struct nvkm_device *device = gr->base.engine.subdev.device;
756 757
	const struct gf100_gr_pack *pack;
	const struct gf100_gr_init *init;
758
	u32 data = 0;
759

760
	nvkm_wr32(device, 0x400208, 0x80000000);
761 762 763 764 765 766

	pack_for_each_init(init, pack, p) {
		u32 next = init->addr + init->count * init->pitch;
		u32 addr = init->addr;

		if ((pack == p && init == p->init) || data != init->data) {
767
			nvkm_wr32(device, 0x400204, init->data);
768 769
			data = init->data;
		}
770

771
		while (addr < next) {
772
			nvkm_wr32(device, 0x400200, addr);
773 774 775 776 777
			/**
			 * Wait for GR to go idle after submitting a
			 * GO_IDLE bundle
			 */
			if ((addr & 0xffff) == 0xe100)
B
Ben Skeggs 已提交
778
				gf100_gr_wait_idle(gr);
779 780 781 782
			nvkm_msec(device, 2000,
				if (!(nvkm_rd32(device, 0x400700) & 0x00000004))
					break;
			);
783 784 785
			addr += init->pitch;
		}
	}
786

787
	nvkm_wr32(device, 0x400208, 0x00000000);
788 789 790
}

void
B
Ben Skeggs 已提交
791
gf100_gr_mthd(struct gf100_gr *gr, const struct gf100_gr_pack *p)
792
{
793
	struct nvkm_device *device = gr->base.engine.subdev.device;
794 795
	const struct gf100_gr_pack *pack;
	const struct gf100_gr_init *init;
796
	u32 data = 0;
797

798 799 800 801 802 803
	pack_for_each_init(init, pack, p) {
		u32 ctrl = 0x80000000 | pack->type;
		u32 next = init->addr + init->count * init->pitch;
		u32 addr = init->addr;

		if ((pack == p && init == p->init) || data != init->data) {
804
			nvkm_wr32(device, 0x40448c, init->data);
805 806 807 808
			data = init->data;
		}

		while (addr < next) {
809
			nvkm_wr32(device, 0x404488, ctrl | (addr << 14));
810
			addr += init->pitch;
811 812 813 814 815
		}
	}
}

u64
B
Ben Skeggs 已提交
816
gf100_gr_units(struct nvkm_gr *obj)
817
{
B
Ben Skeggs 已提交
818
	struct gf100_gr *gr = container_of(obj, typeof(*gr), base);
819 820
	u64 cfg;

B
Ben Skeggs 已提交
821 822 823
	cfg  = (u32)gr->gpc_nr;
	cfg |= (u32)gr->tpc_total << 8;
	cfg |= (u64)gr->rop_nr << 32;
824 825

	return cfg;
826 827
}

828 829 830 831 832 833 834 835 836 837 838 839
static const struct nvkm_bitfield gk104_sked_error[] = {
	{ 0x00000080, "CONSTANT_BUFFER_SIZE" },
	{ 0x00000200, "LOCAL_MEMORY_SIZE_POS" },
	{ 0x00000400, "LOCAL_MEMORY_SIZE_NEG" },
	{ 0x00000800, "WARP_CSTACK_SIZE" },
	{ 0x00001000, "TOTAL_TEMP_SIZE" },
	{ 0x00002000, "REGISTER_COUNT" },
	{ 0x00040000, "TOTAL_THREADS" },
	{ 0x00100000, "PROGRAM_OFFSET" },
	{ 0x00200000, "SHARED_MEMORY_SIZE" },
	{ 0x02000000, "SHARED_CONFIG_TOO_SMALL" },
	{ 0x04000000, "TOTAL_REGISTER_COUNT" },
840 841 842
	{}
};

843 844 845 846 847 848 849
static const struct nvkm_bitfield gf100_gpc_rop_error[] = {
	{ 0x00000002, "RT_PITCH_OVERRUN" },
	{ 0x00000010, "RT_WIDTH_OVERRUN" },
	{ 0x00000020, "RT_HEIGHT_OVERRUN" },
	{ 0x00000080, "ZETA_STORAGE_TYPE_MISMATCH" },
	{ 0x00000100, "RT_STORAGE_TYPE_MISMATCH" },
	{ 0x00000400, "RT_LINEAR_MISMATCH" },
850 851 852
	{}
};

853
static void
B
Ben Skeggs 已提交
854
gf100_gr_trap_gpc_rop(struct gf100_gr *gr, int gpc)
855
{
856 857 858
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
	char error[128];
859
	u32 trap[4];
860

861
	trap[0] = nvkm_rd32(device, GPC_UNIT(gpc, 0x0420)) & 0x3fffffff;
862 863 864
	trap[1] = nvkm_rd32(device, GPC_UNIT(gpc, 0x0434));
	trap[2] = nvkm_rd32(device, GPC_UNIT(gpc, 0x0438));
	trap[3] = nvkm_rd32(device, GPC_UNIT(gpc, 0x043c));
865

866
	nvkm_snprintbf(error, sizeof(error), gf100_gpc_rop_error, trap[0]);
867

868 869 870 871
	nvkm_error(subdev, "GPC%d/PROP trap: %08x [%s] x = %u, y = %u, "
			   "format = %x, storage type = %x\n",
		   gpc, trap[0], error, trap[1] & 0xffff, trap[1] >> 16,
		   (trap[2] >> 8) & 0x3f, trap[3] & 0xff);
872
	nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
873 874
}

875
static const struct nvkm_enum gf100_mp_warp_error[] = {
876 877 878 879 880 881 882 883 884 885 886 887
	{ 0x00, "NO_ERROR" },
	{ 0x01, "STACK_MISMATCH" },
	{ 0x05, "MISALIGNED_PC" },
	{ 0x08, "MISALIGNED_GPR" },
	{ 0x09, "INVALID_OPCODE" },
	{ 0x0d, "GPR_OUT_OF_BOUNDS" },
	{ 0x0e, "MEM_OUT_OF_BOUNDS" },
	{ 0x0f, "UNALIGNED_MEM_ACCESS" },
	{ 0x11, "INVALID_PARAM" },
	{}
};

888
static const struct nvkm_bitfield gf100_mp_global_error[] = {
889 890 891 892 893 894
	{ 0x00000004, "MULTIPLE_WARP_ERRORS" },
	{ 0x00000008, "OUT_OF_STACK_SPACE" },
	{}
};

static void
B
Ben Skeggs 已提交
895
gf100_gr_trap_mp(struct gf100_gr *gr, int gpc, int tpc)
896
{
897 898
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
899 900
	u32 werr = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x648));
	u32 gerr = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x650));
901 902
	const struct nvkm_enum *warp;
	char glob[128];
903

904 905 906 907 908 909
	nvkm_snprintbf(glob, sizeof(glob), gf100_mp_global_error, gerr);
	warp = nvkm_enum_find(gf100_mp_warp_error, werr & 0xffff);

	nvkm_error(subdev, "GPC%i/TPC%i/MP trap: "
			   "global %08x [%s] warp %04x [%s]\n",
		   gpc, tpc, gerr, glob, werr, warp ? warp->name : "");
910

911 912
	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x648), 0x00000000);
	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x650), gerr);
913 914
}

915
static void
B
Ben Skeggs 已提交
916
gf100_gr_trap_tpc(struct gf100_gr *gr, int gpc, int tpc)
917
{
918 919
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
920
	u32 stat = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x0508));
921 922

	if (stat & 0x00000001) {
923
		u32 trap = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x0224));
924
		nvkm_error(subdev, "GPC%d/TPC%d/TEX: %08x\n", gpc, tpc, trap);
925
		nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x0224), 0xc0000000);
926 927 928 929
		stat &= ~0x00000001;
	}

	if (stat & 0x00000002) {
B
Ben Skeggs 已提交
930
		gf100_gr_trap_mp(gr, gpc, tpc);
931 932 933 934
		stat &= ~0x00000002;
	}

	if (stat & 0x00000004) {
935
		u32 trap = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x0084));
936
		nvkm_error(subdev, "GPC%d/TPC%d/POLY: %08x\n", gpc, tpc, trap);
937
		nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x0084), 0xc0000000);
938 939 940 941
		stat &= ~0x00000004;
	}

	if (stat & 0x00000008) {
942
		u32 trap = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x048c));
943
		nvkm_error(subdev, "GPC%d/TPC%d/L1C: %08x\n", gpc, tpc, trap);
944
		nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x048c), 0xc0000000);
945 946 947 948
		stat &= ~0x00000008;
	}

	if (stat) {
949
		nvkm_error(subdev, "GPC%d/TPC%d/%08x: unknown\n", gpc, tpc, stat);
950 951 952 953
	}
}

static void
B
Ben Skeggs 已提交
954
gf100_gr_trap_gpc(struct gf100_gr *gr, int gpc)
955
{
956 957
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
958
	u32 stat = nvkm_rd32(device, GPC_UNIT(gpc, 0x2c90));
959 960 961
	int tpc;

	if (stat & 0x00000001) {
B
Ben Skeggs 已提交
962
		gf100_gr_trap_gpc_rop(gr, gpc);
963 964 965 966
		stat &= ~0x00000001;
	}

	if (stat & 0x00000002) {
967
		u32 trap = nvkm_rd32(device, GPC_UNIT(gpc, 0x0900));
968
		nvkm_error(subdev, "GPC%d/ZCULL: %08x\n", gpc, trap);
969
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000);
970 971 972 973
		stat &= ~0x00000002;
	}

	if (stat & 0x00000004) {
974
		u32 trap = nvkm_rd32(device, GPC_UNIT(gpc, 0x1028));
975
		nvkm_error(subdev, "GPC%d/CCACHE: %08x\n", gpc, trap);
976
		nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000);
977 978 979 980
		stat &= ~0x00000004;
	}

	if (stat & 0x00000008) {
981
		u32 trap = nvkm_rd32(device, GPC_UNIT(gpc, 0x0824));
982
		nvkm_error(subdev, "GPC%d/ESETUP: %08x\n", gpc, trap);
983
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000);
984 985 986
		stat &= ~0x00000009;
	}

B
Ben Skeggs 已提交
987
	for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
988 989
		u32 mask = 0x00010000 << tpc;
		if (stat & mask) {
B
Ben Skeggs 已提交
990
			gf100_gr_trap_tpc(gr, gpc, tpc);
991
			nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), mask);
992 993 994 995 996
			stat &= ~mask;
		}
	}

	if (stat) {
997
		nvkm_error(subdev, "GPC%d/%08x: unknown\n", gpc, stat);
998 999 1000 1001
	}
}

static void
B
Ben Skeggs 已提交
1002
gf100_gr_trap_intr(struct gf100_gr *gr)
1003
{
1004 1005
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
1006
	u32 trap = nvkm_rd32(device, 0x400108);
1007
	int rop, gpc;
1008 1009

	if (trap & 0x00000001) {
1010
		u32 stat = nvkm_rd32(device, 0x404000);
1011
		nvkm_error(subdev, "DISPATCH %08x\n", stat);
1012 1013
		nvkm_wr32(device, 0x404000, 0xc0000000);
		nvkm_wr32(device, 0x400108, 0x00000001);
1014 1015 1016 1017
		trap &= ~0x00000001;
	}

	if (trap & 0x00000002) {
1018
		u32 stat = nvkm_rd32(device, 0x404600);
1019
		nvkm_error(subdev, "M2MF %08x\n", stat);
1020 1021
		nvkm_wr32(device, 0x404600, 0xc0000000);
		nvkm_wr32(device, 0x400108, 0x00000002);
1022 1023 1024 1025
		trap &= ~0x00000002;
	}

	if (trap & 0x00000008) {
1026
		u32 stat = nvkm_rd32(device, 0x408030);
1027
		nvkm_error(subdev, "CCACHE %08x\n", stat);
1028 1029
		nvkm_wr32(device, 0x408030, 0xc0000000);
		nvkm_wr32(device, 0x400108, 0x00000008);
1030 1031 1032 1033
		trap &= ~0x00000008;
	}

	if (trap & 0x00000010) {
1034
		u32 stat = nvkm_rd32(device, 0x405840);
1035
		nvkm_error(subdev, "SHADER %08x\n", stat);
1036 1037
		nvkm_wr32(device, 0x405840, 0xc0000000);
		nvkm_wr32(device, 0x400108, 0x00000010);
1038 1039 1040 1041
		trap &= ~0x00000010;
	}

	if (trap & 0x00000040) {
1042
		u32 stat = nvkm_rd32(device, 0x40601c);
1043
		nvkm_error(subdev, "UNK6 %08x\n", stat);
1044 1045
		nvkm_wr32(device, 0x40601c, 0xc0000000);
		nvkm_wr32(device, 0x400108, 0x00000040);
1046 1047 1048 1049
		trap &= ~0x00000040;
	}

	if (trap & 0x00000080) {
1050
		u32 stat = nvkm_rd32(device, 0x404490);
1051
		nvkm_error(subdev, "MACRO %08x\n", stat);
1052 1053
		nvkm_wr32(device, 0x404490, 0xc0000000);
		nvkm_wr32(device, 0x400108, 0x00000080);
1054 1055 1056
		trap &= ~0x00000080;
	}

1057
	if (trap & 0x00000100) {
1058 1059
		u32 stat = nvkm_rd32(device, 0x407020) & 0x3fffffff;
		char sked[128];
1060

1061 1062
		nvkm_snprintbf(sked, sizeof(sked), gk104_sked_error, stat);
		nvkm_error(subdev, "SKED: %08x [%s]\n", stat, sked);
1063

1064
		if (stat)
1065 1066
			nvkm_wr32(device, 0x407020, 0x40000000);
		nvkm_wr32(device, 0x400108, 0x00000100);
1067 1068 1069
		trap &= ~0x00000100;
	}

1070
	if (trap & 0x01000000) {
1071
		u32 stat = nvkm_rd32(device, 0x400118);
B
Ben Skeggs 已提交
1072
		for (gpc = 0; stat && gpc < gr->gpc_nr; gpc++) {
1073 1074
			u32 mask = 0x00000001 << gpc;
			if (stat & mask) {
B
Ben Skeggs 已提交
1075
				gf100_gr_trap_gpc(gr, gpc);
1076
				nvkm_wr32(device, 0x400118, mask);
1077 1078 1079
				stat &= ~mask;
			}
		}
1080
		nvkm_wr32(device, 0x400108, 0x01000000);
1081 1082 1083 1084
		trap &= ~0x01000000;
	}

	if (trap & 0x02000000) {
B
Ben Skeggs 已提交
1085
		for (rop = 0; rop < gr->rop_nr; rop++) {
1086 1087
			u32 statz = nvkm_rd32(device, ROP_UNIT(rop, 0x070));
			u32 statc = nvkm_rd32(device, ROP_UNIT(rop, 0x144));
1088
			nvkm_error(subdev, "ROP%d %08x %08x\n",
1089
				 rop, statz, statc);
1090 1091
			nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0xc0000000);
			nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0xc0000000);
1092
		}
1093
		nvkm_wr32(device, 0x400108, 0x02000000);
1094 1095 1096 1097
		trap &= ~0x02000000;
	}

	if (trap) {
1098
		nvkm_error(subdev, "TRAP UNHANDLED %08x\n", trap);
1099
		nvkm_wr32(device, 0x400108, trap);
1100 1101 1102
	}
}

1103
static void
B
Ben Skeggs 已提交
1104
gf100_gr_ctxctl_debug_unit(struct gf100_gr *gr, u32 base)
1105
{
1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
	nvkm_error(subdev, "%06x - done %08x\n", base,
		   nvkm_rd32(device, base + 0x400));
	nvkm_error(subdev, "%06x - stat %08x %08x %08x %08x\n", base,
		   nvkm_rd32(device, base + 0x800),
		   nvkm_rd32(device, base + 0x804),
		   nvkm_rd32(device, base + 0x808),
		   nvkm_rd32(device, base + 0x80c));
	nvkm_error(subdev, "%06x - stat %08x %08x %08x %08x\n", base,
		   nvkm_rd32(device, base + 0x810),
		   nvkm_rd32(device, base + 0x814),
		   nvkm_rd32(device, base + 0x818),
		   nvkm_rd32(device, base + 0x81c));
1120 1121 1122
}

void
B
Ben Skeggs 已提交
1123
gf100_gr_ctxctl_debug(struct gf100_gr *gr)
1124
{
1125 1126
	struct nvkm_device *device = gr->base.engine.subdev.device;
	u32 gpcnr = nvkm_rd32(device, 0x409604) & 0xffff;
1127 1128
	u32 gpc;

B
Ben Skeggs 已提交
1129
	gf100_gr_ctxctl_debug_unit(gr, 0x409000);
1130
	for (gpc = 0; gpc < gpcnr; gpc++)
B
Ben Skeggs 已提交
1131
		gf100_gr_ctxctl_debug_unit(gr, 0x502000 + (gpc * 0x8000));
1132 1133 1134
}

static void
B
Ben Skeggs 已提交
1135
gf100_gr_ctxctl_isr(struct gf100_gr *gr)
1136
{
1137 1138
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
1139
	u32 stat = nvkm_rd32(device, 0x409c18);
1140

1141
	if (stat & 0x00000001) {
1142
		u32 code = nvkm_rd32(device, 0x409814);
1143
		if (code == E_BAD_FWMTHD) {
1144 1145
			u32 class = nvkm_rd32(device, 0x409808);
			u32  addr = nvkm_rd32(device, 0x40980c);
1146 1147
			u32  subc = (addr & 0x00070000) >> 16;
			u32  mthd = (addr & 0x00003ffc);
1148
			u32  data = nvkm_rd32(device, 0x409810);
1149

1150 1151 1152
			nvkm_error(subdev, "FECS MTHD subc %d class %04x "
					   "mthd %04x data %08x\n",
				   subc, class, mthd, data);
1153

1154
			nvkm_wr32(device, 0x409c20, 0x00000001);
1155 1156
			stat &= ~0x00000001;
		} else {
1157
			nvkm_error(subdev, "FECS ucode error %d\n", code);
1158 1159
		}
	}
1160

1161
	if (stat & 0x00080000) {
1162
		nvkm_error(subdev, "FECS watchdog timeout\n");
B
Ben Skeggs 已提交
1163
		gf100_gr_ctxctl_debug(gr);
1164
		nvkm_wr32(device, 0x409c20, 0x00080000);
1165 1166 1167 1168
		stat &= ~0x00080000;
	}

	if (stat) {
1169
		nvkm_error(subdev, "FECS %08x\n", stat);
B
Ben Skeggs 已提交
1170
		gf100_gr_ctxctl_debug(gr);
1171
		nvkm_wr32(device, 0x409c20, stat);
1172
	}
1173 1174
}

1175
static void
1176
gf100_gr_intr(struct nvkm_subdev *subdev)
1177
{
1178 1179
	struct gf100_gr *gr = (void *)subdev;
	struct nvkm_device *device = gr->base.engine.subdev.device;
1180 1181
	struct nvkm_fifo_chan *chan;
	unsigned long flags;
1182 1183 1184
	u64 inst = nvkm_rd32(device, 0x409b00) & 0x0fffffff;
	u32 stat = nvkm_rd32(device, 0x400100);
	u32 addr = nvkm_rd32(device, 0x400704);
1185 1186
	u32 mthd = (addr & 0x00003ffc);
	u32 subc = (addr & 0x00070000) >> 16;
1187 1188
	u32 data = nvkm_rd32(device, 0x400708);
	u32 code = nvkm_rd32(device, 0x400110);
1189
	u32 class;
1190 1191
	const char *name = "unknown";
	int chid = -1;
1192

1193
	chan = nvkm_fifo_chan_inst(device->fifo, (u64)inst << 12, &flags);
1194 1195 1196 1197
	if (chan) {
		name = chan->object.client->name;
		chid = chan->chid;
	}
1198

B
Ben Skeggs 已提交
1199
	if (nv_device(gr)->card_type < NV_E0 || subc < 4)
1200
		class = nvkm_rd32(device, 0x404200 + (subc * 4));
1201 1202 1203
	else
		class = 0x0000;

1204 1205 1206 1207 1208
	if (stat & 0x00000001) {
		/*
		 * notifier interrupt, only needed for cyclestats
		 * can be safely ignored
		 */
1209
		nvkm_wr32(device, 0x400100, 0x00000001);
1210 1211 1212
		stat &= ~0x00000001;
	}

1213
	if (stat & 0x00000010) {
1214
		if (!gf100_gr_mthd_sw(device, class, mthd, data)) {
1215 1216
			nvkm_error(subdev, "ILLEGAL_MTHD ch %d [%010llx %s] "
				   "subc %d class %04x mthd %04x data %08x\n",
1217 1218
				   chid, inst << 12, name, subc,
				   class, mthd, data);
1219
		}
1220
		nvkm_wr32(device, 0x400100, 0x00000010);
1221 1222 1223 1224
		stat &= ~0x00000010;
	}

	if (stat & 0x00000020) {
1225 1226
		nvkm_error(subdev, "ILLEGAL_CLASS ch %d [%010llx %s] "
			   "subc %d class %04x mthd %04x data %08x\n",
1227
			   chid, inst << 12, name, subc, class, mthd, data);
1228
		nvkm_wr32(device, 0x400100, 0x00000020);
1229 1230 1231 1232
		stat &= ~0x00000020;
	}

	if (stat & 0x00100000) {
1233 1234 1235 1236 1237
		const struct nvkm_enum *en =
			nvkm_enum_find(nv50_data_error_names, code);
		nvkm_error(subdev, "DATA_ERROR %08x [%s] ch %d [%010llx %s] "
				   "subc %d class %04x mthd %04x data %08x\n",
			   code, en ? en->name : "", chid, inst << 12,
1238
			   name, subc, class, mthd, data);
1239
		nvkm_wr32(device, 0x400100, 0x00100000);
1240 1241 1242 1243
		stat &= ~0x00100000;
	}

	if (stat & 0x00200000) {
1244
		nvkm_error(subdev, "TRAP ch %d [%010llx %s]\n",
1245
			   chid, inst << 12, name);
B
Ben Skeggs 已提交
1246
		gf100_gr_trap_intr(gr);
1247
		nvkm_wr32(device, 0x400100, 0x00200000);
1248 1249 1250 1251
		stat &= ~0x00200000;
	}

	if (stat & 0x00080000) {
B
Ben Skeggs 已提交
1252
		gf100_gr_ctxctl_isr(gr);
1253
		nvkm_wr32(device, 0x400100, 0x00080000);
1254 1255 1256 1257
		stat &= ~0x00080000;
	}

	if (stat) {
1258
		nvkm_error(subdev, "intr %08x\n", stat);
1259
		nvkm_wr32(device, 0x400100, stat);
1260 1261
	}

1262
	nvkm_wr32(device, 0x400500, 0x00010001);
1263
	nvkm_fifo_chan_put(device->fifo, flags, &chan);
1264 1265
}

1266
void
B
Ben Skeggs 已提交
1267
gf100_gr_init_fw(struct gf100_gr *gr, u32 fuc_base,
1268
		 struct gf100_gr_fuc *code, struct gf100_gr_fuc *data)
1269
{
1270
	struct nvkm_device *device = gr->base.engine.subdev.device;
1271
	int i;
1272

1273
	nvkm_wr32(device, fuc_base + 0x01c0, 0x01000000);
1274
	for (i = 0; i < data->size / 4; i++)
1275
		nvkm_wr32(device, fuc_base + 0x01c4, data->data[i]);
1276

1277
	nvkm_wr32(device, fuc_base + 0x0180, 0x01000000);
1278 1279
	for (i = 0; i < code->size / 4; i++) {
		if ((i & 0x3f) == 0)
1280 1281
			nvkm_wr32(device, fuc_base + 0x0188, i >> 6);
		nvkm_wr32(device, fuc_base + 0x0184, code->data[i]);
1282
	}
1283 1284 1285

	/* code must be padded to 0x40 words */
	for (; i & 0x3f; i++)
1286
		nvkm_wr32(device, fuc_base + 0x0184, 0);
1287 1288
}

1289
static void
B
Ben Skeggs 已提交
1290
gf100_gr_init_csdata(struct gf100_gr *gr,
1291 1292
		     const struct gf100_gr_pack *pack,
		     u32 falcon, u32 starstar, u32 base)
1293
{
1294
	struct nvkm_device *device = gr->base.engine.subdev.device;
1295 1296
	const struct gf100_gr_pack *iter;
	const struct gf100_gr_init *init;
1297
	u32 addr = ~0, prev = ~0, xfer = 0;
1298 1299
	u32 star, temp;

1300 1301 1302
	nvkm_wr32(device, falcon + 0x01c0, 0x02000000 + starstar);
	star = nvkm_rd32(device, falcon + 0x01c4);
	temp = nvkm_rd32(device, falcon + 0x01c4);
1303 1304
	if (temp > star)
		star = temp;
1305
	nvkm_wr32(device, falcon + 0x01c0, 0x01000000 + star);
1306

1307 1308 1309 1310 1311 1312 1313
	pack_for_each_init(init, iter, pack) {
		u32 head = init->addr - base;
		u32 tail = head + init->count * init->pitch;
		while (head < tail) {
			if (head != prev + 4 || xfer >= 32) {
				if (xfer) {
					u32 data = ((--xfer << 26) | addr);
1314
					nvkm_wr32(device, falcon + 0x01c4, data);
1315 1316 1317 1318
					star += 4;
				}
				addr = head;
				xfer = 0;
1319
			}
1320 1321 1322
			prev = head;
			xfer = xfer + 1;
			head = head + init->pitch;
1323
		}
1324
	}
1325

1326 1327 1328
	nvkm_wr32(device, falcon + 0x01c4, (--xfer << 26) | addr);
	nvkm_wr32(device, falcon + 0x01c0, 0x01000004 + starstar);
	nvkm_wr32(device, falcon + 0x01c4, star + 4);
1329 1330
}

1331
int
B
Ben Skeggs 已提交
1332
gf100_gr_init_ctxctl(struct gf100_gr *gr)
1333
{
1334
	const struct gf100_grctx_func *grctx = gr->func->grctx;
1335 1336
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
B
Ben Skeggs 已提交
1337
	struct gf100_gr_oclass *oclass = (void *)nv_object(gr)->oclass;
1338
	int i;
1339

B
Ben Skeggs 已提交
1340
	if (gr->firmware) {
1341
		/* load fuc microcode */
B
Ben Skeggs 已提交
1342 1343 1344 1345 1346 1347
		nvkm_mc(gr)->unk260(nvkm_mc(gr), 0);
		gf100_gr_init_fw(gr, 0x409000, &gr->fuc409c,
						 &gr->fuc409d);
		gf100_gr_init_fw(gr, 0x41a000, &gr->fuc41ac,
						 &gr->fuc41ad);
		nvkm_mc(gr)->unk260(nvkm_mc(gr), 1);
1348

1349
		/* start both of them running */
1350 1351 1352 1353 1354
		nvkm_wr32(device, 0x409840, 0xffffffff);
		nvkm_wr32(device, 0x41a10c, 0x00000000);
		nvkm_wr32(device, 0x40910c, 0x00000000);
		nvkm_wr32(device, 0x41a100, 0x00000002);
		nvkm_wr32(device, 0x409100, 0x00000002);
1355 1356 1357 1358 1359
		if (nvkm_msec(device, 2000,
			if (nvkm_rd32(device, 0x409800) & 0x00000001)
				break;
		) < 0)
			return -EBUSY;
B
Ben Skeggs 已提交
1360

1361 1362 1363
		nvkm_wr32(device, 0x409840, 0xffffffff);
		nvkm_wr32(device, 0x409500, 0x7fffffff);
		nvkm_wr32(device, 0x409504, 0x00000021);
B
Ben Skeggs 已提交
1364

1365 1366 1367
		nvkm_wr32(device, 0x409840, 0xffffffff);
		nvkm_wr32(device, 0x409500, 0x00000000);
		nvkm_wr32(device, 0x409504, 0x00000010);
1368 1369 1370 1371
		if (nvkm_msec(device, 2000,
			if ((gr->size = nvkm_rd32(device, 0x409800)))
				break;
		) < 0)
1372
			return -EBUSY;
1373

1374 1375 1376
		nvkm_wr32(device, 0x409840, 0xffffffff);
		nvkm_wr32(device, 0x409500, 0x00000000);
		nvkm_wr32(device, 0x409504, 0x00000016);
1377 1378 1379 1380
		if (nvkm_msec(device, 2000,
			if (nvkm_rd32(device, 0x409800))
				break;
		) < 0)
1381 1382
			return -EBUSY;

1383 1384 1385
		nvkm_wr32(device, 0x409840, 0xffffffff);
		nvkm_wr32(device, 0x409500, 0x00000000);
		nvkm_wr32(device, 0x409504, 0x00000025);
1386 1387 1388 1389
		if (nvkm_msec(device, 2000,
			if (nvkm_rd32(device, 0x409800))
				break;
		) < 0)
1390 1391
			return -EBUSY;

B
Ben Skeggs 已提交
1392
		if (nv_device(gr)->chipset >= 0xe0) {
1393 1394 1395
			nvkm_wr32(device, 0x409800, 0x00000000);
			nvkm_wr32(device, 0x409500, 0x00000001);
			nvkm_wr32(device, 0x409504, 0x00000030);
1396 1397 1398 1399
			if (nvkm_msec(device, 2000,
				if (nvkm_rd32(device, 0x409800))
					break;
			) < 0)
1400 1401
				return -EBUSY;

1402 1403 1404 1405
			nvkm_wr32(device, 0x409810, 0xb00095c8);
			nvkm_wr32(device, 0x409800, 0x00000000);
			nvkm_wr32(device, 0x409500, 0x00000001);
			nvkm_wr32(device, 0x409504, 0x00000031);
1406 1407 1408 1409
			if (nvkm_msec(device, 2000,
				if (nvkm_rd32(device, 0x409800))
					break;
			) < 0)
1410 1411
				return -EBUSY;

1412 1413 1414 1415
			nvkm_wr32(device, 0x409810, 0x00080420);
			nvkm_wr32(device, 0x409800, 0x00000000);
			nvkm_wr32(device, 0x409500, 0x00000001);
			nvkm_wr32(device, 0x409504, 0x00000032);
1416 1417 1418 1419
			if (nvkm_msec(device, 2000,
				if (nvkm_rd32(device, 0x409800))
					break;
			) < 0)
1420 1421
				return -EBUSY;

1422 1423 1424
			nvkm_wr32(device, 0x409614, 0x00000070);
			nvkm_wr32(device, 0x409614, 0x00000770);
			nvkm_wr32(device, 0x40802c, 0x00000001);
1425 1426
		}

B
Ben Skeggs 已提交
1427 1428
		if (gr->data == NULL) {
			int ret = gf100_grctx_generate(gr);
1429
			if (ret) {
1430
				nvkm_error(subdev, "failed to construct context\n");
1431 1432 1433 1434 1435
				return ret;
			}
		}

		return 0;
1436 1437 1438
	} else
	if (!oclass->fecs.ucode) {
		return -ENOSYS;
1439
	}
1440

1441
	/* load HUB microcode */
B
Ben Skeggs 已提交
1442
	nvkm_mc(gr)->unk260(nvkm_mc(gr), 0);
1443
	nvkm_wr32(device, 0x4091c0, 0x01000000);
1444
	for (i = 0; i < oclass->fecs.ucode->data.size / 4; i++)
1445
		nvkm_wr32(device, 0x4091c4, oclass->fecs.ucode->data.data[i]);
1446

1447
	nvkm_wr32(device, 0x409180, 0x01000000);
1448
	for (i = 0; i < oclass->fecs.ucode->code.size / 4; i++) {
1449
		if ((i & 0x3f) == 0)
1450 1451
			nvkm_wr32(device, 0x409188, i >> 6);
		nvkm_wr32(device, 0x409184, oclass->fecs.ucode->code.data[i]);
1452 1453 1454
	}

	/* load GPC microcode */
1455
	nvkm_wr32(device, 0x41a1c0, 0x01000000);
1456
	for (i = 0; i < oclass->gpccs.ucode->data.size / 4; i++)
1457
		nvkm_wr32(device, 0x41a1c4, oclass->gpccs.ucode->data.data[i]);
1458

1459
	nvkm_wr32(device, 0x41a180, 0x01000000);
1460
	for (i = 0; i < oclass->gpccs.ucode->code.size / 4; i++) {
1461
		if ((i & 0x3f) == 0)
1462 1463
			nvkm_wr32(device, 0x41a188, i >> 6);
		nvkm_wr32(device, 0x41a184, oclass->gpccs.ucode->code.data[i]);
1464
	}
B
Ben Skeggs 已提交
1465
	nvkm_mc(gr)->unk260(nvkm_mc(gr), 1);
1466

1467
	/* load register lists */
1468 1469 1470 1471
	gf100_gr_init_csdata(gr, grctx->hub, 0x409000, 0x000, 0x000000);
	gf100_gr_init_csdata(gr, grctx->gpc, 0x41a000, 0x000, 0x418000);
	gf100_gr_init_csdata(gr, grctx->tpc, 0x41a000, 0x004, 0x419800);
	gf100_gr_init_csdata(gr, grctx->ppc, 0x41a000, 0x008, 0x41be00);
1472

1473
	/* start HUB ucode running, it'll init the GPCs */
1474 1475
	nvkm_wr32(device, 0x40910c, 0x00000000);
	nvkm_wr32(device, 0x409100, 0x00000002);
1476 1477 1478 1479
	if (nvkm_msec(device, 2000,
		if (nvkm_rd32(device, 0x409800) & 0x80000000)
			break;
	) < 0) {
B
Ben Skeggs 已提交
1480
		gf100_gr_ctxctl_debug(gr);
1481 1482 1483
		return -EBUSY;
	}

1484
	gr->size = nvkm_rd32(device, 0x409804);
B
Ben Skeggs 已提交
1485 1486
	if (gr->data == NULL) {
		int ret = gf100_grctx_generate(gr);
1487
		if (ret) {
1488
			nvkm_error(subdev, "failed to construct context\n");
1489 1490
			return ret;
		}
1491 1492 1493
	}

	return 0;
1494 1495
}

1496
int
1497
gf100_gr_init(struct nvkm_object *object)
1498
{
B
Ben Skeggs 已提交
1499
	struct gf100_gr *gr = (void *)object;
1500 1501
	struct nvkm_device *device = gr->base.engine.subdev.device;
	struct gf100_gr_oclass *oclass = (void *)object->oclass;
B
Ben Skeggs 已提交
1502
	const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
1503 1504 1505 1506
	u32 data[TPC_MAX / 8] = {};
	u8  tpcnr[GPC_MAX];
	int gpc, tpc, rop;
	int ret, i;
1507

B
Ben Skeggs 已提交
1508
	ret = nvkm_gr_init(&gr->base);
1509 1510 1511
	if (ret)
		return ret;

1512 1513 1514 1515 1516 1517
	nvkm_wr32(device, GPC_BCAST(0x0880), 0x00000000);
	nvkm_wr32(device, GPC_BCAST(0x08a4), 0x00000000);
	nvkm_wr32(device, GPC_BCAST(0x0888), 0x00000000);
	nvkm_wr32(device, GPC_BCAST(0x088c), 0x00000000);
	nvkm_wr32(device, GPC_BCAST(0x0890), 0x00000000);
	nvkm_wr32(device, GPC_BCAST(0x0894), 0x00000000);
1518 1519
	nvkm_wr32(device, GPC_BCAST(0x08b4), nvkm_memory_addr(gr->unk4188b4) >> 8);
	nvkm_wr32(device, GPC_BCAST(0x08b8), nvkm_memory_addr(gr->unk4188b8) >> 8);
1520

B
Ben Skeggs 已提交
1521
	gf100_gr_mmio(gr, oclass->mmio);
1522

B
Ben Skeggs 已提交
1523 1524
	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
	for (i = 0, gpc = -1; i < gr->tpc_total; i++) {
1525
		do {
B
Ben Skeggs 已提交
1526
			gpc = (gpc + 1) % gr->gpc_nr;
1527
		} while (!tpcnr[gpc]);
B
Ben Skeggs 已提交
1528
		tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
1529 1530 1531 1532

		data[i / 8] |= tpc << ((i % 8) * 4);
	}

1533 1534 1535 1536
	nvkm_wr32(device, GPC_BCAST(0x0980), data[0]);
	nvkm_wr32(device, GPC_BCAST(0x0984), data[1]);
	nvkm_wr32(device, GPC_BCAST(0x0988), data[2]);
	nvkm_wr32(device, GPC_BCAST(0x098c), data[3]);
1537

B
Ben Skeggs 已提交
1538
	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
1539
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0914),
B
Ben Skeggs 已提交
1540
			gr->magic_not_rop_nr << 8 | gr->tpc_nr[gpc]);
1541
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 |
B
Ben Skeggs 已提交
1542
			gr->tpc_total);
1543
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918);
1544 1545
	}

B
Ben Skeggs 已提交
1546
	if (nv_device(gr)->chipset != 0xd7)
1547
		nvkm_wr32(device, GPC_BCAST(0x1bd4), magicgpc918);
M
Maarten Lankhorst 已提交
1548
	else
1549
		nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918);
B
Ben Skeggs 已提交
1550

1551
	nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800));
B
Ben Skeggs 已提交
1552

1553
	nvkm_wr32(device, 0x400500, 0x00010001);
B
Ben Skeggs 已提交
1554

1555 1556
	nvkm_wr32(device, 0x400100, 0xffffffff);
	nvkm_wr32(device, 0x40013c, 0xffffffff);
B
Ben Skeggs 已提交
1557

1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568
	nvkm_wr32(device, 0x409c24, 0x000f0000);
	nvkm_wr32(device, 0x404000, 0xc0000000);
	nvkm_wr32(device, 0x404600, 0xc0000000);
	nvkm_wr32(device, 0x408030, 0xc0000000);
	nvkm_wr32(device, 0x40601c, 0xc0000000);
	nvkm_wr32(device, 0x404490, 0xc0000000);
	nvkm_wr32(device, 0x406018, 0xc0000000);
	nvkm_wr32(device, 0x405840, 0xc0000000);
	nvkm_wr32(device, 0x405844, 0x00ffffff);
	nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008);
	nvkm_mask(device, 0x419eb4, 0x00001000, 0x00001000);
B
Ben Skeggs 已提交
1569 1570

	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
1571 1572 1573 1574
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000);
		nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000);
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000);
B
Ben Skeggs 已提交
1575
		for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
1576 1577 1578 1579 1580 1581 1582
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff);
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff);
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000);
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000);
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000);
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x001ffffe);
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x0000000f);
1583
		}
1584 1585
		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), 0xffffffff);
		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff);
1586 1587
	}

B
Ben Skeggs 已提交
1588
	for (rop = 0; rop < gr->rop_nr; rop++) {
1589 1590 1591 1592
		nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0xc0000000);
		nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0xc0000000);
		nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff);
		nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff);
1593
	}
1594

1595 1596 1597 1598 1599 1600
	nvkm_wr32(device, 0x400108, 0xffffffff);
	nvkm_wr32(device, 0x400138, 0xffffffff);
	nvkm_wr32(device, 0x400118, 0xffffffff);
	nvkm_wr32(device, 0x400130, 0xffffffff);
	nvkm_wr32(device, 0x40011c, 0xffffffff);
	nvkm_wr32(device, 0x400134, 0xffffffff);
1601

1602
	nvkm_wr32(device, 0x400054, 0x34ce3464);
1603

B
Ben Skeggs 已提交
1604
	gf100_gr_zbc_init(gr);
1605

B
Ben Skeggs 已提交
1606
	return gf100_gr_init_ctxctl(gr);
1607 1608
}

1609
void
1610
gf100_gr_dtor_fw(struct gf100_gr_fuc *fuc)
1611 1612 1613 1614 1615 1616
{
	kfree(fuc->data);
	fuc->data = NULL;
}

int
B
Ben Skeggs 已提交
1617
gf100_gr_ctor_fw(struct gf100_gr *gr, const char *fwname,
1618
		 struct gf100_gr_fuc *fuc)
1619
{
1620 1621
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
1622
	const struct firmware *fw;
1623 1624
	char f[64];
	char cname[16];
1625
	int ret;
1626 1627 1628
	int i;

	/* Convert device name to lowercase */
1629
	strncpy(cname, device->chip->name, sizeof(cname));
1630 1631 1632 1633 1634 1635
	cname[sizeof(cname) - 1] = '\0';
	i = strlen(cname);
	while (i) {
		--i;
		cname[i] = tolower(cname[i]);
	}
1636

1637
	snprintf(f, sizeof(f), "nvidia/%s/%s.bin", cname, fwname);
A
Alexandre Courbot 已提交
1638
	ret = request_firmware(&fw, f, nv_device_base(device));
1639
	if (ret) {
1640
		nvkm_error(subdev, "failed to load %s\n", fwname);
1641
		return ret;
1642 1643 1644 1645 1646 1647 1648 1649 1650
	}

	fuc->size = fw->size;
	fuc->data = kmemdup(fw->data, fuc->size, GFP_KERNEL);
	release_firmware(fw);
	return (fuc->data != NULL) ? 0 : -ENOMEM;
}

void
1651
gf100_gr_dtor(struct nvkm_object *object)
1652
{
B
Ben Skeggs 已提交
1653
	struct gf100_gr *gr = (void *)object;
1654

B
Ben Skeggs 已提交
1655
	kfree(gr->data);
1656

B
Ben Skeggs 已提交
1657 1658 1659 1660
	gf100_gr_dtor_fw(&gr->fuc409c);
	gf100_gr_dtor_fw(&gr->fuc409d);
	gf100_gr_dtor_fw(&gr->fuc41ac);
	gf100_gr_dtor_fw(&gr->fuc41ad);
1661

1662 1663
	nvkm_memory_del(&gr->unk4188b8);
	nvkm_memory_del(&gr->unk4188b4);
1664

B
Ben Skeggs 已提交
1665
	nvkm_gr_destroy(&gr->base);
1666 1667
}

1668 1669 1670 1671 1672 1673
static const struct nvkm_gr_func
gf100_gr_ = {
	.chan_new = gf100_gr_chan_new,
	.object_get = gf100_gr_object_get,
};

1674
int
1675 1676 1677
gf100_gr_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
	      struct nvkm_oclass *bclass, void *data, u32 size,
	      struct nvkm_object **pobject)
1678
{
1679
	struct gf100_gr_oclass *oclass = (void *)bclass;
1680
	struct nvkm_device *device = (void *)parent;
B
Ben Skeggs 已提交
1681
	struct gf100_gr *gr;
1682
	bool use_ext_fw, enable;
1683
	int ret, i, j;
1684

1685 1686
	use_ext_fw = nvkm_boolopt(device->cfgopt, "NvGrUseFW",
				  oclass->fecs.ucode == NULL);
1687 1688
	enable = use_ext_fw || oclass->fecs.ucode != NULL;

B
Ben Skeggs 已提交
1689 1690
	ret = nvkm_gr_create(parent, engine, bclass, enable, &gr);
	*pobject = nv_object(gr);
1691 1692 1693
	if (ret)
		return ret;

1694 1695
	gr->func = oclass->func;
	gr->base.func = &gf100_gr_;
B
Ben Skeggs 已提交
1696 1697
	nv_subdev(gr)->unit = 0x08001000;
	nv_subdev(gr)->intr = gf100_gr_intr;
1698

B
Ben Skeggs 已提交
1699
	gr->base.units = gf100_gr_units;
1700

1701
	if (use_ext_fw) {
1702
		nvkm_info(&gr->base.engine.subdev, "using external firmware\n");
B
Ben Skeggs 已提交
1703 1704 1705 1706
		if (gf100_gr_ctor_fw(gr, "fecs_inst", &gr->fuc409c) ||
		    gf100_gr_ctor_fw(gr, "fecs_data", &gr->fuc409d) ||
		    gf100_gr_ctor_fw(gr, "gpccs_inst", &gr->fuc41ac) ||
		    gf100_gr_ctor_fw(gr, "gpccs_data", &gr->fuc41ad))
1707
			return -ENODEV;
B
Ben Skeggs 已提交
1708
		gr->firmware = true;
1709 1710
	}

1711
	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 256, false,
B
Ben Skeggs 已提交
1712
			      &gr->unk4188b4);
1713 1714
	if (ret)
		return ret;
1715

1716
	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 256, false,
B
Ben Skeggs 已提交
1717
			      &gr->unk4188b8);
1718
	if (ret)
1719 1720
		return ret;

1721 1722 1723 1724 1725 1726 1727 1728 1729
	nvkm_kmap(gr->unk4188b4);
	for (i = 0; i < 0x1000; i += 4)
		nvkm_wo32(gr->unk4188b4, i, 0x00000010);
	nvkm_done(gr->unk4188b4);

	nvkm_kmap(gr->unk4188b8);
	for (i = 0; i < 0x1000; i += 4)
		nvkm_wo32(gr->unk4188b8, i, 0x00000010);
	nvkm_done(gr->unk4188b8);
B
Ben Skeggs 已提交
1730

1731 1732
	gr->rop_nr = (nvkm_rd32(device, 0x409604) & 0x001f0000) >> 16;
	gr->gpc_nr =  nvkm_rd32(device, 0x409604) & 0x0000001f;
B
Ben Skeggs 已提交
1733
	for (i = 0; i < gr->gpc_nr; i++) {
1734
		gr->tpc_nr[i]  = nvkm_rd32(device, GPC_UNIT(i, 0x2608));
B
Ben Skeggs 已提交
1735 1736 1737
		gr->tpc_total += gr->tpc_nr[i];
		gr->ppc_nr[i]  = oclass->ppc_nr;
		for (j = 0; j < gr->ppc_nr[i]; j++) {
1738
			u8 mask = nvkm_rd32(device, GPC_UNIT(i, 0x0c30 + (j * 4)));
B
Ben Skeggs 已提交
1739
			gr->ppc_tpc_nr[i][j] = hweight8(mask);
1740
		}
1741 1742 1743
	}

	/*XXX: these need figuring out... though it might not even matter */
B
Ben Skeggs 已提交
1744
	switch (nv_device(gr)->chipset) {
1745
	case 0xc0:
B
Ben Skeggs 已提交
1746 1747
		if (gr->tpc_total == 11) { /* 465, 3/4/4/0, 4 */
			gr->magic_not_rop_nr = 0x07;
1748
		} else
B
Ben Skeggs 已提交
1749 1750
		if (gr->tpc_total == 14) { /* 470, 3/3/4/4, 5 */
			gr->magic_not_rop_nr = 0x05;
1751
		} else
B
Ben Skeggs 已提交
1752 1753
		if (gr->tpc_total == 15) { /* 480, 3/4/4/4, 6 */
			gr->magic_not_rop_nr = 0x06;
1754 1755 1756
		}
		break;
	case 0xc3: /* 450, 4/0/0/0, 2 */
B
Ben Skeggs 已提交
1757
		gr->magic_not_rop_nr = 0x03;
1758 1759
		break;
	case 0xc4: /* 460, 3/4/0/0, 4 */
B
Ben Skeggs 已提交
1760
		gr->magic_not_rop_nr = 0x01;
1761 1762
		break;
	case 0xc1: /* 2/0/0/0, 1 */
B
Ben Skeggs 已提交
1763
		gr->magic_not_rop_nr = 0x01;
1764 1765
		break;
	case 0xc8: /* 4/4/3/4, 5 */
B
Ben Skeggs 已提交
1766
		gr->magic_not_rop_nr = 0x06;
1767 1768
		break;
	case 0xce: /* 4/4/0/0, 4 */
B
Ben Skeggs 已提交
1769
		gr->magic_not_rop_nr = 0x03;
1770 1771
		break;
	case 0xcf: /* 4/0/0/0, 3 */
B
Ben Skeggs 已提交
1772
		gr->magic_not_rop_nr = 0x03;
1773
		break;
M
Maarten Lankhorst 已提交
1774
	case 0xd7:
1775
	case 0xd9: /* 1/0/0/0, 1 */
1776
	case 0xea: /* gk20a */
1777
	case 0x12b: /* gm20b */
B
Ben Skeggs 已提交
1778
		gr->magic_not_rop_nr = 0x01;
1779 1780 1781
		break;
	}

1782 1783 1784
	return 0;
}

1785
#include "fuc/hubgf100.fuc3.h"
1786

1787 1788 1789 1790 1791 1792
struct gf100_gr_ucode
gf100_gr_fecs_ucode = {
	.code.data = gf100_grhub_code,
	.code.size = sizeof(gf100_grhub_code),
	.data.data = gf100_grhub_data,
	.data.size = sizeof(gf100_grhub_data),
1793 1794
};

1795
#include "fuc/gpcgf100.fuc3.h"
1796

1797 1798 1799 1800 1801 1802
struct gf100_gr_ucode
gf100_gr_gpccs_ucode = {
	.code.data = gf100_grgpc_code,
	.code.size = sizeof(gf100_grgpc_code),
	.data.data = gf100_grgpc_data,
	.data.size = sizeof(gf100_grgpc_data),
1803 1804
};

1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816
static const struct gf100_gr_func
gf100_gr = {
	.grctx = &gf100_grctx,
	.sclass = {
		{ -1, -1, FERMI_TWOD_A },
		{ -1, -1, FERMI_MEMORY_TO_MEMORY_FORMAT_A },
		{ -1, -1, FERMI_A, &gf100_fermi },
		{ -1, -1, FERMI_COMPUTE_A },
		{}
	}
};

1817 1818
struct nvkm_oclass *
gf100_gr_oclass = &(struct gf100_gr_oclass) {
1819
	.base.handle = NV_ENGINE(GR, 0xc0),
1820 1821 1822 1823 1824
	.base.ofuncs = &(struct nvkm_ofuncs) {
		.ctor = gf100_gr_ctor,
		.dtor = gf100_gr_dtor,
		.init = gf100_gr_init,
		.fini = _nvkm_gr_fini,
1825
	},
1826
	.func = &gf100_gr,
1827 1828 1829
	.mmio = gf100_gr_pack_mmio,
	.fecs.ucode = &gf100_gr_fecs_ucode,
	.gpccs.ucode = &gf100_gr_gpccs_ucode,
1830
}.base;