gf100.c 49.2 KB
Newer Older
1
/*
2
 * Copyright 2012 Red Hat Inc.
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 * Authors: Ben Skeggs
 */
24 25 26 27 28 29 30 31
#include "gf100.h"
#include "ctxgf100.h"
#include "fuc/os.h"

#include <core/client.h>
#include <core/option.h>
#include <subdev/fb.h>
#include <subdev/mc.h>
32
#include <subdev/pmu.h>
33
#include <subdev/timer.h>
34
#include <engine/fifo.h>
35 36

#include <nvif/class.h>
37
#include <nvif/cl9097.h>
38
#include <nvif/unpack.h>
39

40 41 42 43 44
/*******************************************************************************
 * Zero Bandwidth Clear
 ******************************************************************************/

static void
B
Ben Skeggs 已提交
45
gf100_gr_zbc_clear_color(struct gf100_gr *gr, int zbc)
46
{
47
	struct nvkm_device *device = gr->base.engine.subdev.device;
B
Ben Skeggs 已提交
48
	if (gr->zbc_color[zbc].format) {
49 50 51 52 53 54 55 56
		nvkm_wr32(device, 0x405804, gr->zbc_color[zbc].ds[0]);
		nvkm_wr32(device, 0x405808, gr->zbc_color[zbc].ds[1]);
		nvkm_wr32(device, 0x40580c, gr->zbc_color[zbc].ds[2]);
		nvkm_wr32(device, 0x405810, gr->zbc_color[zbc].ds[3]);
	}
	nvkm_wr32(device, 0x405814, gr->zbc_color[zbc].format);
	nvkm_wr32(device, 0x405820, zbc);
	nvkm_wr32(device, 0x405824, 0x00000004); /* TRIGGER | WRITE | COLOR */
57 58 59
}

static int
B
Ben Skeggs 已提交
60
gf100_gr_zbc_color_get(struct gf100_gr *gr, int format,
61
		       const u32 ds[4], const u32 l2[4])
62
{
63
	struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc;
64 65 66
	int zbc = -ENOSPC, i;

	for (i = ltc->zbc_min; i <= ltc->zbc_max; i++) {
B
Ben Skeggs 已提交
67 68
		if (gr->zbc_color[i].format) {
			if (gr->zbc_color[i].format != format)
69
				continue;
B
Ben Skeggs 已提交
70 71
			if (memcmp(gr->zbc_color[i].ds, ds, sizeof(
				   gr->zbc_color[i].ds)))
72
				continue;
B
Ben Skeggs 已提交
73 74
			if (memcmp(gr->zbc_color[i].l2, l2, sizeof(
				   gr->zbc_color[i].l2))) {
75 76 77 78 79 80 81 82 83
				WARN_ON(1);
				return -EINVAL;
			}
			return i;
		} else {
			zbc = (zbc < 0) ? i : zbc;
		}
	}

84 85 86
	if (zbc < 0)
		return zbc;

B
Ben Skeggs 已提交
87 88 89
	memcpy(gr->zbc_color[zbc].ds, ds, sizeof(gr->zbc_color[zbc].ds));
	memcpy(gr->zbc_color[zbc].l2, l2, sizeof(gr->zbc_color[zbc].l2));
	gr->zbc_color[zbc].format = format;
90
	nvkm_ltc_zbc_color_get(ltc, zbc, l2);
B
Ben Skeggs 已提交
91
	gf100_gr_zbc_clear_color(gr, zbc);
92 93 94 95
	return zbc;
}

static void
B
Ben Skeggs 已提交
96
gf100_gr_zbc_clear_depth(struct gf100_gr *gr, int zbc)
97
{
98
	struct nvkm_device *device = gr->base.engine.subdev.device;
B
Ben Skeggs 已提交
99
	if (gr->zbc_depth[zbc].format)
100 101 102 103
		nvkm_wr32(device, 0x405818, gr->zbc_depth[zbc].ds);
	nvkm_wr32(device, 0x40581c, gr->zbc_depth[zbc].format);
	nvkm_wr32(device, 0x405820, zbc);
	nvkm_wr32(device, 0x405824, 0x00000005); /* TRIGGER | WRITE | DEPTH */
104 105 106
}

static int
B
Ben Skeggs 已提交
107
gf100_gr_zbc_depth_get(struct gf100_gr *gr, int format,
108
		       const u32 ds, const u32 l2)
109
{
110
	struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc;
111 112 113
	int zbc = -ENOSPC, i;

	for (i = ltc->zbc_min; i <= ltc->zbc_max; i++) {
B
Ben Skeggs 已提交
114 115
		if (gr->zbc_depth[i].format) {
			if (gr->zbc_depth[i].format != format)
116
				continue;
B
Ben Skeggs 已提交
117
			if (gr->zbc_depth[i].ds != ds)
118
				continue;
B
Ben Skeggs 已提交
119
			if (gr->zbc_depth[i].l2 != l2) {
120 121 122 123 124 125 126 127 128
				WARN_ON(1);
				return -EINVAL;
			}
			return i;
		} else {
			zbc = (zbc < 0) ? i : zbc;
		}
	}

129 130 131
	if (zbc < 0)
		return zbc;

B
Ben Skeggs 已提交
132 133 134
	gr->zbc_depth[zbc].format = format;
	gr->zbc_depth[zbc].ds = ds;
	gr->zbc_depth[zbc].l2 = l2;
135
	nvkm_ltc_zbc_depth_get(ltc, zbc, l2);
B
Ben Skeggs 已提交
136
	gf100_gr_zbc_clear_depth(gr, zbc);
137 138 139
	return zbc;
}

140 141 142 143
/*******************************************************************************
 * Graphics object classes
 ******************************************************************************/

144
static int
145
gf100_fermi_mthd_zbc_color(struct nvkm_object *object, void *data, u32 size)
146
{
147
	struct gf100_gr *gr = gf100_gr(nvkm_gr(object->engine));
148 149 150
	union {
		struct fermi_a_zbc_color_v0 v0;
	} *args = data;
151
	int ret = -ENOSYS;
152

153
	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
		switch (args->v0.format) {
		case FERMI_A_ZBC_COLOR_V0_FMT_ZERO:
		case FERMI_A_ZBC_COLOR_V0_FMT_UNORM_ONE:
		case FERMI_A_ZBC_COLOR_V0_FMT_RF32_GF32_BF32_AF32:
		case FERMI_A_ZBC_COLOR_V0_FMT_R16_G16_B16_A16:
		case FERMI_A_ZBC_COLOR_V0_FMT_RN16_GN16_BN16_AN16:
		case FERMI_A_ZBC_COLOR_V0_FMT_RS16_GS16_BS16_AS16:
		case FERMI_A_ZBC_COLOR_V0_FMT_RU16_GU16_BU16_AU16:
		case FERMI_A_ZBC_COLOR_V0_FMT_RF16_GF16_BF16_AF16:
		case FERMI_A_ZBC_COLOR_V0_FMT_A8R8G8B8:
		case FERMI_A_ZBC_COLOR_V0_FMT_A8RL8GL8BL8:
		case FERMI_A_ZBC_COLOR_V0_FMT_A2B10G10R10:
		case FERMI_A_ZBC_COLOR_V0_FMT_AU2BU10GU10RU10:
		case FERMI_A_ZBC_COLOR_V0_FMT_A8B8G8R8:
		case FERMI_A_ZBC_COLOR_V0_FMT_A8BL8GL8RL8:
		case FERMI_A_ZBC_COLOR_V0_FMT_AN8BN8GN8RN8:
		case FERMI_A_ZBC_COLOR_V0_FMT_AS8BS8GS8RS8:
		case FERMI_A_ZBC_COLOR_V0_FMT_AU8BU8GU8RU8:
		case FERMI_A_ZBC_COLOR_V0_FMT_A2R10G10B10:
		case FERMI_A_ZBC_COLOR_V0_FMT_BF10GF11RF11:
B
Ben Skeggs 已提交
174
			ret = gf100_gr_zbc_color_get(gr, args->v0.format,
175 176
							   args->v0.ds,
							   args->v0.l2);
177 178 179 180 181 182 183 184 185 186 187 188 189 190
			if (ret >= 0) {
				args->v0.index = ret;
				return 0;
			}
			break;
		default:
			return -EINVAL;
		}
	}

	return ret;
}

static int
191
gf100_fermi_mthd_zbc_depth(struct nvkm_object *object, void *data, u32 size)
192
{
193
	struct gf100_gr *gr = gf100_gr(nvkm_gr(object->engine));
194 195 196
	union {
		struct fermi_a_zbc_depth_v0 v0;
	} *args = data;
197
	int ret = -ENOSYS;
198

199
	if (!(ret = nvif_unpack(ret, &data, &size, args->v0, 0, 0, false))) {
200 201
		switch (args->v0.format) {
		case FERMI_A_ZBC_DEPTH_V0_FMT_FP32:
B
Ben Skeggs 已提交
202
			ret = gf100_gr_zbc_depth_get(gr, args->v0.format,
203 204
							   args->v0.ds,
							   args->v0.l2);
205 206 207 208 209 210 211 212 213 214
			return (ret >= 0) ? 0 : -ENOSPC;
		default:
			return -EINVAL;
		}
	}

	return ret;
}

static int
215
gf100_fermi_mthd(struct nvkm_object *object, u32 mthd, void *data, u32 size)
216
{
217
	nvif_ioctl(object, "fermi mthd %08x\n", mthd);
218 219
	switch (mthd) {
	case FERMI_A_ZBC_COLOR:
220
		return gf100_fermi_mthd_zbc_color(object, data, size);
221
	case FERMI_A_ZBC_DEPTH:
222
		return gf100_fermi_mthd_zbc_depth(object, data, size);
223 224 225 226 227 228
	default:
		break;
	}
	return -EINVAL;
}

229 230
const struct nvkm_object_func
gf100_fermi = {
231
	.mthd = gf100_fermi_mthd,
232 233
};

234 235
static void
gf100_gr_mthd_set_shader_exceptions(struct nvkm_device *device, u32 data)
236
{
237 238
	nvkm_wr32(device, 0x419e44, data ? 0xffffffff : 0x00000000);
	nvkm_wr32(device, 0x419e4c, data ? 0xffffffff : 0x00000000);
239 240
}

241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259
static bool
gf100_gr_mthd_sw(struct nvkm_device *device, u16 class, u32 mthd, u32 data)
{
	switch (class & 0x00ff) {
	case 0x97:
	case 0xc0:
		switch (mthd) {
		case 0x1528:
			gf100_gr_mthd_set_shader_exceptions(device, data);
			return true;
		default:
			break;
		}
		break;
	default:
		break;
	}
	return false;
}
260

261 262 263 264 265 266 267 268 269 270 271 272 273 274 275
static int
gf100_gr_object_get(struct nvkm_gr *base, int index, struct nvkm_sclass *sclass)
{
	struct gf100_gr *gr = gf100_gr(base);
	int c = 0;

	while (gr->func->sclass[c].oclass) {
		if (c++ == index) {
			*sclass = gr->func->sclass[index];
			return index;
		}
	}

	return c;
}
276 277 278 279

/*******************************************************************************
 * PGRAPH context
 ******************************************************************************/
280

281 282 283
static int
gf100_gr_chan_bind(struct nvkm_object *object, struct nvkm_gpuobj *parent,
		   int align, struct nvkm_gpuobj **pgpuobj)
284
{
285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349
	struct gf100_gr_chan *chan = gf100_gr_chan(object);
	struct gf100_gr *gr = chan->gr;
	int ret, i;

	ret = nvkm_gpuobj_new(gr->base.engine.subdev.device, gr->size,
			      align, false, parent, pgpuobj);
	if (ret)
		return ret;

	nvkm_kmap(*pgpuobj);
	for (i = 0; i < gr->size; i += 4)
		nvkm_wo32(*pgpuobj, i, gr->data[i / 4]);

	if (!gr->firmware) {
		nvkm_wo32(*pgpuobj, 0x00, chan->mmio_nr / 2);
		nvkm_wo32(*pgpuobj, 0x04, chan->mmio_vma.offset >> 8);
	} else {
		nvkm_wo32(*pgpuobj, 0xf4, 0);
		nvkm_wo32(*pgpuobj, 0xf8, 0);
		nvkm_wo32(*pgpuobj, 0x10, chan->mmio_nr / 2);
		nvkm_wo32(*pgpuobj, 0x14, lower_32_bits(chan->mmio_vma.offset));
		nvkm_wo32(*pgpuobj, 0x18, upper_32_bits(chan->mmio_vma.offset));
		nvkm_wo32(*pgpuobj, 0x1c, 1);
		nvkm_wo32(*pgpuobj, 0x20, 0);
		nvkm_wo32(*pgpuobj, 0x28, 0);
		nvkm_wo32(*pgpuobj, 0x2c, 0);
	}
	nvkm_done(*pgpuobj);
	return 0;
}

static void *
gf100_gr_chan_dtor(struct nvkm_object *object)
{
	struct gf100_gr_chan *chan = gf100_gr_chan(object);
	int i;

	for (i = 0; i < ARRAY_SIZE(chan->data); i++) {
		if (chan->data[i].vma.node) {
			nvkm_vm_unmap(&chan->data[i].vma);
			nvkm_vm_put(&chan->data[i].vma);
		}
		nvkm_memory_del(&chan->data[i].mem);
	}

	if (chan->mmio_vma.node) {
		nvkm_vm_unmap(&chan->mmio_vma);
		nvkm_vm_put(&chan->mmio_vma);
	}
	nvkm_memory_del(&chan->mmio);
	return chan;
}

static const struct nvkm_object_func
gf100_gr_chan = {
	.dtor = gf100_gr_chan_dtor,
	.bind = gf100_gr_chan_bind,
};

static int
gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
		  const struct nvkm_oclass *oclass,
		  struct nvkm_object **pobject)
{
	struct gf100_gr *gr = gf100_gr(base);
B
Ben Skeggs 已提交
350 351
	struct gf100_gr_data *data = gr->mmio_data;
	struct gf100_gr_mmio *mmio = gr->mmio_list;
352
	struct gf100_gr_chan *chan;
353
	struct nvkm_device *device = gr->base.engine.subdev.device;
354 355
	int ret, i;

356 357 358 359 360
	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
		return -ENOMEM;
	nvkm_object_ctor(&gf100_gr_chan, oclass, &chan->object);
	chan->gr = gr;
	*pobject = &chan->object;
361

362 363 364 365
	/* allocate memory for a "mmio list" buffer that's used by the HUB
	 * fuc to modify some per-context register settings on first load
	 * of the context.
	 */
366 367
	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0x100,
			      false, &chan->mmio);
368 369 370
	if (ret)
		return ret;

371
	ret = nvkm_vm_get(fifoch->vm, 0x1000, 12, NV_MEM_ACCESS_RW |
372
			  NV_MEM_ACCESS_SYS, &chan->mmio_vma);
373 374 375
	if (ret)
		return ret;

376 377
	nvkm_memory_map(chan->mmio, &chan->mmio_vma, 0);

378
	/* allocate buffers referenced by mmio list */
B
Ben Skeggs 已提交
379
	for (i = 0; data->size && i < ARRAY_SIZE(gr->mmio_data); i++) {
380 381 382
		ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST,
				      data->size, data->align, false,
				      &chan->data[i].mem);
383 384
		if (ret)
			return ret;
385

386 387 388
		ret = nvkm_vm_get(fifoch->vm,
				  nvkm_memory_size(chan->data[i].mem), 12,
				  data->access, &chan->data[i].vma);
389 390
		if (ret)
			return ret;
391

392
		nvkm_memory_map(chan->data[i].mem, &chan->data[i].vma, 0);
393
		data++;
394 395
	}

396
	/* finally, fill in the mmio list and point the context at it */
397
	nvkm_kmap(chan->mmio);
B
Ben Skeggs 已提交
398
	for (i = 0; mmio->addr && i < ARRAY_SIZE(gr->mmio_list); i++) {
399 400
		u32 addr = mmio->addr;
		u32 data = mmio->data;
401

402
		if (mmio->buffer >= 0) {
403
			u64 info = chan->data[mmio->buffer].vma.offset;
404 405
			data |= info >> mmio->shift;
		}
406

407 408
		nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, addr);
		nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, data);
409 410
		mmio++;
	}
411
	nvkm_done(chan->mmio);
412
	return 0;
413 414
}

415
/*******************************************************************************
416
 * PGRAPH register lists
417 418
 ******************************************************************************/

419 420
const struct gf100_gr_init
gf100_gr_init_main_0[] = {
421 422 423 424 425 426 427 428 429 430 431 432 433 434
	{ 0x400080,   1, 0x04, 0x003083c2 },
	{ 0x400088,   1, 0x04, 0x00006fe7 },
	{ 0x40008c,   1, 0x04, 0x00000000 },
	{ 0x400090,   1, 0x04, 0x00000030 },
	{ 0x40013c,   1, 0x04, 0x013901f7 },
	{ 0x400140,   1, 0x04, 0x00000100 },
	{ 0x400144,   1, 0x04, 0x00000000 },
	{ 0x400148,   1, 0x04, 0x00000110 },
	{ 0x400138,   1, 0x04, 0x00000000 },
	{ 0x400130,   2, 0x04, 0x00000000 },
	{ 0x400124,   1, 0x04, 0x00000002 },
	{}
};

435 436
const struct gf100_gr_init
gf100_gr_init_fe_0[] = {
437 438 439 440 441
	{ 0x40415c,   1, 0x04, 0x00000000 },
	{ 0x404170,   1, 0x04, 0x00000000 },
	{}
};

442 443
const struct gf100_gr_init
gf100_gr_init_pri_0[] = {
444 445 446 447
	{ 0x404488,   2, 0x04, 0x00000000 },
	{}
};

448 449
const struct gf100_gr_init
gf100_gr_init_rstr2d_0[] = {
450 451 452 453
	{ 0x407808,   1, 0x04, 0x00000000 },
	{}
};

454 455
const struct gf100_gr_init
gf100_gr_init_pd_0[] = {
456 457 458 459
	{ 0x406024,   1, 0x04, 0x00000000 },
	{}
};

460 461
const struct gf100_gr_init
gf100_gr_init_ds_0[] = {
462 463 464 465 466 467
	{ 0x405844,   1, 0x04, 0x00ffffff },
	{ 0x405850,   1, 0x04, 0x00000000 },
	{ 0x405908,   1, 0x04, 0x00000000 },
	{}
};

468 469
const struct gf100_gr_init
gf100_gr_init_scc_0[] = {
470 471 472 473
	{ 0x40803c,   1, 0x04, 0x00000000 },
	{}
};

474 475
const struct gf100_gr_init
gf100_gr_init_prop_0[] = {
476
	{ 0x4184a0,   1, 0x04, 0x00000000 },
477 478 479
	{}
};

480 481
const struct gf100_gr_init
gf100_gr_init_gpc_unk_0[] = {
482 483 484 485
	{ 0x418604,   1, 0x04, 0x00000000 },
	{ 0x418680,   1, 0x04, 0x00000000 },
	{ 0x418714,   1, 0x04, 0x80000000 },
	{ 0x418384,   1, 0x04, 0x00000000 },
486 487 488
	{}
};

489 490
const struct gf100_gr_init
gf100_gr_init_setup_0[] = {
491
	{ 0x418814,   3, 0x04, 0x00000000 },
492 493 494
	{}
};

495 496
const struct gf100_gr_init
gf100_gr_init_crstr_0[] = {
497
	{ 0x418b04,   1, 0x04, 0x00000000 },
498 499 500
	{}
};

501 502
const struct gf100_gr_init
gf100_gr_init_setup_1[] = {
503 504 505 506
	{ 0x4188c8,   1, 0x04, 0x80000000 },
	{ 0x4188cc,   1, 0x04, 0x00000000 },
	{ 0x4188d0,   1, 0x04, 0x00010000 },
	{ 0x4188d4,   1, 0x04, 0x00000001 },
507 508 509
	{}
};

510 511
const struct gf100_gr_init
gf100_gr_init_zcull_0[] = {
512 513 514 515 516
	{ 0x418910,   1, 0x04, 0x00010001 },
	{ 0x418914,   1, 0x04, 0x00000301 },
	{ 0x418918,   1, 0x04, 0x00800000 },
	{ 0x418980,   1, 0x04, 0x77777770 },
	{ 0x418984,   3, 0x04, 0x77777777 },
517 518 519
	{}
};

520 521
const struct gf100_gr_init
gf100_gr_init_gpm_0[] = {
522 523
	{ 0x418c04,   1, 0x04, 0x00000000 },
	{ 0x418c88,   1, 0x04, 0x00000000 },
524 525 526
	{}
};

527 528
const struct gf100_gr_init
gf100_gr_init_gpc_unk_1[] = {
529 530 531 532
	{ 0x418d00,   1, 0x04, 0x00000000 },
	{ 0x418f08,   1, 0x04, 0x00000000 },
	{ 0x418e00,   1, 0x04, 0x00000050 },
	{ 0x418e08,   1, 0x04, 0x00000000 },
533 534 535
	{}
};

536 537
const struct gf100_gr_init
gf100_gr_init_gcc_0[] = {
538 539 540 541 542
	{ 0x41900c,   1, 0x04, 0x00000000 },
	{ 0x419018,   1, 0x04, 0x00000000 },
	{}
};

543 544
const struct gf100_gr_init
gf100_gr_init_tpccs_0[] = {
545 546
	{ 0x419d08,   2, 0x04, 0x00000000 },
	{ 0x419d10,   1, 0x04, 0x00000014 },
547 548 549
	{}
};

550 551
const struct gf100_gr_init
gf100_gr_init_tex_0[] = {
552 553 554
	{ 0x419ab0,   1, 0x04, 0x00000000 },
	{ 0x419ab8,   1, 0x04, 0x000000e7 },
	{ 0x419abc,   2, 0x04, 0x00000000 },
555 556 557
	{}
};

558 559
const struct gf100_gr_init
gf100_gr_init_pe_0[] = {
560 561 562 563
	{ 0x41980c,   3, 0x04, 0x00000000 },
	{ 0x419844,   1, 0x04, 0x00000000 },
	{ 0x41984c,   1, 0x04, 0x00005bc5 },
	{ 0x419850,   4, 0x04, 0x00000000 },
564 565 566
	{}
};

567 568
const struct gf100_gr_init
gf100_gr_init_l1c_0[] = {
569 570 571 572 573 574
	{ 0x419c98,   1, 0x04, 0x00000000 },
	{ 0x419ca8,   1, 0x04, 0x80000000 },
	{ 0x419cb4,   1, 0x04, 0x00000000 },
	{ 0x419cb8,   1, 0x04, 0x00008bf4 },
	{ 0x419cbc,   1, 0x04, 0x28137606 },
	{ 0x419cc0,   2, 0x04, 0x00000000 },
575 576 577
	{}
};

578 579
const struct gf100_gr_init
gf100_gr_init_wwdx_0[] = {
580 581
	{ 0x419bd4,   1, 0x04, 0x00800000 },
	{ 0x419bdc,   1, 0x04, 0x00000000 },
582 583 584
	{}
};

585 586
const struct gf100_gr_init
gf100_gr_init_tpccs_1[] = {
587
	{ 0x419d2c,   1, 0x04, 0x00000000 },
588 589 590
	{}
};

591 592
const struct gf100_gr_init
gf100_gr_init_mpc_0[] = {
593
	{ 0x419c0c,   1, 0x04, 0x00000000 },
594 595 596
	{}
};

597 598
static const struct gf100_gr_init
gf100_gr_init_sm_0[] = {
599 600 601 602 603 604 605 606 607 608 609 610 611 612 613 614
	{ 0x419e00,   1, 0x04, 0x00000000 },
	{ 0x419ea0,   1, 0x04, 0x00000000 },
	{ 0x419ea4,   1, 0x04, 0x00000100 },
	{ 0x419ea8,   1, 0x04, 0x00001100 },
	{ 0x419eac,   1, 0x04, 0x11100702 },
	{ 0x419eb0,   1, 0x04, 0x00000003 },
	{ 0x419eb4,   4, 0x04, 0x00000000 },
	{ 0x419ec8,   1, 0x04, 0x06060618 },
	{ 0x419ed0,   1, 0x04, 0x0eff0e38 },
	{ 0x419ed4,   1, 0x04, 0x011104f1 },
	{ 0x419edc,   1, 0x04, 0x00000000 },
	{ 0x419f00,   1, 0x04, 0x00000000 },
	{ 0x419f2c,   1, 0x04, 0x00000000 },
	{}
};

615 616
const struct gf100_gr_init
gf100_gr_init_be_0[] = {
617 618 619 620 621 622 623 624 625 626
	{ 0x40880c,   1, 0x04, 0x00000000 },
	{ 0x408910,   9, 0x04, 0x00000000 },
	{ 0x408950,   1, 0x04, 0x00000000 },
	{ 0x408954,   1, 0x04, 0x0000ffff },
	{ 0x408984,   1, 0x04, 0x00000000 },
	{ 0x408988,   1, 0x04, 0x08040201 },
	{ 0x40898c,   1, 0x04, 0x80402010 },
	{}
};

627 628
const struct gf100_gr_init
gf100_gr_init_fe_1[] = {
629 630 631 632
	{ 0x4040f0,   1, 0x04, 0x00000000 },
	{}
};

633 634
const struct gf100_gr_init
gf100_gr_init_pe_1[] = {
635 636 637 638
	{ 0x419880,   1, 0x04, 0x00000002 },
	{}
};

639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666 667
static const struct gf100_gr_pack
gf100_gr_pack_mmio[] = {
	{ gf100_gr_init_main_0 },
	{ gf100_gr_init_fe_0 },
	{ gf100_gr_init_pri_0 },
	{ gf100_gr_init_rstr2d_0 },
	{ gf100_gr_init_pd_0 },
	{ gf100_gr_init_ds_0 },
	{ gf100_gr_init_scc_0 },
	{ gf100_gr_init_prop_0 },
	{ gf100_gr_init_gpc_unk_0 },
	{ gf100_gr_init_setup_0 },
	{ gf100_gr_init_crstr_0 },
	{ gf100_gr_init_setup_1 },
	{ gf100_gr_init_zcull_0 },
	{ gf100_gr_init_gpm_0 },
	{ gf100_gr_init_gpc_unk_1 },
	{ gf100_gr_init_gcc_0 },
	{ gf100_gr_init_tpccs_0 },
	{ gf100_gr_init_tex_0 },
	{ gf100_gr_init_pe_0 },
	{ gf100_gr_init_l1c_0 },
	{ gf100_gr_init_wwdx_0 },
	{ gf100_gr_init_tpccs_1 },
	{ gf100_gr_init_mpc_0 },
	{ gf100_gr_init_sm_0 },
	{ gf100_gr_init_be_0 },
	{ gf100_gr_init_fe_1 },
	{ gf100_gr_init_pe_1 },
M
Maarten Lankhorst 已提交
668 669 670
	{}
};

671 672 673 674
/*******************************************************************************
 * PGRAPH engine/subdev functions
 ******************************************************************************/

675
void
B
Ben Skeggs 已提交
676
gf100_gr_zbc_init(struct gf100_gr *gr)
677 678 679 680 681 682 683 684 685
{
	const u32  zero[] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000,
			      0x00000000, 0x00000000, 0x00000000, 0x00000000 };
	const u32   one[] = { 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
			      0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff };
	const u32 f32_0[] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000,
			      0x00000000, 0x00000000, 0x00000000, 0x00000000 };
	const u32 f32_1[] = { 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
			      0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 };
686
	struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc;
687 688
	int index;

B
Ben Skeggs 已提交
689 690 691 692 693 694 695
	if (!gr->zbc_color[0].format) {
		gf100_gr_zbc_color_get(gr, 1,  & zero[0],   &zero[4]);
		gf100_gr_zbc_color_get(gr, 2,  &  one[0],    &one[4]);
		gf100_gr_zbc_color_get(gr, 4,  &f32_0[0],  &f32_0[4]);
		gf100_gr_zbc_color_get(gr, 4,  &f32_1[0],  &f32_1[4]);
		gf100_gr_zbc_depth_get(gr, 1, 0x00000000, 0x00000000);
		gf100_gr_zbc_depth_get(gr, 1, 0x3f800000, 0x3f800000);
696 697 698
	}

	for (index = ltc->zbc_min; index <= ltc->zbc_max; index++)
B
Ben Skeggs 已提交
699
		gf100_gr_zbc_clear_color(gr, index);
700
	for (index = ltc->zbc_min; index <= ltc->zbc_max; index++)
B
Ben Skeggs 已提交
701
		gf100_gr_zbc_clear_depth(gr, index);
702 703
}

704 705 706 707 708 709
/**
 * Wait until GR goes idle. GR is considered idle if it is disabled by the
 * MC (0x200) register, or GR is not busy and a context switch is not in
 * progress.
 */
int
B
Ben Skeggs 已提交
710
gf100_gr_wait_idle(struct gf100_gr *gr)
711
{
712 713
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
714 715 716 717 718 719 720 721
	unsigned long end_jiffies = jiffies + msecs_to_jiffies(2000);
	bool gr_enabled, ctxsw_active, gr_busy;

	do {
		/*
		 * required to make sure FIFO_ENGINE_STATUS (0x2640) is
		 * up-to-date
		 */
722
		nvkm_rd32(device, 0x400700);
723

724 725 726
		gr_enabled = nvkm_rd32(device, 0x200) & 0x1000;
		ctxsw_active = nvkm_rd32(device, 0x2640) & 0x8000;
		gr_busy = nvkm_rd32(device, 0x40060c) & 0x1;
727 728 729 730 731

		if (!gr_enabled || (!gr_busy && !ctxsw_active))
			return 0;
	} while (time_before(jiffies, end_jiffies));

732 733 734
	nvkm_error(subdev,
		   "wait for idle timeout (en: %d, ctxsw: %d, busy: %d)\n",
		   gr_enabled, ctxsw_active, gr_busy);
735 736 737
	return -EAGAIN;
}

738
void
B
Ben Skeggs 已提交
739
gf100_gr_mmio(struct gf100_gr *gr, const struct gf100_gr_pack *p)
740
{
741
	struct nvkm_device *device = gr->base.engine.subdev.device;
742 743
	const struct gf100_gr_pack *pack;
	const struct gf100_gr_init *init;
744 745 746 747 748

	pack_for_each_init(init, pack, p) {
		u32 next = init->addr + init->count * init->pitch;
		u32 addr = init->addr;
		while (addr < next) {
749
			nvkm_wr32(device, addr, init->data);
750 751 752
			addr += init->pitch;
		}
	}
753 754 755
}

void
B
Ben Skeggs 已提交
756
gf100_gr_icmd(struct gf100_gr *gr, const struct gf100_gr_pack *p)
757
{
758
	struct nvkm_device *device = gr->base.engine.subdev.device;
759 760
	const struct gf100_gr_pack *pack;
	const struct gf100_gr_init *init;
761
	u32 data = 0;
762

763
	nvkm_wr32(device, 0x400208, 0x80000000);
764 765 766 767 768 769

	pack_for_each_init(init, pack, p) {
		u32 next = init->addr + init->count * init->pitch;
		u32 addr = init->addr;

		if ((pack == p && init == p->init) || data != init->data) {
770
			nvkm_wr32(device, 0x400204, init->data);
771 772
			data = init->data;
		}
773

774
		while (addr < next) {
775
			nvkm_wr32(device, 0x400200, addr);
776 777 778 779 780
			/**
			 * Wait for GR to go idle after submitting a
			 * GO_IDLE bundle
			 */
			if ((addr & 0xffff) == 0xe100)
B
Ben Skeggs 已提交
781
				gf100_gr_wait_idle(gr);
782 783 784 785
			nvkm_msec(device, 2000,
				if (!(nvkm_rd32(device, 0x400700) & 0x00000004))
					break;
			);
786 787 788
			addr += init->pitch;
		}
	}
789

790
	nvkm_wr32(device, 0x400208, 0x00000000);
791 792 793
}

void
B
Ben Skeggs 已提交
794
gf100_gr_mthd(struct gf100_gr *gr, const struct gf100_gr_pack *p)
795
{
796
	struct nvkm_device *device = gr->base.engine.subdev.device;
797 798
	const struct gf100_gr_pack *pack;
	const struct gf100_gr_init *init;
799
	u32 data = 0;
800

801 802 803 804 805 806
	pack_for_each_init(init, pack, p) {
		u32 ctrl = 0x80000000 | pack->type;
		u32 next = init->addr + init->count * init->pitch;
		u32 addr = init->addr;

		if ((pack == p && init == p->init) || data != init->data) {
807
			nvkm_wr32(device, 0x40448c, init->data);
808 809 810 811
			data = init->data;
		}

		while (addr < next) {
812
			nvkm_wr32(device, 0x404488, ctrl | (addr << 14));
813
			addr += init->pitch;
814 815 816 817 818
		}
	}
}

u64
819
gf100_gr_units(struct nvkm_gr *base)
820
{
821
	struct gf100_gr *gr = gf100_gr(base);
822 823
	u64 cfg;

B
Ben Skeggs 已提交
824 825 826
	cfg  = (u32)gr->gpc_nr;
	cfg |= (u32)gr->tpc_total << 8;
	cfg |= (u64)gr->rop_nr << 32;
827 828

	return cfg;
829 830
}

831 832 833 834 835 836 837 838 839 840 841 842
static const struct nvkm_bitfield gk104_sked_error[] = {
	{ 0x00000080, "CONSTANT_BUFFER_SIZE" },
	{ 0x00000200, "LOCAL_MEMORY_SIZE_POS" },
	{ 0x00000400, "LOCAL_MEMORY_SIZE_NEG" },
	{ 0x00000800, "WARP_CSTACK_SIZE" },
	{ 0x00001000, "TOTAL_TEMP_SIZE" },
	{ 0x00002000, "REGISTER_COUNT" },
	{ 0x00040000, "TOTAL_THREADS" },
	{ 0x00100000, "PROGRAM_OFFSET" },
	{ 0x00200000, "SHARED_MEMORY_SIZE" },
	{ 0x02000000, "SHARED_CONFIG_TOO_SMALL" },
	{ 0x04000000, "TOTAL_REGISTER_COUNT" },
843 844 845
	{}
};

846 847 848 849 850 851 852
static const struct nvkm_bitfield gf100_gpc_rop_error[] = {
	{ 0x00000002, "RT_PITCH_OVERRUN" },
	{ 0x00000010, "RT_WIDTH_OVERRUN" },
	{ 0x00000020, "RT_HEIGHT_OVERRUN" },
	{ 0x00000080, "ZETA_STORAGE_TYPE_MISMATCH" },
	{ 0x00000100, "RT_STORAGE_TYPE_MISMATCH" },
	{ 0x00000400, "RT_LINEAR_MISMATCH" },
853 854 855
	{}
};

856
static void
B
Ben Skeggs 已提交
857
gf100_gr_trap_gpc_rop(struct gf100_gr *gr, int gpc)
858
{
859 860 861
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
	char error[128];
862
	u32 trap[4];
863

864
	trap[0] = nvkm_rd32(device, GPC_UNIT(gpc, 0x0420)) & 0x3fffffff;
865 866 867
	trap[1] = nvkm_rd32(device, GPC_UNIT(gpc, 0x0434));
	trap[2] = nvkm_rd32(device, GPC_UNIT(gpc, 0x0438));
	trap[3] = nvkm_rd32(device, GPC_UNIT(gpc, 0x043c));
868

869
	nvkm_snprintbf(error, sizeof(error), gf100_gpc_rop_error, trap[0]);
870

871 872 873 874
	nvkm_error(subdev, "GPC%d/PROP trap: %08x [%s] x = %u, y = %u, "
			   "format = %x, storage type = %x\n",
		   gpc, trap[0], error, trap[1] & 0xffff, trap[1] >> 16,
		   (trap[2] >> 8) & 0x3f, trap[3] & 0xff);
875
	nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
876 877
}

878
static const struct nvkm_enum gf100_mp_warp_error[] = {
879 880 881 882 883 884 885 886
	{ 0x00, "NO_ERROR" },
	{ 0x01, "STACK_MISMATCH" },
	{ 0x05, "MISALIGNED_PC" },
	{ 0x08, "MISALIGNED_GPR" },
	{ 0x09, "INVALID_OPCODE" },
	{ 0x0d, "GPR_OUT_OF_BOUNDS" },
	{ 0x0e, "MEM_OUT_OF_BOUNDS" },
	{ 0x0f, "UNALIGNED_MEM_ACCESS" },
887
	{ 0x10, "INVALID_ADDR_SPACE" },
888 889 890 891
	{ 0x11, "INVALID_PARAM" },
	{}
};

892
static const struct nvkm_bitfield gf100_mp_global_error[] = {
893 894 895 896 897 898
	{ 0x00000004, "MULTIPLE_WARP_ERRORS" },
	{ 0x00000008, "OUT_OF_STACK_SPACE" },
	{}
};

static void
B
Ben Skeggs 已提交
899
gf100_gr_trap_mp(struct gf100_gr *gr, int gpc, int tpc)
900
{
901 902
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
903 904
	u32 werr = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x648));
	u32 gerr = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x650));
905 906
	const struct nvkm_enum *warp;
	char glob[128];
907

908 909 910 911 912 913
	nvkm_snprintbf(glob, sizeof(glob), gf100_mp_global_error, gerr);
	warp = nvkm_enum_find(gf100_mp_warp_error, werr & 0xffff);

	nvkm_error(subdev, "GPC%i/TPC%i/MP trap: "
			   "global %08x [%s] warp %04x [%s]\n",
		   gpc, tpc, gerr, glob, werr, warp ? warp->name : "");
914

915 916
	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x648), 0x00000000);
	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x650), gerr);
917 918
}

919
static void
B
Ben Skeggs 已提交
920
gf100_gr_trap_tpc(struct gf100_gr *gr, int gpc, int tpc)
921
{
922 923
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
924
	u32 stat = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x0508));
925 926

	if (stat & 0x00000001) {
927
		u32 trap = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x0224));
928
		nvkm_error(subdev, "GPC%d/TPC%d/TEX: %08x\n", gpc, tpc, trap);
929
		nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x0224), 0xc0000000);
930 931 932 933
		stat &= ~0x00000001;
	}

	if (stat & 0x00000002) {
B
Ben Skeggs 已提交
934
		gf100_gr_trap_mp(gr, gpc, tpc);
935 936 937 938
		stat &= ~0x00000002;
	}

	if (stat & 0x00000004) {
939
		u32 trap = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x0084));
940
		nvkm_error(subdev, "GPC%d/TPC%d/POLY: %08x\n", gpc, tpc, trap);
941
		nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x0084), 0xc0000000);
942 943 944 945
		stat &= ~0x00000004;
	}

	if (stat & 0x00000008) {
946
		u32 trap = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x048c));
947
		nvkm_error(subdev, "GPC%d/TPC%d/L1C: %08x\n", gpc, tpc, trap);
948
		nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x048c), 0xc0000000);
949 950 951 952
		stat &= ~0x00000008;
	}

	if (stat) {
953
		nvkm_error(subdev, "GPC%d/TPC%d/%08x: unknown\n", gpc, tpc, stat);
954 955 956 957
	}
}

static void
B
Ben Skeggs 已提交
958
gf100_gr_trap_gpc(struct gf100_gr *gr, int gpc)
959
{
960 961
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
962
	u32 stat = nvkm_rd32(device, GPC_UNIT(gpc, 0x2c90));
963 964 965
	int tpc;

	if (stat & 0x00000001) {
B
Ben Skeggs 已提交
966
		gf100_gr_trap_gpc_rop(gr, gpc);
967 968 969 970
		stat &= ~0x00000001;
	}

	if (stat & 0x00000002) {
971
		u32 trap = nvkm_rd32(device, GPC_UNIT(gpc, 0x0900));
972
		nvkm_error(subdev, "GPC%d/ZCULL: %08x\n", gpc, trap);
973
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000);
974 975 976 977
		stat &= ~0x00000002;
	}

	if (stat & 0x00000004) {
978
		u32 trap = nvkm_rd32(device, GPC_UNIT(gpc, 0x1028));
979
		nvkm_error(subdev, "GPC%d/CCACHE: %08x\n", gpc, trap);
980
		nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000);
981 982 983 984
		stat &= ~0x00000004;
	}

	if (stat & 0x00000008) {
985
		u32 trap = nvkm_rd32(device, GPC_UNIT(gpc, 0x0824));
986
		nvkm_error(subdev, "GPC%d/ESETUP: %08x\n", gpc, trap);
987
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000);
988 989 990
		stat &= ~0x00000009;
	}

B
Ben Skeggs 已提交
991
	for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
992 993
		u32 mask = 0x00010000 << tpc;
		if (stat & mask) {
B
Ben Skeggs 已提交
994
			gf100_gr_trap_tpc(gr, gpc, tpc);
995
			nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), mask);
996 997 998 999 1000
			stat &= ~mask;
		}
	}

	if (stat) {
1001
		nvkm_error(subdev, "GPC%d/%08x: unknown\n", gpc, stat);
1002 1003 1004 1005
	}
}

static void
B
Ben Skeggs 已提交
1006
gf100_gr_trap_intr(struct gf100_gr *gr)
1007
{
1008 1009
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
1010
	u32 trap = nvkm_rd32(device, 0x400108);
1011
	int rop, gpc;
1012 1013

	if (trap & 0x00000001) {
1014
		u32 stat = nvkm_rd32(device, 0x404000);
1015
		nvkm_error(subdev, "DISPATCH %08x\n", stat);
1016 1017
		nvkm_wr32(device, 0x404000, 0xc0000000);
		nvkm_wr32(device, 0x400108, 0x00000001);
1018 1019 1020 1021
		trap &= ~0x00000001;
	}

	if (trap & 0x00000002) {
1022
		u32 stat = nvkm_rd32(device, 0x404600);
1023
		nvkm_error(subdev, "M2MF %08x\n", stat);
1024 1025
		nvkm_wr32(device, 0x404600, 0xc0000000);
		nvkm_wr32(device, 0x400108, 0x00000002);
1026 1027 1028 1029
		trap &= ~0x00000002;
	}

	if (trap & 0x00000008) {
1030
		u32 stat = nvkm_rd32(device, 0x408030);
1031
		nvkm_error(subdev, "CCACHE %08x\n", stat);
1032 1033
		nvkm_wr32(device, 0x408030, 0xc0000000);
		nvkm_wr32(device, 0x400108, 0x00000008);
1034 1035 1036 1037
		trap &= ~0x00000008;
	}

	if (trap & 0x00000010) {
1038
		u32 stat = nvkm_rd32(device, 0x405840);
1039
		nvkm_error(subdev, "SHADER %08x\n", stat);
1040 1041
		nvkm_wr32(device, 0x405840, 0xc0000000);
		nvkm_wr32(device, 0x400108, 0x00000010);
1042 1043 1044 1045
		trap &= ~0x00000010;
	}

	if (trap & 0x00000040) {
1046
		u32 stat = nvkm_rd32(device, 0x40601c);
1047
		nvkm_error(subdev, "UNK6 %08x\n", stat);
1048 1049
		nvkm_wr32(device, 0x40601c, 0xc0000000);
		nvkm_wr32(device, 0x400108, 0x00000040);
1050 1051 1052 1053
		trap &= ~0x00000040;
	}

	if (trap & 0x00000080) {
1054
		u32 stat = nvkm_rd32(device, 0x404490);
1055
		nvkm_error(subdev, "MACRO %08x\n", stat);
1056 1057
		nvkm_wr32(device, 0x404490, 0xc0000000);
		nvkm_wr32(device, 0x400108, 0x00000080);
1058 1059 1060
		trap &= ~0x00000080;
	}

1061
	if (trap & 0x00000100) {
1062 1063
		u32 stat = nvkm_rd32(device, 0x407020) & 0x3fffffff;
		char sked[128];
1064

1065 1066
		nvkm_snprintbf(sked, sizeof(sked), gk104_sked_error, stat);
		nvkm_error(subdev, "SKED: %08x [%s]\n", stat, sked);
1067

1068
		if (stat)
1069 1070
			nvkm_wr32(device, 0x407020, 0x40000000);
		nvkm_wr32(device, 0x400108, 0x00000100);
1071 1072 1073
		trap &= ~0x00000100;
	}

1074
	if (trap & 0x01000000) {
1075
		u32 stat = nvkm_rd32(device, 0x400118);
B
Ben Skeggs 已提交
1076
		for (gpc = 0; stat && gpc < gr->gpc_nr; gpc++) {
1077 1078
			u32 mask = 0x00000001 << gpc;
			if (stat & mask) {
B
Ben Skeggs 已提交
1079
				gf100_gr_trap_gpc(gr, gpc);
1080
				nvkm_wr32(device, 0x400118, mask);
1081 1082 1083
				stat &= ~mask;
			}
		}
1084
		nvkm_wr32(device, 0x400108, 0x01000000);
1085 1086 1087 1088
		trap &= ~0x01000000;
	}

	if (trap & 0x02000000) {
B
Ben Skeggs 已提交
1089
		for (rop = 0; rop < gr->rop_nr; rop++) {
1090 1091
			u32 statz = nvkm_rd32(device, ROP_UNIT(rop, 0x070));
			u32 statc = nvkm_rd32(device, ROP_UNIT(rop, 0x144));
1092
			nvkm_error(subdev, "ROP%d %08x %08x\n",
1093
				 rop, statz, statc);
1094 1095
			nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0xc0000000);
			nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0xc0000000);
1096
		}
1097
		nvkm_wr32(device, 0x400108, 0x02000000);
1098 1099 1100 1101
		trap &= ~0x02000000;
	}

	if (trap) {
1102
		nvkm_error(subdev, "TRAP UNHANDLED %08x\n", trap);
1103
		nvkm_wr32(device, 0x400108, trap);
1104 1105 1106
	}
}

1107
static void
B
Ben Skeggs 已提交
1108
gf100_gr_ctxctl_debug_unit(struct gf100_gr *gr, u32 base)
1109
{
1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122 1123
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
	nvkm_error(subdev, "%06x - done %08x\n", base,
		   nvkm_rd32(device, base + 0x400));
	nvkm_error(subdev, "%06x - stat %08x %08x %08x %08x\n", base,
		   nvkm_rd32(device, base + 0x800),
		   nvkm_rd32(device, base + 0x804),
		   nvkm_rd32(device, base + 0x808),
		   nvkm_rd32(device, base + 0x80c));
	nvkm_error(subdev, "%06x - stat %08x %08x %08x %08x\n", base,
		   nvkm_rd32(device, base + 0x810),
		   nvkm_rd32(device, base + 0x814),
		   nvkm_rd32(device, base + 0x818),
		   nvkm_rd32(device, base + 0x81c));
1124 1125 1126
}

void
B
Ben Skeggs 已提交
1127
gf100_gr_ctxctl_debug(struct gf100_gr *gr)
1128
{
1129 1130
	struct nvkm_device *device = gr->base.engine.subdev.device;
	u32 gpcnr = nvkm_rd32(device, 0x409604) & 0xffff;
1131 1132
	u32 gpc;

B
Ben Skeggs 已提交
1133
	gf100_gr_ctxctl_debug_unit(gr, 0x409000);
1134
	for (gpc = 0; gpc < gpcnr; gpc++)
B
Ben Skeggs 已提交
1135
		gf100_gr_ctxctl_debug_unit(gr, 0x502000 + (gpc * 0x8000));
1136 1137 1138
}

static void
B
Ben Skeggs 已提交
1139
gf100_gr_ctxctl_isr(struct gf100_gr *gr)
1140
{
1141 1142
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
1143
	u32 stat = nvkm_rd32(device, 0x409c18);
1144

1145
	if (stat & 0x00000001) {
1146
		u32 code = nvkm_rd32(device, 0x409814);
1147
		if (code == E_BAD_FWMTHD) {
1148 1149
			u32 class = nvkm_rd32(device, 0x409808);
			u32  addr = nvkm_rd32(device, 0x40980c);
1150 1151
			u32  subc = (addr & 0x00070000) >> 16;
			u32  mthd = (addr & 0x00003ffc);
1152
			u32  data = nvkm_rd32(device, 0x409810);
1153

1154 1155 1156
			nvkm_error(subdev, "FECS MTHD subc %d class %04x "
					   "mthd %04x data %08x\n",
				   subc, class, mthd, data);
1157

1158
			nvkm_wr32(device, 0x409c20, 0x00000001);
1159 1160
			stat &= ~0x00000001;
		} else {
1161
			nvkm_error(subdev, "FECS ucode error %d\n", code);
1162 1163
		}
	}
1164

1165
	if (stat & 0x00080000) {
1166
		nvkm_error(subdev, "FECS watchdog timeout\n");
B
Ben Skeggs 已提交
1167
		gf100_gr_ctxctl_debug(gr);
1168
		nvkm_wr32(device, 0x409c20, 0x00080000);
1169 1170 1171 1172
		stat &= ~0x00080000;
	}

	if (stat) {
1173
		nvkm_error(subdev, "FECS %08x\n", stat);
B
Ben Skeggs 已提交
1174
		gf100_gr_ctxctl_debug(gr);
1175
		nvkm_wr32(device, 0x409c20, stat);
1176
	}
1177 1178
}

1179
static void
1180
gf100_gr_intr(struct nvkm_gr *base)
1181
{
1182 1183 1184
	struct gf100_gr *gr = gf100_gr(base);
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
1185 1186
	struct nvkm_fifo_chan *chan;
	unsigned long flags;
1187 1188 1189
	u64 inst = nvkm_rd32(device, 0x409b00) & 0x0fffffff;
	u32 stat = nvkm_rd32(device, 0x400100);
	u32 addr = nvkm_rd32(device, 0x400704);
1190 1191
	u32 mthd = (addr & 0x00003ffc);
	u32 subc = (addr & 0x00070000) >> 16;
1192 1193
	u32 data = nvkm_rd32(device, 0x400708);
	u32 code = nvkm_rd32(device, 0x400110);
1194
	u32 class;
1195 1196
	const char *name = "unknown";
	int chid = -1;
1197

1198
	chan = nvkm_fifo_chan_inst(device->fifo, (u64)inst << 12, &flags);
1199 1200 1201 1202
	if (chan) {
		name = chan->object.client->name;
		chid = chan->chid;
	}
1203

1204
	if (device->card_type < NV_E0 || subc < 4)
1205
		class = nvkm_rd32(device, 0x404200 + (subc * 4));
1206 1207 1208
	else
		class = 0x0000;

1209 1210 1211 1212 1213
	if (stat & 0x00000001) {
		/*
		 * notifier interrupt, only needed for cyclestats
		 * can be safely ignored
		 */
1214
		nvkm_wr32(device, 0x400100, 0x00000001);
1215 1216 1217
		stat &= ~0x00000001;
	}

1218
	if (stat & 0x00000010) {
1219
		if (!gf100_gr_mthd_sw(device, class, mthd, data)) {
1220 1221
			nvkm_error(subdev, "ILLEGAL_MTHD ch %d [%010llx %s] "
				   "subc %d class %04x mthd %04x data %08x\n",
1222 1223
				   chid, inst << 12, name, subc,
				   class, mthd, data);
1224
		}
1225
		nvkm_wr32(device, 0x400100, 0x00000010);
1226 1227 1228 1229
		stat &= ~0x00000010;
	}

	if (stat & 0x00000020) {
1230 1231
		nvkm_error(subdev, "ILLEGAL_CLASS ch %d [%010llx %s] "
			   "subc %d class %04x mthd %04x data %08x\n",
1232
			   chid, inst << 12, name, subc, class, mthd, data);
1233
		nvkm_wr32(device, 0x400100, 0x00000020);
1234 1235 1236 1237
		stat &= ~0x00000020;
	}

	if (stat & 0x00100000) {
1238 1239 1240 1241 1242
		const struct nvkm_enum *en =
			nvkm_enum_find(nv50_data_error_names, code);
		nvkm_error(subdev, "DATA_ERROR %08x [%s] ch %d [%010llx %s] "
				   "subc %d class %04x mthd %04x data %08x\n",
			   code, en ? en->name : "", chid, inst << 12,
1243
			   name, subc, class, mthd, data);
1244
		nvkm_wr32(device, 0x400100, 0x00100000);
1245 1246 1247 1248
		stat &= ~0x00100000;
	}

	if (stat & 0x00200000) {
1249
		nvkm_error(subdev, "TRAP ch %d [%010llx %s]\n",
1250
			   chid, inst << 12, name);
B
Ben Skeggs 已提交
1251
		gf100_gr_trap_intr(gr);
1252
		nvkm_wr32(device, 0x400100, 0x00200000);
1253 1254 1255 1256
		stat &= ~0x00200000;
	}

	if (stat & 0x00080000) {
B
Ben Skeggs 已提交
1257
		gf100_gr_ctxctl_isr(gr);
1258
		nvkm_wr32(device, 0x400100, 0x00080000);
1259 1260 1261 1262
		stat &= ~0x00080000;
	}

	if (stat) {
1263
		nvkm_error(subdev, "intr %08x\n", stat);
1264
		nvkm_wr32(device, 0x400100, stat);
1265 1266
	}

1267
	nvkm_wr32(device, 0x400500, 0x00010001);
1268
	nvkm_fifo_chan_put(device->fifo, flags, &chan);
1269 1270
}

1271
void
B
Ben Skeggs 已提交
1272
gf100_gr_init_fw(struct gf100_gr *gr, u32 fuc_base,
1273
		 struct gf100_gr_fuc *code, struct gf100_gr_fuc *data)
1274
{
1275
	struct nvkm_device *device = gr->base.engine.subdev.device;
1276
	int i;
1277

1278
	nvkm_wr32(device, fuc_base + 0x01c0, 0x01000000);
1279
	for (i = 0; i < data->size / 4; i++)
1280
		nvkm_wr32(device, fuc_base + 0x01c4, data->data[i]);
1281

1282
	nvkm_wr32(device, fuc_base + 0x0180, 0x01000000);
1283 1284
	for (i = 0; i < code->size / 4; i++) {
		if ((i & 0x3f) == 0)
1285 1286
			nvkm_wr32(device, fuc_base + 0x0188, i >> 6);
		nvkm_wr32(device, fuc_base + 0x0184, code->data[i]);
1287
	}
1288 1289 1290

	/* code must be padded to 0x40 words */
	for (; i & 0x3f; i++)
1291
		nvkm_wr32(device, fuc_base + 0x0184, 0);
1292 1293
}

1294
static void
B
Ben Skeggs 已提交
1295
gf100_gr_init_csdata(struct gf100_gr *gr,
1296 1297
		     const struct gf100_gr_pack *pack,
		     u32 falcon, u32 starstar, u32 base)
1298
{
1299
	struct nvkm_device *device = gr->base.engine.subdev.device;
1300 1301
	const struct gf100_gr_pack *iter;
	const struct gf100_gr_init *init;
1302
	u32 addr = ~0, prev = ~0, xfer = 0;
1303 1304
	u32 star, temp;

1305 1306 1307
	nvkm_wr32(device, falcon + 0x01c0, 0x02000000 + starstar);
	star = nvkm_rd32(device, falcon + 0x01c4);
	temp = nvkm_rd32(device, falcon + 0x01c4);
1308 1309
	if (temp > star)
		star = temp;
1310
	nvkm_wr32(device, falcon + 0x01c0, 0x01000000 + star);
1311

1312 1313 1314 1315 1316 1317 1318
	pack_for_each_init(init, iter, pack) {
		u32 head = init->addr - base;
		u32 tail = head + init->count * init->pitch;
		while (head < tail) {
			if (head != prev + 4 || xfer >= 32) {
				if (xfer) {
					u32 data = ((--xfer << 26) | addr);
1319
					nvkm_wr32(device, falcon + 0x01c4, data);
1320 1321 1322 1323
					star += 4;
				}
				addr = head;
				xfer = 0;
1324
			}
1325 1326 1327
			prev = head;
			xfer = xfer + 1;
			head = head + init->pitch;
1328
		}
1329
	}
1330

1331 1332 1333
	nvkm_wr32(device, falcon + 0x01c4, (--xfer << 26) | addr);
	nvkm_wr32(device, falcon + 0x01c0, 0x01000004 + starstar);
	nvkm_wr32(device, falcon + 0x01c4, star + 4);
1334 1335
}

1336
int
B
Ben Skeggs 已提交
1337
gf100_gr_init_ctxctl(struct gf100_gr *gr)
1338
{
1339
	const struct gf100_grctx_func *grctx = gr->func->grctx;
1340 1341
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
1342
	int i;
1343

B
Ben Skeggs 已提交
1344
	if (gr->firmware) {
1345
		/* load fuc microcode */
1346
		nvkm_mc_unk260(device->mc, 0);
1347 1348
		gf100_gr_init_fw(gr, 0x409000, &gr->fuc409c, &gr->fuc409d);
		gf100_gr_init_fw(gr, 0x41a000, &gr->fuc41ac, &gr->fuc41ad);
1349
		nvkm_mc_unk260(device->mc, 1);
1350

1351
		/* start both of them running */
1352 1353 1354 1355 1356
		nvkm_wr32(device, 0x409840, 0xffffffff);
		nvkm_wr32(device, 0x41a10c, 0x00000000);
		nvkm_wr32(device, 0x40910c, 0x00000000);
		nvkm_wr32(device, 0x41a100, 0x00000002);
		nvkm_wr32(device, 0x409100, 0x00000002);
1357 1358 1359 1360 1361
		if (nvkm_msec(device, 2000,
			if (nvkm_rd32(device, 0x409800) & 0x00000001)
				break;
		) < 0)
			return -EBUSY;
B
Ben Skeggs 已提交
1362

1363 1364 1365
		nvkm_wr32(device, 0x409840, 0xffffffff);
		nvkm_wr32(device, 0x409500, 0x7fffffff);
		nvkm_wr32(device, 0x409504, 0x00000021);
B
Ben Skeggs 已提交
1366

1367 1368 1369
		nvkm_wr32(device, 0x409840, 0xffffffff);
		nvkm_wr32(device, 0x409500, 0x00000000);
		nvkm_wr32(device, 0x409504, 0x00000010);
1370 1371 1372 1373
		if (nvkm_msec(device, 2000,
			if ((gr->size = nvkm_rd32(device, 0x409800)))
				break;
		) < 0)
1374
			return -EBUSY;
1375

1376 1377 1378
		nvkm_wr32(device, 0x409840, 0xffffffff);
		nvkm_wr32(device, 0x409500, 0x00000000);
		nvkm_wr32(device, 0x409504, 0x00000016);
1379 1380 1381 1382
		if (nvkm_msec(device, 2000,
			if (nvkm_rd32(device, 0x409800))
				break;
		) < 0)
1383 1384
			return -EBUSY;

1385 1386 1387
		nvkm_wr32(device, 0x409840, 0xffffffff);
		nvkm_wr32(device, 0x409500, 0x00000000);
		nvkm_wr32(device, 0x409504, 0x00000025);
1388 1389 1390 1391
		if (nvkm_msec(device, 2000,
			if (nvkm_rd32(device, 0x409800))
				break;
		) < 0)
1392 1393
			return -EBUSY;

1394
		if (device->chipset >= 0xe0) {
1395 1396 1397
			nvkm_wr32(device, 0x409800, 0x00000000);
			nvkm_wr32(device, 0x409500, 0x00000001);
			nvkm_wr32(device, 0x409504, 0x00000030);
1398 1399 1400 1401
			if (nvkm_msec(device, 2000,
				if (nvkm_rd32(device, 0x409800))
					break;
			) < 0)
1402 1403
				return -EBUSY;

1404 1405 1406 1407
			nvkm_wr32(device, 0x409810, 0xb00095c8);
			nvkm_wr32(device, 0x409800, 0x00000000);
			nvkm_wr32(device, 0x409500, 0x00000001);
			nvkm_wr32(device, 0x409504, 0x00000031);
1408 1409 1410 1411
			if (nvkm_msec(device, 2000,
				if (nvkm_rd32(device, 0x409800))
					break;
			) < 0)
1412 1413
				return -EBUSY;

1414 1415 1416 1417
			nvkm_wr32(device, 0x409810, 0x00080420);
			nvkm_wr32(device, 0x409800, 0x00000000);
			nvkm_wr32(device, 0x409500, 0x00000001);
			nvkm_wr32(device, 0x409504, 0x00000032);
1418 1419 1420 1421
			if (nvkm_msec(device, 2000,
				if (nvkm_rd32(device, 0x409800))
					break;
			) < 0)
1422 1423
				return -EBUSY;

1424 1425 1426
			nvkm_wr32(device, 0x409614, 0x00000070);
			nvkm_wr32(device, 0x409614, 0x00000770);
			nvkm_wr32(device, 0x40802c, 0x00000001);
1427 1428
		}

B
Ben Skeggs 已提交
1429 1430
		if (gr->data == NULL) {
			int ret = gf100_grctx_generate(gr);
1431
			if (ret) {
1432
				nvkm_error(subdev, "failed to construct context\n");
1433 1434 1435 1436 1437
				return ret;
			}
		}

		return 0;
1438
	} else
1439
	if (!gr->func->fecs.ucode) {
1440
		return -ENOSYS;
1441
	}
1442

1443
	/* load HUB microcode */
1444
	nvkm_mc_unk260(device->mc, 0);
1445
	nvkm_wr32(device, 0x4091c0, 0x01000000);
1446 1447
	for (i = 0; i < gr->func->fecs.ucode->data.size / 4; i++)
		nvkm_wr32(device, 0x4091c4, gr->func->fecs.ucode->data.data[i]);
1448

1449
	nvkm_wr32(device, 0x409180, 0x01000000);
1450
	for (i = 0; i < gr->func->fecs.ucode->code.size / 4; i++) {
1451
		if ((i & 0x3f) == 0)
1452
			nvkm_wr32(device, 0x409188, i >> 6);
1453
		nvkm_wr32(device, 0x409184, gr->func->fecs.ucode->code.data[i]);
1454 1455 1456
	}

	/* load GPC microcode */
1457
	nvkm_wr32(device, 0x41a1c0, 0x01000000);
1458 1459
	for (i = 0; i < gr->func->gpccs.ucode->data.size / 4; i++)
		nvkm_wr32(device, 0x41a1c4, gr->func->gpccs.ucode->data.data[i]);
1460

1461
	nvkm_wr32(device, 0x41a180, 0x01000000);
1462
	for (i = 0; i < gr->func->gpccs.ucode->code.size / 4; i++) {
1463
		if ((i & 0x3f) == 0)
1464
			nvkm_wr32(device, 0x41a188, i >> 6);
1465
		nvkm_wr32(device, 0x41a184, gr->func->gpccs.ucode->code.data[i]);
1466
	}
1467
	nvkm_mc_unk260(device->mc, 1);
1468

1469
	/* load register lists */
1470 1471 1472 1473
	gf100_gr_init_csdata(gr, grctx->hub, 0x409000, 0x000, 0x000000);
	gf100_gr_init_csdata(gr, grctx->gpc, 0x41a000, 0x000, 0x418000);
	gf100_gr_init_csdata(gr, grctx->tpc, 0x41a000, 0x004, 0x419800);
	gf100_gr_init_csdata(gr, grctx->ppc, 0x41a000, 0x008, 0x41be00);
1474

1475
	/* start HUB ucode running, it'll init the GPCs */
1476 1477
	nvkm_wr32(device, 0x40910c, 0x00000000);
	nvkm_wr32(device, 0x409100, 0x00000002);
1478 1479 1480 1481
	if (nvkm_msec(device, 2000,
		if (nvkm_rd32(device, 0x409800) & 0x80000000)
			break;
	) < 0) {
B
Ben Skeggs 已提交
1482
		gf100_gr_ctxctl_debug(gr);
1483 1484 1485
		return -EBUSY;
	}

1486
	gr->size = nvkm_rd32(device, 0x409804);
B
Ben Skeggs 已提交
1487 1488
	if (gr->data == NULL) {
		int ret = gf100_grctx_generate(gr);
1489
		if (ret) {
1490
			nvkm_error(subdev, "failed to construct context\n");
1491 1492
			return ret;
		}
1493 1494 1495
	}

	return 0;
1496 1497
}

1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533 1534
static int
gf100_gr_oneinit(struct nvkm_gr *base)
{
	struct gf100_gr *gr = gf100_gr(base);
	struct nvkm_device *device = gr->base.engine.subdev.device;
	int ret, i, j;

	nvkm_pmu_pgob(device->pmu, false);

	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 256, false,
			      &gr->unk4188b4);
	if (ret)
		return ret;

	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 256, false,
			      &gr->unk4188b8);
	if (ret)
		return ret;

	nvkm_kmap(gr->unk4188b4);
	for (i = 0; i < 0x1000; i += 4)
		nvkm_wo32(gr->unk4188b4, i, 0x00000010);
	nvkm_done(gr->unk4188b4);

	nvkm_kmap(gr->unk4188b8);
	for (i = 0; i < 0x1000; i += 4)
		nvkm_wo32(gr->unk4188b8, i, 0x00000010);
	nvkm_done(gr->unk4188b8);

	gr->rop_nr = (nvkm_rd32(device, 0x409604) & 0x001f0000) >> 16;
	gr->gpc_nr =  nvkm_rd32(device, 0x409604) & 0x0000001f;
	for (i = 0; i < gr->gpc_nr; i++) {
		gr->tpc_nr[i]  = nvkm_rd32(device, GPC_UNIT(i, 0x2608));
		gr->tpc_total += gr->tpc_nr[i];
		gr->ppc_nr[i]  = gr->func->ppc_nr;
		for (j = 0; j < gr->ppc_nr[i]; j++) {
			u8 mask = nvkm_rd32(device, GPC_UNIT(i, 0x0c30 + (j * 4)));
1535 1536
			if (mask)
				gr->ppc_mask[i] |= (1 << j);
1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648 1649
			gr->ppc_tpc_nr[i][j] = hweight8(mask);
		}
	}

	/*XXX: these need figuring out... though it might not even matter */
	switch (device->chipset) {
	case 0xc0:
		if (gr->tpc_total == 11) { /* 465, 3/4/4/0, 4 */
			gr->magic_not_rop_nr = 0x07;
		} else
		if (gr->tpc_total == 14) { /* 470, 3/3/4/4, 5 */
			gr->magic_not_rop_nr = 0x05;
		} else
		if (gr->tpc_total == 15) { /* 480, 3/4/4/4, 6 */
			gr->magic_not_rop_nr = 0x06;
		}
		break;
	case 0xc3: /* 450, 4/0/0/0, 2 */
		gr->magic_not_rop_nr = 0x03;
		break;
	case 0xc4: /* 460, 3/4/0/0, 4 */
		gr->magic_not_rop_nr = 0x01;
		break;
	case 0xc1: /* 2/0/0/0, 1 */
		gr->magic_not_rop_nr = 0x01;
		break;
	case 0xc8: /* 4/4/3/4, 5 */
		gr->magic_not_rop_nr = 0x06;
		break;
	case 0xce: /* 4/4/0/0, 4 */
		gr->magic_not_rop_nr = 0x03;
		break;
	case 0xcf: /* 4/0/0/0, 3 */
		gr->magic_not_rop_nr = 0x03;
		break;
	case 0xd7:
	case 0xd9: /* 1/0/0/0, 1 */
	case 0xea: /* gk20a */
	case 0x12b: /* gm20b */
		gr->magic_not_rop_nr = 0x01;
		break;
	}

	return 0;
}

int
gf100_gr_init_(struct nvkm_gr *base)
{
	struct gf100_gr *gr = gf100_gr(base);
	nvkm_pmu_pgob(gr->base.engine.subdev.device->pmu, false);
	return gr->func->init(gr);
}

void
gf100_gr_dtor_fw(struct gf100_gr_fuc *fuc)
{
	kfree(fuc->data);
	fuc->data = NULL;
}

void *
gf100_gr_dtor(struct nvkm_gr *base)
{
	struct gf100_gr *gr = gf100_gr(base);

	if (gr->func->dtor)
		gr->func->dtor(gr);
	kfree(gr->data);

	gf100_gr_dtor_fw(&gr->fuc409c);
	gf100_gr_dtor_fw(&gr->fuc409d);
	gf100_gr_dtor_fw(&gr->fuc41ac);
	gf100_gr_dtor_fw(&gr->fuc41ad);

	nvkm_memory_del(&gr->unk4188b8);
	nvkm_memory_del(&gr->unk4188b4);
	return gr;
}

static const struct nvkm_gr_func
gf100_gr_ = {
	.dtor = gf100_gr_dtor,
	.oneinit = gf100_gr_oneinit,
	.init = gf100_gr_init_,
	.intr = gf100_gr_intr,
	.units = gf100_gr_units,
	.chan_new = gf100_gr_chan_new,
	.object_get = gf100_gr_object_get,
};

int
gf100_gr_ctor_fw(struct gf100_gr *gr, const char *fwname,
		 struct gf100_gr_fuc *fuc)
{
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
	const struct firmware *fw;
	char f[64];
	char cname[16];
	int ret;
	int i;

	/* Convert device name to lowercase */
	strncpy(cname, device->chip->name, sizeof(cname));
	cname[sizeof(cname) - 1] = '\0';
	i = strlen(cname);
	while (i) {
		--i;
		cname[i] = tolower(cname[i]);
	}

	snprintf(f, sizeof(f), "nvidia/%s/%s.bin", cname, fwname);
1650
	ret = request_firmware(&fw, f, device->dev);
1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688 1689
	if (ret) {
		nvkm_error(subdev, "failed to load %s\n", fwname);
		return ret;
	}

	fuc->size = fw->size;
	fuc->data = kmemdup(fw->data, fuc->size, GFP_KERNEL);
	release_firmware(fw);
	return (fuc->data != NULL) ? 0 : -ENOMEM;
}

int
gf100_gr_ctor(const struct gf100_gr_func *func, struct nvkm_device *device,
	      int index, struct gf100_gr *gr)
{
	int ret;

	gr->func = func;
	gr->firmware = nvkm_boolopt(device->cfgopt, "NvGrUseFW",
				    func->fecs.ucode == NULL);

	ret = nvkm_gr_ctor(&gf100_gr_, device, index, 0x08001000,
			   gr->firmware || func->fecs.ucode != NULL,
			   &gr->base);
	if (ret)
		return ret;

	if (gr->firmware) {
		nvkm_info(&gr->base.engine.subdev, "using external firmware\n");
		if (gf100_gr_ctor_fw(gr, "fecs_inst", &gr->fuc409c) ||
		    gf100_gr_ctor_fw(gr, "fecs_data", &gr->fuc409d) ||
		    gf100_gr_ctor_fw(gr, "gpccs_inst", &gr->fuc41ac) ||
		    gf100_gr_ctor_fw(gr, "gpccs_data", &gr->fuc41ad))
			return -ENODEV;
	}

	return 0;
}

1690
int
1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701 1702
gf100_gr_new_(const struct gf100_gr_func *func, struct nvkm_device *device,
	      int index, struct nvkm_gr **pgr)
{
	struct gf100_gr *gr;
	if (!(gr = kzalloc(sizeof(*gr), GFP_KERNEL)))
		return -ENOMEM;
	*pgr = &gr->base;
	return gf100_gr_ctor(func, device, index, gr);
}

int
gf100_gr_init(struct gf100_gr *gr)
1703
{
1704
	struct nvkm_device *device = gr->base.engine.subdev.device;
B
Ben Skeggs 已提交
1705
	const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
1706 1707 1708
	u32 data[TPC_MAX / 8] = {};
	u8  tpcnr[GPC_MAX];
	int gpc, tpc, rop;
1709
	int i;
1710

1711 1712 1713 1714 1715 1716
	nvkm_wr32(device, GPC_BCAST(0x0880), 0x00000000);
	nvkm_wr32(device, GPC_BCAST(0x08a4), 0x00000000);
	nvkm_wr32(device, GPC_BCAST(0x0888), 0x00000000);
	nvkm_wr32(device, GPC_BCAST(0x088c), 0x00000000);
	nvkm_wr32(device, GPC_BCAST(0x0890), 0x00000000);
	nvkm_wr32(device, GPC_BCAST(0x0894), 0x00000000);
1717 1718
	nvkm_wr32(device, GPC_BCAST(0x08b4), nvkm_memory_addr(gr->unk4188b4) >> 8);
	nvkm_wr32(device, GPC_BCAST(0x08b8), nvkm_memory_addr(gr->unk4188b8) >> 8);
1719

1720
	gf100_gr_mmio(gr, gr->func->mmio);
1721

B
Ben Skeggs 已提交
1722 1723
	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
	for (i = 0, gpc = -1; i < gr->tpc_total; i++) {
1724
		do {
B
Ben Skeggs 已提交
1725
			gpc = (gpc + 1) % gr->gpc_nr;
1726
		} while (!tpcnr[gpc]);
B
Ben Skeggs 已提交
1727
		tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
1728 1729 1730 1731

		data[i / 8] |= tpc << ((i % 8) * 4);
	}

1732 1733 1734 1735
	nvkm_wr32(device, GPC_BCAST(0x0980), data[0]);
	nvkm_wr32(device, GPC_BCAST(0x0984), data[1]);
	nvkm_wr32(device, GPC_BCAST(0x0988), data[2]);
	nvkm_wr32(device, GPC_BCAST(0x098c), data[3]);
1736

B
Ben Skeggs 已提交
1737
	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
1738
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0914),
B
Ben Skeggs 已提交
1739
			gr->magic_not_rop_nr << 8 | gr->tpc_nr[gpc]);
1740
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 |
B
Ben Skeggs 已提交
1741
			gr->tpc_total);
1742
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918);
1743 1744
	}

1745
	if (device->chipset != 0xd7)
1746
		nvkm_wr32(device, GPC_BCAST(0x1bd4), magicgpc918);
M
Maarten Lankhorst 已提交
1747
	else
1748
		nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918);
B
Ben Skeggs 已提交
1749

1750
	nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800));
B
Ben Skeggs 已提交
1751

1752
	nvkm_wr32(device, 0x400500, 0x00010001);
B
Ben Skeggs 已提交
1753

1754 1755
	nvkm_wr32(device, 0x400100, 0xffffffff);
	nvkm_wr32(device, 0x40013c, 0xffffffff);
B
Ben Skeggs 已提交
1756

1757 1758 1759 1760 1761 1762 1763 1764 1765 1766 1767
	nvkm_wr32(device, 0x409c24, 0x000f0000);
	nvkm_wr32(device, 0x404000, 0xc0000000);
	nvkm_wr32(device, 0x404600, 0xc0000000);
	nvkm_wr32(device, 0x408030, 0xc0000000);
	nvkm_wr32(device, 0x40601c, 0xc0000000);
	nvkm_wr32(device, 0x404490, 0xc0000000);
	nvkm_wr32(device, 0x406018, 0xc0000000);
	nvkm_wr32(device, 0x405840, 0xc0000000);
	nvkm_wr32(device, 0x405844, 0x00ffffff);
	nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008);
	nvkm_mask(device, 0x419eb4, 0x00001000, 0x00001000);
B
Ben Skeggs 已提交
1768 1769

	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
1770 1771 1772 1773
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000);
		nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000);
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000);
B
Ben Skeggs 已提交
1774
		for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
1775 1776 1777 1778 1779 1780 1781
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff);
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff);
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000);
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000);
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000);
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x001ffffe);
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x0000000f);
1782
		}
1783 1784
		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), 0xffffffff);
		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff);
1785 1786
	}

B
Ben Skeggs 已提交
1787
	for (rop = 0; rop < gr->rop_nr; rop++) {
1788 1789 1790 1791
		nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0xc0000000);
		nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0xc0000000);
		nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff);
		nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff);
1792
	}
1793

1794 1795 1796 1797 1798 1799
	nvkm_wr32(device, 0x400108, 0xffffffff);
	nvkm_wr32(device, 0x400138, 0xffffffff);
	nvkm_wr32(device, 0x400118, 0xffffffff);
	nvkm_wr32(device, 0x400130, 0xffffffff);
	nvkm_wr32(device, 0x40011c, 0xffffffff);
	nvkm_wr32(device, 0x400134, 0xffffffff);
1800

1801
	nvkm_wr32(device, 0x400054, 0x34ce3464);
1802

B
Ben Skeggs 已提交
1803
	gf100_gr_zbc_init(gr);
1804

B
Ben Skeggs 已提交
1805
	return gf100_gr_init_ctxctl(gr);
1806 1807
}

1808
#include "fuc/hubgf100.fuc3.h"
1809

1810 1811 1812 1813 1814 1815
struct gf100_gr_ucode
gf100_gr_fecs_ucode = {
	.code.data = gf100_grhub_code,
	.code.size = sizeof(gf100_grhub_code),
	.data.data = gf100_grhub_data,
	.data.size = sizeof(gf100_grhub_data),
1816 1817
};

1818
#include "fuc/gpcgf100.fuc3.h"
1819

1820 1821 1822 1823 1824 1825
struct gf100_gr_ucode
gf100_gr_gpccs_ucode = {
	.code.data = gf100_grgpc_code,
	.code.size = sizeof(gf100_grgpc_code),
	.data.data = gf100_grgpc_data,
	.data.size = sizeof(gf100_grgpc_data),
1826 1827
};

1828 1829
static const struct gf100_gr_func
gf100_gr = {
1830 1831 1832 1833
	.init = gf100_gr_init,
	.mmio = gf100_gr_pack_mmio,
	.fecs.ucode = &gf100_gr_fecs_ucode,
	.gpccs.ucode = &gf100_gr_gpccs_ucode,
1834 1835 1836 1837 1838 1839 1840 1841 1842 1843
	.grctx = &gf100_grctx,
	.sclass = {
		{ -1, -1, FERMI_TWOD_A },
		{ -1, -1, FERMI_MEMORY_TO_MEMORY_FORMAT_A },
		{ -1, -1, FERMI_A, &gf100_fermi },
		{ -1, -1, FERMI_COMPUTE_A },
		{}
	}
};

1844 1845 1846 1847 1848
int
gf100_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
{
	return gf100_gr_new_(&gf100_gr, device, index, pgr);
}