gf100.c 49.1 KB
Newer Older
1
/*
2
 * Copyright 2012 Red Hat Inc.
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 * Authors: Ben Skeggs
 */
24 25 26 27 28 29 30 31
#include "gf100.h"
#include "ctxgf100.h"
#include "fuc/os.h"

#include <core/client.h>
#include <core/option.h>
#include <subdev/fb.h>
#include <subdev/mc.h>
32
#include <subdev/pmu.h>
33
#include <subdev/timer.h>
34
#include <engine/fifo.h>
35 36 37

#include <nvif/class.h>
#include <nvif/unpack.h>
38

39 40 41 42 43
/*******************************************************************************
 * Zero Bandwidth Clear
 ******************************************************************************/

static void
B
Ben Skeggs 已提交
44
gf100_gr_zbc_clear_color(struct gf100_gr *gr, int zbc)
45
{
46
	struct nvkm_device *device = gr->base.engine.subdev.device;
B
Ben Skeggs 已提交
47
	if (gr->zbc_color[zbc].format) {
48 49 50 51 52 53 54 55
		nvkm_wr32(device, 0x405804, gr->zbc_color[zbc].ds[0]);
		nvkm_wr32(device, 0x405808, gr->zbc_color[zbc].ds[1]);
		nvkm_wr32(device, 0x40580c, gr->zbc_color[zbc].ds[2]);
		nvkm_wr32(device, 0x405810, gr->zbc_color[zbc].ds[3]);
	}
	nvkm_wr32(device, 0x405814, gr->zbc_color[zbc].format);
	nvkm_wr32(device, 0x405820, zbc);
	nvkm_wr32(device, 0x405824, 0x00000004); /* TRIGGER | WRITE | COLOR */
56 57 58
}

static int
B
Ben Skeggs 已提交
59
gf100_gr_zbc_color_get(struct gf100_gr *gr, int format,
60
		       const u32 ds[4], const u32 l2[4])
61
{
62
	struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc;
63 64 65
	int zbc = -ENOSPC, i;

	for (i = ltc->zbc_min; i <= ltc->zbc_max; i++) {
B
Ben Skeggs 已提交
66 67
		if (gr->zbc_color[i].format) {
			if (gr->zbc_color[i].format != format)
68
				continue;
B
Ben Skeggs 已提交
69 70
			if (memcmp(gr->zbc_color[i].ds, ds, sizeof(
				   gr->zbc_color[i].ds)))
71
				continue;
B
Ben Skeggs 已提交
72 73
			if (memcmp(gr->zbc_color[i].l2, l2, sizeof(
				   gr->zbc_color[i].l2))) {
74 75 76 77 78 79 80 81 82
				WARN_ON(1);
				return -EINVAL;
			}
			return i;
		} else {
			zbc = (zbc < 0) ? i : zbc;
		}
	}

83 84 85
	if (zbc < 0)
		return zbc;

B
Ben Skeggs 已提交
86 87 88
	memcpy(gr->zbc_color[zbc].ds, ds, sizeof(gr->zbc_color[zbc].ds));
	memcpy(gr->zbc_color[zbc].l2, l2, sizeof(gr->zbc_color[zbc].l2));
	gr->zbc_color[zbc].format = format;
89
	nvkm_ltc_zbc_color_get(ltc, zbc, l2);
B
Ben Skeggs 已提交
90
	gf100_gr_zbc_clear_color(gr, zbc);
91 92 93 94
	return zbc;
}

static void
B
Ben Skeggs 已提交
95
gf100_gr_zbc_clear_depth(struct gf100_gr *gr, int zbc)
96
{
97
	struct nvkm_device *device = gr->base.engine.subdev.device;
B
Ben Skeggs 已提交
98
	if (gr->zbc_depth[zbc].format)
99 100 101 102
		nvkm_wr32(device, 0x405818, gr->zbc_depth[zbc].ds);
	nvkm_wr32(device, 0x40581c, gr->zbc_depth[zbc].format);
	nvkm_wr32(device, 0x405820, zbc);
	nvkm_wr32(device, 0x405824, 0x00000005); /* TRIGGER | WRITE | DEPTH */
103 104 105
}

static int
B
Ben Skeggs 已提交
106
gf100_gr_zbc_depth_get(struct gf100_gr *gr, int format,
107
		       const u32 ds, const u32 l2)
108
{
109
	struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc;
110 111 112
	int zbc = -ENOSPC, i;

	for (i = ltc->zbc_min; i <= ltc->zbc_max; i++) {
B
Ben Skeggs 已提交
113 114
		if (gr->zbc_depth[i].format) {
			if (gr->zbc_depth[i].format != format)
115
				continue;
B
Ben Skeggs 已提交
116
			if (gr->zbc_depth[i].ds != ds)
117
				continue;
B
Ben Skeggs 已提交
118
			if (gr->zbc_depth[i].l2 != l2) {
119 120 121 122 123 124 125 126 127
				WARN_ON(1);
				return -EINVAL;
			}
			return i;
		} else {
			zbc = (zbc < 0) ? i : zbc;
		}
	}

128 129 130
	if (zbc < 0)
		return zbc;

B
Ben Skeggs 已提交
131 132 133
	gr->zbc_depth[zbc].format = format;
	gr->zbc_depth[zbc].ds = ds;
	gr->zbc_depth[zbc].l2 = l2;
134
	nvkm_ltc_zbc_depth_get(ltc, zbc, l2);
B
Ben Skeggs 已提交
135
	gf100_gr_zbc_clear_depth(gr, zbc);
136 137 138
	return zbc;
}

139 140 141 142
/*******************************************************************************
 * Graphics object classes
 ******************************************************************************/

143
static int
144
gf100_fermi_mthd_zbc_color(struct nvkm_object *object, void *data, u32 size)
145
{
146
	struct gf100_gr *gr = gf100_gr(nvkm_gr(object->engine));
147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172
	union {
		struct fermi_a_zbc_color_v0 v0;
	} *args = data;
	int ret;

	if (nvif_unpack(args->v0, 0, 0, false)) {
		switch (args->v0.format) {
		case FERMI_A_ZBC_COLOR_V0_FMT_ZERO:
		case FERMI_A_ZBC_COLOR_V0_FMT_UNORM_ONE:
		case FERMI_A_ZBC_COLOR_V0_FMT_RF32_GF32_BF32_AF32:
		case FERMI_A_ZBC_COLOR_V0_FMT_R16_G16_B16_A16:
		case FERMI_A_ZBC_COLOR_V0_FMT_RN16_GN16_BN16_AN16:
		case FERMI_A_ZBC_COLOR_V0_FMT_RS16_GS16_BS16_AS16:
		case FERMI_A_ZBC_COLOR_V0_FMT_RU16_GU16_BU16_AU16:
		case FERMI_A_ZBC_COLOR_V0_FMT_RF16_GF16_BF16_AF16:
		case FERMI_A_ZBC_COLOR_V0_FMT_A8R8G8B8:
		case FERMI_A_ZBC_COLOR_V0_FMT_A8RL8GL8BL8:
		case FERMI_A_ZBC_COLOR_V0_FMT_A2B10G10R10:
		case FERMI_A_ZBC_COLOR_V0_FMT_AU2BU10GU10RU10:
		case FERMI_A_ZBC_COLOR_V0_FMT_A8B8G8R8:
		case FERMI_A_ZBC_COLOR_V0_FMT_A8BL8GL8RL8:
		case FERMI_A_ZBC_COLOR_V0_FMT_AN8BN8GN8RN8:
		case FERMI_A_ZBC_COLOR_V0_FMT_AS8BS8GS8RS8:
		case FERMI_A_ZBC_COLOR_V0_FMT_AU8BU8GU8RU8:
		case FERMI_A_ZBC_COLOR_V0_FMT_A2R10G10B10:
		case FERMI_A_ZBC_COLOR_V0_FMT_BF10GF11RF11:
B
Ben Skeggs 已提交
173
			ret = gf100_gr_zbc_color_get(gr, args->v0.format,
174 175
							   args->v0.ds,
							   args->v0.l2);
176 177 178 179 180 181 182 183 184 185 186 187 188 189
			if (ret >= 0) {
				args->v0.index = ret;
				return 0;
			}
			break;
		default:
			return -EINVAL;
		}
	}

	return ret;
}

static int
190
gf100_fermi_mthd_zbc_depth(struct nvkm_object *object, void *data, u32 size)
191
{
192
	struct gf100_gr *gr = gf100_gr(nvkm_gr(object->engine));
193 194 195 196 197 198 199 200
	union {
		struct fermi_a_zbc_depth_v0 v0;
	} *args = data;
	int ret;

	if (nvif_unpack(args->v0, 0, 0, false)) {
		switch (args->v0.format) {
		case FERMI_A_ZBC_DEPTH_V0_FMT_FP32:
B
Ben Skeggs 已提交
201
			ret = gf100_gr_zbc_depth_get(gr, args->v0.format,
202 203
							   args->v0.ds,
							   args->v0.l2);
204 205 206 207 208 209 210 211 212 213
			return (ret >= 0) ? 0 : -ENOSPC;
		default:
			return -EINVAL;
		}
	}

	return ret;
}

static int
214
gf100_fermi_mthd(struct nvkm_object *object, u32 mthd, void *data, u32 size)
215 216 217
{
	switch (mthd) {
	case FERMI_A_ZBC_COLOR:
218
		return gf100_fermi_mthd_zbc_color(object, data, size);
219
	case FERMI_A_ZBC_DEPTH:
220
		return gf100_fermi_mthd_zbc_depth(object, data, size);
221 222 223 224 225 226
	default:
		break;
	}
	return -EINVAL;
}

227 228
const struct nvkm_object_func
gf100_fermi = {
229
	.mthd = gf100_fermi_mthd,
230 231
};

232 233
static void
gf100_gr_mthd_set_shader_exceptions(struct nvkm_device *device, u32 data)
234
{
235 236
	nvkm_wr32(device, 0x419e44, data ? 0xffffffff : 0x00000000);
	nvkm_wr32(device, 0x419e4c, data ? 0xffffffff : 0x00000000);
237 238
}

239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257
static bool
gf100_gr_mthd_sw(struct nvkm_device *device, u16 class, u32 mthd, u32 data)
{
	switch (class & 0x00ff) {
	case 0x97:
	case 0xc0:
		switch (mthd) {
		case 0x1528:
			gf100_gr_mthd_set_shader_exceptions(device, data);
			return true;
		default:
			break;
		}
		break;
	default:
		break;
	}
	return false;
}
258

259 260 261 262 263 264 265 266 267 268 269 270 271 272 273
static int
gf100_gr_object_get(struct nvkm_gr *base, int index, struct nvkm_sclass *sclass)
{
	struct gf100_gr *gr = gf100_gr(base);
	int c = 0;

	while (gr->func->sclass[c].oclass) {
		if (c++ == index) {
			*sclass = gr->func->sclass[index];
			return index;
		}
	}

	return c;
}
274 275 276 277

/*******************************************************************************
 * PGRAPH context
 ******************************************************************************/
278

279 280 281
static int
gf100_gr_chan_bind(struct nvkm_object *object, struct nvkm_gpuobj *parent,
		   int align, struct nvkm_gpuobj **pgpuobj)
282
{
283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347
	struct gf100_gr_chan *chan = gf100_gr_chan(object);
	struct gf100_gr *gr = chan->gr;
	int ret, i;

	ret = nvkm_gpuobj_new(gr->base.engine.subdev.device, gr->size,
			      align, false, parent, pgpuobj);
	if (ret)
		return ret;

	nvkm_kmap(*pgpuobj);
	for (i = 0; i < gr->size; i += 4)
		nvkm_wo32(*pgpuobj, i, gr->data[i / 4]);

	if (!gr->firmware) {
		nvkm_wo32(*pgpuobj, 0x00, chan->mmio_nr / 2);
		nvkm_wo32(*pgpuobj, 0x04, chan->mmio_vma.offset >> 8);
	} else {
		nvkm_wo32(*pgpuobj, 0xf4, 0);
		nvkm_wo32(*pgpuobj, 0xf8, 0);
		nvkm_wo32(*pgpuobj, 0x10, chan->mmio_nr / 2);
		nvkm_wo32(*pgpuobj, 0x14, lower_32_bits(chan->mmio_vma.offset));
		nvkm_wo32(*pgpuobj, 0x18, upper_32_bits(chan->mmio_vma.offset));
		nvkm_wo32(*pgpuobj, 0x1c, 1);
		nvkm_wo32(*pgpuobj, 0x20, 0);
		nvkm_wo32(*pgpuobj, 0x28, 0);
		nvkm_wo32(*pgpuobj, 0x2c, 0);
	}
	nvkm_done(*pgpuobj);
	return 0;
}

static void *
gf100_gr_chan_dtor(struct nvkm_object *object)
{
	struct gf100_gr_chan *chan = gf100_gr_chan(object);
	int i;

	for (i = 0; i < ARRAY_SIZE(chan->data); i++) {
		if (chan->data[i].vma.node) {
			nvkm_vm_unmap(&chan->data[i].vma);
			nvkm_vm_put(&chan->data[i].vma);
		}
		nvkm_memory_del(&chan->data[i].mem);
	}

	if (chan->mmio_vma.node) {
		nvkm_vm_unmap(&chan->mmio_vma);
		nvkm_vm_put(&chan->mmio_vma);
	}
	nvkm_memory_del(&chan->mmio);
	return chan;
}

static const struct nvkm_object_func
gf100_gr_chan = {
	.dtor = gf100_gr_chan_dtor,
	.bind = gf100_gr_chan_bind,
};

static int
gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
		  const struct nvkm_oclass *oclass,
		  struct nvkm_object **pobject)
{
	struct gf100_gr *gr = gf100_gr(base);
B
Ben Skeggs 已提交
348 349
	struct gf100_gr_data *data = gr->mmio_data;
	struct gf100_gr_mmio *mmio = gr->mmio_list;
350
	struct gf100_gr_chan *chan;
351
	struct nvkm_device *device = gr->base.engine.subdev.device;
352 353
	int ret, i;

354 355 356 357 358
	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
		return -ENOMEM;
	nvkm_object_ctor(&gf100_gr_chan, oclass, &chan->object);
	chan->gr = gr;
	*pobject = &chan->object;
359

360 361 362 363
	/* allocate memory for a "mmio list" buffer that's used by the HUB
	 * fuc to modify some per-context register settings on first load
	 * of the context.
	 */
364 365
	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0x100,
			      false, &chan->mmio);
366 367 368
	if (ret)
		return ret;

369
	ret = nvkm_vm_get(fifoch->vm, 0x1000, 12, NV_MEM_ACCESS_RW |
370
			  NV_MEM_ACCESS_SYS, &chan->mmio_vma);
371 372 373
	if (ret)
		return ret;

374 375
	nvkm_memory_map(chan->mmio, &chan->mmio_vma, 0);

376
	/* allocate buffers referenced by mmio list */
B
Ben Skeggs 已提交
377
	for (i = 0; data->size && i < ARRAY_SIZE(gr->mmio_data); i++) {
378 379 380
		ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST,
				      data->size, data->align, false,
				      &chan->data[i].mem);
381 382
		if (ret)
			return ret;
383

384 385 386
		ret = nvkm_vm_get(fifoch->vm,
				  nvkm_memory_size(chan->data[i].mem), 12,
				  data->access, &chan->data[i].vma);
387 388
		if (ret)
			return ret;
389

390
		nvkm_memory_map(chan->data[i].mem, &chan->data[i].vma, 0);
391
		data++;
392 393
	}

394
	/* finally, fill in the mmio list and point the context at it */
395
	nvkm_kmap(chan->mmio);
B
Ben Skeggs 已提交
396
	for (i = 0; mmio->addr && i < ARRAY_SIZE(gr->mmio_list); i++) {
397 398
		u32 addr = mmio->addr;
		u32 data = mmio->data;
399

400
		if (mmio->buffer >= 0) {
401
			u64 info = chan->data[mmio->buffer].vma.offset;
402 403
			data |= info >> mmio->shift;
		}
404

405 406
		nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, addr);
		nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, data);
407 408
		mmio++;
	}
409
	nvkm_done(chan->mmio);
410
	return 0;
411 412
}

413
/*******************************************************************************
414
 * PGRAPH register lists
415 416
 ******************************************************************************/

417 418
const struct gf100_gr_init
gf100_gr_init_main_0[] = {
419 420 421 422 423 424 425 426 427 428 429 430 431 432
	{ 0x400080,   1, 0x04, 0x003083c2 },
	{ 0x400088,   1, 0x04, 0x00006fe7 },
	{ 0x40008c,   1, 0x04, 0x00000000 },
	{ 0x400090,   1, 0x04, 0x00000030 },
	{ 0x40013c,   1, 0x04, 0x013901f7 },
	{ 0x400140,   1, 0x04, 0x00000100 },
	{ 0x400144,   1, 0x04, 0x00000000 },
	{ 0x400148,   1, 0x04, 0x00000110 },
	{ 0x400138,   1, 0x04, 0x00000000 },
	{ 0x400130,   2, 0x04, 0x00000000 },
	{ 0x400124,   1, 0x04, 0x00000002 },
	{}
};

433 434
const struct gf100_gr_init
gf100_gr_init_fe_0[] = {
435 436 437 438 439
	{ 0x40415c,   1, 0x04, 0x00000000 },
	{ 0x404170,   1, 0x04, 0x00000000 },
	{}
};

440 441
const struct gf100_gr_init
gf100_gr_init_pri_0[] = {
442 443 444 445
	{ 0x404488,   2, 0x04, 0x00000000 },
	{}
};

446 447
const struct gf100_gr_init
gf100_gr_init_rstr2d_0[] = {
448 449 450 451
	{ 0x407808,   1, 0x04, 0x00000000 },
	{}
};

452 453
const struct gf100_gr_init
gf100_gr_init_pd_0[] = {
454 455 456 457
	{ 0x406024,   1, 0x04, 0x00000000 },
	{}
};

458 459
const struct gf100_gr_init
gf100_gr_init_ds_0[] = {
460 461 462 463 464 465
	{ 0x405844,   1, 0x04, 0x00ffffff },
	{ 0x405850,   1, 0x04, 0x00000000 },
	{ 0x405908,   1, 0x04, 0x00000000 },
	{}
};

466 467
const struct gf100_gr_init
gf100_gr_init_scc_0[] = {
468 469 470 471
	{ 0x40803c,   1, 0x04, 0x00000000 },
	{}
};

472 473
const struct gf100_gr_init
gf100_gr_init_prop_0[] = {
474
	{ 0x4184a0,   1, 0x04, 0x00000000 },
475 476 477
	{}
};

478 479
const struct gf100_gr_init
gf100_gr_init_gpc_unk_0[] = {
480 481 482 483
	{ 0x418604,   1, 0x04, 0x00000000 },
	{ 0x418680,   1, 0x04, 0x00000000 },
	{ 0x418714,   1, 0x04, 0x80000000 },
	{ 0x418384,   1, 0x04, 0x00000000 },
484 485 486
	{}
};

487 488
const struct gf100_gr_init
gf100_gr_init_setup_0[] = {
489
	{ 0x418814,   3, 0x04, 0x00000000 },
490 491 492
	{}
};

493 494
const struct gf100_gr_init
gf100_gr_init_crstr_0[] = {
495
	{ 0x418b04,   1, 0x04, 0x00000000 },
496 497 498
	{}
};

499 500
const struct gf100_gr_init
gf100_gr_init_setup_1[] = {
501 502 503 504
	{ 0x4188c8,   1, 0x04, 0x80000000 },
	{ 0x4188cc,   1, 0x04, 0x00000000 },
	{ 0x4188d0,   1, 0x04, 0x00010000 },
	{ 0x4188d4,   1, 0x04, 0x00000001 },
505 506 507
	{}
};

508 509
const struct gf100_gr_init
gf100_gr_init_zcull_0[] = {
510 511 512 513 514
	{ 0x418910,   1, 0x04, 0x00010001 },
	{ 0x418914,   1, 0x04, 0x00000301 },
	{ 0x418918,   1, 0x04, 0x00800000 },
	{ 0x418980,   1, 0x04, 0x77777770 },
	{ 0x418984,   3, 0x04, 0x77777777 },
515 516 517
	{}
};

518 519
const struct gf100_gr_init
gf100_gr_init_gpm_0[] = {
520 521
	{ 0x418c04,   1, 0x04, 0x00000000 },
	{ 0x418c88,   1, 0x04, 0x00000000 },
522 523 524
	{}
};

525 526
const struct gf100_gr_init
gf100_gr_init_gpc_unk_1[] = {
527 528 529 530
	{ 0x418d00,   1, 0x04, 0x00000000 },
	{ 0x418f08,   1, 0x04, 0x00000000 },
	{ 0x418e00,   1, 0x04, 0x00000050 },
	{ 0x418e08,   1, 0x04, 0x00000000 },
531 532 533
	{}
};

534 535
const struct gf100_gr_init
gf100_gr_init_gcc_0[] = {
536 537 538 539 540
	{ 0x41900c,   1, 0x04, 0x00000000 },
	{ 0x419018,   1, 0x04, 0x00000000 },
	{}
};

541 542
const struct gf100_gr_init
gf100_gr_init_tpccs_0[] = {
543 544
	{ 0x419d08,   2, 0x04, 0x00000000 },
	{ 0x419d10,   1, 0x04, 0x00000014 },
545 546 547
	{}
};

548 549
const struct gf100_gr_init
gf100_gr_init_tex_0[] = {
550 551 552
	{ 0x419ab0,   1, 0x04, 0x00000000 },
	{ 0x419ab8,   1, 0x04, 0x000000e7 },
	{ 0x419abc,   2, 0x04, 0x00000000 },
553 554 555
	{}
};

556 557
const struct gf100_gr_init
gf100_gr_init_pe_0[] = {
558 559 560 561
	{ 0x41980c,   3, 0x04, 0x00000000 },
	{ 0x419844,   1, 0x04, 0x00000000 },
	{ 0x41984c,   1, 0x04, 0x00005bc5 },
	{ 0x419850,   4, 0x04, 0x00000000 },
562 563 564
	{}
};

565 566
const struct gf100_gr_init
gf100_gr_init_l1c_0[] = {
567 568 569 570 571 572
	{ 0x419c98,   1, 0x04, 0x00000000 },
	{ 0x419ca8,   1, 0x04, 0x80000000 },
	{ 0x419cb4,   1, 0x04, 0x00000000 },
	{ 0x419cb8,   1, 0x04, 0x00008bf4 },
	{ 0x419cbc,   1, 0x04, 0x28137606 },
	{ 0x419cc0,   2, 0x04, 0x00000000 },
573 574 575
	{}
};

576 577
const struct gf100_gr_init
gf100_gr_init_wwdx_0[] = {
578 579
	{ 0x419bd4,   1, 0x04, 0x00800000 },
	{ 0x419bdc,   1, 0x04, 0x00000000 },
580 581 582
	{}
};

583 584
const struct gf100_gr_init
gf100_gr_init_tpccs_1[] = {
585
	{ 0x419d2c,   1, 0x04, 0x00000000 },
586 587 588
	{}
};

589 590
const struct gf100_gr_init
gf100_gr_init_mpc_0[] = {
591
	{ 0x419c0c,   1, 0x04, 0x00000000 },
592 593 594
	{}
};

595 596
static const struct gf100_gr_init
gf100_gr_init_sm_0[] = {
597 598 599 600 601 602 603 604 605 606 607 608 609 610 611 612
	{ 0x419e00,   1, 0x04, 0x00000000 },
	{ 0x419ea0,   1, 0x04, 0x00000000 },
	{ 0x419ea4,   1, 0x04, 0x00000100 },
	{ 0x419ea8,   1, 0x04, 0x00001100 },
	{ 0x419eac,   1, 0x04, 0x11100702 },
	{ 0x419eb0,   1, 0x04, 0x00000003 },
	{ 0x419eb4,   4, 0x04, 0x00000000 },
	{ 0x419ec8,   1, 0x04, 0x06060618 },
	{ 0x419ed0,   1, 0x04, 0x0eff0e38 },
	{ 0x419ed4,   1, 0x04, 0x011104f1 },
	{ 0x419edc,   1, 0x04, 0x00000000 },
	{ 0x419f00,   1, 0x04, 0x00000000 },
	{ 0x419f2c,   1, 0x04, 0x00000000 },
	{}
};

613 614
const struct gf100_gr_init
gf100_gr_init_be_0[] = {
615 616 617 618 619 620 621 622 623 624
	{ 0x40880c,   1, 0x04, 0x00000000 },
	{ 0x408910,   9, 0x04, 0x00000000 },
	{ 0x408950,   1, 0x04, 0x00000000 },
	{ 0x408954,   1, 0x04, 0x0000ffff },
	{ 0x408984,   1, 0x04, 0x00000000 },
	{ 0x408988,   1, 0x04, 0x08040201 },
	{ 0x40898c,   1, 0x04, 0x80402010 },
	{}
};

625 626
const struct gf100_gr_init
gf100_gr_init_fe_1[] = {
627 628 629 630
	{ 0x4040f0,   1, 0x04, 0x00000000 },
	{}
};

631 632
const struct gf100_gr_init
gf100_gr_init_pe_1[] = {
633 634 635 636
	{ 0x419880,   1, 0x04, 0x00000002 },
	{}
};

637 638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665
static const struct gf100_gr_pack
gf100_gr_pack_mmio[] = {
	{ gf100_gr_init_main_0 },
	{ gf100_gr_init_fe_0 },
	{ gf100_gr_init_pri_0 },
	{ gf100_gr_init_rstr2d_0 },
	{ gf100_gr_init_pd_0 },
	{ gf100_gr_init_ds_0 },
	{ gf100_gr_init_scc_0 },
	{ gf100_gr_init_prop_0 },
	{ gf100_gr_init_gpc_unk_0 },
	{ gf100_gr_init_setup_0 },
	{ gf100_gr_init_crstr_0 },
	{ gf100_gr_init_setup_1 },
	{ gf100_gr_init_zcull_0 },
	{ gf100_gr_init_gpm_0 },
	{ gf100_gr_init_gpc_unk_1 },
	{ gf100_gr_init_gcc_0 },
	{ gf100_gr_init_tpccs_0 },
	{ gf100_gr_init_tex_0 },
	{ gf100_gr_init_pe_0 },
	{ gf100_gr_init_l1c_0 },
	{ gf100_gr_init_wwdx_0 },
	{ gf100_gr_init_tpccs_1 },
	{ gf100_gr_init_mpc_0 },
	{ gf100_gr_init_sm_0 },
	{ gf100_gr_init_be_0 },
	{ gf100_gr_init_fe_1 },
	{ gf100_gr_init_pe_1 },
M
Maarten Lankhorst 已提交
666 667 668
	{}
};

669 670 671 672
/*******************************************************************************
 * PGRAPH engine/subdev functions
 ******************************************************************************/

673
void
B
Ben Skeggs 已提交
674
gf100_gr_zbc_init(struct gf100_gr *gr)
675 676 677 678 679 680 681 682 683
{
	const u32  zero[] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000,
			      0x00000000, 0x00000000, 0x00000000, 0x00000000 };
	const u32   one[] = { 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
			      0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff };
	const u32 f32_0[] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000,
			      0x00000000, 0x00000000, 0x00000000, 0x00000000 };
	const u32 f32_1[] = { 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
			      0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 };
684
	struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc;
685 686
	int index;

B
Ben Skeggs 已提交
687 688 689 690 691 692 693
	if (!gr->zbc_color[0].format) {
		gf100_gr_zbc_color_get(gr, 1,  & zero[0],   &zero[4]);
		gf100_gr_zbc_color_get(gr, 2,  &  one[0],    &one[4]);
		gf100_gr_zbc_color_get(gr, 4,  &f32_0[0],  &f32_0[4]);
		gf100_gr_zbc_color_get(gr, 4,  &f32_1[0],  &f32_1[4]);
		gf100_gr_zbc_depth_get(gr, 1, 0x00000000, 0x00000000);
		gf100_gr_zbc_depth_get(gr, 1, 0x3f800000, 0x3f800000);
694 695 696
	}

	for (index = ltc->zbc_min; index <= ltc->zbc_max; index++)
B
Ben Skeggs 已提交
697
		gf100_gr_zbc_clear_color(gr, index);
698
	for (index = ltc->zbc_min; index <= ltc->zbc_max; index++)
B
Ben Skeggs 已提交
699
		gf100_gr_zbc_clear_depth(gr, index);
700 701
}

702 703 704 705 706 707
/**
 * Wait until GR goes idle. GR is considered idle if it is disabled by the
 * MC (0x200) register, or GR is not busy and a context switch is not in
 * progress.
 */
int
B
Ben Skeggs 已提交
708
gf100_gr_wait_idle(struct gf100_gr *gr)
709
{
710 711
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
712 713 714 715 716 717 718 719
	unsigned long end_jiffies = jiffies + msecs_to_jiffies(2000);
	bool gr_enabled, ctxsw_active, gr_busy;

	do {
		/*
		 * required to make sure FIFO_ENGINE_STATUS (0x2640) is
		 * up-to-date
		 */
720
		nvkm_rd32(device, 0x400700);
721

722 723 724
		gr_enabled = nvkm_rd32(device, 0x200) & 0x1000;
		ctxsw_active = nvkm_rd32(device, 0x2640) & 0x8000;
		gr_busy = nvkm_rd32(device, 0x40060c) & 0x1;
725 726 727 728 729

		if (!gr_enabled || (!gr_busy && !ctxsw_active))
			return 0;
	} while (time_before(jiffies, end_jiffies));

730 731 732
	nvkm_error(subdev,
		   "wait for idle timeout (en: %d, ctxsw: %d, busy: %d)\n",
		   gr_enabled, ctxsw_active, gr_busy);
733 734 735
	return -EAGAIN;
}

736
void
B
Ben Skeggs 已提交
737
gf100_gr_mmio(struct gf100_gr *gr, const struct gf100_gr_pack *p)
738
{
739
	struct nvkm_device *device = gr->base.engine.subdev.device;
740 741
	const struct gf100_gr_pack *pack;
	const struct gf100_gr_init *init;
742 743 744 745 746

	pack_for_each_init(init, pack, p) {
		u32 next = init->addr + init->count * init->pitch;
		u32 addr = init->addr;
		while (addr < next) {
747
			nvkm_wr32(device, addr, init->data);
748 749 750
			addr += init->pitch;
		}
	}
751 752 753
}

void
B
Ben Skeggs 已提交
754
gf100_gr_icmd(struct gf100_gr *gr, const struct gf100_gr_pack *p)
755
{
756
	struct nvkm_device *device = gr->base.engine.subdev.device;
757 758
	const struct gf100_gr_pack *pack;
	const struct gf100_gr_init *init;
759
	u32 data = 0;
760

761
	nvkm_wr32(device, 0x400208, 0x80000000);
762 763 764 765 766 767

	pack_for_each_init(init, pack, p) {
		u32 next = init->addr + init->count * init->pitch;
		u32 addr = init->addr;

		if ((pack == p && init == p->init) || data != init->data) {
768
			nvkm_wr32(device, 0x400204, init->data);
769 770
			data = init->data;
		}
771

772
		while (addr < next) {
773
			nvkm_wr32(device, 0x400200, addr);
774 775 776 777 778
			/**
			 * Wait for GR to go idle after submitting a
			 * GO_IDLE bundle
			 */
			if ((addr & 0xffff) == 0xe100)
B
Ben Skeggs 已提交
779
				gf100_gr_wait_idle(gr);
780 781 782 783
			nvkm_msec(device, 2000,
				if (!(nvkm_rd32(device, 0x400700) & 0x00000004))
					break;
			);
784 785 786
			addr += init->pitch;
		}
	}
787

788
	nvkm_wr32(device, 0x400208, 0x00000000);
789 790 791
}

void
B
Ben Skeggs 已提交
792
gf100_gr_mthd(struct gf100_gr *gr, const struct gf100_gr_pack *p)
793
{
794
	struct nvkm_device *device = gr->base.engine.subdev.device;
795 796
	const struct gf100_gr_pack *pack;
	const struct gf100_gr_init *init;
797
	u32 data = 0;
798

799 800 801 802 803 804
	pack_for_each_init(init, pack, p) {
		u32 ctrl = 0x80000000 | pack->type;
		u32 next = init->addr + init->count * init->pitch;
		u32 addr = init->addr;

		if ((pack == p && init == p->init) || data != init->data) {
805
			nvkm_wr32(device, 0x40448c, init->data);
806 807 808 809
			data = init->data;
		}

		while (addr < next) {
810
			nvkm_wr32(device, 0x404488, ctrl | (addr << 14));
811
			addr += init->pitch;
812 813 814 815 816
		}
	}
}

u64
817
gf100_gr_units(struct nvkm_gr *base)
818
{
819
	struct gf100_gr *gr = gf100_gr(base);
820 821
	u64 cfg;

B
Ben Skeggs 已提交
822 823 824
	cfg  = (u32)gr->gpc_nr;
	cfg |= (u32)gr->tpc_total << 8;
	cfg |= (u64)gr->rop_nr << 32;
825 826

	return cfg;
827 828
}

829 830 831 832 833 834 835 836 837 838 839 840
static const struct nvkm_bitfield gk104_sked_error[] = {
	{ 0x00000080, "CONSTANT_BUFFER_SIZE" },
	{ 0x00000200, "LOCAL_MEMORY_SIZE_POS" },
	{ 0x00000400, "LOCAL_MEMORY_SIZE_NEG" },
	{ 0x00000800, "WARP_CSTACK_SIZE" },
	{ 0x00001000, "TOTAL_TEMP_SIZE" },
	{ 0x00002000, "REGISTER_COUNT" },
	{ 0x00040000, "TOTAL_THREADS" },
	{ 0x00100000, "PROGRAM_OFFSET" },
	{ 0x00200000, "SHARED_MEMORY_SIZE" },
	{ 0x02000000, "SHARED_CONFIG_TOO_SMALL" },
	{ 0x04000000, "TOTAL_REGISTER_COUNT" },
841 842 843
	{}
};

844 845 846 847 848 849 850
static const struct nvkm_bitfield gf100_gpc_rop_error[] = {
	{ 0x00000002, "RT_PITCH_OVERRUN" },
	{ 0x00000010, "RT_WIDTH_OVERRUN" },
	{ 0x00000020, "RT_HEIGHT_OVERRUN" },
	{ 0x00000080, "ZETA_STORAGE_TYPE_MISMATCH" },
	{ 0x00000100, "RT_STORAGE_TYPE_MISMATCH" },
	{ 0x00000400, "RT_LINEAR_MISMATCH" },
851 852 853
	{}
};

854
static void
B
Ben Skeggs 已提交
855
gf100_gr_trap_gpc_rop(struct gf100_gr *gr, int gpc)
856
{
857 858 859
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
	char error[128];
860
	u32 trap[4];
861

862
	trap[0] = nvkm_rd32(device, GPC_UNIT(gpc, 0x0420)) & 0x3fffffff;
863 864 865
	trap[1] = nvkm_rd32(device, GPC_UNIT(gpc, 0x0434));
	trap[2] = nvkm_rd32(device, GPC_UNIT(gpc, 0x0438));
	trap[3] = nvkm_rd32(device, GPC_UNIT(gpc, 0x043c));
866

867
	nvkm_snprintbf(error, sizeof(error), gf100_gpc_rop_error, trap[0]);
868

869 870 871 872
	nvkm_error(subdev, "GPC%d/PROP trap: %08x [%s] x = %u, y = %u, "
			   "format = %x, storage type = %x\n",
		   gpc, trap[0], error, trap[1] & 0xffff, trap[1] >> 16,
		   (trap[2] >> 8) & 0x3f, trap[3] & 0xff);
873
	nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
874 875
}

876
static const struct nvkm_enum gf100_mp_warp_error[] = {
877 878 879 880 881 882 883 884
	{ 0x00, "NO_ERROR" },
	{ 0x01, "STACK_MISMATCH" },
	{ 0x05, "MISALIGNED_PC" },
	{ 0x08, "MISALIGNED_GPR" },
	{ 0x09, "INVALID_OPCODE" },
	{ 0x0d, "GPR_OUT_OF_BOUNDS" },
	{ 0x0e, "MEM_OUT_OF_BOUNDS" },
	{ 0x0f, "UNALIGNED_MEM_ACCESS" },
885
	{ 0x10, "INVALID_ADDR_SPACE" },
886 887 888 889
	{ 0x11, "INVALID_PARAM" },
	{}
};

890
static const struct nvkm_bitfield gf100_mp_global_error[] = {
891 892 893 894 895 896
	{ 0x00000004, "MULTIPLE_WARP_ERRORS" },
	{ 0x00000008, "OUT_OF_STACK_SPACE" },
	{}
};

static void
B
Ben Skeggs 已提交
897
gf100_gr_trap_mp(struct gf100_gr *gr, int gpc, int tpc)
898
{
899 900
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
901 902
	u32 werr = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x648));
	u32 gerr = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x650));
903 904
	const struct nvkm_enum *warp;
	char glob[128];
905

906 907 908 909 910 911
	nvkm_snprintbf(glob, sizeof(glob), gf100_mp_global_error, gerr);
	warp = nvkm_enum_find(gf100_mp_warp_error, werr & 0xffff);

	nvkm_error(subdev, "GPC%i/TPC%i/MP trap: "
			   "global %08x [%s] warp %04x [%s]\n",
		   gpc, tpc, gerr, glob, werr, warp ? warp->name : "");
912

913 914
	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x648), 0x00000000);
	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x650), gerr);
915 916
}

917
static void
B
Ben Skeggs 已提交
918
gf100_gr_trap_tpc(struct gf100_gr *gr, int gpc, int tpc)
919
{
920 921
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
922
	u32 stat = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x0508));
923 924

	if (stat & 0x00000001) {
925
		u32 trap = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x0224));
926
		nvkm_error(subdev, "GPC%d/TPC%d/TEX: %08x\n", gpc, tpc, trap);
927
		nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x0224), 0xc0000000);
928 929 930 931
		stat &= ~0x00000001;
	}

	if (stat & 0x00000002) {
B
Ben Skeggs 已提交
932
		gf100_gr_trap_mp(gr, gpc, tpc);
933 934 935 936
		stat &= ~0x00000002;
	}

	if (stat & 0x00000004) {
937
		u32 trap = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x0084));
938
		nvkm_error(subdev, "GPC%d/TPC%d/POLY: %08x\n", gpc, tpc, trap);
939
		nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x0084), 0xc0000000);
940 941 942 943
		stat &= ~0x00000004;
	}

	if (stat & 0x00000008) {
944
		u32 trap = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x048c));
945
		nvkm_error(subdev, "GPC%d/TPC%d/L1C: %08x\n", gpc, tpc, trap);
946
		nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x048c), 0xc0000000);
947 948 949 950
		stat &= ~0x00000008;
	}

	if (stat) {
951
		nvkm_error(subdev, "GPC%d/TPC%d/%08x: unknown\n", gpc, tpc, stat);
952 953 954 955
	}
}

static void
B
Ben Skeggs 已提交
956
gf100_gr_trap_gpc(struct gf100_gr *gr, int gpc)
957
{
958 959
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
960
	u32 stat = nvkm_rd32(device, GPC_UNIT(gpc, 0x2c90));
961 962 963
	int tpc;

	if (stat & 0x00000001) {
B
Ben Skeggs 已提交
964
		gf100_gr_trap_gpc_rop(gr, gpc);
965 966 967 968
		stat &= ~0x00000001;
	}

	if (stat & 0x00000002) {
969
		u32 trap = nvkm_rd32(device, GPC_UNIT(gpc, 0x0900));
970
		nvkm_error(subdev, "GPC%d/ZCULL: %08x\n", gpc, trap);
971
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000);
972 973 974 975
		stat &= ~0x00000002;
	}

	if (stat & 0x00000004) {
976
		u32 trap = nvkm_rd32(device, GPC_UNIT(gpc, 0x1028));
977
		nvkm_error(subdev, "GPC%d/CCACHE: %08x\n", gpc, trap);
978
		nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000);
979 980 981 982
		stat &= ~0x00000004;
	}

	if (stat & 0x00000008) {
983
		u32 trap = nvkm_rd32(device, GPC_UNIT(gpc, 0x0824));
984
		nvkm_error(subdev, "GPC%d/ESETUP: %08x\n", gpc, trap);
985
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000);
986 987 988
		stat &= ~0x00000009;
	}

B
Ben Skeggs 已提交
989
	for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
990 991
		u32 mask = 0x00010000 << tpc;
		if (stat & mask) {
B
Ben Skeggs 已提交
992
			gf100_gr_trap_tpc(gr, gpc, tpc);
993
			nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), mask);
994 995 996 997 998
			stat &= ~mask;
		}
	}

	if (stat) {
999
		nvkm_error(subdev, "GPC%d/%08x: unknown\n", gpc, stat);
1000 1001 1002 1003
	}
}

static void
B
Ben Skeggs 已提交
1004
gf100_gr_trap_intr(struct gf100_gr *gr)
1005
{
1006 1007
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
1008
	u32 trap = nvkm_rd32(device, 0x400108);
1009
	int rop, gpc;
1010 1011

	if (trap & 0x00000001) {
1012
		u32 stat = nvkm_rd32(device, 0x404000);
1013
		nvkm_error(subdev, "DISPATCH %08x\n", stat);
1014 1015
		nvkm_wr32(device, 0x404000, 0xc0000000);
		nvkm_wr32(device, 0x400108, 0x00000001);
1016 1017 1018 1019
		trap &= ~0x00000001;
	}

	if (trap & 0x00000002) {
1020
		u32 stat = nvkm_rd32(device, 0x404600);
1021
		nvkm_error(subdev, "M2MF %08x\n", stat);
1022 1023
		nvkm_wr32(device, 0x404600, 0xc0000000);
		nvkm_wr32(device, 0x400108, 0x00000002);
1024 1025 1026 1027
		trap &= ~0x00000002;
	}

	if (trap & 0x00000008) {
1028
		u32 stat = nvkm_rd32(device, 0x408030);
1029
		nvkm_error(subdev, "CCACHE %08x\n", stat);
1030 1031
		nvkm_wr32(device, 0x408030, 0xc0000000);
		nvkm_wr32(device, 0x400108, 0x00000008);
1032 1033 1034 1035
		trap &= ~0x00000008;
	}

	if (trap & 0x00000010) {
1036
		u32 stat = nvkm_rd32(device, 0x405840);
1037
		nvkm_error(subdev, "SHADER %08x\n", stat);
1038 1039
		nvkm_wr32(device, 0x405840, 0xc0000000);
		nvkm_wr32(device, 0x400108, 0x00000010);
1040 1041 1042 1043
		trap &= ~0x00000010;
	}

	if (trap & 0x00000040) {
1044
		u32 stat = nvkm_rd32(device, 0x40601c);
1045
		nvkm_error(subdev, "UNK6 %08x\n", stat);
1046 1047
		nvkm_wr32(device, 0x40601c, 0xc0000000);
		nvkm_wr32(device, 0x400108, 0x00000040);
1048 1049 1050 1051
		trap &= ~0x00000040;
	}

	if (trap & 0x00000080) {
1052
		u32 stat = nvkm_rd32(device, 0x404490);
1053
		nvkm_error(subdev, "MACRO %08x\n", stat);
1054 1055
		nvkm_wr32(device, 0x404490, 0xc0000000);
		nvkm_wr32(device, 0x400108, 0x00000080);
1056 1057 1058
		trap &= ~0x00000080;
	}

1059
	if (trap & 0x00000100) {
1060 1061
		u32 stat = nvkm_rd32(device, 0x407020) & 0x3fffffff;
		char sked[128];
1062

1063 1064
		nvkm_snprintbf(sked, sizeof(sked), gk104_sked_error, stat);
		nvkm_error(subdev, "SKED: %08x [%s]\n", stat, sked);
1065

1066
		if (stat)
1067 1068
			nvkm_wr32(device, 0x407020, 0x40000000);
		nvkm_wr32(device, 0x400108, 0x00000100);
1069 1070 1071
		trap &= ~0x00000100;
	}

1072
	if (trap & 0x01000000) {
1073
		u32 stat = nvkm_rd32(device, 0x400118);
B
Ben Skeggs 已提交
1074
		for (gpc = 0; stat && gpc < gr->gpc_nr; gpc++) {
1075 1076
			u32 mask = 0x00000001 << gpc;
			if (stat & mask) {
B
Ben Skeggs 已提交
1077
				gf100_gr_trap_gpc(gr, gpc);
1078
				nvkm_wr32(device, 0x400118, mask);
1079 1080 1081
				stat &= ~mask;
			}
		}
1082
		nvkm_wr32(device, 0x400108, 0x01000000);
1083 1084 1085 1086
		trap &= ~0x01000000;
	}

	if (trap & 0x02000000) {
B
Ben Skeggs 已提交
1087
		for (rop = 0; rop < gr->rop_nr; rop++) {
1088 1089
			u32 statz = nvkm_rd32(device, ROP_UNIT(rop, 0x070));
			u32 statc = nvkm_rd32(device, ROP_UNIT(rop, 0x144));
1090
			nvkm_error(subdev, "ROP%d %08x %08x\n",
1091
				 rop, statz, statc);
1092 1093
			nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0xc0000000);
			nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0xc0000000);
1094
		}
1095
		nvkm_wr32(device, 0x400108, 0x02000000);
1096 1097 1098 1099
		trap &= ~0x02000000;
	}

	if (trap) {
1100
		nvkm_error(subdev, "TRAP UNHANDLED %08x\n", trap);
1101
		nvkm_wr32(device, 0x400108, trap);
1102 1103 1104
	}
}

1105
static void
B
Ben Skeggs 已提交
1106
gf100_gr_ctxctl_debug_unit(struct gf100_gr *gr, u32 base)
1107
{
1108 1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
	nvkm_error(subdev, "%06x - done %08x\n", base,
		   nvkm_rd32(device, base + 0x400));
	nvkm_error(subdev, "%06x - stat %08x %08x %08x %08x\n", base,
		   nvkm_rd32(device, base + 0x800),
		   nvkm_rd32(device, base + 0x804),
		   nvkm_rd32(device, base + 0x808),
		   nvkm_rd32(device, base + 0x80c));
	nvkm_error(subdev, "%06x - stat %08x %08x %08x %08x\n", base,
		   nvkm_rd32(device, base + 0x810),
		   nvkm_rd32(device, base + 0x814),
		   nvkm_rd32(device, base + 0x818),
		   nvkm_rd32(device, base + 0x81c));
1122 1123 1124
}

void
B
Ben Skeggs 已提交
1125
gf100_gr_ctxctl_debug(struct gf100_gr *gr)
1126
{
1127 1128
	struct nvkm_device *device = gr->base.engine.subdev.device;
	u32 gpcnr = nvkm_rd32(device, 0x409604) & 0xffff;
1129 1130
	u32 gpc;

B
Ben Skeggs 已提交
1131
	gf100_gr_ctxctl_debug_unit(gr, 0x409000);
1132
	for (gpc = 0; gpc < gpcnr; gpc++)
B
Ben Skeggs 已提交
1133
		gf100_gr_ctxctl_debug_unit(gr, 0x502000 + (gpc * 0x8000));
1134 1135 1136
}

static void
B
Ben Skeggs 已提交
1137
gf100_gr_ctxctl_isr(struct gf100_gr *gr)
1138
{
1139 1140
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
1141
	u32 stat = nvkm_rd32(device, 0x409c18);
1142

1143
	if (stat & 0x00000001) {
1144
		u32 code = nvkm_rd32(device, 0x409814);
1145
		if (code == E_BAD_FWMTHD) {
1146 1147
			u32 class = nvkm_rd32(device, 0x409808);
			u32  addr = nvkm_rd32(device, 0x40980c);
1148 1149
			u32  subc = (addr & 0x00070000) >> 16;
			u32  mthd = (addr & 0x00003ffc);
1150
			u32  data = nvkm_rd32(device, 0x409810);
1151

1152 1153 1154
			nvkm_error(subdev, "FECS MTHD subc %d class %04x "
					   "mthd %04x data %08x\n",
				   subc, class, mthd, data);
1155

1156
			nvkm_wr32(device, 0x409c20, 0x00000001);
1157 1158
			stat &= ~0x00000001;
		} else {
1159
			nvkm_error(subdev, "FECS ucode error %d\n", code);
1160 1161
		}
	}
1162

1163
	if (stat & 0x00080000) {
1164
		nvkm_error(subdev, "FECS watchdog timeout\n");
B
Ben Skeggs 已提交
1165
		gf100_gr_ctxctl_debug(gr);
1166
		nvkm_wr32(device, 0x409c20, 0x00080000);
1167 1168 1169 1170
		stat &= ~0x00080000;
	}

	if (stat) {
1171
		nvkm_error(subdev, "FECS %08x\n", stat);
B
Ben Skeggs 已提交
1172
		gf100_gr_ctxctl_debug(gr);
1173
		nvkm_wr32(device, 0x409c20, stat);
1174
	}
1175 1176
}

1177
static void
1178
gf100_gr_intr(struct nvkm_gr *base)
1179
{
1180 1181 1182
	struct gf100_gr *gr = gf100_gr(base);
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
1183 1184
	struct nvkm_fifo_chan *chan;
	unsigned long flags;
1185 1186 1187
	u64 inst = nvkm_rd32(device, 0x409b00) & 0x0fffffff;
	u32 stat = nvkm_rd32(device, 0x400100);
	u32 addr = nvkm_rd32(device, 0x400704);
1188 1189
	u32 mthd = (addr & 0x00003ffc);
	u32 subc = (addr & 0x00070000) >> 16;
1190 1191
	u32 data = nvkm_rd32(device, 0x400708);
	u32 code = nvkm_rd32(device, 0x400110);
1192
	u32 class;
1193 1194
	const char *name = "unknown";
	int chid = -1;
1195

1196
	chan = nvkm_fifo_chan_inst(device->fifo, (u64)inst << 12, &flags);
1197 1198 1199 1200
	if (chan) {
		name = chan->object.client->name;
		chid = chan->chid;
	}
1201

1202
	if (device->card_type < NV_E0 || subc < 4)
1203
		class = nvkm_rd32(device, 0x404200 + (subc * 4));
1204 1205 1206
	else
		class = 0x0000;

1207 1208 1209 1210 1211
	if (stat & 0x00000001) {
		/*
		 * notifier interrupt, only needed for cyclestats
		 * can be safely ignored
		 */
1212
		nvkm_wr32(device, 0x400100, 0x00000001);
1213 1214 1215
		stat &= ~0x00000001;
	}

1216
	if (stat & 0x00000010) {
1217
		if (!gf100_gr_mthd_sw(device, class, mthd, data)) {
1218 1219
			nvkm_error(subdev, "ILLEGAL_MTHD ch %d [%010llx %s] "
				   "subc %d class %04x mthd %04x data %08x\n",
1220 1221
				   chid, inst << 12, name, subc,
				   class, mthd, data);
1222
		}
1223
		nvkm_wr32(device, 0x400100, 0x00000010);
1224 1225 1226 1227
		stat &= ~0x00000010;
	}

	if (stat & 0x00000020) {
1228 1229
		nvkm_error(subdev, "ILLEGAL_CLASS ch %d [%010llx %s] "
			   "subc %d class %04x mthd %04x data %08x\n",
1230
			   chid, inst << 12, name, subc, class, mthd, data);
1231
		nvkm_wr32(device, 0x400100, 0x00000020);
1232 1233 1234 1235
		stat &= ~0x00000020;
	}

	if (stat & 0x00100000) {
1236 1237 1238 1239 1240
		const struct nvkm_enum *en =
			nvkm_enum_find(nv50_data_error_names, code);
		nvkm_error(subdev, "DATA_ERROR %08x [%s] ch %d [%010llx %s] "
				   "subc %d class %04x mthd %04x data %08x\n",
			   code, en ? en->name : "", chid, inst << 12,
1241
			   name, subc, class, mthd, data);
1242
		nvkm_wr32(device, 0x400100, 0x00100000);
1243 1244 1245 1246
		stat &= ~0x00100000;
	}

	if (stat & 0x00200000) {
1247
		nvkm_error(subdev, "TRAP ch %d [%010llx %s]\n",
1248
			   chid, inst << 12, name);
B
Ben Skeggs 已提交
1249
		gf100_gr_trap_intr(gr);
1250
		nvkm_wr32(device, 0x400100, 0x00200000);
1251 1252 1253 1254
		stat &= ~0x00200000;
	}

	if (stat & 0x00080000) {
B
Ben Skeggs 已提交
1255
		gf100_gr_ctxctl_isr(gr);
1256
		nvkm_wr32(device, 0x400100, 0x00080000);
1257 1258 1259 1260
		stat &= ~0x00080000;
	}

	if (stat) {
1261
		nvkm_error(subdev, "intr %08x\n", stat);
1262
		nvkm_wr32(device, 0x400100, stat);
1263 1264
	}

1265
	nvkm_wr32(device, 0x400500, 0x00010001);
1266
	nvkm_fifo_chan_put(device->fifo, flags, &chan);
1267 1268
}

1269
void
B
Ben Skeggs 已提交
1270
gf100_gr_init_fw(struct gf100_gr *gr, u32 fuc_base,
1271
		 struct gf100_gr_fuc *code, struct gf100_gr_fuc *data)
1272
{
1273
	struct nvkm_device *device = gr->base.engine.subdev.device;
1274
	int i;
1275

1276
	nvkm_wr32(device, fuc_base + 0x01c0, 0x01000000);
1277
	for (i = 0; i < data->size / 4; i++)
1278
		nvkm_wr32(device, fuc_base + 0x01c4, data->data[i]);
1279

1280
	nvkm_wr32(device, fuc_base + 0x0180, 0x01000000);
1281 1282
	for (i = 0; i < code->size / 4; i++) {
		if ((i & 0x3f) == 0)
1283 1284
			nvkm_wr32(device, fuc_base + 0x0188, i >> 6);
		nvkm_wr32(device, fuc_base + 0x0184, code->data[i]);
1285
	}
1286 1287 1288

	/* code must be padded to 0x40 words */
	for (; i & 0x3f; i++)
1289
		nvkm_wr32(device, fuc_base + 0x0184, 0);
1290 1291
}

1292
static void
B
Ben Skeggs 已提交
1293
gf100_gr_init_csdata(struct gf100_gr *gr,
1294 1295
		     const struct gf100_gr_pack *pack,
		     u32 falcon, u32 starstar, u32 base)
1296
{
1297
	struct nvkm_device *device = gr->base.engine.subdev.device;
1298 1299
	const struct gf100_gr_pack *iter;
	const struct gf100_gr_init *init;
1300
	u32 addr = ~0, prev = ~0, xfer = 0;
1301 1302
	u32 star, temp;

1303 1304 1305
	nvkm_wr32(device, falcon + 0x01c0, 0x02000000 + starstar);
	star = nvkm_rd32(device, falcon + 0x01c4);
	temp = nvkm_rd32(device, falcon + 0x01c4);
1306 1307
	if (temp > star)
		star = temp;
1308
	nvkm_wr32(device, falcon + 0x01c0, 0x01000000 + star);
1309

1310 1311 1312 1313 1314 1315 1316
	pack_for_each_init(init, iter, pack) {
		u32 head = init->addr - base;
		u32 tail = head + init->count * init->pitch;
		while (head < tail) {
			if (head != prev + 4 || xfer >= 32) {
				if (xfer) {
					u32 data = ((--xfer << 26) | addr);
1317
					nvkm_wr32(device, falcon + 0x01c4, data);
1318 1319 1320 1321
					star += 4;
				}
				addr = head;
				xfer = 0;
1322
			}
1323 1324 1325
			prev = head;
			xfer = xfer + 1;
			head = head + init->pitch;
1326
		}
1327
	}
1328

1329 1330 1331
	nvkm_wr32(device, falcon + 0x01c4, (--xfer << 26) | addr);
	nvkm_wr32(device, falcon + 0x01c0, 0x01000004 + starstar);
	nvkm_wr32(device, falcon + 0x01c4, star + 4);
1332 1333
}

1334
int
B
Ben Skeggs 已提交
1335
gf100_gr_init_ctxctl(struct gf100_gr *gr)
1336
{
1337
	const struct gf100_grctx_func *grctx = gr->func->grctx;
1338 1339
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
1340
	int i;
1341

B
Ben Skeggs 已提交
1342
	if (gr->firmware) {
1343
		/* load fuc microcode */
1344
		nvkm_mc_unk260(device->mc, 0);
1345 1346
		gf100_gr_init_fw(gr, 0x409000, &gr->fuc409c, &gr->fuc409d);
		gf100_gr_init_fw(gr, 0x41a000, &gr->fuc41ac, &gr->fuc41ad);
1347
		nvkm_mc_unk260(device->mc, 1);
1348

1349
		/* start both of them running */
1350 1351 1352 1353 1354
		nvkm_wr32(device, 0x409840, 0xffffffff);
		nvkm_wr32(device, 0x41a10c, 0x00000000);
		nvkm_wr32(device, 0x40910c, 0x00000000);
		nvkm_wr32(device, 0x41a100, 0x00000002);
		nvkm_wr32(device, 0x409100, 0x00000002);
1355 1356 1357 1358 1359
		if (nvkm_msec(device, 2000,
			if (nvkm_rd32(device, 0x409800) & 0x00000001)
				break;
		) < 0)
			return -EBUSY;
B
Ben Skeggs 已提交
1360

1361 1362 1363
		nvkm_wr32(device, 0x409840, 0xffffffff);
		nvkm_wr32(device, 0x409500, 0x7fffffff);
		nvkm_wr32(device, 0x409504, 0x00000021);
B
Ben Skeggs 已提交
1364

1365 1366 1367
		nvkm_wr32(device, 0x409840, 0xffffffff);
		nvkm_wr32(device, 0x409500, 0x00000000);
		nvkm_wr32(device, 0x409504, 0x00000010);
1368 1369 1370 1371
		if (nvkm_msec(device, 2000,
			if ((gr->size = nvkm_rd32(device, 0x409800)))
				break;
		) < 0)
1372
			return -EBUSY;
1373

1374 1375 1376
		nvkm_wr32(device, 0x409840, 0xffffffff);
		nvkm_wr32(device, 0x409500, 0x00000000);
		nvkm_wr32(device, 0x409504, 0x00000016);
1377 1378 1379 1380
		if (nvkm_msec(device, 2000,
			if (nvkm_rd32(device, 0x409800))
				break;
		) < 0)
1381 1382
			return -EBUSY;

1383 1384 1385
		nvkm_wr32(device, 0x409840, 0xffffffff);
		nvkm_wr32(device, 0x409500, 0x00000000);
		nvkm_wr32(device, 0x409504, 0x00000025);
1386 1387 1388 1389
		if (nvkm_msec(device, 2000,
			if (nvkm_rd32(device, 0x409800))
				break;
		) < 0)
1390 1391
			return -EBUSY;

1392
		if (device->chipset >= 0xe0) {
1393 1394 1395
			nvkm_wr32(device, 0x409800, 0x00000000);
			nvkm_wr32(device, 0x409500, 0x00000001);
			nvkm_wr32(device, 0x409504, 0x00000030);
1396 1397 1398 1399
			if (nvkm_msec(device, 2000,
				if (nvkm_rd32(device, 0x409800))
					break;
			) < 0)
1400 1401
				return -EBUSY;

1402 1403 1404 1405
			nvkm_wr32(device, 0x409810, 0xb00095c8);
			nvkm_wr32(device, 0x409800, 0x00000000);
			nvkm_wr32(device, 0x409500, 0x00000001);
			nvkm_wr32(device, 0x409504, 0x00000031);
1406 1407 1408 1409
			if (nvkm_msec(device, 2000,
				if (nvkm_rd32(device, 0x409800))
					break;
			) < 0)
1410 1411
				return -EBUSY;

1412 1413 1414 1415
			nvkm_wr32(device, 0x409810, 0x00080420);
			nvkm_wr32(device, 0x409800, 0x00000000);
			nvkm_wr32(device, 0x409500, 0x00000001);
			nvkm_wr32(device, 0x409504, 0x00000032);
1416 1417 1418 1419
			if (nvkm_msec(device, 2000,
				if (nvkm_rd32(device, 0x409800))
					break;
			) < 0)
1420 1421
				return -EBUSY;

1422 1423 1424
			nvkm_wr32(device, 0x409614, 0x00000070);
			nvkm_wr32(device, 0x409614, 0x00000770);
			nvkm_wr32(device, 0x40802c, 0x00000001);
1425 1426
		}

B
Ben Skeggs 已提交
1427 1428
		if (gr->data == NULL) {
			int ret = gf100_grctx_generate(gr);
1429
			if (ret) {
1430
				nvkm_error(subdev, "failed to construct context\n");
1431 1432 1433 1434 1435
				return ret;
			}
		}

		return 0;
1436
	} else
1437
	if (!gr->func->fecs.ucode) {
1438
		return -ENOSYS;
1439
	}
1440

1441
	/* load HUB microcode */
1442
	nvkm_mc_unk260(device->mc, 0);
1443
	nvkm_wr32(device, 0x4091c0, 0x01000000);
1444 1445
	for (i = 0; i < gr->func->fecs.ucode->data.size / 4; i++)
		nvkm_wr32(device, 0x4091c4, gr->func->fecs.ucode->data.data[i]);
1446

1447
	nvkm_wr32(device, 0x409180, 0x01000000);
1448
	for (i = 0; i < gr->func->fecs.ucode->code.size / 4; i++) {
1449
		if ((i & 0x3f) == 0)
1450
			nvkm_wr32(device, 0x409188, i >> 6);
1451
		nvkm_wr32(device, 0x409184, gr->func->fecs.ucode->code.data[i]);
1452 1453 1454
	}

	/* load GPC microcode */
1455
	nvkm_wr32(device, 0x41a1c0, 0x01000000);
1456 1457
	for (i = 0; i < gr->func->gpccs.ucode->data.size / 4; i++)
		nvkm_wr32(device, 0x41a1c4, gr->func->gpccs.ucode->data.data[i]);
1458

1459
	nvkm_wr32(device, 0x41a180, 0x01000000);
1460
	for (i = 0; i < gr->func->gpccs.ucode->code.size / 4; i++) {
1461
		if ((i & 0x3f) == 0)
1462
			nvkm_wr32(device, 0x41a188, i >> 6);
1463
		nvkm_wr32(device, 0x41a184, gr->func->gpccs.ucode->code.data[i]);
1464
	}
1465
	nvkm_mc_unk260(device->mc, 1);
1466

1467
	/* load register lists */
1468 1469 1470 1471
	gf100_gr_init_csdata(gr, grctx->hub, 0x409000, 0x000, 0x000000);
	gf100_gr_init_csdata(gr, grctx->gpc, 0x41a000, 0x000, 0x418000);
	gf100_gr_init_csdata(gr, grctx->tpc, 0x41a000, 0x004, 0x419800);
	gf100_gr_init_csdata(gr, grctx->ppc, 0x41a000, 0x008, 0x41be00);
1472

1473
	/* start HUB ucode running, it'll init the GPCs */
1474 1475
	nvkm_wr32(device, 0x40910c, 0x00000000);
	nvkm_wr32(device, 0x409100, 0x00000002);
1476 1477 1478 1479
	if (nvkm_msec(device, 2000,
		if (nvkm_rd32(device, 0x409800) & 0x80000000)
			break;
	) < 0) {
B
Ben Skeggs 已提交
1480
		gf100_gr_ctxctl_debug(gr);
1481 1482 1483
		return -EBUSY;
	}

1484
	gr->size = nvkm_rd32(device, 0x409804);
B
Ben Skeggs 已提交
1485 1486
	if (gr->data == NULL) {
		int ret = gf100_grctx_generate(gr);
1487
		if (ret) {
1488
			nvkm_error(subdev, "failed to construct context\n");
1489 1490
			return ret;
		}
1491 1492 1493
	}

	return 0;
1494 1495
}

1496 1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532
static int
gf100_gr_oneinit(struct nvkm_gr *base)
{
	struct gf100_gr *gr = gf100_gr(base);
	struct nvkm_device *device = gr->base.engine.subdev.device;
	int ret, i, j;

	nvkm_pmu_pgob(device->pmu, false);

	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 256, false,
			      &gr->unk4188b4);
	if (ret)
		return ret;

	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 256, false,
			      &gr->unk4188b8);
	if (ret)
		return ret;

	nvkm_kmap(gr->unk4188b4);
	for (i = 0; i < 0x1000; i += 4)
		nvkm_wo32(gr->unk4188b4, i, 0x00000010);
	nvkm_done(gr->unk4188b4);

	nvkm_kmap(gr->unk4188b8);
	for (i = 0; i < 0x1000; i += 4)
		nvkm_wo32(gr->unk4188b8, i, 0x00000010);
	nvkm_done(gr->unk4188b8);

	gr->rop_nr = (nvkm_rd32(device, 0x409604) & 0x001f0000) >> 16;
	gr->gpc_nr =  nvkm_rd32(device, 0x409604) & 0x0000001f;
	for (i = 0; i < gr->gpc_nr; i++) {
		gr->tpc_nr[i]  = nvkm_rd32(device, GPC_UNIT(i, 0x2608));
		gr->tpc_total += gr->tpc_nr[i];
		gr->ppc_nr[i]  = gr->func->ppc_nr;
		for (j = 0; j < gr->ppc_nr[i]; j++) {
			u8 mask = nvkm_rd32(device, GPC_UNIT(i, 0x0c30 + (j * 4)));
1533 1534
			if (mask)
				gr->ppc_mask[i] |= (1 << j);
1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647
			gr->ppc_tpc_nr[i][j] = hweight8(mask);
		}
	}

	/*XXX: these need figuring out... though it might not even matter */
	switch (device->chipset) {
	case 0xc0:
		if (gr->tpc_total == 11) { /* 465, 3/4/4/0, 4 */
			gr->magic_not_rop_nr = 0x07;
		} else
		if (gr->tpc_total == 14) { /* 470, 3/3/4/4, 5 */
			gr->magic_not_rop_nr = 0x05;
		} else
		if (gr->tpc_total == 15) { /* 480, 3/4/4/4, 6 */
			gr->magic_not_rop_nr = 0x06;
		}
		break;
	case 0xc3: /* 450, 4/0/0/0, 2 */
		gr->magic_not_rop_nr = 0x03;
		break;
	case 0xc4: /* 460, 3/4/0/0, 4 */
		gr->magic_not_rop_nr = 0x01;
		break;
	case 0xc1: /* 2/0/0/0, 1 */
		gr->magic_not_rop_nr = 0x01;
		break;
	case 0xc8: /* 4/4/3/4, 5 */
		gr->magic_not_rop_nr = 0x06;
		break;
	case 0xce: /* 4/4/0/0, 4 */
		gr->magic_not_rop_nr = 0x03;
		break;
	case 0xcf: /* 4/0/0/0, 3 */
		gr->magic_not_rop_nr = 0x03;
		break;
	case 0xd7:
	case 0xd9: /* 1/0/0/0, 1 */
	case 0xea: /* gk20a */
	case 0x12b: /* gm20b */
		gr->magic_not_rop_nr = 0x01;
		break;
	}

	return 0;
}

int
gf100_gr_init_(struct nvkm_gr *base)
{
	struct gf100_gr *gr = gf100_gr(base);
	nvkm_pmu_pgob(gr->base.engine.subdev.device->pmu, false);
	return gr->func->init(gr);
}

void
gf100_gr_dtor_fw(struct gf100_gr_fuc *fuc)
{
	kfree(fuc->data);
	fuc->data = NULL;
}

void *
gf100_gr_dtor(struct nvkm_gr *base)
{
	struct gf100_gr *gr = gf100_gr(base);

	if (gr->func->dtor)
		gr->func->dtor(gr);
	kfree(gr->data);

	gf100_gr_dtor_fw(&gr->fuc409c);
	gf100_gr_dtor_fw(&gr->fuc409d);
	gf100_gr_dtor_fw(&gr->fuc41ac);
	gf100_gr_dtor_fw(&gr->fuc41ad);

	nvkm_memory_del(&gr->unk4188b8);
	nvkm_memory_del(&gr->unk4188b4);
	return gr;
}

static const struct nvkm_gr_func
gf100_gr_ = {
	.dtor = gf100_gr_dtor,
	.oneinit = gf100_gr_oneinit,
	.init = gf100_gr_init_,
	.intr = gf100_gr_intr,
	.units = gf100_gr_units,
	.chan_new = gf100_gr_chan_new,
	.object_get = gf100_gr_object_get,
};

int
gf100_gr_ctor_fw(struct gf100_gr *gr, const char *fwname,
		 struct gf100_gr_fuc *fuc)
{
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
	const struct firmware *fw;
	char f[64];
	char cname[16];
	int ret;
	int i;

	/* Convert device name to lowercase */
	strncpy(cname, device->chip->name, sizeof(cname));
	cname[sizeof(cname) - 1] = '\0';
	i = strlen(cname);
	while (i) {
		--i;
		cname[i] = tolower(cname[i]);
	}

	snprintf(f, sizeof(f), "nvidia/%s/%s.bin", cname, fwname);
1648
	ret = request_firmware(&fw, f, device->dev);
1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687
	if (ret) {
		nvkm_error(subdev, "failed to load %s\n", fwname);
		return ret;
	}

	fuc->size = fw->size;
	fuc->data = kmemdup(fw->data, fuc->size, GFP_KERNEL);
	release_firmware(fw);
	return (fuc->data != NULL) ? 0 : -ENOMEM;
}

int
gf100_gr_ctor(const struct gf100_gr_func *func, struct nvkm_device *device,
	      int index, struct gf100_gr *gr)
{
	int ret;

	gr->func = func;
	gr->firmware = nvkm_boolopt(device->cfgopt, "NvGrUseFW",
				    func->fecs.ucode == NULL);

	ret = nvkm_gr_ctor(&gf100_gr_, device, index, 0x08001000,
			   gr->firmware || func->fecs.ucode != NULL,
			   &gr->base);
	if (ret)
		return ret;

	if (gr->firmware) {
		nvkm_info(&gr->base.engine.subdev, "using external firmware\n");
		if (gf100_gr_ctor_fw(gr, "fecs_inst", &gr->fuc409c) ||
		    gf100_gr_ctor_fw(gr, "fecs_data", &gr->fuc409d) ||
		    gf100_gr_ctor_fw(gr, "gpccs_inst", &gr->fuc41ac) ||
		    gf100_gr_ctor_fw(gr, "gpccs_data", &gr->fuc41ad))
			return -ENODEV;
	}

	return 0;
}

1688
int
1689 1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700
gf100_gr_new_(const struct gf100_gr_func *func, struct nvkm_device *device,
	      int index, struct nvkm_gr **pgr)
{
	struct gf100_gr *gr;
	if (!(gr = kzalloc(sizeof(*gr), GFP_KERNEL)))
		return -ENOMEM;
	*pgr = &gr->base;
	return gf100_gr_ctor(func, device, index, gr);
}

int
gf100_gr_init(struct gf100_gr *gr)
1701
{
1702
	struct nvkm_device *device = gr->base.engine.subdev.device;
B
Ben Skeggs 已提交
1703
	const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
1704 1705 1706
	u32 data[TPC_MAX / 8] = {};
	u8  tpcnr[GPC_MAX];
	int gpc, tpc, rop;
1707
	int i;
1708

1709 1710 1711 1712 1713 1714
	nvkm_wr32(device, GPC_BCAST(0x0880), 0x00000000);
	nvkm_wr32(device, GPC_BCAST(0x08a4), 0x00000000);
	nvkm_wr32(device, GPC_BCAST(0x0888), 0x00000000);
	nvkm_wr32(device, GPC_BCAST(0x088c), 0x00000000);
	nvkm_wr32(device, GPC_BCAST(0x0890), 0x00000000);
	nvkm_wr32(device, GPC_BCAST(0x0894), 0x00000000);
1715 1716
	nvkm_wr32(device, GPC_BCAST(0x08b4), nvkm_memory_addr(gr->unk4188b4) >> 8);
	nvkm_wr32(device, GPC_BCAST(0x08b8), nvkm_memory_addr(gr->unk4188b8) >> 8);
1717

1718
	gf100_gr_mmio(gr, gr->func->mmio);
1719

B
Ben Skeggs 已提交
1720 1721
	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
	for (i = 0, gpc = -1; i < gr->tpc_total; i++) {
1722
		do {
B
Ben Skeggs 已提交
1723
			gpc = (gpc + 1) % gr->gpc_nr;
1724
		} while (!tpcnr[gpc]);
B
Ben Skeggs 已提交
1725
		tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
1726 1727 1728 1729

		data[i / 8] |= tpc << ((i % 8) * 4);
	}

1730 1731 1732 1733
	nvkm_wr32(device, GPC_BCAST(0x0980), data[0]);
	nvkm_wr32(device, GPC_BCAST(0x0984), data[1]);
	nvkm_wr32(device, GPC_BCAST(0x0988), data[2]);
	nvkm_wr32(device, GPC_BCAST(0x098c), data[3]);
1734

B
Ben Skeggs 已提交
1735
	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
1736
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0914),
B
Ben Skeggs 已提交
1737
			gr->magic_not_rop_nr << 8 | gr->tpc_nr[gpc]);
1738
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 |
B
Ben Skeggs 已提交
1739
			gr->tpc_total);
1740
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918);
1741 1742
	}

1743
	if (device->chipset != 0xd7)
1744
		nvkm_wr32(device, GPC_BCAST(0x1bd4), magicgpc918);
M
Maarten Lankhorst 已提交
1745
	else
1746
		nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918);
B
Ben Skeggs 已提交
1747

1748
	nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800));
B
Ben Skeggs 已提交
1749

1750
	nvkm_wr32(device, 0x400500, 0x00010001);
B
Ben Skeggs 已提交
1751

1752 1753
	nvkm_wr32(device, 0x400100, 0xffffffff);
	nvkm_wr32(device, 0x40013c, 0xffffffff);
B
Ben Skeggs 已提交
1754

1755 1756 1757 1758 1759 1760 1761 1762 1763 1764 1765
	nvkm_wr32(device, 0x409c24, 0x000f0000);
	nvkm_wr32(device, 0x404000, 0xc0000000);
	nvkm_wr32(device, 0x404600, 0xc0000000);
	nvkm_wr32(device, 0x408030, 0xc0000000);
	nvkm_wr32(device, 0x40601c, 0xc0000000);
	nvkm_wr32(device, 0x404490, 0xc0000000);
	nvkm_wr32(device, 0x406018, 0xc0000000);
	nvkm_wr32(device, 0x405840, 0xc0000000);
	nvkm_wr32(device, 0x405844, 0x00ffffff);
	nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008);
	nvkm_mask(device, 0x419eb4, 0x00001000, 0x00001000);
B
Ben Skeggs 已提交
1766 1767

	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
1768 1769 1770 1771
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000);
		nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000);
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000);
B
Ben Skeggs 已提交
1772
		for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
1773 1774 1775 1776 1777 1778 1779
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff);
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff);
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000);
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000);
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000);
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x001ffffe);
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x0000000f);
1780
		}
1781 1782
		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), 0xffffffff);
		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff);
1783 1784
	}

B
Ben Skeggs 已提交
1785
	for (rop = 0; rop < gr->rop_nr; rop++) {
1786 1787 1788 1789
		nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0xc0000000);
		nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0xc0000000);
		nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff);
		nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff);
1790
	}
1791

1792 1793 1794 1795 1796 1797
	nvkm_wr32(device, 0x400108, 0xffffffff);
	nvkm_wr32(device, 0x400138, 0xffffffff);
	nvkm_wr32(device, 0x400118, 0xffffffff);
	nvkm_wr32(device, 0x400130, 0xffffffff);
	nvkm_wr32(device, 0x40011c, 0xffffffff);
	nvkm_wr32(device, 0x400134, 0xffffffff);
1798

1799
	nvkm_wr32(device, 0x400054, 0x34ce3464);
1800

B
Ben Skeggs 已提交
1801
	gf100_gr_zbc_init(gr);
1802

B
Ben Skeggs 已提交
1803
	return gf100_gr_init_ctxctl(gr);
1804 1805
}

1806
#include "fuc/hubgf100.fuc3.h"
1807

1808 1809 1810 1811 1812 1813
struct gf100_gr_ucode
gf100_gr_fecs_ucode = {
	.code.data = gf100_grhub_code,
	.code.size = sizeof(gf100_grhub_code),
	.data.data = gf100_grhub_data,
	.data.size = sizeof(gf100_grhub_data),
1814 1815
};

1816
#include "fuc/gpcgf100.fuc3.h"
1817

1818 1819 1820 1821 1822 1823
struct gf100_gr_ucode
gf100_gr_gpccs_ucode = {
	.code.data = gf100_grgpc_code,
	.code.size = sizeof(gf100_grgpc_code),
	.data.data = gf100_grgpc_data,
	.data.size = sizeof(gf100_grgpc_data),
1824 1825
};

1826 1827
static const struct gf100_gr_func
gf100_gr = {
1828 1829 1830 1831
	.init = gf100_gr_init,
	.mmio = gf100_gr_pack_mmio,
	.fecs.ucode = &gf100_gr_fecs_ucode,
	.gpccs.ucode = &gf100_gr_gpccs_ucode,
1832 1833 1834 1835 1836 1837 1838 1839 1840 1841
	.grctx = &gf100_grctx,
	.sclass = {
		{ -1, -1, FERMI_TWOD_A },
		{ -1, -1, FERMI_MEMORY_TO_MEMORY_FORMAT_A },
		{ -1, -1, FERMI_A, &gf100_fermi },
		{ -1, -1, FERMI_COMPUTE_A },
		{}
	}
};

1842 1843 1844 1845 1846
int
gf100_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
{
	return gf100_gr_new_(&gf100_gr, device, index, pgr);
}