gf100.c 49.1 KB
Newer Older
1
/*
2
 * Copyright 2012 Red Hat Inc.
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 * Authors: Ben Skeggs
 */
24 25 26 27 28 29 30 31
#include "gf100.h"
#include "ctxgf100.h"
#include "fuc/os.h"

#include <core/client.h>
#include <core/option.h>
#include <subdev/fb.h>
#include <subdev/mc.h>
32
#include <subdev/pmu.h>
33
#include <subdev/timer.h>
34
#include <engine/fifo.h>
35 36

#include <nvif/class.h>
37
#include <nvif/cl9097.h>
38
#include <nvif/unpack.h>
39

40 41 42 43 44
/*******************************************************************************
 * Zero Bandwidth Clear
 ******************************************************************************/

static void
B
Ben Skeggs 已提交
45
gf100_gr_zbc_clear_color(struct gf100_gr *gr, int zbc)
46
{
47
	struct nvkm_device *device = gr->base.engine.subdev.device;
B
Ben Skeggs 已提交
48
	if (gr->zbc_color[zbc].format) {
49 50 51 52 53 54 55 56
		nvkm_wr32(device, 0x405804, gr->zbc_color[zbc].ds[0]);
		nvkm_wr32(device, 0x405808, gr->zbc_color[zbc].ds[1]);
		nvkm_wr32(device, 0x40580c, gr->zbc_color[zbc].ds[2]);
		nvkm_wr32(device, 0x405810, gr->zbc_color[zbc].ds[3]);
	}
	nvkm_wr32(device, 0x405814, gr->zbc_color[zbc].format);
	nvkm_wr32(device, 0x405820, zbc);
	nvkm_wr32(device, 0x405824, 0x00000004); /* TRIGGER | WRITE | COLOR */
57 58 59
}

static int
B
Ben Skeggs 已提交
60
gf100_gr_zbc_color_get(struct gf100_gr *gr, int format,
61
		       const u32 ds[4], const u32 l2[4])
62
{
63
	struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc;
64 65 66
	int zbc = -ENOSPC, i;

	for (i = ltc->zbc_min; i <= ltc->zbc_max; i++) {
B
Ben Skeggs 已提交
67 68
		if (gr->zbc_color[i].format) {
			if (gr->zbc_color[i].format != format)
69
				continue;
B
Ben Skeggs 已提交
70 71
			if (memcmp(gr->zbc_color[i].ds, ds, sizeof(
				   gr->zbc_color[i].ds)))
72
				continue;
B
Ben Skeggs 已提交
73 74
			if (memcmp(gr->zbc_color[i].l2, l2, sizeof(
				   gr->zbc_color[i].l2))) {
75 76 77 78 79 80 81 82 83
				WARN_ON(1);
				return -EINVAL;
			}
			return i;
		} else {
			zbc = (zbc < 0) ? i : zbc;
		}
	}

84 85 86
	if (zbc < 0)
		return zbc;

B
Ben Skeggs 已提交
87 88 89
	memcpy(gr->zbc_color[zbc].ds, ds, sizeof(gr->zbc_color[zbc].ds));
	memcpy(gr->zbc_color[zbc].l2, l2, sizeof(gr->zbc_color[zbc].l2));
	gr->zbc_color[zbc].format = format;
90
	nvkm_ltc_zbc_color_get(ltc, zbc, l2);
B
Ben Skeggs 已提交
91
	gf100_gr_zbc_clear_color(gr, zbc);
92 93 94 95
	return zbc;
}

static void
B
Ben Skeggs 已提交
96
gf100_gr_zbc_clear_depth(struct gf100_gr *gr, int zbc)
97
{
98
	struct nvkm_device *device = gr->base.engine.subdev.device;
B
Ben Skeggs 已提交
99
	if (gr->zbc_depth[zbc].format)
100 101 102 103
		nvkm_wr32(device, 0x405818, gr->zbc_depth[zbc].ds);
	nvkm_wr32(device, 0x40581c, gr->zbc_depth[zbc].format);
	nvkm_wr32(device, 0x405820, zbc);
	nvkm_wr32(device, 0x405824, 0x00000005); /* TRIGGER | WRITE | DEPTH */
104 105 106
}

static int
B
Ben Skeggs 已提交
107
gf100_gr_zbc_depth_get(struct gf100_gr *gr, int format,
108
		       const u32 ds, const u32 l2)
109
{
110
	struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc;
111 112 113
	int zbc = -ENOSPC, i;

	for (i = ltc->zbc_min; i <= ltc->zbc_max; i++) {
B
Ben Skeggs 已提交
114 115
		if (gr->zbc_depth[i].format) {
			if (gr->zbc_depth[i].format != format)
116
				continue;
B
Ben Skeggs 已提交
117
			if (gr->zbc_depth[i].ds != ds)
118
				continue;
B
Ben Skeggs 已提交
119
			if (gr->zbc_depth[i].l2 != l2) {
120 121 122 123 124 125 126 127 128
				WARN_ON(1);
				return -EINVAL;
			}
			return i;
		} else {
			zbc = (zbc < 0) ? i : zbc;
		}
	}

129 130 131
	if (zbc < 0)
		return zbc;

B
Ben Skeggs 已提交
132 133 134
	gr->zbc_depth[zbc].format = format;
	gr->zbc_depth[zbc].ds = ds;
	gr->zbc_depth[zbc].l2 = l2;
135
	nvkm_ltc_zbc_depth_get(ltc, zbc, l2);
B
Ben Skeggs 已提交
136
	gf100_gr_zbc_clear_depth(gr, zbc);
137 138 139
	return zbc;
}

140 141 142 143
/*******************************************************************************
 * Graphics object classes
 ******************************************************************************/

144
static int
145
gf100_fermi_mthd_zbc_color(struct nvkm_object *object, void *data, u32 size)
146
{
147
	struct gf100_gr *gr = gf100_gr(nvkm_gr(object->engine));
148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173
	union {
		struct fermi_a_zbc_color_v0 v0;
	} *args = data;
	int ret;

	if (nvif_unpack(args->v0, 0, 0, false)) {
		switch (args->v0.format) {
		case FERMI_A_ZBC_COLOR_V0_FMT_ZERO:
		case FERMI_A_ZBC_COLOR_V0_FMT_UNORM_ONE:
		case FERMI_A_ZBC_COLOR_V0_FMT_RF32_GF32_BF32_AF32:
		case FERMI_A_ZBC_COLOR_V0_FMT_R16_G16_B16_A16:
		case FERMI_A_ZBC_COLOR_V0_FMT_RN16_GN16_BN16_AN16:
		case FERMI_A_ZBC_COLOR_V0_FMT_RS16_GS16_BS16_AS16:
		case FERMI_A_ZBC_COLOR_V0_FMT_RU16_GU16_BU16_AU16:
		case FERMI_A_ZBC_COLOR_V0_FMT_RF16_GF16_BF16_AF16:
		case FERMI_A_ZBC_COLOR_V0_FMT_A8R8G8B8:
		case FERMI_A_ZBC_COLOR_V0_FMT_A8RL8GL8BL8:
		case FERMI_A_ZBC_COLOR_V0_FMT_A2B10G10R10:
		case FERMI_A_ZBC_COLOR_V0_FMT_AU2BU10GU10RU10:
		case FERMI_A_ZBC_COLOR_V0_FMT_A8B8G8R8:
		case FERMI_A_ZBC_COLOR_V0_FMT_A8BL8GL8RL8:
		case FERMI_A_ZBC_COLOR_V0_FMT_AN8BN8GN8RN8:
		case FERMI_A_ZBC_COLOR_V0_FMT_AS8BS8GS8RS8:
		case FERMI_A_ZBC_COLOR_V0_FMT_AU8BU8GU8RU8:
		case FERMI_A_ZBC_COLOR_V0_FMT_A2R10G10B10:
		case FERMI_A_ZBC_COLOR_V0_FMT_BF10GF11RF11:
B
Ben Skeggs 已提交
174
			ret = gf100_gr_zbc_color_get(gr, args->v0.format,
175 176
							   args->v0.ds,
							   args->v0.l2);
177 178 179 180 181 182 183 184 185 186 187 188 189 190
			if (ret >= 0) {
				args->v0.index = ret;
				return 0;
			}
			break;
		default:
			return -EINVAL;
		}
	}

	return ret;
}

static int
191
gf100_fermi_mthd_zbc_depth(struct nvkm_object *object, void *data, u32 size)
192
{
193
	struct gf100_gr *gr = gf100_gr(nvkm_gr(object->engine));
194 195 196 197 198 199 200 201
	union {
		struct fermi_a_zbc_depth_v0 v0;
	} *args = data;
	int ret;

	if (nvif_unpack(args->v0, 0, 0, false)) {
		switch (args->v0.format) {
		case FERMI_A_ZBC_DEPTH_V0_FMT_FP32:
B
Ben Skeggs 已提交
202
			ret = gf100_gr_zbc_depth_get(gr, args->v0.format,
203 204
							   args->v0.ds,
							   args->v0.l2);
205 206 207 208 209 210 211 212 213 214
			return (ret >= 0) ? 0 : -ENOSPC;
		default:
			return -EINVAL;
		}
	}

	return ret;
}

static int
215
gf100_fermi_mthd(struct nvkm_object *object, u32 mthd, void *data, u32 size)
216 217 218
{
	switch (mthd) {
	case FERMI_A_ZBC_COLOR:
219
		return gf100_fermi_mthd_zbc_color(object, data, size);
220
	case FERMI_A_ZBC_DEPTH:
221
		return gf100_fermi_mthd_zbc_depth(object, data, size);
222 223 224 225 226 227
	default:
		break;
	}
	return -EINVAL;
}

228 229
const struct nvkm_object_func
gf100_fermi = {
230
	.mthd = gf100_fermi_mthd,
231 232
};

233 234
static void
gf100_gr_mthd_set_shader_exceptions(struct nvkm_device *device, u32 data)
235
{
236 237
	nvkm_wr32(device, 0x419e44, data ? 0xffffffff : 0x00000000);
	nvkm_wr32(device, 0x419e4c, data ? 0xffffffff : 0x00000000);
238 239
}

240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258
static bool
gf100_gr_mthd_sw(struct nvkm_device *device, u16 class, u32 mthd, u32 data)
{
	switch (class & 0x00ff) {
	case 0x97:
	case 0xc0:
		switch (mthd) {
		case 0x1528:
			gf100_gr_mthd_set_shader_exceptions(device, data);
			return true;
		default:
			break;
		}
		break;
	default:
		break;
	}
	return false;
}
259

260 261 262 263 264 265 266 267 268 269 270 271 272 273 274
static int
gf100_gr_object_get(struct nvkm_gr *base, int index, struct nvkm_sclass *sclass)
{
	struct gf100_gr *gr = gf100_gr(base);
	int c = 0;

	while (gr->func->sclass[c].oclass) {
		if (c++ == index) {
			*sclass = gr->func->sclass[index];
			return index;
		}
	}

	return c;
}
275 276 277 278

/*******************************************************************************
 * PGRAPH context
 ******************************************************************************/
279

280 281 282
static int
gf100_gr_chan_bind(struct nvkm_object *object, struct nvkm_gpuobj *parent,
		   int align, struct nvkm_gpuobj **pgpuobj)
283
{
284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348
	struct gf100_gr_chan *chan = gf100_gr_chan(object);
	struct gf100_gr *gr = chan->gr;
	int ret, i;

	ret = nvkm_gpuobj_new(gr->base.engine.subdev.device, gr->size,
			      align, false, parent, pgpuobj);
	if (ret)
		return ret;

	nvkm_kmap(*pgpuobj);
	for (i = 0; i < gr->size; i += 4)
		nvkm_wo32(*pgpuobj, i, gr->data[i / 4]);

	if (!gr->firmware) {
		nvkm_wo32(*pgpuobj, 0x00, chan->mmio_nr / 2);
		nvkm_wo32(*pgpuobj, 0x04, chan->mmio_vma.offset >> 8);
	} else {
		nvkm_wo32(*pgpuobj, 0xf4, 0);
		nvkm_wo32(*pgpuobj, 0xf8, 0);
		nvkm_wo32(*pgpuobj, 0x10, chan->mmio_nr / 2);
		nvkm_wo32(*pgpuobj, 0x14, lower_32_bits(chan->mmio_vma.offset));
		nvkm_wo32(*pgpuobj, 0x18, upper_32_bits(chan->mmio_vma.offset));
		nvkm_wo32(*pgpuobj, 0x1c, 1);
		nvkm_wo32(*pgpuobj, 0x20, 0);
		nvkm_wo32(*pgpuobj, 0x28, 0);
		nvkm_wo32(*pgpuobj, 0x2c, 0);
	}
	nvkm_done(*pgpuobj);
	return 0;
}

static void *
gf100_gr_chan_dtor(struct nvkm_object *object)
{
	struct gf100_gr_chan *chan = gf100_gr_chan(object);
	int i;

	for (i = 0; i < ARRAY_SIZE(chan->data); i++) {
		if (chan->data[i].vma.node) {
			nvkm_vm_unmap(&chan->data[i].vma);
			nvkm_vm_put(&chan->data[i].vma);
		}
		nvkm_memory_del(&chan->data[i].mem);
	}

	if (chan->mmio_vma.node) {
		nvkm_vm_unmap(&chan->mmio_vma);
		nvkm_vm_put(&chan->mmio_vma);
	}
	nvkm_memory_del(&chan->mmio);
	return chan;
}

static const struct nvkm_object_func
gf100_gr_chan = {
	.dtor = gf100_gr_chan_dtor,
	.bind = gf100_gr_chan_bind,
};

static int
gf100_gr_chan_new(struct nvkm_gr *base, struct nvkm_fifo_chan *fifoch,
		  const struct nvkm_oclass *oclass,
		  struct nvkm_object **pobject)
{
	struct gf100_gr *gr = gf100_gr(base);
B
Ben Skeggs 已提交
349 350
	struct gf100_gr_data *data = gr->mmio_data;
	struct gf100_gr_mmio *mmio = gr->mmio_list;
351
	struct gf100_gr_chan *chan;
352
	struct nvkm_device *device = gr->base.engine.subdev.device;
353 354
	int ret, i;

355 356 357 358 359
	if (!(chan = kzalloc(sizeof(*chan), GFP_KERNEL)))
		return -ENOMEM;
	nvkm_object_ctor(&gf100_gr_chan, oclass, &chan->object);
	chan->gr = gr;
	*pobject = &chan->object;
360

361 362 363 364
	/* allocate memory for a "mmio list" buffer that's used by the HUB
	 * fuc to modify some per-context register settings on first load
	 * of the context.
	 */
365 366
	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 0x100,
			      false, &chan->mmio);
367 368 369
	if (ret)
		return ret;

370
	ret = nvkm_vm_get(fifoch->vm, 0x1000, 12, NV_MEM_ACCESS_RW |
371
			  NV_MEM_ACCESS_SYS, &chan->mmio_vma);
372 373 374
	if (ret)
		return ret;

375 376
	nvkm_memory_map(chan->mmio, &chan->mmio_vma, 0);

377
	/* allocate buffers referenced by mmio list */
B
Ben Skeggs 已提交
378
	for (i = 0; data->size && i < ARRAY_SIZE(gr->mmio_data); i++) {
379 380 381
		ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST,
				      data->size, data->align, false,
				      &chan->data[i].mem);
382 383
		if (ret)
			return ret;
384

385 386 387
		ret = nvkm_vm_get(fifoch->vm,
				  nvkm_memory_size(chan->data[i].mem), 12,
				  data->access, &chan->data[i].vma);
388 389
		if (ret)
			return ret;
390

391
		nvkm_memory_map(chan->data[i].mem, &chan->data[i].vma, 0);
392
		data++;
393 394
	}

395
	/* finally, fill in the mmio list and point the context at it */
396
	nvkm_kmap(chan->mmio);
B
Ben Skeggs 已提交
397
	for (i = 0; mmio->addr && i < ARRAY_SIZE(gr->mmio_list); i++) {
398 399
		u32 addr = mmio->addr;
		u32 data = mmio->data;
400

401
		if (mmio->buffer >= 0) {
402
			u64 info = chan->data[mmio->buffer].vma.offset;
403 404
			data |= info >> mmio->shift;
		}
405

406 407
		nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, addr);
		nvkm_wo32(chan->mmio, chan->mmio_nr++ * 4, data);
408 409
		mmio++;
	}
410
	nvkm_done(chan->mmio);
411
	return 0;
412 413
}

414
/*******************************************************************************
415
 * PGRAPH register lists
416 417
 ******************************************************************************/

418 419
const struct gf100_gr_init
gf100_gr_init_main_0[] = {
420 421 422 423 424 425 426 427 428 429 430 431 432 433
	{ 0x400080,   1, 0x04, 0x003083c2 },
	{ 0x400088,   1, 0x04, 0x00006fe7 },
	{ 0x40008c,   1, 0x04, 0x00000000 },
	{ 0x400090,   1, 0x04, 0x00000030 },
	{ 0x40013c,   1, 0x04, 0x013901f7 },
	{ 0x400140,   1, 0x04, 0x00000100 },
	{ 0x400144,   1, 0x04, 0x00000000 },
	{ 0x400148,   1, 0x04, 0x00000110 },
	{ 0x400138,   1, 0x04, 0x00000000 },
	{ 0x400130,   2, 0x04, 0x00000000 },
	{ 0x400124,   1, 0x04, 0x00000002 },
	{}
};

434 435
const struct gf100_gr_init
gf100_gr_init_fe_0[] = {
436 437 438 439 440
	{ 0x40415c,   1, 0x04, 0x00000000 },
	{ 0x404170,   1, 0x04, 0x00000000 },
	{}
};

441 442
const struct gf100_gr_init
gf100_gr_init_pri_0[] = {
443 444 445 446
	{ 0x404488,   2, 0x04, 0x00000000 },
	{}
};

447 448
const struct gf100_gr_init
gf100_gr_init_rstr2d_0[] = {
449 450 451 452
	{ 0x407808,   1, 0x04, 0x00000000 },
	{}
};

453 454
const struct gf100_gr_init
gf100_gr_init_pd_0[] = {
455 456 457 458
	{ 0x406024,   1, 0x04, 0x00000000 },
	{}
};

459 460
const struct gf100_gr_init
gf100_gr_init_ds_0[] = {
461 462 463 464 465 466
	{ 0x405844,   1, 0x04, 0x00ffffff },
	{ 0x405850,   1, 0x04, 0x00000000 },
	{ 0x405908,   1, 0x04, 0x00000000 },
	{}
};

467 468
const struct gf100_gr_init
gf100_gr_init_scc_0[] = {
469 470 471 472
	{ 0x40803c,   1, 0x04, 0x00000000 },
	{}
};

473 474
const struct gf100_gr_init
gf100_gr_init_prop_0[] = {
475
	{ 0x4184a0,   1, 0x04, 0x00000000 },
476 477 478
	{}
};

479 480
const struct gf100_gr_init
gf100_gr_init_gpc_unk_0[] = {
481 482 483 484
	{ 0x418604,   1, 0x04, 0x00000000 },
	{ 0x418680,   1, 0x04, 0x00000000 },
	{ 0x418714,   1, 0x04, 0x80000000 },
	{ 0x418384,   1, 0x04, 0x00000000 },
485 486 487
	{}
};

488 489
const struct gf100_gr_init
gf100_gr_init_setup_0[] = {
490
	{ 0x418814,   3, 0x04, 0x00000000 },
491 492 493
	{}
};

494 495
const struct gf100_gr_init
gf100_gr_init_crstr_0[] = {
496
	{ 0x418b04,   1, 0x04, 0x00000000 },
497 498 499
	{}
};

500 501
const struct gf100_gr_init
gf100_gr_init_setup_1[] = {
502 503 504 505
	{ 0x4188c8,   1, 0x04, 0x80000000 },
	{ 0x4188cc,   1, 0x04, 0x00000000 },
	{ 0x4188d0,   1, 0x04, 0x00010000 },
	{ 0x4188d4,   1, 0x04, 0x00000001 },
506 507 508
	{}
};

509 510
const struct gf100_gr_init
gf100_gr_init_zcull_0[] = {
511 512 513 514 515
	{ 0x418910,   1, 0x04, 0x00010001 },
	{ 0x418914,   1, 0x04, 0x00000301 },
	{ 0x418918,   1, 0x04, 0x00800000 },
	{ 0x418980,   1, 0x04, 0x77777770 },
	{ 0x418984,   3, 0x04, 0x77777777 },
516 517 518
	{}
};

519 520
const struct gf100_gr_init
gf100_gr_init_gpm_0[] = {
521 522
	{ 0x418c04,   1, 0x04, 0x00000000 },
	{ 0x418c88,   1, 0x04, 0x00000000 },
523 524 525
	{}
};

526 527
const struct gf100_gr_init
gf100_gr_init_gpc_unk_1[] = {
528 529 530 531
	{ 0x418d00,   1, 0x04, 0x00000000 },
	{ 0x418f08,   1, 0x04, 0x00000000 },
	{ 0x418e00,   1, 0x04, 0x00000050 },
	{ 0x418e08,   1, 0x04, 0x00000000 },
532 533 534
	{}
};

535 536
const struct gf100_gr_init
gf100_gr_init_gcc_0[] = {
537 538 539 540 541
	{ 0x41900c,   1, 0x04, 0x00000000 },
	{ 0x419018,   1, 0x04, 0x00000000 },
	{}
};

542 543
const struct gf100_gr_init
gf100_gr_init_tpccs_0[] = {
544 545
	{ 0x419d08,   2, 0x04, 0x00000000 },
	{ 0x419d10,   1, 0x04, 0x00000014 },
546 547 548
	{}
};

549 550
const struct gf100_gr_init
gf100_gr_init_tex_0[] = {
551 552 553
	{ 0x419ab0,   1, 0x04, 0x00000000 },
	{ 0x419ab8,   1, 0x04, 0x000000e7 },
	{ 0x419abc,   2, 0x04, 0x00000000 },
554 555 556
	{}
};

557 558
const struct gf100_gr_init
gf100_gr_init_pe_0[] = {
559 560 561 562
	{ 0x41980c,   3, 0x04, 0x00000000 },
	{ 0x419844,   1, 0x04, 0x00000000 },
	{ 0x41984c,   1, 0x04, 0x00005bc5 },
	{ 0x419850,   4, 0x04, 0x00000000 },
563 564 565
	{}
};

566 567
const struct gf100_gr_init
gf100_gr_init_l1c_0[] = {
568 569 570 571 572 573
	{ 0x419c98,   1, 0x04, 0x00000000 },
	{ 0x419ca8,   1, 0x04, 0x80000000 },
	{ 0x419cb4,   1, 0x04, 0x00000000 },
	{ 0x419cb8,   1, 0x04, 0x00008bf4 },
	{ 0x419cbc,   1, 0x04, 0x28137606 },
	{ 0x419cc0,   2, 0x04, 0x00000000 },
574 575 576
	{}
};

577 578
const struct gf100_gr_init
gf100_gr_init_wwdx_0[] = {
579 580
	{ 0x419bd4,   1, 0x04, 0x00800000 },
	{ 0x419bdc,   1, 0x04, 0x00000000 },
581 582 583
	{}
};

584 585
const struct gf100_gr_init
gf100_gr_init_tpccs_1[] = {
586
	{ 0x419d2c,   1, 0x04, 0x00000000 },
587 588 589
	{}
};

590 591
const struct gf100_gr_init
gf100_gr_init_mpc_0[] = {
592
	{ 0x419c0c,   1, 0x04, 0x00000000 },
593 594 595
	{}
};

596 597
static const struct gf100_gr_init
gf100_gr_init_sm_0[] = {
598 599 600 601 602 603 604 605 606 607 608 609 610 611 612 613
	{ 0x419e00,   1, 0x04, 0x00000000 },
	{ 0x419ea0,   1, 0x04, 0x00000000 },
	{ 0x419ea4,   1, 0x04, 0x00000100 },
	{ 0x419ea8,   1, 0x04, 0x00001100 },
	{ 0x419eac,   1, 0x04, 0x11100702 },
	{ 0x419eb0,   1, 0x04, 0x00000003 },
	{ 0x419eb4,   4, 0x04, 0x00000000 },
	{ 0x419ec8,   1, 0x04, 0x06060618 },
	{ 0x419ed0,   1, 0x04, 0x0eff0e38 },
	{ 0x419ed4,   1, 0x04, 0x011104f1 },
	{ 0x419edc,   1, 0x04, 0x00000000 },
	{ 0x419f00,   1, 0x04, 0x00000000 },
	{ 0x419f2c,   1, 0x04, 0x00000000 },
	{}
};

614 615
const struct gf100_gr_init
gf100_gr_init_be_0[] = {
616 617 618 619 620 621 622 623 624 625
	{ 0x40880c,   1, 0x04, 0x00000000 },
	{ 0x408910,   9, 0x04, 0x00000000 },
	{ 0x408950,   1, 0x04, 0x00000000 },
	{ 0x408954,   1, 0x04, 0x0000ffff },
	{ 0x408984,   1, 0x04, 0x00000000 },
	{ 0x408988,   1, 0x04, 0x08040201 },
	{ 0x40898c,   1, 0x04, 0x80402010 },
	{}
};

626 627
const struct gf100_gr_init
gf100_gr_init_fe_1[] = {
628 629 630 631
	{ 0x4040f0,   1, 0x04, 0x00000000 },
	{}
};

632 633
const struct gf100_gr_init
gf100_gr_init_pe_1[] = {
634 635 636 637
	{ 0x419880,   1, 0x04, 0x00000002 },
	{}
};

638 639 640 641 642 643 644 645 646 647 648 649 650 651 652 653 654 655 656 657 658 659 660 661 662 663 664 665 666
static const struct gf100_gr_pack
gf100_gr_pack_mmio[] = {
	{ gf100_gr_init_main_0 },
	{ gf100_gr_init_fe_0 },
	{ gf100_gr_init_pri_0 },
	{ gf100_gr_init_rstr2d_0 },
	{ gf100_gr_init_pd_0 },
	{ gf100_gr_init_ds_0 },
	{ gf100_gr_init_scc_0 },
	{ gf100_gr_init_prop_0 },
	{ gf100_gr_init_gpc_unk_0 },
	{ gf100_gr_init_setup_0 },
	{ gf100_gr_init_crstr_0 },
	{ gf100_gr_init_setup_1 },
	{ gf100_gr_init_zcull_0 },
	{ gf100_gr_init_gpm_0 },
	{ gf100_gr_init_gpc_unk_1 },
	{ gf100_gr_init_gcc_0 },
	{ gf100_gr_init_tpccs_0 },
	{ gf100_gr_init_tex_0 },
	{ gf100_gr_init_pe_0 },
	{ gf100_gr_init_l1c_0 },
	{ gf100_gr_init_wwdx_0 },
	{ gf100_gr_init_tpccs_1 },
	{ gf100_gr_init_mpc_0 },
	{ gf100_gr_init_sm_0 },
	{ gf100_gr_init_be_0 },
	{ gf100_gr_init_fe_1 },
	{ gf100_gr_init_pe_1 },
M
Maarten Lankhorst 已提交
667 668 669
	{}
};

670 671 672 673
/*******************************************************************************
 * PGRAPH engine/subdev functions
 ******************************************************************************/

674
void
B
Ben Skeggs 已提交
675
gf100_gr_zbc_init(struct gf100_gr *gr)
676 677 678 679 680 681 682 683 684
{
	const u32  zero[] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000,
			      0x00000000, 0x00000000, 0x00000000, 0x00000000 };
	const u32   one[] = { 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
			      0xffffffff, 0xffffffff, 0xffffffff, 0xffffffff };
	const u32 f32_0[] = { 0x00000000, 0x00000000, 0x00000000, 0x00000000,
			      0x00000000, 0x00000000, 0x00000000, 0x00000000 };
	const u32 f32_1[] = { 0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000,
			      0x3f800000, 0x3f800000, 0x3f800000, 0x3f800000 };
685
	struct nvkm_ltc *ltc = gr->base.engine.subdev.device->ltc;
686 687
	int index;

B
Ben Skeggs 已提交
688 689 690 691 692 693 694
	if (!gr->zbc_color[0].format) {
		gf100_gr_zbc_color_get(gr, 1,  & zero[0],   &zero[4]);
		gf100_gr_zbc_color_get(gr, 2,  &  one[0],    &one[4]);
		gf100_gr_zbc_color_get(gr, 4,  &f32_0[0],  &f32_0[4]);
		gf100_gr_zbc_color_get(gr, 4,  &f32_1[0],  &f32_1[4]);
		gf100_gr_zbc_depth_get(gr, 1, 0x00000000, 0x00000000);
		gf100_gr_zbc_depth_get(gr, 1, 0x3f800000, 0x3f800000);
695 696 697
	}

	for (index = ltc->zbc_min; index <= ltc->zbc_max; index++)
B
Ben Skeggs 已提交
698
		gf100_gr_zbc_clear_color(gr, index);
699
	for (index = ltc->zbc_min; index <= ltc->zbc_max; index++)
B
Ben Skeggs 已提交
700
		gf100_gr_zbc_clear_depth(gr, index);
701 702
}

703 704 705 706 707 708
/**
 * Wait until GR goes idle. GR is considered idle if it is disabled by the
 * MC (0x200) register, or GR is not busy and a context switch is not in
 * progress.
 */
int
B
Ben Skeggs 已提交
709
gf100_gr_wait_idle(struct gf100_gr *gr)
710
{
711 712
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
713 714 715 716 717 718 719 720
	unsigned long end_jiffies = jiffies + msecs_to_jiffies(2000);
	bool gr_enabled, ctxsw_active, gr_busy;

	do {
		/*
		 * required to make sure FIFO_ENGINE_STATUS (0x2640) is
		 * up-to-date
		 */
721
		nvkm_rd32(device, 0x400700);
722

723 724 725
		gr_enabled = nvkm_rd32(device, 0x200) & 0x1000;
		ctxsw_active = nvkm_rd32(device, 0x2640) & 0x8000;
		gr_busy = nvkm_rd32(device, 0x40060c) & 0x1;
726 727 728 729 730

		if (!gr_enabled || (!gr_busy && !ctxsw_active))
			return 0;
	} while (time_before(jiffies, end_jiffies));

731 732 733
	nvkm_error(subdev,
		   "wait for idle timeout (en: %d, ctxsw: %d, busy: %d)\n",
		   gr_enabled, ctxsw_active, gr_busy);
734 735 736
	return -EAGAIN;
}

737
void
B
Ben Skeggs 已提交
738
gf100_gr_mmio(struct gf100_gr *gr, const struct gf100_gr_pack *p)
739
{
740
	struct nvkm_device *device = gr->base.engine.subdev.device;
741 742
	const struct gf100_gr_pack *pack;
	const struct gf100_gr_init *init;
743 744 745 746 747

	pack_for_each_init(init, pack, p) {
		u32 next = init->addr + init->count * init->pitch;
		u32 addr = init->addr;
		while (addr < next) {
748
			nvkm_wr32(device, addr, init->data);
749 750 751
			addr += init->pitch;
		}
	}
752 753 754
}

void
B
Ben Skeggs 已提交
755
gf100_gr_icmd(struct gf100_gr *gr, const struct gf100_gr_pack *p)
756
{
757
	struct nvkm_device *device = gr->base.engine.subdev.device;
758 759
	const struct gf100_gr_pack *pack;
	const struct gf100_gr_init *init;
760
	u32 data = 0;
761

762
	nvkm_wr32(device, 0x400208, 0x80000000);
763 764 765 766 767 768

	pack_for_each_init(init, pack, p) {
		u32 next = init->addr + init->count * init->pitch;
		u32 addr = init->addr;

		if ((pack == p && init == p->init) || data != init->data) {
769
			nvkm_wr32(device, 0x400204, init->data);
770 771
			data = init->data;
		}
772

773
		while (addr < next) {
774
			nvkm_wr32(device, 0x400200, addr);
775 776 777 778 779
			/**
			 * Wait for GR to go idle after submitting a
			 * GO_IDLE bundle
			 */
			if ((addr & 0xffff) == 0xe100)
B
Ben Skeggs 已提交
780
				gf100_gr_wait_idle(gr);
781 782 783 784
			nvkm_msec(device, 2000,
				if (!(nvkm_rd32(device, 0x400700) & 0x00000004))
					break;
			);
785 786 787
			addr += init->pitch;
		}
	}
788

789
	nvkm_wr32(device, 0x400208, 0x00000000);
790 791 792
}

void
B
Ben Skeggs 已提交
793
gf100_gr_mthd(struct gf100_gr *gr, const struct gf100_gr_pack *p)
794
{
795
	struct nvkm_device *device = gr->base.engine.subdev.device;
796 797
	const struct gf100_gr_pack *pack;
	const struct gf100_gr_init *init;
798
	u32 data = 0;
799

800 801 802 803 804 805
	pack_for_each_init(init, pack, p) {
		u32 ctrl = 0x80000000 | pack->type;
		u32 next = init->addr + init->count * init->pitch;
		u32 addr = init->addr;

		if ((pack == p && init == p->init) || data != init->data) {
806
			nvkm_wr32(device, 0x40448c, init->data);
807 808 809 810
			data = init->data;
		}

		while (addr < next) {
811
			nvkm_wr32(device, 0x404488, ctrl | (addr << 14));
812
			addr += init->pitch;
813 814 815 816 817
		}
	}
}

u64
818
gf100_gr_units(struct nvkm_gr *base)
819
{
820
	struct gf100_gr *gr = gf100_gr(base);
821 822
	u64 cfg;

B
Ben Skeggs 已提交
823 824 825
	cfg  = (u32)gr->gpc_nr;
	cfg |= (u32)gr->tpc_total << 8;
	cfg |= (u64)gr->rop_nr << 32;
826 827

	return cfg;
828 829
}

830 831 832 833 834 835 836 837 838 839 840 841
static const struct nvkm_bitfield gk104_sked_error[] = {
	{ 0x00000080, "CONSTANT_BUFFER_SIZE" },
	{ 0x00000200, "LOCAL_MEMORY_SIZE_POS" },
	{ 0x00000400, "LOCAL_MEMORY_SIZE_NEG" },
	{ 0x00000800, "WARP_CSTACK_SIZE" },
	{ 0x00001000, "TOTAL_TEMP_SIZE" },
	{ 0x00002000, "REGISTER_COUNT" },
	{ 0x00040000, "TOTAL_THREADS" },
	{ 0x00100000, "PROGRAM_OFFSET" },
	{ 0x00200000, "SHARED_MEMORY_SIZE" },
	{ 0x02000000, "SHARED_CONFIG_TOO_SMALL" },
	{ 0x04000000, "TOTAL_REGISTER_COUNT" },
842 843 844
	{}
};

845 846 847 848 849 850 851
static const struct nvkm_bitfield gf100_gpc_rop_error[] = {
	{ 0x00000002, "RT_PITCH_OVERRUN" },
	{ 0x00000010, "RT_WIDTH_OVERRUN" },
	{ 0x00000020, "RT_HEIGHT_OVERRUN" },
	{ 0x00000080, "ZETA_STORAGE_TYPE_MISMATCH" },
	{ 0x00000100, "RT_STORAGE_TYPE_MISMATCH" },
	{ 0x00000400, "RT_LINEAR_MISMATCH" },
852 853 854
	{}
};

855
static void
B
Ben Skeggs 已提交
856
gf100_gr_trap_gpc_rop(struct gf100_gr *gr, int gpc)
857
{
858 859 860
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
	char error[128];
861
	u32 trap[4];
862

863
	trap[0] = nvkm_rd32(device, GPC_UNIT(gpc, 0x0420)) & 0x3fffffff;
864 865 866
	trap[1] = nvkm_rd32(device, GPC_UNIT(gpc, 0x0434));
	trap[2] = nvkm_rd32(device, GPC_UNIT(gpc, 0x0438));
	trap[3] = nvkm_rd32(device, GPC_UNIT(gpc, 0x043c));
867

868
	nvkm_snprintbf(error, sizeof(error), gf100_gpc_rop_error, trap[0]);
869

870 871 872 873
	nvkm_error(subdev, "GPC%d/PROP trap: %08x [%s] x = %u, y = %u, "
			   "format = %x, storage type = %x\n",
		   gpc, trap[0], error, trap[1] & 0xffff, trap[1] >> 16,
		   (trap[2] >> 8) & 0x3f, trap[3] & 0xff);
874
	nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
875 876
}

877
static const struct nvkm_enum gf100_mp_warp_error[] = {
878 879 880 881 882 883 884 885
	{ 0x00, "NO_ERROR" },
	{ 0x01, "STACK_MISMATCH" },
	{ 0x05, "MISALIGNED_PC" },
	{ 0x08, "MISALIGNED_GPR" },
	{ 0x09, "INVALID_OPCODE" },
	{ 0x0d, "GPR_OUT_OF_BOUNDS" },
	{ 0x0e, "MEM_OUT_OF_BOUNDS" },
	{ 0x0f, "UNALIGNED_MEM_ACCESS" },
886
	{ 0x10, "INVALID_ADDR_SPACE" },
887 888 889 890
	{ 0x11, "INVALID_PARAM" },
	{}
};

891
static const struct nvkm_bitfield gf100_mp_global_error[] = {
892 893 894 895 896 897
	{ 0x00000004, "MULTIPLE_WARP_ERRORS" },
	{ 0x00000008, "OUT_OF_STACK_SPACE" },
	{}
};

static void
B
Ben Skeggs 已提交
898
gf100_gr_trap_mp(struct gf100_gr *gr, int gpc, int tpc)
899
{
900 901
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
902 903
	u32 werr = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x648));
	u32 gerr = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x650));
904 905
	const struct nvkm_enum *warp;
	char glob[128];
906

907 908 909 910 911 912
	nvkm_snprintbf(glob, sizeof(glob), gf100_mp_global_error, gerr);
	warp = nvkm_enum_find(gf100_mp_warp_error, werr & 0xffff);

	nvkm_error(subdev, "GPC%i/TPC%i/MP trap: "
			   "global %08x [%s] warp %04x [%s]\n",
		   gpc, tpc, gerr, glob, werr, warp ? warp->name : "");
913

914 915
	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x648), 0x00000000);
	nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x650), gerr);
916 917
}

918
static void
B
Ben Skeggs 已提交
919
gf100_gr_trap_tpc(struct gf100_gr *gr, int gpc, int tpc)
920
{
921 922
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
923
	u32 stat = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x0508));
924 925

	if (stat & 0x00000001) {
926
		u32 trap = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x0224));
927
		nvkm_error(subdev, "GPC%d/TPC%d/TEX: %08x\n", gpc, tpc, trap);
928
		nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x0224), 0xc0000000);
929 930 931 932
		stat &= ~0x00000001;
	}

	if (stat & 0x00000002) {
B
Ben Skeggs 已提交
933
		gf100_gr_trap_mp(gr, gpc, tpc);
934 935 936 937
		stat &= ~0x00000002;
	}

	if (stat & 0x00000004) {
938
		u32 trap = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x0084));
939
		nvkm_error(subdev, "GPC%d/TPC%d/POLY: %08x\n", gpc, tpc, trap);
940
		nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x0084), 0xc0000000);
941 942 943 944
		stat &= ~0x00000004;
	}

	if (stat & 0x00000008) {
945
		u32 trap = nvkm_rd32(device, TPC_UNIT(gpc, tpc, 0x048c));
946
		nvkm_error(subdev, "GPC%d/TPC%d/L1C: %08x\n", gpc, tpc, trap);
947
		nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x048c), 0xc0000000);
948 949 950 951
		stat &= ~0x00000008;
	}

	if (stat) {
952
		nvkm_error(subdev, "GPC%d/TPC%d/%08x: unknown\n", gpc, tpc, stat);
953 954 955 956
	}
}

static void
B
Ben Skeggs 已提交
957
gf100_gr_trap_gpc(struct gf100_gr *gr, int gpc)
958
{
959 960
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
961
	u32 stat = nvkm_rd32(device, GPC_UNIT(gpc, 0x2c90));
962 963 964
	int tpc;

	if (stat & 0x00000001) {
B
Ben Skeggs 已提交
965
		gf100_gr_trap_gpc_rop(gr, gpc);
966 967 968 969
		stat &= ~0x00000001;
	}

	if (stat & 0x00000002) {
970
		u32 trap = nvkm_rd32(device, GPC_UNIT(gpc, 0x0900));
971
		nvkm_error(subdev, "GPC%d/ZCULL: %08x\n", gpc, trap);
972
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000);
973 974 975 976
		stat &= ~0x00000002;
	}

	if (stat & 0x00000004) {
977
		u32 trap = nvkm_rd32(device, GPC_UNIT(gpc, 0x1028));
978
		nvkm_error(subdev, "GPC%d/CCACHE: %08x\n", gpc, trap);
979
		nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000);
980 981 982 983
		stat &= ~0x00000004;
	}

	if (stat & 0x00000008) {
984
		u32 trap = nvkm_rd32(device, GPC_UNIT(gpc, 0x0824));
985
		nvkm_error(subdev, "GPC%d/ESETUP: %08x\n", gpc, trap);
986
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000);
987 988 989
		stat &= ~0x00000009;
	}

B
Ben Skeggs 已提交
990
	for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
991 992
		u32 mask = 0x00010000 << tpc;
		if (stat & mask) {
B
Ben Skeggs 已提交
993
			gf100_gr_trap_tpc(gr, gpc, tpc);
994
			nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), mask);
995 996 997 998 999
			stat &= ~mask;
		}
	}

	if (stat) {
1000
		nvkm_error(subdev, "GPC%d/%08x: unknown\n", gpc, stat);
1001 1002 1003 1004
	}
}

static void
B
Ben Skeggs 已提交
1005
gf100_gr_trap_intr(struct gf100_gr *gr)
1006
{
1007 1008
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
1009
	u32 trap = nvkm_rd32(device, 0x400108);
1010
	int rop, gpc;
1011 1012

	if (trap & 0x00000001) {
1013
		u32 stat = nvkm_rd32(device, 0x404000);
1014
		nvkm_error(subdev, "DISPATCH %08x\n", stat);
1015 1016
		nvkm_wr32(device, 0x404000, 0xc0000000);
		nvkm_wr32(device, 0x400108, 0x00000001);
1017 1018 1019 1020
		trap &= ~0x00000001;
	}

	if (trap & 0x00000002) {
1021
		u32 stat = nvkm_rd32(device, 0x404600);
1022
		nvkm_error(subdev, "M2MF %08x\n", stat);
1023 1024
		nvkm_wr32(device, 0x404600, 0xc0000000);
		nvkm_wr32(device, 0x400108, 0x00000002);
1025 1026 1027 1028
		trap &= ~0x00000002;
	}

	if (trap & 0x00000008) {
1029
		u32 stat = nvkm_rd32(device, 0x408030);
1030
		nvkm_error(subdev, "CCACHE %08x\n", stat);
1031 1032
		nvkm_wr32(device, 0x408030, 0xc0000000);
		nvkm_wr32(device, 0x400108, 0x00000008);
1033 1034 1035 1036
		trap &= ~0x00000008;
	}

	if (trap & 0x00000010) {
1037
		u32 stat = nvkm_rd32(device, 0x405840);
1038
		nvkm_error(subdev, "SHADER %08x\n", stat);
1039 1040
		nvkm_wr32(device, 0x405840, 0xc0000000);
		nvkm_wr32(device, 0x400108, 0x00000010);
1041 1042 1043 1044
		trap &= ~0x00000010;
	}

	if (trap & 0x00000040) {
1045
		u32 stat = nvkm_rd32(device, 0x40601c);
1046
		nvkm_error(subdev, "UNK6 %08x\n", stat);
1047 1048
		nvkm_wr32(device, 0x40601c, 0xc0000000);
		nvkm_wr32(device, 0x400108, 0x00000040);
1049 1050 1051 1052
		trap &= ~0x00000040;
	}

	if (trap & 0x00000080) {
1053
		u32 stat = nvkm_rd32(device, 0x404490);
1054
		nvkm_error(subdev, "MACRO %08x\n", stat);
1055 1056
		nvkm_wr32(device, 0x404490, 0xc0000000);
		nvkm_wr32(device, 0x400108, 0x00000080);
1057 1058 1059
		trap &= ~0x00000080;
	}

1060
	if (trap & 0x00000100) {
1061 1062
		u32 stat = nvkm_rd32(device, 0x407020) & 0x3fffffff;
		char sked[128];
1063

1064 1065
		nvkm_snprintbf(sked, sizeof(sked), gk104_sked_error, stat);
		nvkm_error(subdev, "SKED: %08x [%s]\n", stat, sked);
1066

1067
		if (stat)
1068 1069
			nvkm_wr32(device, 0x407020, 0x40000000);
		nvkm_wr32(device, 0x400108, 0x00000100);
1070 1071 1072
		trap &= ~0x00000100;
	}

1073
	if (trap & 0x01000000) {
1074
		u32 stat = nvkm_rd32(device, 0x400118);
B
Ben Skeggs 已提交
1075
		for (gpc = 0; stat && gpc < gr->gpc_nr; gpc++) {
1076 1077
			u32 mask = 0x00000001 << gpc;
			if (stat & mask) {
B
Ben Skeggs 已提交
1078
				gf100_gr_trap_gpc(gr, gpc);
1079
				nvkm_wr32(device, 0x400118, mask);
1080 1081 1082
				stat &= ~mask;
			}
		}
1083
		nvkm_wr32(device, 0x400108, 0x01000000);
1084 1085 1086 1087
		trap &= ~0x01000000;
	}

	if (trap & 0x02000000) {
B
Ben Skeggs 已提交
1088
		for (rop = 0; rop < gr->rop_nr; rop++) {
1089 1090
			u32 statz = nvkm_rd32(device, ROP_UNIT(rop, 0x070));
			u32 statc = nvkm_rd32(device, ROP_UNIT(rop, 0x144));
1091
			nvkm_error(subdev, "ROP%d %08x %08x\n",
1092
				 rop, statz, statc);
1093 1094
			nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0xc0000000);
			nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0xc0000000);
1095
		}
1096
		nvkm_wr32(device, 0x400108, 0x02000000);
1097 1098 1099 1100
		trap &= ~0x02000000;
	}

	if (trap) {
1101
		nvkm_error(subdev, "TRAP UNHANDLED %08x\n", trap);
1102
		nvkm_wr32(device, 0x400108, trap);
1103 1104 1105
	}
}

1106
static void
B
Ben Skeggs 已提交
1107
gf100_gr_ctxctl_debug_unit(struct gf100_gr *gr, u32 base)
1108
{
1109 1110 1111 1112 1113 1114 1115 1116 1117 1118 1119 1120 1121 1122
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
	nvkm_error(subdev, "%06x - done %08x\n", base,
		   nvkm_rd32(device, base + 0x400));
	nvkm_error(subdev, "%06x - stat %08x %08x %08x %08x\n", base,
		   nvkm_rd32(device, base + 0x800),
		   nvkm_rd32(device, base + 0x804),
		   nvkm_rd32(device, base + 0x808),
		   nvkm_rd32(device, base + 0x80c));
	nvkm_error(subdev, "%06x - stat %08x %08x %08x %08x\n", base,
		   nvkm_rd32(device, base + 0x810),
		   nvkm_rd32(device, base + 0x814),
		   nvkm_rd32(device, base + 0x818),
		   nvkm_rd32(device, base + 0x81c));
1123 1124 1125
}

void
B
Ben Skeggs 已提交
1126
gf100_gr_ctxctl_debug(struct gf100_gr *gr)
1127
{
1128 1129
	struct nvkm_device *device = gr->base.engine.subdev.device;
	u32 gpcnr = nvkm_rd32(device, 0x409604) & 0xffff;
1130 1131
	u32 gpc;

B
Ben Skeggs 已提交
1132
	gf100_gr_ctxctl_debug_unit(gr, 0x409000);
1133
	for (gpc = 0; gpc < gpcnr; gpc++)
B
Ben Skeggs 已提交
1134
		gf100_gr_ctxctl_debug_unit(gr, 0x502000 + (gpc * 0x8000));
1135 1136 1137
}

static void
B
Ben Skeggs 已提交
1138
gf100_gr_ctxctl_isr(struct gf100_gr *gr)
1139
{
1140 1141
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
1142
	u32 stat = nvkm_rd32(device, 0x409c18);
1143

1144
	if (stat & 0x00000001) {
1145
		u32 code = nvkm_rd32(device, 0x409814);
1146
		if (code == E_BAD_FWMTHD) {
1147 1148
			u32 class = nvkm_rd32(device, 0x409808);
			u32  addr = nvkm_rd32(device, 0x40980c);
1149 1150
			u32  subc = (addr & 0x00070000) >> 16;
			u32  mthd = (addr & 0x00003ffc);
1151
			u32  data = nvkm_rd32(device, 0x409810);
1152

1153 1154 1155
			nvkm_error(subdev, "FECS MTHD subc %d class %04x "
					   "mthd %04x data %08x\n",
				   subc, class, mthd, data);
1156

1157
			nvkm_wr32(device, 0x409c20, 0x00000001);
1158 1159
			stat &= ~0x00000001;
		} else {
1160
			nvkm_error(subdev, "FECS ucode error %d\n", code);
1161 1162
		}
	}
1163

1164
	if (stat & 0x00080000) {
1165
		nvkm_error(subdev, "FECS watchdog timeout\n");
B
Ben Skeggs 已提交
1166
		gf100_gr_ctxctl_debug(gr);
1167
		nvkm_wr32(device, 0x409c20, 0x00080000);
1168 1169 1170 1171
		stat &= ~0x00080000;
	}

	if (stat) {
1172
		nvkm_error(subdev, "FECS %08x\n", stat);
B
Ben Skeggs 已提交
1173
		gf100_gr_ctxctl_debug(gr);
1174
		nvkm_wr32(device, 0x409c20, stat);
1175
	}
1176 1177
}

1178
static void
1179
gf100_gr_intr(struct nvkm_gr *base)
1180
{
1181 1182 1183
	struct gf100_gr *gr = gf100_gr(base);
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
1184 1185
	struct nvkm_fifo_chan *chan;
	unsigned long flags;
1186 1187 1188
	u64 inst = nvkm_rd32(device, 0x409b00) & 0x0fffffff;
	u32 stat = nvkm_rd32(device, 0x400100);
	u32 addr = nvkm_rd32(device, 0x400704);
1189 1190
	u32 mthd = (addr & 0x00003ffc);
	u32 subc = (addr & 0x00070000) >> 16;
1191 1192
	u32 data = nvkm_rd32(device, 0x400708);
	u32 code = nvkm_rd32(device, 0x400110);
1193
	u32 class;
1194 1195
	const char *name = "unknown";
	int chid = -1;
1196

1197
	chan = nvkm_fifo_chan_inst(device->fifo, (u64)inst << 12, &flags);
1198 1199 1200 1201
	if (chan) {
		name = chan->object.client->name;
		chid = chan->chid;
	}
1202

1203
	if (device->card_type < NV_E0 || subc < 4)
1204
		class = nvkm_rd32(device, 0x404200 + (subc * 4));
1205 1206 1207
	else
		class = 0x0000;

1208 1209 1210 1211 1212
	if (stat & 0x00000001) {
		/*
		 * notifier interrupt, only needed for cyclestats
		 * can be safely ignored
		 */
1213
		nvkm_wr32(device, 0x400100, 0x00000001);
1214 1215 1216
		stat &= ~0x00000001;
	}

1217
	if (stat & 0x00000010) {
1218
		if (!gf100_gr_mthd_sw(device, class, mthd, data)) {
1219 1220
			nvkm_error(subdev, "ILLEGAL_MTHD ch %d [%010llx %s] "
				   "subc %d class %04x mthd %04x data %08x\n",
1221 1222
				   chid, inst << 12, name, subc,
				   class, mthd, data);
1223
		}
1224
		nvkm_wr32(device, 0x400100, 0x00000010);
1225 1226 1227 1228
		stat &= ~0x00000010;
	}

	if (stat & 0x00000020) {
1229 1230
		nvkm_error(subdev, "ILLEGAL_CLASS ch %d [%010llx %s] "
			   "subc %d class %04x mthd %04x data %08x\n",
1231
			   chid, inst << 12, name, subc, class, mthd, data);
1232
		nvkm_wr32(device, 0x400100, 0x00000020);
1233 1234 1235 1236
		stat &= ~0x00000020;
	}

	if (stat & 0x00100000) {
1237 1238 1239 1240 1241
		const struct nvkm_enum *en =
			nvkm_enum_find(nv50_data_error_names, code);
		nvkm_error(subdev, "DATA_ERROR %08x [%s] ch %d [%010llx %s] "
				   "subc %d class %04x mthd %04x data %08x\n",
			   code, en ? en->name : "", chid, inst << 12,
1242
			   name, subc, class, mthd, data);
1243
		nvkm_wr32(device, 0x400100, 0x00100000);
1244 1245 1246 1247
		stat &= ~0x00100000;
	}

	if (stat & 0x00200000) {
1248
		nvkm_error(subdev, "TRAP ch %d [%010llx %s]\n",
1249
			   chid, inst << 12, name);
B
Ben Skeggs 已提交
1250
		gf100_gr_trap_intr(gr);
1251
		nvkm_wr32(device, 0x400100, 0x00200000);
1252 1253 1254 1255
		stat &= ~0x00200000;
	}

	if (stat & 0x00080000) {
B
Ben Skeggs 已提交
1256
		gf100_gr_ctxctl_isr(gr);
1257
		nvkm_wr32(device, 0x400100, 0x00080000);
1258 1259 1260 1261
		stat &= ~0x00080000;
	}

	if (stat) {
1262
		nvkm_error(subdev, "intr %08x\n", stat);
1263
		nvkm_wr32(device, 0x400100, stat);
1264 1265
	}

1266
	nvkm_wr32(device, 0x400500, 0x00010001);
1267
	nvkm_fifo_chan_put(device->fifo, flags, &chan);
1268 1269
}

1270
void
B
Ben Skeggs 已提交
1271
gf100_gr_init_fw(struct gf100_gr *gr, u32 fuc_base,
1272
		 struct gf100_gr_fuc *code, struct gf100_gr_fuc *data)
1273
{
1274
	struct nvkm_device *device = gr->base.engine.subdev.device;
1275
	int i;
1276

1277
	nvkm_wr32(device, fuc_base + 0x01c0, 0x01000000);
1278
	for (i = 0; i < data->size / 4; i++)
1279
		nvkm_wr32(device, fuc_base + 0x01c4, data->data[i]);
1280

1281
	nvkm_wr32(device, fuc_base + 0x0180, 0x01000000);
1282 1283
	for (i = 0; i < code->size / 4; i++) {
		if ((i & 0x3f) == 0)
1284 1285
			nvkm_wr32(device, fuc_base + 0x0188, i >> 6);
		nvkm_wr32(device, fuc_base + 0x0184, code->data[i]);
1286
	}
1287 1288 1289

	/* code must be padded to 0x40 words */
	for (; i & 0x3f; i++)
1290
		nvkm_wr32(device, fuc_base + 0x0184, 0);
1291 1292
}

1293
static void
B
Ben Skeggs 已提交
1294
gf100_gr_init_csdata(struct gf100_gr *gr,
1295 1296
		     const struct gf100_gr_pack *pack,
		     u32 falcon, u32 starstar, u32 base)
1297
{
1298
	struct nvkm_device *device = gr->base.engine.subdev.device;
1299 1300
	const struct gf100_gr_pack *iter;
	const struct gf100_gr_init *init;
1301
	u32 addr = ~0, prev = ~0, xfer = 0;
1302 1303
	u32 star, temp;

1304 1305 1306
	nvkm_wr32(device, falcon + 0x01c0, 0x02000000 + starstar);
	star = nvkm_rd32(device, falcon + 0x01c4);
	temp = nvkm_rd32(device, falcon + 0x01c4);
1307 1308
	if (temp > star)
		star = temp;
1309
	nvkm_wr32(device, falcon + 0x01c0, 0x01000000 + star);
1310

1311 1312 1313 1314 1315 1316 1317
	pack_for_each_init(init, iter, pack) {
		u32 head = init->addr - base;
		u32 tail = head + init->count * init->pitch;
		while (head < tail) {
			if (head != prev + 4 || xfer >= 32) {
				if (xfer) {
					u32 data = ((--xfer << 26) | addr);
1318
					nvkm_wr32(device, falcon + 0x01c4, data);
1319 1320 1321 1322
					star += 4;
				}
				addr = head;
				xfer = 0;
1323
			}
1324 1325 1326
			prev = head;
			xfer = xfer + 1;
			head = head + init->pitch;
1327
		}
1328
	}
1329

1330 1331 1332
	nvkm_wr32(device, falcon + 0x01c4, (--xfer << 26) | addr);
	nvkm_wr32(device, falcon + 0x01c0, 0x01000004 + starstar);
	nvkm_wr32(device, falcon + 0x01c4, star + 4);
1333 1334
}

1335
int
B
Ben Skeggs 已提交
1336
gf100_gr_init_ctxctl(struct gf100_gr *gr)
1337
{
1338
	const struct gf100_grctx_func *grctx = gr->func->grctx;
1339 1340
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
1341
	int i;
1342

B
Ben Skeggs 已提交
1343
	if (gr->firmware) {
1344
		/* load fuc microcode */
1345
		nvkm_mc_unk260(device->mc, 0);
1346 1347
		gf100_gr_init_fw(gr, 0x409000, &gr->fuc409c, &gr->fuc409d);
		gf100_gr_init_fw(gr, 0x41a000, &gr->fuc41ac, &gr->fuc41ad);
1348
		nvkm_mc_unk260(device->mc, 1);
1349

1350
		/* start both of them running */
1351 1352 1353 1354 1355
		nvkm_wr32(device, 0x409840, 0xffffffff);
		nvkm_wr32(device, 0x41a10c, 0x00000000);
		nvkm_wr32(device, 0x40910c, 0x00000000);
		nvkm_wr32(device, 0x41a100, 0x00000002);
		nvkm_wr32(device, 0x409100, 0x00000002);
1356 1357 1358 1359 1360
		if (nvkm_msec(device, 2000,
			if (nvkm_rd32(device, 0x409800) & 0x00000001)
				break;
		) < 0)
			return -EBUSY;
B
Ben Skeggs 已提交
1361

1362 1363 1364
		nvkm_wr32(device, 0x409840, 0xffffffff);
		nvkm_wr32(device, 0x409500, 0x7fffffff);
		nvkm_wr32(device, 0x409504, 0x00000021);
B
Ben Skeggs 已提交
1365

1366 1367 1368
		nvkm_wr32(device, 0x409840, 0xffffffff);
		nvkm_wr32(device, 0x409500, 0x00000000);
		nvkm_wr32(device, 0x409504, 0x00000010);
1369 1370 1371 1372
		if (nvkm_msec(device, 2000,
			if ((gr->size = nvkm_rd32(device, 0x409800)))
				break;
		) < 0)
1373
			return -EBUSY;
1374

1375 1376 1377
		nvkm_wr32(device, 0x409840, 0xffffffff);
		nvkm_wr32(device, 0x409500, 0x00000000);
		nvkm_wr32(device, 0x409504, 0x00000016);
1378 1379 1380 1381
		if (nvkm_msec(device, 2000,
			if (nvkm_rd32(device, 0x409800))
				break;
		) < 0)
1382 1383
			return -EBUSY;

1384 1385 1386
		nvkm_wr32(device, 0x409840, 0xffffffff);
		nvkm_wr32(device, 0x409500, 0x00000000);
		nvkm_wr32(device, 0x409504, 0x00000025);
1387 1388 1389 1390
		if (nvkm_msec(device, 2000,
			if (nvkm_rd32(device, 0x409800))
				break;
		) < 0)
1391 1392
			return -EBUSY;

1393
		if (device->chipset >= 0xe0) {
1394 1395 1396
			nvkm_wr32(device, 0x409800, 0x00000000);
			nvkm_wr32(device, 0x409500, 0x00000001);
			nvkm_wr32(device, 0x409504, 0x00000030);
1397 1398 1399 1400
			if (nvkm_msec(device, 2000,
				if (nvkm_rd32(device, 0x409800))
					break;
			) < 0)
1401 1402
				return -EBUSY;

1403 1404 1405 1406
			nvkm_wr32(device, 0x409810, 0xb00095c8);
			nvkm_wr32(device, 0x409800, 0x00000000);
			nvkm_wr32(device, 0x409500, 0x00000001);
			nvkm_wr32(device, 0x409504, 0x00000031);
1407 1408 1409 1410
			if (nvkm_msec(device, 2000,
				if (nvkm_rd32(device, 0x409800))
					break;
			) < 0)
1411 1412
				return -EBUSY;

1413 1414 1415 1416
			nvkm_wr32(device, 0x409810, 0x00080420);
			nvkm_wr32(device, 0x409800, 0x00000000);
			nvkm_wr32(device, 0x409500, 0x00000001);
			nvkm_wr32(device, 0x409504, 0x00000032);
1417 1418 1419 1420
			if (nvkm_msec(device, 2000,
				if (nvkm_rd32(device, 0x409800))
					break;
			) < 0)
1421 1422
				return -EBUSY;

1423 1424 1425
			nvkm_wr32(device, 0x409614, 0x00000070);
			nvkm_wr32(device, 0x409614, 0x00000770);
			nvkm_wr32(device, 0x40802c, 0x00000001);
1426 1427
		}

B
Ben Skeggs 已提交
1428 1429
		if (gr->data == NULL) {
			int ret = gf100_grctx_generate(gr);
1430
			if (ret) {
1431
				nvkm_error(subdev, "failed to construct context\n");
1432 1433 1434 1435 1436
				return ret;
			}
		}

		return 0;
1437
	} else
1438
	if (!gr->func->fecs.ucode) {
1439
		return -ENOSYS;
1440
	}
1441

1442
	/* load HUB microcode */
1443
	nvkm_mc_unk260(device->mc, 0);
1444
	nvkm_wr32(device, 0x4091c0, 0x01000000);
1445 1446
	for (i = 0; i < gr->func->fecs.ucode->data.size / 4; i++)
		nvkm_wr32(device, 0x4091c4, gr->func->fecs.ucode->data.data[i]);
1447

1448
	nvkm_wr32(device, 0x409180, 0x01000000);
1449
	for (i = 0; i < gr->func->fecs.ucode->code.size / 4; i++) {
1450
		if ((i & 0x3f) == 0)
1451
			nvkm_wr32(device, 0x409188, i >> 6);
1452
		nvkm_wr32(device, 0x409184, gr->func->fecs.ucode->code.data[i]);
1453 1454 1455
	}

	/* load GPC microcode */
1456
	nvkm_wr32(device, 0x41a1c0, 0x01000000);
1457 1458
	for (i = 0; i < gr->func->gpccs.ucode->data.size / 4; i++)
		nvkm_wr32(device, 0x41a1c4, gr->func->gpccs.ucode->data.data[i]);
1459

1460
	nvkm_wr32(device, 0x41a180, 0x01000000);
1461
	for (i = 0; i < gr->func->gpccs.ucode->code.size / 4; i++) {
1462
		if ((i & 0x3f) == 0)
1463
			nvkm_wr32(device, 0x41a188, i >> 6);
1464
		nvkm_wr32(device, 0x41a184, gr->func->gpccs.ucode->code.data[i]);
1465
	}
1466
	nvkm_mc_unk260(device->mc, 1);
1467

1468
	/* load register lists */
1469 1470 1471 1472
	gf100_gr_init_csdata(gr, grctx->hub, 0x409000, 0x000, 0x000000);
	gf100_gr_init_csdata(gr, grctx->gpc, 0x41a000, 0x000, 0x418000);
	gf100_gr_init_csdata(gr, grctx->tpc, 0x41a000, 0x004, 0x419800);
	gf100_gr_init_csdata(gr, grctx->ppc, 0x41a000, 0x008, 0x41be00);
1473

1474
	/* start HUB ucode running, it'll init the GPCs */
1475 1476
	nvkm_wr32(device, 0x40910c, 0x00000000);
	nvkm_wr32(device, 0x409100, 0x00000002);
1477 1478 1479 1480
	if (nvkm_msec(device, 2000,
		if (nvkm_rd32(device, 0x409800) & 0x80000000)
			break;
	) < 0) {
B
Ben Skeggs 已提交
1481
		gf100_gr_ctxctl_debug(gr);
1482 1483 1484
		return -EBUSY;
	}

1485
	gr->size = nvkm_rd32(device, 0x409804);
B
Ben Skeggs 已提交
1486 1487
	if (gr->data == NULL) {
		int ret = gf100_grctx_generate(gr);
1488
		if (ret) {
1489
			nvkm_error(subdev, "failed to construct context\n");
1490 1491
			return ret;
		}
1492 1493 1494
	}

	return 0;
1495 1496
}

1497 1498 1499 1500 1501 1502 1503 1504 1505 1506 1507 1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519 1520 1521 1522 1523 1524 1525 1526 1527 1528 1529 1530 1531 1532 1533
static int
gf100_gr_oneinit(struct nvkm_gr *base)
{
	struct gf100_gr *gr = gf100_gr(base);
	struct nvkm_device *device = gr->base.engine.subdev.device;
	int ret, i, j;

	nvkm_pmu_pgob(device->pmu, false);

	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 256, false,
			      &gr->unk4188b4);
	if (ret)
		return ret;

	ret = nvkm_memory_new(device, NVKM_MEM_TARGET_INST, 0x1000, 256, false,
			      &gr->unk4188b8);
	if (ret)
		return ret;

	nvkm_kmap(gr->unk4188b4);
	for (i = 0; i < 0x1000; i += 4)
		nvkm_wo32(gr->unk4188b4, i, 0x00000010);
	nvkm_done(gr->unk4188b4);

	nvkm_kmap(gr->unk4188b8);
	for (i = 0; i < 0x1000; i += 4)
		nvkm_wo32(gr->unk4188b8, i, 0x00000010);
	nvkm_done(gr->unk4188b8);

	gr->rop_nr = (nvkm_rd32(device, 0x409604) & 0x001f0000) >> 16;
	gr->gpc_nr =  nvkm_rd32(device, 0x409604) & 0x0000001f;
	for (i = 0; i < gr->gpc_nr; i++) {
		gr->tpc_nr[i]  = nvkm_rd32(device, GPC_UNIT(i, 0x2608));
		gr->tpc_total += gr->tpc_nr[i];
		gr->ppc_nr[i]  = gr->func->ppc_nr;
		for (j = 0; j < gr->ppc_nr[i]; j++) {
			u8 mask = nvkm_rd32(device, GPC_UNIT(i, 0x0c30 + (j * 4)));
1534 1535
			if (mask)
				gr->ppc_mask[i] |= (1 << j);
1536 1537 1538 1539 1540 1541 1542 1543 1544 1545 1546 1547 1548 1549 1550 1551 1552 1553 1554 1555 1556 1557 1558 1559 1560 1561 1562 1563 1564 1565 1566 1567 1568 1569 1570 1571 1572 1573 1574 1575 1576 1577 1578 1579 1580 1581 1582 1583 1584 1585 1586 1587 1588 1589 1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631 1632 1633 1634 1635 1636 1637 1638 1639 1640 1641 1642 1643 1644 1645 1646 1647 1648
			gr->ppc_tpc_nr[i][j] = hweight8(mask);
		}
	}

	/*XXX: these need figuring out... though it might not even matter */
	switch (device->chipset) {
	case 0xc0:
		if (gr->tpc_total == 11) { /* 465, 3/4/4/0, 4 */
			gr->magic_not_rop_nr = 0x07;
		} else
		if (gr->tpc_total == 14) { /* 470, 3/3/4/4, 5 */
			gr->magic_not_rop_nr = 0x05;
		} else
		if (gr->tpc_total == 15) { /* 480, 3/4/4/4, 6 */
			gr->magic_not_rop_nr = 0x06;
		}
		break;
	case 0xc3: /* 450, 4/0/0/0, 2 */
		gr->magic_not_rop_nr = 0x03;
		break;
	case 0xc4: /* 460, 3/4/0/0, 4 */
		gr->magic_not_rop_nr = 0x01;
		break;
	case 0xc1: /* 2/0/0/0, 1 */
		gr->magic_not_rop_nr = 0x01;
		break;
	case 0xc8: /* 4/4/3/4, 5 */
		gr->magic_not_rop_nr = 0x06;
		break;
	case 0xce: /* 4/4/0/0, 4 */
		gr->magic_not_rop_nr = 0x03;
		break;
	case 0xcf: /* 4/0/0/0, 3 */
		gr->magic_not_rop_nr = 0x03;
		break;
	case 0xd7:
	case 0xd9: /* 1/0/0/0, 1 */
	case 0xea: /* gk20a */
	case 0x12b: /* gm20b */
		gr->magic_not_rop_nr = 0x01;
		break;
	}

	return 0;
}

int
gf100_gr_init_(struct nvkm_gr *base)
{
	struct gf100_gr *gr = gf100_gr(base);
	nvkm_pmu_pgob(gr->base.engine.subdev.device->pmu, false);
	return gr->func->init(gr);
}

void
gf100_gr_dtor_fw(struct gf100_gr_fuc *fuc)
{
	kfree(fuc->data);
	fuc->data = NULL;
}

void *
gf100_gr_dtor(struct nvkm_gr *base)
{
	struct gf100_gr *gr = gf100_gr(base);

	if (gr->func->dtor)
		gr->func->dtor(gr);
	kfree(gr->data);

	gf100_gr_dtor_fw(&gr->fuc409c);
	gf100_gr_dtor_fw(&gr->fuc409d);
	gf100_gr_dtor_fw(&gr->fuc41ac);
	gf100_gr_dtor_fw(&gr->fuc41ad);

	nvkm_memory_del(&gr->unk4188b8);
	nvkm_memory_del(&gr->unk4188b4);
	return gr;
}

static const struct nvkm_gr_func
gf100_gr_ = {
	.dtor = gf100_gr_dtor,
	.oneinit = gf100_gr_oneinit,
	.init = gf100_gr_init_,
	.intr = gf100_gr_intr,
	.units = gf100_gr_units,
	.chan_new = gf100_gr_chan_new,
	.object_get = gf100_gr_object_get,
};

int
gf100_gr_ctor_fw(struct gf100_gr *gr, const char *fwname,
		 struct gf100_gr_fuc *fuc)
{
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
	const struct firmware *fw;
	char f[64];
	char cname[16];
	int ret;
	int i;

	/* Convert device name to lowercase */
	strncpy(cname, device->chip->name, sizeof(cname));
	cname[sizeof(cname) - 1] = '\0';
	i = strlen(cname);
	while (i) {
		--i;
		cname[i] = tolower(cname[i]);
	}

	snprintf(f, sizeof(f), "nvidia/%s/%s.bin", cname, fwname);
1649
	ret = request_firmware(&fw, f, device->dev);
1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682 1683 1684 1685 1686 1687 1688
	if (ret) {
		nvkm_error(subdev, "failed to load %s\n", fwname);
		return ret;
	}

	fuc->size = fw->size;
	fuc->data = kmemdup(fw->data, fuc->size, GFP_KERNEL);
	release_firmware(fw);
	return (fuc->data != NULL) ? 0 : -ENOMEM;
}

int
gf100_gr_ctor(const struct gf100_gr_func *func, struct nvkm_device *device,
	      int index, struct gf100_gr *gr)
{
	int ret;

	gr->func = func;
	gr->firmware = nvkm_boolopt(device->cfgopt, "NvGrUseFW",
				    func->fecs.ucode == NULL);

	ret = nvkm_gr_ctor(&gf100_gr_, device, index, 0x08001000,
			   gr->firmware || func->fecs.ucode != NULL,
			   &gr->base);
	if (ret)
		return ret;

	if (gr->firmware) {
		nvkm_info(&gr->base.engine.subdev, "using external firmware\n");
		if (gf100_gr_ctor_fw(gr, "fecs_inst", &gr->fuc409c) ||
		    gf100_gr_ctor_fw(gr, "fecs_data", &gr->fuc409d) ||
		    gf100_gr_ctor_fw(gr, "gpccs_inst", &gr->fuc41ac) ||
		    gf100_gr_ctor_fw(gr, "gpccs_data", &gr->fuc41ad))
			return -ENODEV;
	}

	return 0;
}

1689
int
1690 1691 1692 1693 1694 1695 1696 1697 1698 1699 1700 1701
gf100_gr_new_(const struct gf100_gr_func *func, struct nvkm_device *device,
	      int index, struct nvkm_gr **pgr)
{
	struct gf100_gr *gr;
	if (!(gr = kzalloc(sizeof(*gr), GFP_KERNEL)))
		return -ENOMEM;
	*pgr = &gr->base;
	return gf100_gr_ctor(func, device, index, gr);
}

int
gf100_gr_init(struct gf100_gr *gr)
1702
{
1703
	struct nvkm_device *device = gr->base.engine.subdev.device;
B
Ben Skeggs 已提交
1704
	const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
1705 1706 1707
	u32 data[TPC_MAX / 8] = {};
	u8  tpcnr[GPC_MAX];
	int gpc, tpc, rop;
1708
	int i;
1709

1710 1711 1712 1713 1714 1715
	nvkm_wr32(device, GPC_BCAST(0x0880), 0x00000000);
	nvkm_wr32(device, GPC_BCAST(0x08a4), 0x00000000);
	nvkm_wr32(device, GPC_BCAST(0x0888), 0x00000000);
	nvkm_wr32(device, GPC_BCAST(0x088c), 0x00000000);
	nvkm_wr32(device, GPC_BCAST(0x0890), 0x00000000);
	nvkm_wr32(device, GPC_BCAST(0x0894), 0x00000000);
1716 1717
	nvkm_wr32(device, GPC_BCAST(0x08b4), nvkm_memory_addr(gr->unk4188b4) >> 8);
	nvkm_wr32(device, GPC_BCAST(0x08b8), nvkm_memory_addr(gr->unk4188b8) >> 8);
1718

1719
	gf100_gr_mmio(gr, gr->func->mmio);
1720

B
Ben Skeggs 已提交
1721 1722
	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
	for (i = 0, gpc = -1; i < gr->tpc_total; i++) {
1723
		do {
B
Ben Skeggs 已提交
1724
			gpc = (gpc + 1) % gr->gpc_nr;
1725
		} while (!tpcnr[gpc]);
B
Ben Skeggs 已提交
1726
		tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
1727 1728 1729 1730

		data[i / 8] |= tpc << ((i % 8) * 4);
	}

1731 1732 1733 1734
	nvkm_wr32(device, GPC_BCAST(0x0980), data[0]);
	nvkm_wr32(device, GPC_BCAST(0x0984), data[1]);
	nvkm_wr32(device, GPC_BCAST(0x0988), data[2]);
	nvkm_wr32(device, GPC_BCAST(0x098c), data[3]);
1735

B
Ben Skeggs 已提交
1736
	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
1737
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0914),
B
Ben Skeggs 已提交
1738
			gr->magic_not_rop_nr << 8 | gr->tpc_nr[gpc]);
1739
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 |
B
Ben Skeggs 已提交
1740
			gr->tpc_total);
1741
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918);
1742 1743
	}

1744
	if (device->chipset != 0xd7)
1745
		nvkm_wr32(device, GPC_BCAST(0x1bd4), magicgpc918);
M
Maarten Lankhorst 已提交
1746
	else
1747
		nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918);
B
Ben Skeggs 已提交
1748

1749
	nvkm_wr32(device, GPC_BCAST(0x08ac), nvkm_rd32(device, 0x100800));
B
Ben Skeggs 已提交
1750

1751
	nvkm_wr32(device, 0x400500, 0x00010001);
B
Ben Skeggs 已提交
1752

1753 1754
	nvkm_wr32(device, 0x400100, 0xffffffff);
	nvkm_wr32(device, 0x40013c, 0xffffffff);
B
Ben Skeggs 已提交
1755

1756 1757 1758 1759 1760 1761 1762 1763 1764 1765 1766
	nvkm_wr32(device, 0x409c24, 0x000f0000);
	nvkm_wr32(device, 0x404000, 0xc0000000);
	nvkm_wr32(device, 0x404600, 0xc0000000);
	nvkm_wr32(device, 0x408030, 0xc0000000);
	nvkm_wr32(device, 0x40601c, 0xc0000000);
	nvkm_wr32(device, 0x404490, 0xc0000000);
	nvkm_wr32(device, 0x406018, 0xc0000000);
	nvkm_wr32(device, 0x405840, 0xc0000000);
	nvkm_wr32(device, 0x405844, 0x00ffffff);
	nvkm_mask(device, 0x419cc0, 0x00000008, 0x00000008);
	nvkm_mask(device, 0x419eb4, 0x00001000, 0x00001000);
B
Ben Skeggs 已提交
1767 1768

	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
1769 1770 1771 1772
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0420), 0xc0000000);
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0900), 0xc0000000);
		nvkm_wr32(device, GPC_UNIT(gpc, 0x1028), 0xc0000000);
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0824), 0xc0000000);
B
Ben Skeggs 已提交
1773
		for (tpc = 0; tpc < gr->tpc_nr[gpc]; tpc++) {
1774 1775 1776 1777 1778 1779 1780
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x508), 0xffffffff);
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x50c), 0xffffffff);
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x224), 0xc0000000);
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x48c), 0xc0000000);
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x084), 0xc0000000);
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x644), 0x001ffffe);
			nvkm_wr32(device, TPC_UNIT(gpc, tpc, 0x64c), 0x0000000f);
1781
		}
1782 1783
		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c90), 0xffffffff);
		nvkm_wr32(device, GPC_UNIT(gpc, 0x2c94), 0xffffffff);
1784 1785
	}

B
Ben Skeggs 已提交
1786
	for (rop = 0; rop < gr->rop_nr; rop++) {
1787 1788 1789 1790
		nvkm_wr32(device, ROP_UNIT(rop, 0x144), 0xc0000000);
		nvkm_wr32(device, ROP_UNIT(rop, 0x070), 0xc0000000);
		nvkm_wr32(device, ROP_UNIT(rop, 0x204), 0xffffffff);
		nvkm_wr32(device, ROP_UNIT(rop, 0x208), 0xffffffff);
1791
	}
1792

1793 1794 1795 1796 1797 1798
	nvkm_wr32(device, 0x400108, 0xffffffff);
	nvkm_wr32(device, 0x400138, 0xffffffff);
	nvkm_wr32(device, 0x400118, 0xffffffff);
	nvkm_wr32(device, 0x400130, 0xffffffff);
	nvkm_wr32(device, 0x40011c, 0xffffffff);
	nvkm_wr32(device, 0x400134, 0xffffffff);
1799

1800
	nvkm_wr32(device, 0x400054, 0x34ce3464);
1801

B
Ben Skeggs 已提交
1802
	gf100_gr_zbc_init(gr);
1803

B
Ben Skeggs 已提交
1804
	return gf100_gr_init_ctxctl(gr);
1805 1806
}

1807
#include "fuc/hubgf100.fuc3.h"
1808

1809 1810 1811 1812 1813 1814
struct gf100_gr_ucode
gf100_gr_fecs_ucode = {
	.code.data = gf100_grhub_code,
	.code.size = sizeof(gf100_grhub_code),
	.data.data = gf100_grhub_data,
	.data.size = sizeof(gf100_grhub_data),
1815 1816
};

1817
#include "fuc/gpcgf100.fuc3.h"
1818

1819 1820 1821 1822 1823 1824
struct gf100_gr_ucode
gf100_gr_gpccs_ucode = {
	.code.data = gf100_grgpc_code,
	.code.size = sizeof(gf100_grgpc_code),
	.data.data = gf100_grgpc_data,
	.data.size = sizeof(gf100_grgpc_data),
1825 1826
};

1827 1828
static const struct gf100_gr_func
gf100_gr = {
1829 1830 1831 1832
	.init = gf100_gr_init,
	.mmio = gf100_gr_pack_mmio,
	.fecs.ucode = &gf100_gr_fecs_ucode,
	.gpccs.ucode = &gf100_gr_gpccs_ucode,
1833 1834 1835 1836 1837 1838 1839 1840 1841 1842
	.grctx = &gf100_grctx,
	.sclass = {
		{ -1, -1, FERMI_TWOD_A },
		{ -1, -1, FERMI_MEMORY_TO_MEMORY_FORMAT_A },
		{ -1, -1, FERMI_A, &gf100_fermi },
		{ -1, -1, FERMI_COMPUTE_A },
		{}
	}
};

1843 1844 1845 1846 1847
int
gf100_gr_new(struct nvkm_device *device, int index, struct nvkm_gr **pgr)
{
	return gf100_gr_new_(&gf100_gr, device, index, pgr);
}