gk20a.c 9.1 KB
Newer Older
1
/*
2
 * Copyright (c) 2014-2015, NVIDIA CORPORATION. All rights reserved.
3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
 * DEALINGS IN THE SOFTWARE.
 */
22
#include "gk20a.h"
23
#include "ctxgf100.h"
24

25
#include <nvif/class.h>
26
#include <subdev/timer.h>
27

28
static struct nvkm_oclass
29
gk20a_gr_sclass[] = {
30 31
	{ FERMI_TWOD_A, &nvkm_object_ofuncs },
	{ KEPLER_INLINE_TO_MEMORY_A, &nvkm_object_ofuncs },
32 33
	{ KEPLER_C, &gf100_fermi_ofuncs, gf100_gr_9097_omthds },
	{ KEPLER_COMPUTE_A, &nvkm_object_ofuncs, gf100_gr_90c0_omthds },
34 35 36
	{}
};

37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
static void
gk20a_gr_init_dtor(struct gf100_gr_pack *pack)
{
	vfree(pack);
}

struct gk20a_fw_av
{
	u32 addr;
	u32 data;
};

static struct gf100_gr_pack *
gk20a_gr_av_to_init(struct gf100_gr_fuc *fuc)
{
	struct gf100_gr_init *init;
	struct gf100_gr_pack *pack;
	const int nent = (fuc->size / sizeof(struct gk20a_fw_av));
	int i;

	pack = vzalloc((sizeof(*pack) * 2) + (sizeof(*init) * (nent + 1)));
	if (!pack)
		return ERR_PTR(-ENOMEM);

	init = (void *)(pack + 2);

	pack[0].init = init;

	for (i = 0; i < nent; i++) {
		struct gf100_gr_init *ent = &init[i];
		struct gk20a_fw_av *av = &((struct gk20a_fw_av *)fuc->data)[i];

		ent->addr = av->addr;
		ent->data = av->data;
		ent->count = 1;
		ent->pitch = 1;
	}

	return pack;
}

struct gk20a_fw_aiv
{
	u32 addr;
	u32 index;
	u32 data;
};

static struct gf100_gr_pack *
gk20a_gr_aiv_to_init(struct gf100_gr_fuc *fuc)
{
	struct gf100_gr_init *init;
	struct gf100_gr_pack *pack;
	const int nent = (fuc->size / sizeof(struct gk20a_fw_aiv));
	int i;

	pack = vzalloc((sizeof(*pack) * 2) + (sizeof(*init) * (nent + 1)));
	if (!pack)
		return ERR_PTR(-ENOMEM);

	init = (void *)(pack + 2);

	pack[0].init = init;

	for (i = 0; i < nent; i++) {
		struct gf100_gr_init *ent = &init[i];
		struct gk20a_fw_aiv *av = &((struct gk20a_fw_aiv *)fuc->data)[i];

		ent->addr = av->addr;
		ent->data = av->data;
		ent->count = 1;
		ent->pitch = 1;
	}

	return pack;
}

static struct gf100_gr_pack *
gk20a_gr_av_to_method(struct gf100_gr_fuc *fuc)
{
	struct gf100_gr_init *init;
	struct gf100_gr_pack *pack;
	/* We don't suppose we will initialize more than 16 classes here... */
	static const unsigned int max_classes = 16;
	const int nent = (fuc->size / sizeof(struct gk20a_fw_av));
	int i, classidx = 0;
	u32 prevclass = 0;

	pack = vzalloc((sizeof(*pack) * max_classes) +
		       (sizeof(*init) * (nent + 1)));
	if (!pack)
		return ERR_PTR(-ENOMEM);

	init = (void *)(pack + max_classes);

	for (i = 0; i < nent; i++) {
		struct gf100_gr_init *ent = &init[i];
		struct gk20a_fw_av *av = &((struct gk20a_fw_av *)fuc->data)[i];
		u32 class = av->addr & 0xffff;
		u32 addr = (av->addr & 0xffff0000) >> 14;

		if (prevclass != class) {
			pack[classidx].init = ent;
			pack[classidx].type = class;
			prevclass = class;
			if (++classidx >= max_classes) {
				vfree(pack);
				return ERR_PTR(-ENOSPC);
			}
		}

		ent->addr = addr;
		ent->data = av->data;
		ent->count = 1;
		ent->pitch = 1;
	}

	return pack;
}

157
int
158 159 160 161 162
gk20a_gr_ctor(struct nvkm_object *parent, struct nvkm_object *engine,
	      struct nvkm_oclass *oclass, void *data, u32 size,
	      struct nvkm_object **pobject)
{
	int err;
B
Ben Skeggs 已提交
163
	struct gf100_gr *gr;
164 165 166 167 168 169
	struct gf100_gr_fuc fuc;

	err = gf100_gr_ctor(parent, engine, oclass, data, size, pobject);
	if (err)
		return err;

B
Ben Skeggs 已提交
170
	gr = (void *)*pobject;
171

B
Ben Skeggs 已提交
172
	err = gf100_gr_ctor_fw(gr, "sw_nonctx", &fuc);
173 174
	if (err)
		return err;
B
Ben Skeggs 已提交
175
	gr->fuc_sw_nonctx = gk20a_gr_av_to_init(&fuc);
176
	gf100_gr_dtor_fw(&fuc);
B
Ben Skeggs 已提交
177 178
	if (IS_ERR(gr->fuc_sw_nonctx))
		return PTR_ERR(gr->fuc_sw_nonctx);
179

B
Ben Skeggs 已提交
180
	err = gf100_gr_ctor_fw(gr, "sw_ctx", &fuc);
181 182
	if (err)
		return err;
B
Ben Skeggs 已提交
183
	gr->fuc_sw_ctx = gk20a_gr_aiv_to_init(&fuc);
184
	gf100_gr_dtor_fw(&fuc);
B
Ben Skeggs 已提交
185 186
	if (IS_ERR(gr->fuc_sw_ctx))
		return PTR_ERR(gr->fuc_sw_ctx);
187

B
Ben Skeggs 已提交
188
	err = gf100_gr_ctor_fw(gr, "sw_bundle_init", &fuc);
189 190
	if (err)
		return err;
B
Ben Skeggs 已提交
191
	gr->fuc_bundle = gk20a_gr_av_to_init(&fuc);
192
	gf100_gr_dtor_fw(&fuc);
B
Ben Skeggs 已提交
193 194
	if (IS_ERR(gr->fuc_bundle))
		return PTR_ERR(gr->fuc_bundle);
195

B
Ben Skeggs 已提交
196
	err = gf100_gr_ctor_fw(gr, "sw_method_init", &fuc);
197 198
	if (err)
		return err;
B
Ben Skeggs 已提交
199
	gr->fuc_method = gk20a_gr_av_to_method(&fuc);
200
	gf100_gr_dtor_fw(&fuc);
B
Ben Skeggs 已提交
201 202
	if (IS_ERR(gr->fuc_method))
		return PTR_ERR(gr->fuc_method);
203 204 205 206

	return 0;
}

207
void
208 209
gk20a_gr_dtor(struct nvkm_object *object)
{
B
Ben Skeggs 已提交
210
	struct gf100_gr *gr = (void *)object;
211

B
Ben Skeggs 已提交
212 213 214 215
	gk20a_gr_init_dtor(gr->fuc_method);
	gk20a_gr_init_dtor(gr->fuc_bundle);
	gk20a_gr_init_dtor(gr->fuc_sw_ctx);
	gk20a_gr_init_dtor(gr->fuc_sw_nonctx);
216 217 218 219 220

	gf100_gr_dtor(object);
}

static int
B
Ben Skeggs 已提交
221
gk20a_gr_wait_mem_scrubbing(struct gf100_gr *gr)
222
{
223 224
	struct nvkm_subdev *subdev = &gr->base.engine.subdev;
	struct nvkm_device *device = subdev->device;
225 226 227 228 229

	if (nvkm_msec(device, 2000,
		if (!(nvkm_rd32(device, 0x40910c) & 0x00000006))
			break;
	) < 0) {
230
		nvkm_error(subdev, "FECS mem scrubbing timeout\n");
231 232 233
		return -ETIMEDOUT;
	}

234 235 236 237
	if (nvkm_msec(device, 2000,
		if (!(nvkm_rd32(device, 0x41a10c) & 0x00000006))
			break;
	) < 0) {
238
		nvkm_error(subdev, "GPCCS mem scrubbing timeout\n");
239 240 241 242 243 244 245
		return -ETIMEDOUT;
	}

	return 0;
}

static void
B
Ben Skeggs 已提交
246
gk20a_gr_set_hww_esr_report_mask(struct gf100_gr *gr)
247
{
248 249 250
	struct nvkm_device *device = gr->base.engine.subdev.device;
	nvkm_wr32(device, 0x419e44, 0x1ffffe);
	nvkm_wr32(device, 0x419e4c, 0x7f);
251 252
}

253
int
254 255 256
gk20a_gr_init(struct nvkm_object *object)
{
	struct gk20a_gr_oclass *oclass = (void *)object->oclass;
B
Ben Skeggs 已提交
257
	struct gf100_gr *gr = (void *)object;
258
	struct nvkm_device *device = gr->base.engine.subdev.device;
B
Ben Skeggs 已提交
259
	const u32 magicgpc918 = DIV_ROUND_UP(0x00800000, gr->tpc_total);
260 261 262 263 264
	u32 data[TPC_MAX / 8] = {};
	u8  tpcnr[GPC_MAX];
	int gpc, tpc;
	int ret, i;

B
Ben Skeggs 已提交
265
	ret = nvkm_gr_init(&gr->base);
266 267 268 269
	if (ret)
		return ret;

	/* Clear SCC RAM */
270
	nvkm_wr32(device, 0x40802c, 0x1);
271

B
Ben Skeggs 已提交
272
	gf100_gr_mmio(gr, gr->fuc_sw_nonctx);
273

B
Ben Skeggs 已提交
274
	ret = gk20a_gr_wait_mem_scrubbing(gr);
275 276 277
	if (ret)
		return ret;

B
Ben Skeggs 已提交
278
	ret = gf100_gr_wait_idle(gr);
279 280 281 282
	if (ret)
		return ret;

	/* MMU debug buffer */
283 284
	nvkm_wr32(device, 0x100cc8, gr->unk4188b4->addr >> 8);
	nvkm_wr32(device, 0x100ccc, gr->unk4188b8->addr >> 8);
285 286

	if (oclass->init_gpc_mmu)
B
Ben Skeggs 已提交
287
		oclass->init_gpc_mmu(gr);
288 289

	/* Set the PE as stream master */
290
	nvkm_mask(device, 0x503018, 0x1, 0x1);
291 292 293

	/* Zcull init */
	memset(data, 0x00, sizeof(data));
B
Ben Skeggs 已提交
294 295
	memcpy(tpcnr, gr->tpc_nr, sizeof(gr->tpc_nr));
	for (i = 0, gpc = -1; i < gr->tpc_total; i++) {
296
		do {
B
Ben Skeggs 已提交
297
			gpc = (gpc + 1) % gr->gpc_nr;
298
		} while (!tpcnr[gpc]);
B
Ben Skeggs 已提交
299
		tpc = gr->tpc_nr[gpc] - tpcnr[gpc]--;
300 301 302 303

		data[i / 8] |= tpc << ((i % 8) * 4);
	}

304 305 306 307
	nvkm_wr32(device, GPC_BCAST(0x0980), data[0]);
	nvkm_wr32(device, GPC_BCAST(0x0984), data[1]);
	nvkm_wr32(device, GPC_BCAST(0x0988), data[2]);
	nvkm_wr32(device, GPC_BCAST(0x098c), data[3]);
B
Ben Skeggs 已提交
308 309

	for (gpc = 0; gpc < gr->gpc_nr; gpc++) {
310 311 312 313 314
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0914),
			  gr->magic_not_rop_nr << 8 | gr->tpc_nr[gpc]);
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0910), 0x00040000 |
			  gr->tpc_total);
		nvkm_wr32(device, GPC_UNIT(gpc, 0x0918), magicgpc918);
315 316
	}

317
	nvkm_wr32(device, GPC_BCAST(0x3fd4), magicgpc918);
318 319

	/* Enable FIFO access */
320
	nvkm_wr32(device, 0x400500, 0x00010001);
321 322

	/* Enable interrupts */
323 324
	nvkm_wr32(device, 0x400100, 0xffffffff);
	nvkm_wr32(device, 0x40013c, 0xffffffff);
325 326

	/* Enable FECS error interrupts */
327
	nvkm_wr32(device, 0x409c24, 0x000f0000);
328 329

	/* Enable hardware warning exceptions */
330 331
	nvkm_wr32(device, 0x404000, 0xc0000000);
	nvkm_wr32(device, 0x404600, 0xc0000000);
332 333

	if (oclass->set_hww_esr_report_mask)
B
Ben Skeggs 已提交
334
		oclass->set_hww_esr_report_mask(gr);
335 336

	/* Enable TPC exceptions per GPC */
337 338
	nvkm_wr32(device, 0x419d0c, 0x2);
	nvkm_wr32(device, 0x41ac94, (((1 << gr->tpc_total) - 1) & 0xff) << 16);
339 340

	/* Reset and enable all exceptions */
341 342 343 344 345 346
	nvkm_wr32(device, 0x400108, 0xffffffff);
	nvkm_wr32(device, 0x400138, 0xffffffff);
	nvkm_wr32(device, 0x400118, 0xffffffff);
	nvkm_wr32(device, 0x400130, 0xffffffff);
	nvkm_wr32(device, 0x40011c, 0xffffffff);
	nvkm_wr32(device, 0x400134, 0xffffffff);
347

B
Ben Skeggs 已提交
348
	gf100_gr_zbc_init(gr);
349

B
Ben Skeggs 已提交
350
	return gf100_gr_init_ctxctl(gr);
351 352
}

353
struct nvkm_oclass *
354 355 356 357 358 359 360 361 362 363 364 365
gk20a_gr_oclass = &(struct gk20a_gr_oclass) {
	.gf100 = {
		.base.handle = NV_ENGINE(GR, 0xea),
		.base.ofuncs = &(struct nvkm_ofuncs) {
			.ctor = gk20a_gr_ctor,
			.dtor = gk20a_gr_dtor,
			.init = gk20a_gr_init,
			.fini = _nvkm_gr_fini,
		},
		.cclass = &gk20a_grctx_oclass,
		.sclass = gk20a_gr_sclass,
		.ppc_nr = 1,
366
	},
367 368
	.set_hww_esr_report_mask = gk20a_gr_set_hww_esr_report_mask,
}.gf100.base;