amdgpu_ctx.c 11.8 KB
Newer Older
A
Alex Deucher 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
/*
 * Copyright 2015 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 * Authors: monk liu <monk.liu@amd.com>
 */

#include <drm/drmP.h>
26
#include <drm/drm_auth.h>
A
Alex Deucher 已提交
27
#include "amdgpu.h"
28
#include "amdgpu_sched.h"
A
Alex Deucher 已提交
29

30
static int amdgpu_ctx_priority_permit(struct drm_file *filp,
31
				      enum drm_sched_priority priority)
32 33
{
	/* NORMAL and below are accessible by everyone */
34
	if (priority <= DRM_SCHED_PRIORITY_NORMAL)
35 36 37 38 39 40 41 42 43 44 45 46
		return 0;

	if (capable(CAP_SYS_NICE))
		return 0;

	if (drm_is_current_master(filp))
		return 0;

	return -EACCES;
}

static int amdgpu_ctx_init(struct amdgpu_device *adev,
47
			   enum drm_sched_priority priority,
48 49
			   struct drm_file *filp,
			   struct amdgpu_ctx *ctx)
A
Alex Deucher 已提交
50
{
51
	unsigned i, j;
52
	int r;
A
Alex Deucher 已提交
53

54
	if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
55 56 57 58 59 60
		return -EINVAL;

	r = amdgpu_ctx_priority_permit(filp, priority);
	if (r)
		return r;

61 62 63 64
	memset(ctx, 0, sizeof(*ctx));
	ctx->adev = adev;
	kref_init(&ctx->refcount);
	spin_lock_init(&ctx->ring_lock);
65
	ctx->fences = kcalloc(amdgpu_sched_jobs * AMDGPU_MAX_RINGS,
66
			      sizeof(struct dma_fence*), GFP_KERNEL);
67 68
	if (!ctx->fences)
		return -ENOMEM;
A
Alex Deucher 已提交
69

70 71
	mutex_init(&ctx->lock);

72 73
	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
		ctx->rings[i].sequence = 1;
74
		ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i];
75
	}
76 77

	ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
78
	ctx->reset_counter_query = ctx->reset_counter;
79
	ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
80
	ctx->init_priority = priority;
81
	ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
82

83 84
	/* create context entity for each ring */
	for (i = 0; i < adev->num_rings; i++) {
85
		struct amdgpu_ring *ring = adev->rings[i];
86
		struct drm_sched_rq *rq;
87

88
		rq = &ring->sched.sched_rq[priority];
M
Monk Liu 已提交
89 90 91 92

		if (ring == &adev->gfx.kiq.ring)
			continue;

93 94
		r = drm_sched_entity_init(&ctx->rings[i].entity,
					  &rq, 1, &ctx->guilty);
95
		if (r)
96
			goto failed;
97 98
	}

99 100 101 102
	r = amdgpu_queue_mgr_init(adev, &ctx->queue_mgr);
	if (r)
		goto failed;

A
Alex Deucher 已提交
103
	return 0;
104 105 106

failed:
	for (j = 0; j < i; j++)
107
		drm_sched_entity_destroy(&ctx->rings[j].entity);
108 109 110
	kfree(ctx->fences);
	ctx->fences = NULL;
	return r;
A
Alex Deucher 已提交
111 112
}

113
static void amdgpu_ctx_fini(struct kref *ref)
A
Alex Deucher 已提交
114
{
115
	struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
116 117 118
	struct amdgpu_device *adev = ctx->adev;
	unsigned i, j;

119 120 121
	if (!adev)
		return;

122
	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
123
		for (j = 0; j < amdgpu_sched_jobs; ++j)
124
			dma_fence_put(ctx->rings[i].fences[j]);
125
	kfree(ctx->fences);
126
	ctx->fences = NULL;
127

128
	amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
129 130

	mutex_destroy(&ctx->lock);
131 132

	kfree(ctx);
133 134 135 136
}

static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
			    struct amdgpu_fpriv *fpriv,
137
			    struct drm_file *filp,
138
			    enum drm_sched_priority priority,
139 140 141
			    uint32_t *id)
{
	struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
A
Alex Deucher 已提交
142
	struct amdgpu_ctx *ctx;
143
	int r;
A
Alex Deucher 已提交
144

145 146 147 148 149 150 151
	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
	if (!ctx)
		return -ENOMEM;

	mutex_lock(&mgr->lock);
	r = idr_alloc(&mgr->ctx_handles, ctx, 1, 0, GFP_KERNEL);
	if (r < 0) {
152
		mutex_unlock(&mgr->lock);
153 154 155
		kfree(ctx);
		return r;
	}
156

157
	*id = (uint32_t)r;
158
	r = amdgpu_ctx_init(adev, priority, filp, ctx);
159 160 161 162 163
	if (r) {
		idr_remove(&mgr->ctx_handles, *id);
		*id = 0;
		kfree(ctx);
	}
164 165 166 167 168 169 170
	mutex_unlock(&mgr->lock);
	return r;
}

static void amdgpu_ctx_do_release(struct kref *ref)
{
	struct amdgpu_ctx *ctx;
171
	u32 i;
172 173 174

	ctx = container_of(ref, struct amdgpu_ctx, refcount);

175 176 177 178 179
	for (i = 0; i < ctx->adev->num_rings; i++) {

		if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
			continue;

180
		drm_sched_entity_destroy(&ctx->rings[i].entity);
181
	}
182

183
	amdgpu_ctx_fini(ref);
184 185 186 187 188 189 190 191
}

static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id)
{
	struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
	struct amdgpu_ctx *ctx;

	mutex_lock(&mgr->lock);
192 193
	ctx = idr_remove(&mgr->ctx_handles, id);
	if (ctx)
194
		kref_put(&ctx->refcount, amdgpu_ctx_do_release);
195
	mutex_unlock(&mgr->lock);
196
	return ctx ? 0 : -EINVAL;
A
Alex Deucher 已提交
197 198
}

199 200 201
static int amdgpu_ctx_query(struct amdgpu_device *adev,
			    struct amdgpu_fpriv *fpriv, uint32_t id,
			    union drm_amdgpu_ctx_out *out)
A
Alex Deucher 已提交
202 203
{
	struct amdgpu_ctx *ctx;
204
	struct amdgpu_ctx_mgr *mgr;
205
	unsigned reset_counter;
A
Alex Deucher 已提交
206

207 208 209 210
	if (!fpriv)
		return -EINVAL;

	mgr = &fpriv->ctx_mgr;
211
	mutex_lock(&mgr->lock);
A
Alex Deucher 已提交
212
	ctx = idr_find(&mgr->ctx_handles, id);
213
	if (!ctx) {
214
		mutex_unlock(&mgr->lock);
215
		return -EINVAL;
A
Alex Deucher 已提交
216
	}
217 218

	/* TODO: these two are always zero */
219 220
	out->state.flags = 0x0;
	out->state.hangs = 0x0;
221 222 223 224

	/* determine if a GPU reset has occured since the last call */
	reset_counter = atomic_read(&adev->gpu_reset_counter);
	/* TODO: this should ideally return NO, GUILTY, or INNOCENT. */
225
	if (ctx->reset_counter_query == reset_counter)
226 227 228
		out->state.reset_status = AMDGPU_CTX_NO_RESET;
	else
		out->state.reset_status = AMDGPU_CTX_UNKNOWN_RESET;
229
	ctx->reset_counter_query = reset_counter;
230

231
	mutex_unlock(&mgr->lock);
232
	return 0;
A
Alex Deucher 已提交
233 234
}

M
Monk Liu 已提交
235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268
static int amdgpu_ctx_query2(struct amdgpu_device *adev,
	struct amdgpu_fpriv *fpriv, uint32_t id,
	union drm_amdgpu_ctx_out *out)
{
	struct amdgpu_ctx *ctx;
	struct amdgpu_ctx_mgr *mgr;

	if (!fpriv)
		return -EINVAL;

	mgr = &fpriv->ctx_mgr;
	mutex_lock(&mgr->lock);
	ctx = idr_find(&mgr->ctx_handles, id);
	if (!ctx) {
		mutex_unlock(&mgr->lock);
		return -EINVAL;
	}

	out->state.flags = 0x0;
	out->state.hangs = 0x0;

	if (ctx->reset_counter != atomic_read(&adev->gpu_reset_counter))
		out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RESET;

	if (ctx->vram_lost_counter != atomic_read(&adev->vram_lost_counter))
		out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST;

	if (atomic_read(&ctx->guilty))
		out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY;

	mutex_unlock(&mgr->lock);
	return 0;
}

A
Alex Deucher 已提交
269
int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
270
		     struct drm_file *filp)
A
Alex Deucher 已提交
271 272 273
{
	int r;
	uint32_t id;
274
	enum drm_sched_priority priority;
A
Alex Deucher 已提交
275 276 277 278 279 280 281

	union drm_amdgpu_ctx *args = data;
	struct amdgpu_device *adev = dev->dev_private;
	struct amdgpu_fpriv *fpriv = filp->driver_priv;

	r = 0;
	id = args->in.ctx_id;
282 283
	priority = amdgpu_to_sched_priority(args->in.priority);

284 285
	/* For backwards compatibility reasons, we need to accept
	 * ioctls with garbage in the priority field */
286 287
	if (priority == DRM_SCHED_PRIORITY_INVALID)
		priority = DRM_SCHED_PRIORITY_NORMAL;
A
Alex Deucher 已提交
288 289

	switch (args->in.op) {
290
	case AMDGPU_CTX_OP_ALLOC_CTX:
291
		r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, &id);
292 293 294 295 296 297 298 299
		args->out.alloc.ctx_id = id;
		break;
	case AMDGPU_CTX_OP_FREE_CTX:
		r = amdgpu_ctx_free(fpriv, id);
		break;
	case AMDGPU_CTX_OP_QUERY_STATE:
		r = amdgpu_ctx_query(adev, fpriv, id, &args->out);
		break;
M
Monk Liu 已提交
300 301 302
	case AMDGPU_CTX_OP_QUERY_STATE2:
		r = amdgpu_ctx_query2(adev, fpriv, id, &args->out);
		break;
303 304
	default:
		return -EINVAL;
A
Alex Deucher 已提交
305 306 307 308
	}

	return r;
}
309 310 311 312

struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id)
{
	struct amdgpu_ctx *ctx;
313 314 315 316 317 318
	struct amdgpu_ctx_mgr *mgr;

	if (!fpriv)
		return NULL;

	mgr = &fpriv->ctx_mgr;
319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335

	mutex_lock(&mgr->lock);
	ctx = idr_find(&mgr->ctx_handles, id);
	if (ctx)
		kref_get(&ctx->refcount);
	mutex_unlock(&mgr->lock);
	return ctx;
}

int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
{
	if (ctx == NULL)
		return -EINVAL;

	kref_put(&ctx->refcount, amdgpu_ctx_do_release);
	return 0;
}
336

337 338
int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
			      struct dma_fence *fence, uint64_t* handler)
339 340
{
	struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
341
	uint64_t seq = cring->sequence;
342
	unsigned idx = 0;
343
	struct dma_fence *other = NULL;
344

345
	idx = seq & (amdgpu_sched_jobs - 1);
346
	other = cring->fences[idx];
347 348
	if (other)
		BUG_ON(!dma_fence_is_signaled(other));
349

350
	dma_fence_get(fence);
351 352 353

	spin_lock(&ctx->ring_lock);
	cring->fences[idx] = fence;
354
	cring->sequence++;
355 356
	spin_unlock(&ctx->ring_lock);

357
	dma_fence_put(other);
358 359
	if (handler)
		*handler = seq;
360

361
	return 0;
362 363
}

364 365
struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
				       struct amdgpu_ring *ring, uint64_t seq)
366 367
{
	struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
368
	struct dma_fence *fence;
369 370

	spin_lock(&ctx->ring_lock);
371

M
Monk Liu 已提交
372 373 374
	if (seq == ~0ull)
		seq = ctx->rings[ring->idx].sequence - 1;

375
	if (seq >= cring->sequence) {
376 377 378 379
		spin_unlock(&ctx->ring_lock);
		return ERR_PTR(-EINVAL);
	}

380

381
	if (seq + amdgpu_sched_jobs < cring->sequence) {
382 383 384 385
		spin_unlock(&ctx->ring_lock);
		return NULL;
	}

386
	fence = dma_fence_get(cring->fences[seq & (amdgpu_sched_jobs - 1)]);
387 388 389 390
	spin_unlock(&ctx->ring_lock);

	return fence;
}
391

392
void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
393
				  enum drm_sched_priority priority)
394 395 396
{
	int i;
	struct amdgpu_device *adev = ctx->adev;
397 398
	struct drm_sched_rq *rq;
	struct drm_sched_entity *entity;
399
	struct amdgpu_ring *ring;
400
	enum drm_sched_priority ctx_prio;
401 402 403

	ctx->override_priority = priority;

404
	ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
405 406 407 408 409 410 411 412 413 414
			ctx->init_priority : ctx->override_priority;

	for (i = 0; i < adev->num_rings; i++) {
		ring = adev->rings[i];
		entity = &ctx->rings[i].entity;
		rq = &ring->sched.sched_rq[ctx_prio];

		if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
			continue;

415
		drm_sched_entity_set_rq(entity, rq);
416 417 418
	}
}

419 420 421 422 423 424 425 426
int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id)
{
	struct amdgpu_ctx_ring *cring = &ctx->rings[ring_id];
	unsigned idx = cring->sequence & (amdgpu_sched_jobs - 1);
	struct dma_fence *other = cring->fences[idx];

	if (other) {
		signed long r;
427
		r = dma_fence_wait(other, true);
428
		if (r < 0) {
429 430 431
			if (r != -ERESTARTSYS)
				DRM_ERROR("Error (%ld) waiting for fence!\n", r);

432 433 434 435 436 437 438
			return r;
		}
	}

	return 0;
}

439 440 441 442 443 444
void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
{
	mutex_init(&mgr->lock);
	idr_init(&mgr->ctx_handles);
}

445
void amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr)
446 447 448 449
{
	struct amdgpu_ctx *ctx;
	struct idr *idp;
	uint32_t id, i;
450
	long max_wait = MAX_WAIT_SCHED_ENTITY_Q_EMPTY;
451 452 453

	idp = &mgr->ctx_handles;

454
	mutex_lock(&mgr->lock);
455 456
	idr_for_each_entry(idp, ctx, id) {

457 458
		if (!ctx->adev) {
			mutex_unlock(&mgr->lock);
459
			return;
460
		}
461

462 463 464 465 466
		for (i = 0; i < ctx->adev->num_rings; i++) {

			if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
				continue;

467 468
			max_wait = drm_sched_entity_flush(&ctx->rings[i].entity,
							  max_wait);
469
		}
470
	}
471
	mutex_unlock(&mgr->lock);
472 473
}

474
void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
475 476 477 478 479 480 481 482 483 484 485 486
{
	struct amdgpu_ctx *ctx;
	struct idr *idp;
	uint32_t id, i;

	idp = &mgr->ctx_handles;

	idr_for_each_entry(idp, ctx, id) {

		if (!ctx->adev)
			return;

487 488 489 490 491
		for (i = 0; i < ctx->adev->num_rings; i++) {

			if (ctx->adev->rings[i] == &ctx->adev->gfx.kiq.ring)
				continue;

492
			if (kref_read(&ctx->refcount) == 1)
493
				drm_sched_entity_fini(&ctx->rings[i].entity);
494 495
			else
				DRM_ERROR("ctx %p is still alive\n", ctx);
496
		}
497 498 499
	}
}

500 501 502 503 504 505
void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
{
	struct amdgpu_ctx *ctx;
	struct idr *idp;
	uint32_t id;

506
	amdgpu_ctx_mgr_entity_fini(mgr);
507

508 509 510
	idp = &mgr->ctx_handles;

	idr_for_each_entry(idp, ctx, id) {
511
		if (kref_put(&ctx->refcount, amdgpu_ctx_fini) != 1)
512 513 514 515 516 517
			DRM_ERROR("ctx %p is still alive\n", ctx);
	}

	idr_destroy(&mgr->ctx_handles);
	mutex_destroy(&mgr->lock);
}