amdgpu_ctx.c 11.7 KB
Newer Older
A
Alex Deucher 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25
/*
 * Copyright 2015 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 * Authors: monk liu <monk.liu@amd.com>
 */

#include <drm/drmP.h>
26
#include <drm/drm_auth.h>
A
Alex Deucher 已提交
27
#include "amdgpu.h"
28
#include "amdgpu_sched.h"
A
Alex Deucher 已提交
29

30
static int amdgpu_ctx_priority_permit(struct drm_file *filp,
31
				      enum drm_sched_priority priority)
32 33
{
	/* NORMAL and below are accessible by everyone */
34
	if (priority <= DRM_SCHED_PRIORITY_NORMAL)
35 36 37 38 39 40 41 42 43 44 45 46
		return 0;

	if (capable(CAP_SYS_NICE))
		return 0;

	if (drm_is_current_master(filp))
		return 0;

	return -EACCES;
}

static int amdgpu_ctx_init(struct amdgpu_device *adev,
47
			   enum drm_sched_priority priority,
48 49
			   struct drm_file *filp,
			   struct amdgpu_ctx *ctx)
A
Alex Deucher 已提交
50
{
51
	unsigned i, j;
52
	int r;
A
Alex Deucher 已提交
53

54
	if (priority < 0 || priority >= DRM_SCHED_PRIORITY_MAX)
55 56 57 58 59 60
		return -EINVAL;

	r = amdgpu_ctx_priority_permit(filp, priority);
	if (r)
		return r;

61 62 63 64
	memset(ctx, 0, sizeof(*ctx));
	ctx->adev = adev;
	kref_init(&ctx->refcount);
	spin_lock_init(&ctx->ring_lock);
65
	ctx->fences = kcalloc(amdgpu_sched_jobs * AMDGPU_MAX_RINGS,
66
			      sizeof(struct dma_fence*), GFP_KERNEL);
67 68
	if (!ctx->fences)
		return -ENOMEM;
A
Alex Deucher 已提交
69

70 71
	mutex_init(&ctx->lock);

72 73
	for (i = 0; i < AMDGPU_MAX_RINGS; ++i) {
		ctx->rings[i].sequence = 1;
74
		ctx->rings[i].fences = &ctx->fences[amdgpu_sched_jobs * i];
75
	}
76 77

	ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
78
	ctx->reset_counter_query = ctx->reset_counter;
79
	ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
80
	ctx->init_priority = priority;
81
	ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;
82

83 84
	/* create context entity for each ring */
	for (i = 0; i < adev->num_rings; i++) {
85
		struct amdgpu_ring *ring = adev->rings[i];
86
		struct drm_sched_rq *rq;
87

88
		rq = &ring->sched.sched_rq[priority];
M
Monk Liu 已提交
89 90 91 92

		if (ring == &adev->gfx.kiq.ring)
			continue;

93
		r = drm_sched_entity_init(&ring->sched, &ctx->rings[i].entity,
94
					  rq, amdgpu_sched_jobs, &ctx->guilty);
95
		if (r)
96
			goto failed;
97 98
	}

99 100 101 102
	r = amdgpu_queue_mgr_init(adev, &ctx->queue_mgr);
	if (r)
		goto failed;

A
Alex Deucher 已提交
103
	return 0;
104 105 106

failed:
	for (j = 0; j < i; j++)
107
		drm_sched_entity_fini(&adev->rings[j]->sched,
108 109 110 111
				      &ctx->rings[j].entity);
	kfree(ctx->fences);
	ctx->fences = NULL;
	return r;
A
Alex Deucher 已提交
112 113
}

114
static void amdgpu_ctx_fini(struct kref *ref)
A
Alex Deucher 已提交
115
{
116
	struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
117 118 119
	struct amdgpu_device *adev = ctx->adev;
	unsigned i, j;

120 121 122
	if (!adev)
		return;

123
	for (i = 0; i < AMDGPU_MAX_RINGS; ++i)
124
		for (j = 0; j < amdgpu_sched_jobs; ++j)
125
			dma_fence_put(ctx->rings[i].fences[j]);
126
	kfree(ctx->fences);
127
	ctx->fences = NULL;
128

129
	amdgpu_queue_mgr_fini(adev, &ctx->queue_mgr);
130 131

	mutex_destroy(&ctx->lock);
132 133

	kfree(ctx);
134 135 136 137
}

static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
			    struct amdgpu_fpriv *fpriv,
138
			    struct drm_file *filp,
139
			    enum drm_sched_priority priority,
140 141 142
			    uint32_t *id)
{
	struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
A
Alex Deucher 已提交
143
	struct amdgpu_ctx *ctx;
144
	int r;
A
Alex Deucher 已提交
145

146 147 148 149 150 151 152
	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
	if (!ctx)
		return -ENOMEM;

	mutex_lock(&mgr->lock);
	r = idr_alloc(&mgr->ctx_handles, ctx, 1, 0, GFP_KERNEL);
	if (r < 0) {
153
		mutex_unlock(&mgr->lock);
154 155 156
		kfree(ctx);
		return r;
	}
157

158
	*id = (uint32_t)r;
159
	r = amdgpu_ctx_init(adev, priority, filp, ctx);
160 161 162 163 164
	if (r) {
		idr_remove(&mgr->ctx_handles, *id);
		*id = 0;
		kfree(ctx);
	}
165 166 167 168 169 170 171
	mutex_unlock(&mgr->lock);
	return r;
}

static void amdgpu_ctx_do_release(struct kref *ref)
{
	struct amdgpu_ctx *ctx;
172
	u32 i;
173 174 175

	ctx = container_of(ref, struct amdgpu_ctx, refcount);

176 177 178
	for (i = 0; i < ctx->adev->num_rings; i++)
		drm_sched_entity_fini(&ctx->adev->rings[i]->sched,
			&ctx->rings[i].entity);
179

180
	amdgpu_ctx_fini(ref);
181 182 183 184 185 186 187 188
}

static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id)
{
	struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
	struct amdgpu_ctx *ctx;

	mutex_lock(&mgr->lock);
189 190
	ctx = idr_remove(&mgr->ctx_handles, id);
	if (ctx)
191
		kref_put(&ctx->refcount, amdgpu_ctx_do_release);
192
	mutex_unlock(&mgr->lock);
193
	return ctx ? 0 : -EINVAL;
A
Alex Deucher 已提交
194 195
}

196 197 198
static int amdgpu_ctx_query(struct amdgpu_device *adev,
			    struct amdgpu_fpriv *fpriv, uint32_t id,
			    union drm_amdgpu_ctx_out *out)
A
Alex Deucher 已提交
199 200
{
	struct amdgpu_ctx *ctx;
201
	struct amdgpu_ctx_mgr *mgr;
202
	unsigned reset_counter;
A
Alex Deucher 已提交
203

204 205 206 207
	if (!fpriv)
		return -EINVAL;

	mgr = &fpriv->ctx_mgr;
208
	mutex_lock(&mgr->lock);
A
Alex Deucher 已提交
209
	ctx = idr_find(&mgr->ctx_handles, id);
210
	if (!ctx) {
211
		mutex_unlock(&mgr->lock);
212
		return -EINVAL;
A
Alex Deucher 已提交
213
	}
214 215

	/* TODO: these two are always zero */
216 217
	out->state.flags = 0x0;
	out->state.hangs = 0x0;
218 219 220 221

	/* determine if a GPU reset has occured since the last call */
	reset_counter = atomic_read(&adev->gpu_reset_counter);
	/* TODO: this should ideally return NO, GUILTY, or INNOCENT. */
222
	if (ctx->reset_counter_query == reset_counter)
223 224 225
		out->state.reset_status = AMDGPU_CTX_NO_RESET;
	else
		out->state.reset_status = AMDGPU_CTX_UNKNOWN_RESET;
226
	ctx->reset_counter_query = reset_counter;
227

228
	mutex_unlock(&mgr->lock);
229
	return 0;
A
Alex Deucher 已提交
230 231
}

M
Monk Liu 已提交
232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265
static int amdgpu_ctx_query2(struct amdgpu_device *adev,
	struct amdgpu_fpriv *fpriv, uint32_t id,
	union drm_amdgpu_ctx_out *out)
{
	struct amdgpu_ctx *ctx;
	struct amdgpu_ctx_mgr *mgr;

	if (!fpriv)
		return -EINVAL;

	mgr = &fpriv->ctx_mgr;
	mutex_lock(&mgr->lock);
	ctx = idr_find(&mgr->ctx_handles, id);
	if (!ctx) {
		mutex_unlock(&mgr->lock);
		return -EINVAL;
	}

	out->state.flags = 0x0;
	out->state.hangs = 0x0;

	if (ctx->reset_counter != atomic_read(&adev->gpu_reset_counter))
		out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RESET;

	if (ctx->vram_lost_counter != atomic_read(&adev->vram_lost_counter))
		out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST;

	if (atomic_read(&ctx->guilty))
		out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY;

	mutex_unlock(&mgr->lock);
	return 0;
}

A
Alex Deucher 已提交
266
int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
267
		     struct drm_file *filp)
A
Alex Deucher 已提交
268 269 270
{
	int r;
	uint32_t id;
271
	enum drm_sched_priority priority;
A
Alex Deucher 已提交
272 273 274 275 276 277 278

	union drm_amdgpu_ctx *args = data;
	struct amdgpu_device *adev = dev->dev_private;
	struct amdgpu_fpriv *fpriv = filp->driver_priv;

	r = 0;
	id = args->in.ctx_id;
279 280
	priority = amdgpu_to_sched_priority(args->in.priority);

281 282
	/* For backwards compatibility reasons, we need to accept
	 * ioctls with garbage in the priority field */
283 284
	if (priority == DRM_SCHED_PRIORITY_INVALID)
		priority = DRM_SCHED_PRIORITY_NORMAL;
A
Alex Deucher 已提交
285 286

	switch (args->in.op) {
287
	case AMDGPU_CTX_OP_ALLOC_CTX:
288
		r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, &id);
289 290 291 292 293 294 295 296
		args->out.alloc.ctx_id = id;
		break;
	case AMDGPU_CTX_OP_FREE_CTX:
		r = amdgpu_ctx_free(fpriv, id);
		break;
	case AMDGPU_CTX_OP_QUERY_STATE:
		r = amdgpu_ctx_query(adev, fpriv, id, &args->out);
		break;
M
Monk Liu 已提交
297 298 299
	case AMDGPU_CTX_OP_QUERY_STATE2:
		r = amdgpu_ctx_query2(adev, fpriv, id, &args->out);
		break;
300 301
	default:
		return -EINVAL;
A
Alex Deucher 已提交
302 303 304 305
	}

	return r;
}
306 307 308 309

struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id)
{
	struct amdgpu_ctx *ctx;
310 311 312 313 314 315
	struct amdgpu_ctx_mgr *mgr;

	if (!fpriv)
		return NULL;

	mgr = &fpriv->ctx_mgr;
316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332

	mutex_lock(&mgr->lock);
	ctx = idr_find(&mgr->ctx_handles, id);
	if (ctx)
		kref_get(&ctx->refcount);
	mutex_unlock(&mgr->lock);
	return ctx;
}

int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
{
	if (ctx == NULL)
		return -EINVAL;

	kref_put(&ctx->refcount, amdgpu_ctx_do_release);
	return 0;
}
333

334 335
int amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx, struct amdgpu_ring *ring,
			      struct dma_fence *fence, uint64_t* handler)
336 337
{
	struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
338
	uint64_t seq = cring->sequence;
339
	unsigned idx = 0;
340
	struct dma_fence *other = NULL;
341

342
	idx = seq & (amdgpu_sched_jobs - 1);
343
	other = cring->fences[idx];
344 345
	if (other)
		BUG_ON(!dma_fence_is_signaled(other));
346

347
	dma_fence_get(fence);
348 349 350

	spin_lock(&ctx->ring_lock);
	cring->fences[idx] = fence;
351
	cring->sequence++;
352 353
	spin_unlock(&ctx->ring_lock);

354
	dma_fence_put(other);
355 356
	if (handler)
		*handler = seq;
357

358
	return 0;
359 360
}

361 362
struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
				       struct amdgpu_ring *ring, uint64_t seq)
363 364
{
	struct amdgpu_ctx_ring *cring = & ctx->rings[ring->idx];
365
	struct dma_fence *fence;
366 367

	spin_lock(&ctx->ring_lock);
368

M
Monk Liu 已提交
369 370 371
	if (seq == ~0ull)
		seq = ctx->rings[ring->idx].sequence - 1;

372
	if (seq >= cring->sequence) {
373 374 375 376
		spin_unlock(&ctx->ring_lock);
		return ERR_PTR(-EINVAL);
	}

377

378
	if (seq + amdgpu_sched_jobs < cring->sequence) {
379 380 381 382
		spin_unlock(&ctx->ring_lock);
		return NULL;
	}

383
	fence = dma_fence_get(cring->fences[seq & (amdgpu_sched_jobs - 1)]);
384 385 386 387
	spin_unlock(&ctx->ring_lock);

	return fence;
}
388

389
void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
390
				  enum drm_sched_priority priority)
391 392 393
{
	int i;
	struct amdgpu_device *adev = ctx->adev;
394 395
	struct drm_sched_rq *rq;
	struct drm_sched_entity *entity;
396
	struct amdgpu_ring *ring;
397
	enum drm_sched_priority ctx_prio;
398 399 400

	ctx->override_priority = priority;

401
	ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
402 403 404 405 406 407 408 409 410 411
			ctx->init_priority : ctx->override_priority;

	for (i = 0; i < adev->num_rings; i++) {
		ring = adev->rings[i];
		entity = &ctx->rings[i].entity;
		rq = &ring->sched.sched_rq[ctx_prio];

		if (ring->funcs->type == AMDGPU_RING_TYPE_KIQ)
			continue;

412
		drm_sched_entity_set_rq(entity, rq);
413 414 415
	}
}

416 417 418 419 420 421 422 423
int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx, unsigned ring_id)
{
	struct amdgpu_ctx_ring *cring = &ctx->rings[ring_id];
	unsigned idx = cring->sequence & (amdgpu_sched_jobs - 1);
	struct dma_fence *other = cring->fences[idx];

	if (other) {
		signed long r;
424
		r = dma_fence_wait(other, true);
425
		if (r < 0) {
426 427 428
			if (r != -ERESTARTSYS)
				DRM_ERROR("Error (%ld) waiting for fence!\n", r);

429 430 431 432 433 434 435
			return r;
		}
	}

	return 0;
}

436 437 438 439 440 441
void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
{
	mutex_init(&mgr->lock);
	idr_init(&mgr->ctx_handles);
}

442 443 444 445 446 447 448 449 450 451 452 453 454 455 456 457 458 459 460 461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485
void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
{
	struct amdgpu_ctx *ctx;
	struct idr *idp;
	uint32_t id, i;

	idp = &mgr->ctx_handles;

	idr_for_each_entry(idp, ctx, id) {

		if (!ctx->adev)
			return;

		for (i = 0; i < ctx->adev->num_rings; i++)
			if (kref_read(&ctx->refcount) == 1)
				drm_sched_entity_do_release(&ctx->adev->rings[i]->sched,
						  &ctx->rings[i].entity);
			else
				DRM_ERROR("ctx %p is still alive\n", ctx);
	}
}

void amdgpu_ctx_mgr_entity_cleanup(struct amdgpu_ctx_mgr *mgr)
{
	struct amdgpu_ctx *ctx;
	struct idr *idp;
	uint32_t id, i;

	idp = &mgr->ctx_handles;

	idr_for_each_entry(idp, ctx, id) {

		if (!ctx->adev)
			return;

		for (i = 0; i < ctx->adev->num_rings; i++)
			if (kref_read(&ctx->refcount) == 1)
				drm_sched_entity_cleanup(&ctx->adev->rings[i]->sched,
					&ctx->rings[i].entity);
			else
				DRM_ERROR("ctx %p is still alive\n", ctx);
	}
}

486 487 488 489 490 491
void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
{
	struct amdgpu_ctx *ctx;
	struct idr *idp;
	uint32_t id;

492 493
	amdgpu_ctx_mgr_entity_cleanup(mgr);

494 495 496
	idp = &mgr->ctx_handles;

	idr_for_each_entry(idp, ctx, id) {
497
		if (kref_put(&ctx->refcount, amdgpu_ctx_fini) != 1)
498 499 500 501 502 503
			DRM_ERROR("ctx %p is still alive\n", ctx);
	}

	idr_destroy(&mgr->ctx_handles);
	mutex_destroy(&mgr->lock);
}