amdgpu_ctx.c 17.1 KB
Newer Older
A
Alex Deucher 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24
/*
 * Copyright 2015 Advanced Micro Devices, Inc.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice shall be included in
 * all copies or substantial portions of the Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR
 * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,
 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
 * OTHER DEALINGS IN THE SOFTWARE.
 *
 * Authors: monk liu <monk.liu@amd.com>
 */

25
#include <drm/drm_auth.h>
A
Alex Deucher 已提交
26
#include "amdgpu.h"
27
#include "amdgpu_sched.h"
28
#include "amdgpu_ras.h"
29
#include <linux/nospec.h>
A
Alex Deucher 已提交
30

31 32 33 34 35 36 37 38 39 40 41 42
#define to_amdgpu_ctx_entity(e)	\
	container_of((e), struct amdgpu_ctx_entity, entity)

const unsigned int amdgpu_ctx_num_entities[AMDGPU_HW_IP_NUM] = {
	[AMDGPU_HW_IP_GFX]	=	1,
	[AMDGPU_HW_IP_COMPUTE]	=	4,
	[AMDGPU_HW_IP_DMA]	=	2,
	[AMDGPU_HW_IP_UVD]	=	1,
	[AMDGPU_HW_IP_VCE]	=	1,
	[AMDGPU_HW_IP_UVD_ENC]	=	1,
	[AMDGPU_HW_IP_VCN_DEC]	=	1,
	[AMDGPU_HW_IP_VCN_ENC]	=	1,
43
	[AMDGPU_HW_IP_VCN_JPEG]	=	1,
44 45
};

46
static int amdgpu_ctx_priority_permit(struct drm_file *filp,
47
				      enum drm_sched_priority priority)
48
{
49
	if (priority < 0 || priority >= DRM_SCHED_PRIORITY_COUNT)
50 51
		return -EINVAL;

52
	/* NORMAL and below are accessible by everyone */
53
	if (priority <= DRM_SCHED_PRIORITY_NORMAL)
54 55 56 57 58 59 60 61 62 63 64
		return 0;

	if (capable(CAP_SYS_NICE))
		return 0;

	if (drm_is_current_master(filp))
		return 0;

	return -EACCES;
}

65 66 67
static enum gfx_pipe_priority amdgpu_ctx_sched_prio_to_compute_prio(enum drm_sched_priority prio)
{
	switch (prio) {
68
	case DRM_SCHED_PRIORITY_HIGH:
69 70 71 72 73 74 75
	case DRM_SCHED_PRIORITY_KERNEL:
		return AMDGPU_GFX_PIPE_PRIO_HIGH;
	default:
		return AMDGPU_GFX_PIPE_PRIO_NORMAL;
	}
}

76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
static unsigned int amdgpu_ctx_prio_sched_to_hw(struct amdgpu_device *adev,
						 enum drm_sched_priority prio,
						 u32 hw_ip)
{
	unsigned int hw_prio;

	hw_prio = (hw_ip == AMDGPU_HW_IP_COMPUTE) ?
			amdgpu_ctx_sched_prio_to_compute_prio(prio) :
			AMDGPU_RING_PRIO_DEFAULT;
	hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM);
	if (adev->gpu_sched[hw_ip][hw_prio].num_scheds == 0)
		hw_prio = AMDGPU_RING_PRIO_DEFAULT;

	return hw_prio;
}

static int amdgpu_ctx_init_entity(struct amdgpu_ctx *ctx, u32 hw_ip,
				   const u32 ring)
A
Alex Deucher 已提交
94
{
95 96 97 98
	struct amdgpu_device *adev = ctx->adev;
	struct amdgpu_ctx_entity *entity;
	struct drm_gpu_scheduler **scheds = NULL, *sched = NULL;
	unsigned num_scheds = 0;
99
	unsigned int hw_prio;
100
	enum drm_sched_priority priority;
101
	int r;
A
Alex Deucher 已提交
102

103
	entity = kzalloc(struct_size(entity, fences, amdgpu_sched_jobs),
104 105 106
			 GFP_KERNEL);
	if (!entity)
		return  -ENOMEM;
107

108 109 110
	entity->sequence = 1;
	priority = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
				ctx->init_priority : ctx->override_priority;
111 112 113 114 115 116
	hw_prio = amdgpu_ctx_prio_sched_to_hw(adev, priority, hw_ip);

	hw_ip = array_index_nospec(hw_ip, AMDGPU_HW_IP_NUM);
	scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
	num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;

117 118 119 120 121
	/* disable load balance if the hw engine retains context among dependent jobs */
	if (hw_ip == AMDGPU_HW_IP_VCN_ENC ||
	    hw_ip == AMDGPU_HW_IP_VCN_DEC ||
	    hw_ip == AMDGPU_HW_IP_UVD_ENC ||
	    hw_ip == AMDGPU_HW_IP_UVD) {
122
		sched = drm_sched_pick_best(scheds, num_scheds);
123 124
		scheds = &sched;
		num_scheds = 1;
125 126
	}

127 128 129 130 131 132
	r = drm_sched_entity_init(&entity->entity, priority, scheds, num_scheds,
				  &ctx->guilty);
	if (r)
		goto error_free_entity;

	ctx->entities[hw_ip][ring] = entity;
A
Alex Deucher 已提交
133
	return 0;
134

135 136
error_free_entity:
	kfree(entity);
137

138 139
	return r;
}
140

141 142 143 144 145 146
static int amdgpu_ctx_init(struct amdgpu_device *adev,
			   enum drm_sched_priority priority,
			   struct drm_file *filp,
			   struct amdgpu_ctx *ctx)
{
	int r;
147

148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180
	r = amdgpu_ctx_priority_permit(filp, priority);
	if (r)
		return r;

	memset(ctx, 0, sizeof(*ctx));

	ctx->adev = adev;

	kref_init(&ctx->refcount);
	spin_lock_init(&ctx->ring_lock);
	mutex_init(&ctx->lock);

	ctx->reset_counter = atomic_read(&adev->gpu_reset_counter);
	ctx->reset_counter_query = ctx->reset_counter;
	ctx->vram_lost_counter = atomic_read(&adev->vram_lost_counter);
	ctx->init_priority = priority;
	ctx->override_priority = DRM_SCHED_PRIORITY_UNSET;

	return 0;
}

static void amdgpu_ctx_fini_entity(struct amdgpu_ctx_entity *entity)
{

	int i;

	if (!entity)
		return;

	for (i = 0; i < amdgpu_sched_jobs; ++i)
		dma_fence_put(entity->fences[i]);

	kfree(entity);
A
Alex Deucher 已提交
181 182
}

183
static void amdgpu_ctx_fini(struct kref *ref)
A
Alex Deucher 已提交
184
{
185
	struct amdgpu_ctx *ctx = container_of(ref, struct amdgpu_ctx, refcount);
186 187 188
	struct amdgpu_device *adev = ctx->adev;
	unsigned i, j;

189 190 191
	if (!adev)
		return;

192 193 194 195 196
	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
		for (j = 0; j < AMDGPU_MAX_ENTITY_NUM; ++j) {
			amdgpu_ctx_fini_entity(ctx->entities[i][j]);
			ctx->entities[i][j] = NULL;
		}
197 198
	}

199
	mutex_destroy(&ctx->lock);
200
	kfree(ctx);
201 202
}

203 204
int amdgpu_ctx_get_entity(struct amdgpu_ctx *ctx, u32 hw_ip, u32 instance,
			  u32 ring, struct drm_sched_entity **entity)
205
{
206 207
	int r;

208 209 210 211
	if (hw_ip >= AMDGPU_HW_IP_NUM) {
		DRM_ERROR("unknown HW IP type: %d\n", hw_ip);
		return -EINVAL;
	}
212 213 214 215 216 217 218

	/* Right now all IPs have only one instance - multiple rings. */
	if (instance != 0) {
		DRM_DEBUG("invalid ip instance: %d\n", instance);
		return -EINVAL;
	}

219 220
	if (ring >= amdgpu_ctx_num_entities[hw_ip]) {
		DRM_DEBUG("invalid ring: %d %d\n", hw_ip, ring);
221 222 223
		return -EINVAL;
	}

224 225 226 227 228 229 230
	if (ctx->entities[hw_ip][ring] == NULL) {
		r = amdgpu_ctx_init_entity(ctx, hw_ip, ring);
		if (r)
			return r;
	}

	*entity = &ctx->entities[hw_ip][ring]->entity;
231 232 233
	return 0;
}

234 235
static int amdgpu_ctx_alloc(struct amdgpu_device *adev,
			    struct amdgpu_fpriv *fpriv,
236
			    struct drm_file *filp,
237
			    enum drm_sched_priority priority,
238 239 240
			    uint32_t *id)
{
	struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
A
Alex Deucher 已提交
241
	struct amdgpu_ctx *ctx;
242
	int r;
A
Alex Deucher 已提交
243

244 245 246 247 248
	ctx = kmalloc(sizeof(*ctx), GFP_KERNEL);
	if (!ctx)
		return -ENOMEM;

	mutex_lock(&mgr->lock);
249
	r = idr_alloc(&mgr->ctx_handles, ctx, 1, AMDGPU_VM_MAX_NUM_CTX, GFP_KERNEL);
250
	if (r < 0) {
251
		mutex_unlock(&mgr->lock);
252 253 254
		kfree(ctx);
		return r;
	}
255

256
	*id = (uint32_t)r;
257
	r = amdgpu_ctx_init(adev, priority, filp, ctx);
258 259 260 261 262
	if (r) {
		idr_remove(&mgr->ctx_handles, *id);
		*id = 0;
		kfree(ctx);
	}
263 264 265 266 267 268 269
	mutex_unlock(&mgr->lock);
	return r;
}

static void amdgpu_ctx_do_release(struct kref *ref)
{
	struct amdgpu_ctx *ctx;
270
	u32 i, j;
271 272

	ctx = container_of(ref, struct amdgpu_ctx, refcount);
273 274 275 276
	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
			if (!ctx->entities[i][j])
				continue;
277

278 279 280
			drm_sched_entity_destroy(&ctx->entities[i][j]->entity);
		}
	}
281

282
	amdgpu_ctx_fini(ref);
283 284 285 286 287 288 289 290
}

static int amdgpu_ctx_free(struct amdgpu_fpriv *fpriv, uint32_t id)
{
	struct amdgpu_ctx_mgr *mgr = &fpriv->ctx_mgr;
	struct amdgpu_ctx *ctx;

	mutex_lock(&mgr->lock);
291 292
	ctx = idr_remove(&mgr->ctx_handles, id);
	if (ctx)
293
		kref_put(&ctx->refcount, amdgpu_ctx_do_release);
294
	mutex_unlock(&mgr->lock);
295
	return ctx ? 0 : -EINVAL;
A
Alex Deucher 已提交
296 297
}

298 299 300
static int amdgpu_ctx_query(struct amdgpu_device *adev,
			    struct amdgpu_fpriv *fpriv, uint32_t id,
			    union drm_amdgpu_ctx_out *out)
A
Alex Deucher 已提交
301 302
{
	struct amdgpu_ctx *ctx;
303
	struct amdgpu_ctx_mgr *mgr;
304
	unsigned reset_counter;
A
Alex Deucher 已提交
305

306 307 308 309
	if (!fpriv)
		return -EINVAL;

	mgr = &fpriv->ctx_mgr;
310
	mutex_lock(&mgr->lock);
A
Alex Deucher 已提交
311
	ctx = idr_find(&mgr->ctx_handles, id);
312
	if (!ctx) {
313
		mutex_unlock(&mgr->lock);
314
		return -EINVAL;
A
Alex Deucher 已提交
315
	}
316 317

	/* TODO: these two are always zero */
318 319
	out->state.flags = 0x0;
	out->state.hangs = 0x0;
320 321 322 323

	/* determine if a GPU reset has occured since the last call */
	reset_counter = atomic_read(&adev->gpu_reset_counter);
	/* TODO: this should ideally return NO, GUILTY, or INNOCENT. */
324
	if (ctx->reset_counter_query == reset_counter)
325 326 327
		out->state.reset_status = AMDGPU_CTX_NO_RESET;
	else
		out->state.reset_status = AMDGPU_CTX_UNKNOWN_RESET;
328
	ctx->reset_counter_query = reset_counter;
329

330
	mutex_unlock(&mgr->lock);
331
	return 0;
A
Alex Deucher 已提交
332 333
}

M
Monk Liu 已提交
334 335 336 337 338 339
static int amdgpu_ctx_query2(struct amdgpu_device *adev,
	struct amdgpu_fpriv *fpriv, uint32_t id,
	union drm_amdgpu_ctx_out *out)
{
	struct amdgpu_ctx *ctx;
	struct amdgpu_ctx_mgr *mgr;
340
	unsigned long ras_counter;
M
Monk Liu 已提交
341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364

	if (!fpriv)
		return -EINVAL;

	mgr = &fpriv->ctx_mgr;
	mutex_lock(&mgr->lock);
	ctx = idr_find(&mgr->ctx_handles, id);
	if (!ctx) {
		mutex_unlock(&mgr->lock);
		return -EINVAL;
	}

	out->state.flags = 0x0;
	out->state.hangs = 0x0;

	if (ctx->reset_counter != atomic_read(&adev->gpu_reset_counter))
		out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RESET;

	if (ctx->vram_lost_counter != atomic_read(&adev->vram_lost_counter))
		out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_VRAMLOST;

	if (atomic_read(&ctx->guilty))
		out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_GUILTY;

365 366 367 368 369 370 371 372 373 374 375 376 377 378 379
	/*query ue count*/
	ras_counter = amdgpu_ras_query_error_count(adev, false);
	/*ras counter is monotonic increasing*/
	if (ras_counter != ctx->ras_counter_ue) {
		out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_UE;
		ctx->ras_counter_ue = ras_counter;
	}

	/*query ce count*/
	ras_counter = amdgpu_ras_query_error_count(adev, true);
	if (ras_counter != ctx->ras_counter_ce) {
		out->state.flags |= AMDGPU_CTX_QUERY2_FLAGS_RAS_CE;
		ctx->ras_counter_ce = ras_counter;
	}

M
Monk Liu 已提交
380 381 382 383
	mutex_unlock(&mgr->lock);
	return 0;
}

A
Alex Deucher 已提交
384
int amdgpu_ctx_ioctl(struct drm_device *dev, void *data,
385
		     struct drm_file *filp)
A
Alex Deucher 已提交
386 387 388
{
	int r;
	uint32_t id;
389
	enum drm_sched_priority priority;
A
Alex Deucher 已提交
390 391

	union drm_amdgpu_ctx *args = data;
392
	struct amdgpu_device *adev = drm_to_adev(dev);
A
Alex Deucher 已提交
393 394 395
	struct amdgpu_fpriv *fpriv = filp->driver_priv;

	id = args->in.ctx_id;
396
	r = amdgpu_to_sched_priority(args->in.priority, &priority);
397

398 399
	/* For backwards compatibility reasons, we need to accept
	 * ioctls with garbage in the priority field */
400
	if (r == -EINVAL)
401
		priority = DRM_SCHED_PRIORITY_NORMAL;
A
Alex Deucher 已提交
402 403

	switch (args->in.op) {
404
	case AMDGPU_CTX_OP_ALLOC_CTX:
405
		r = amdgpu_ctx_alloc(adev, fpriv, filp, priority, &id);
406 407 408 409 410 411 412 413
		args->out.alloc.ctx_id = id;
		break;
	case AMDGPU_CTX_OP_FREE_CTX:
		r = amdgpu_ctx_free(fpriv, id);
		break;
	case AMDGPU_CTX_OP_QUERY_STATE:
		r = amdgpu_ctx_query(adev, fpriv, id, &args->out);
		break;
M
Monk Liu 已提交
414 415 416
	case AMDGPU_CTX_OP_QUERY_STATE2:
		r = amdgpu_ctx_query2(adev, fpriv, id, &args->out);
		break;
417 418
	default:
		return -EINVAL;
A
Alex Deucher 已提交
419 420 421 422
	}

	return r;
}
423 424 425 426

struct amdgpu_ctx *amdgpu_ctx_get(struct amdgpu_fpriv *fpriv, uint32_t id)
{
	struct amdgpu_ctx *ctx;
427 428 429 430 431 432
	struct amdgpu_ctx_mgr *mgr;

	if (!fpriv)
		return NULL;

	mgr = &fpriv->ctx_mgr;
433 434 435 436 437 438 439 440 441 442 443 444 445 446 447 448 449

	mutex_lock(&mgr->lock);
	ctx = idr_find(&mgr->ctx_handles, id);
	if (ctx)
		kref_get(&ctx->refcount);
	mutex_unlock(&mgr->lock);
	return ctx;
}

int amdgpu_ctx_put(struct amdgpu_ctx *ctx)
{
	if (ctx == NULL)
		return -EINVAL;

	kref_put(&ctx->refcount, amdgpu_ctx_do_release);
	return 0;
}
450

451 452
void amdgpu_ctx_add_fence(struct amdgpu_ctx *ctx,
			  struct drm_sched_entity *entity,
453
			  struct dma_fence *fence, uint64_t *handle)
454
{
455 456
	struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
	uint64_t seq = centity->sequence;
457
	struct dma_fence *other = NULL;
458
	unsigned idx = 0;
459

460
	idx = seq & (amdgpu_sched_jobs - 1);
461
	other = centity->fences[idx];
462 463
	if (other)
		BUG_ON(!dma_fence_is_signaled(other));
464

465
	dma_fence_get(fence);
466 467

	spin_lock(&ctx->ring_lock);
468 469
	centity->fences[idx] = fence;
	centity->sequence++;
470 471
	spin_unlock(&ctx->ring_lock);

472
	dma_fence_put(other);
473 474
	if (handle)
		*handle = seq;
475 476
}

477
struct dma_fence *amdgpu_ctx_get_fence(struct amdgpu_ctx *ctx,
478 479
				       struct drm_sched_entity *entity,
				       uint64_t seq)
480
{
481
	struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
482
	struct dma_fence *fence;
483 484

	spin_lock(&ctx->ring_lock);
485

M
Monk Liu 已提交
486
	if (seq == ~0ull)
487
		seq = centity->sequence - 1;
M
Monk Liu 已提交
488

489
	if (seq >= centity->sequence) {
490 491 492 493
		spin_unlock(&ctx->ring_lock);
		return ERR_PTR(-EINVAL);
	}

494

495
	if (seq + amdgpu_sched_jobs < centity->sequence) {
496 497 498 499
		spin_unlock(&ctx->ring_lock);
		return NULL;
	}

500
	fence = dma_fence_get(centity->fences[seq & (amdgpu_sched_jobs - 1)]);
501 502 503 504
	spin_unlock(&ctx->ring_lock);

	return fence;
}
505

506 507 508 509 510 511
static void amdgpu_ctx_set_entity_priority(struct amdgpu_ctx *ctx,
					    struct amdgpu_ctx_entity *aentity,
					    int hw_ip,
					    enum drm_sched_priority priority)
{
	struct amdgpu_device *adev = ctx->adev;
512
	unsigned int hw_prio;
513 514 515 516 517 518 519 520
	struct drm_gpu_scheduler **scheds = NULL;
	unsigned num_scheds;

	/* set sw priority */
	drm_sched_entity_set_priority(&aentity->entity, priority);

	/* set hw priority */
	if (hw_ip == AMDGPU_HW_IP_COMPUTE) {
521 522 523 524 525
		hw_prio = amdgpu_ctx_prio_sched_to_hw(adev, priority,
						      AMDGPU_HW_IP_COMPUTE);
		hw_prio = array_index_nospec(hw_prio, AMDGPU_RING_PRIO_MAX);
		scheds = adev->gpu_sched[hw_ip][hw_prio].sched;
		num_scheds = adev->gpu_sched[hw_ip][hw_prio].num_scheds;
526 527 528 529 530
		drm_sched_entity_modify_sched(&aentity->entity, scheds,
					      num_scheds);
	}
}

531
void amdgpu_ctx_priority_override(struct amdgpu_ctx *ctx,
532
				  enum drm_sched_priority priority)
533
{
534
	enum drm_sched_priority ctx_prio;
535
	unsigned i, j;
536 537 538

	ctx->override_priority = priority;

539
	ctx_prio = (ctx->override_priority == DRM_SCHED_PRIORITY_UNSET) ?
540
			ctx->init_priority : ctx->override_priority;
541 542 543 544
	for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
		for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
			if (!ctx->entities[i][j])
				continue;
545

546 547
			amdgpu_ctx_set_entity_priority(ctx, ctx->entities[i][j],
						       i, ctx_prio);
548
		}
549 550 551
	}
}

552 553
int amdgpu_ctx_wait_prev_fence(struct amdgpu_ctx *ctx,
			       struct drm_sched_entity *entity)
554
{
555
	struct amdgpu_ctx_entity *centity = to_amdgpu_ctx_entity(entity);
556 557 558
	struct dma_fence *other;
	unsigned idx;
	long r;
559

560 561 562 563
	spin_lock(&ctx->ring_lock);
	idx = centity->sequence & (amdgpu_sched_jobs - 1);
	other = dma_fence_get(centity->fences[idx]);
	spin_unlock(&ctx->ring_lock);
564

565 566
	if (!other)
		return 0;
567

568 569 570 571 572 573
	r = dma_fence_wait(other, true);
	if (r < 0 && r != -ERESTARTSYS)
		DRM_ERROR("Error (%ld) waiting for fence!\n", r);

	dma_fence_put(other);
	return r;
574 575
}

576 577 578 579 580 581
void amdgpu_ctx_mgr_init(struct amdgpu_ctx_mgr *mgr)
{
	mutex_init(&mgr->lock);
	idr_init(&mgr->ctx_handles);
}

582
long amdgpu_ctx_mgr_entity_flush(struct amdgpu_ctx_mgr *mgr, long timeout)
583 584 585
{
	struct amdgpu_ctx *ctx;
	struct idr *idp;
586
	uint32_t id, i, j;
587 588 589

	idp = &mgr->ctx_handles;

590
	mutex_lock(&mgr->lock);
591
	idr_for_each_entry(idp, ctx, id) {
592 593 594 595 596 597
		for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
			for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
				struct drm_sched_entity *entity;

				if (!ctx->entities[i][j])
					continue;
598

599 600 601
				entity = &ctx->entities[i][j]->entity;
				timeout = drm_sched_entity_flush(entity, timeout);
			}
602
		}
603
	}
604
	mutex_unlock(&mgr->lock);
605
	return timeout;
606 607
}

608
void amdgpu_ctx_mgr_entity_fini(struct amdgpu_ctx_mgr *mgr)
609 610 611
{
	struct amdgpu_ctx *ctx;
	struct idr *idp;
612
	uint32_t id, i, j;
613 614 615 616

	idp = &mgr->ctx_handles;

	idr_for_each_entry(idp, ctx, id) {
617 618 619
		if (kref_read(&ctx->refcount) != 1) {
			DRM_ERROR("ctx %p is still alive\n", ctx);
			continue;
620
		}
621

622 623 624 625 626 627 628 629 630 631 632
		for (i = 0; i < AMDGPU_HW_IP_NUM; ++i) {
			for (j = 0; j < amdgpu_ctx_num_entities[i]; ++j) {
				struct drm_sched_entity *entity;

				if (!ctx->entities[i][j])
					continue;

				entity = &ctx->entities[i][j]->entity;
				drm_sched_entity_fini(entity);
			}
		}
633 634 635
	}
}

636 637 638 639 640 641
void amdgpu_ctx_mgr_fini(struct amdgpu_ctx_mgr *mgr)
{
	struct amdgpu_ctx *ctx;
	struct idr *idp;
	uint32_t id;

642
	amdgpu_ctx_mgr_entity_fini(mgr);
643

644 645 646
	idp = &mgr->ctx_handles;

	idr_for_each_entry(idp, ctx, id) {
647
		if (kref_put(&ctx->refcount, amdgpu_ctx_fini) != 1)
648 649 650 651 652 653
			DRM_ERROR("ctx %p is still alive\n", ctx);
	}

	idr_destroy(&mgr->ctx_handles);
	mutex_destroy(&mgr->lock);
}
654 655 656 657 658 659 660 661 662 663 664 665 666 667 668 669 670 671 672 673 674 675 676 677 678 679 680 681 682 683 684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712 713 714

void amdgpu_ctx_fence_time(struct amdgpu_ctx *ctx, struct amdgpu_ctx_entity *centity,
		ktime_t *total, ktime_t *max)
{
	ktime_t now, t1;
	uint32_t i;

	now = ktime_get();
	for (i = 0; i < amdgpu_sched_jobs; i++) {
		struct dma_fence *fence;
		struct drm_sched_fence *s_fence;

		spin_lock(&ctx->ring_lock);
		fence = dma_fence_get(centity->fences[i]);
		spin_unlock(&ctx->ring_lock);
		if (!fence)
			continue;
		s_fence = to_drm_sched_fence(fence);
		if (!dma_fence_is_signaled(&s_fence->scheduled))
			continue;
		t1 = s_fence->scheduled.timestamp;
		if (t1 >= now)
			continue;
		if (dma_fence_is_signaled(&s_fence->finished) &&
			s_fence->finished.timestamp < now)
			*total += ktime_sub(s_fence->finished.timestamp, t1);
		else
			*total += ktime_sub(now, t1);
		t1 = ktime_sub(now, t1);
		dma_fence_put(fence);
		*max = max(t1, *max);
	}
}

ktime_t amdgpu_ctx_mgr_fence_usage(struct amdgpu_ctx_mgr *mgr, uint32_t hwip,
		uint32_t idx, uint64_t *elapsed)
{
	struct idr *idp;
	struct amdgpu_ctx *ctx;
	uint32_t id;
	struct amdgpu_ctx_entity *centity;
	ktime_t total = 0, max = 0;

	if (idx >= AMDGPU_MAX_ENTITY_NUM)
		return 0;
	idp = &mgr->ctx_handles;
	mutex_lock(&mgr->lock);
	idr_for_each_entry(idp, ctx, id) {
		if (!ctx->entities[hwip][idx])
			continue;

		centity = ctx->entities[hwip][idx];
		amdgpu_ctx_fence_time(ctx, centity, &total, &max);
	}

	mutex_unlock(&mgr->lock);
	if (elapsed)
		*elapsed = max;

	return total;
}