scheduler.c 21.3 KB
Newer Older
Z
Zhi Wang 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37
/*
 * Copyright(c) 2011-2016 Intel Corporation. All rights reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the "Software"),
 * to deal in the Software without restriction, including without limitation
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
 * and/or sell copies of the Software, and to permit persons to whom the
 * Software is furnished to do so, subject to the following conditions:
 *
 * The above copyright notice and this permission notice (including the next
 * paragraph) shall be included in all copies or substantial portions of the
 * Software.
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 * SOFTWARE.
 *
 * Authors:
 *    Zhi Wang <zhi.a.wang@intel.com>
 *
 * Contributors:
 *    Ping Gao <ping.a.gao@intel.com>
 *    Tina Zhang <tina.zhang@intel.com>
 *    Chanbin Du <changbin.du@intel.com>
 *    Min He <min.he@intel.com>
 *    Bing Niu <bing.niu@intel.com>
 *    Zhenyu Wang <zhenyuw@linux.intel.com>
 *
 */

#include <linux/kthread.h>

38 39 40
#include "i915_drv.h"
#include "gvt.h"

Z
Zhi Wang 已提交
41 42 43
#define RING_CTX_OFF(x) \
	offsetof(struct execlist_ring_context, x)

44 45
static void set_context_pdp_root_pointer(
		struct execlist_ring_context *ring_context,
Z
Zhi Wang 已提交
46 47 48 49 50 51 52 53 54 55 56 57 58 59
		u32 pdp[8])
{
	struct execlist_mmio_pair *pdp_pair = &ring_context->pdp3_UDW;
	int i;

	for (i = 0; i < 8; i++)
		pdp_pair[i].val = pdp[7 - i];
}

static int populate_shadow_context(struct intel_vgpu_workload *workload)
{
	struct intel_vgpu *vgpu = workload->vgpu;
	struct intel_gvt *gvt = vgpu->gvt;
	int ring_id = workload->ring_id;
60
	struct i915_gem_context *shadow_ctx = vgpu->submission.shadow_ctx;
Z
Zhi Wang 已提交
61 62 63 64 65 66 67 68 69 70 71
	struct drm_i915_gem_object *ctx_obj =
		shadow_ctx->engine[ring_id].state->obj;
	struct execlist_ring_context *shadow_ring_context;
	struct page *page;
	void *dst;
	unsigned long context_gpa, context_page_num;
	int i;

	gvt_dbg_sched("ring id %d workload lrca %x", ring_id,
			workload->ctx_desc.lrca);

72
	context_page_num = gvt->dev_priv->engine[ring_id]->context_size;
Z
Zhi Wang 已提交
73 74 75 76 77 78 79 80 81 82 83 84 85

	context_page_num = context_page_num >> PAGE_SHIFT;

	if (IS_BROADWELL(gvt->dev_priv) && ring_id == RCS)
		context_page_num = 19;

	i = 2;

	while (i < context_page_num) {
		context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm,
				(u32)((workload->ctx_desc.lrca + i) <<
				GTT_PAGE_SHIFT));
		if (context_gpa == INTEL_GVT_INVALID_ADDR) {
86
			gvt_vgpu_err("Invalid guest context descriptor\n");
87
			return -EFAULT;
Z
Zhi Wang 已提交
88 89
		}

90
		page = i915_gem_object_get_page(ctx_obj, LRC_HEADER_PAGES + i);
91
		dst = kmap(page);
Z
Zhi Wang 已提交
92 93
		intel_gvt_hypervisor_read_gpa(vgpu, context_gpa, dst,
				GTT_PAGE_SIZE);
94
		kunmap(page);
Z
Zhi Wang 已提交
95 96 97 98
		i++;
	}

	page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
99
	shadow_ring_context = kmap(page);
Z
Zhi Wang 已提交
100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124

#define COPY_REG(name) \
	intel_gvt_hypervisor_read_gpa(vgpu, workload->ring_context_gpa \
		+ RING_CTX_OFF(name.val), &shadow_ring_context->name.val, 4)

	COPY_REG(ctx_ctrl);
	COPY_REG(ctx_timestamp);

	if (ring_id == RCS) {
		COPY_REG(bb_per_ctx_ptr);
		COPY_REG(rcs_indirect_ctx);
		COPY_REG(rcs_indirect_ctx_offset);
	}
#undef COPY_REG

	set_context_pdp_root_pointer(shadow_ring_context,
				     workload->shadow_mm->shadow_page_table);

	intel_gvt_hypervisor_read_gpa(vgpu,
			workload->ring_context_gpa +
			sizeof(*shadow_ring_context),
			(void *)shadow_ring_context +
			sizeof(*shadow_ring_context),
			GTT_PAGE_SIZE - sizeof(*shadow_ring_context));

125
	kunmap(page);
Z
Zhi Wang 已提交
126 127 128
	return 0;
}

129 130 131 132 133
static inline bool is_gvt_request(struct drm_i915_gem_request *req)
{
	return i915_gem_context_force_single_submission(req->ctx);
}

Z
Zhi Wang 已提交
134 135 136
static int shadow_context_status_change(struct notifier_block *nb,
		unsigned long action, void *data)
{
137 138 139 140
	struct drm_i915_gem_request *req = (struct drm_i915_gem_request *)data;
	struct intel_gvt *gvt = container_of(nb, struct intel_gvt,
				shadow_ctx_notifier_block[req->engine->id]);
	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
	enum intel_engine_id ring_id = req->engine->id;
	struct intel_vgpu_workload *workload;

	if (!is_gvt_request(req)) {
		spin_lock_bh(&scheduler->mmio_context_lock);
		if (action == INTEL_CONTEXT_SCHEDULE_IN &&
		    scheduler->engine_owner[ring_id]) {
			/* Switch ring from vGPU to host. */
			intel_gvt_switch_mmio(scheduler->engine_owner[ring_id],
					      NULL, ring_id);
			scheduler->engine_owner[ring_id] = NULL;
		}
		spin_unlock_bh(&scheduler->mmio_context_lock);

		return NOTIFY_OK;
	}
Z
Zhi Wang 已提交
157

158 159
	workload = scheduler->current_workload[ring_id];
	if (unlikely(!workload))
160 161
		return NOTIFY_OK;

Z
Zhi Wang 已提交
162 163
	switch (action) {
	case INTEL_CONTEXT_SCHEDULE_IN:
164 165 166 167 168 169 170 171 172 173
		spin_lock_bh(&scheduler->mmio_context_lock);
		if (workload->vgpu != scheduler->engine_owner[ring_id]) {
			/* Switch ring from host to vGPU or vGPU to vGPU. */
			intel_gvt_switch_mmio(scheduler->engine_owner[ring_id],
					      workload->vgpu, ring_id);
			scheduler->engine_owner[ring_id] = workload->vgpu;
		} else
			gvt_dbg_sched("skip ring %d mmio switch for vgpu%d\n",
				      ring_id, workload->vgpu->id);
		spin_unlock_bh(&scheduler->mmio_context_lock);
Z
Zhi Wang 已提交
174 175 176
		atomic_set(&workload->shadow_ctx_active, 1);
		break;
	case INTEL_CONTEXT_SCHEDULE_OUT:
177
	case INTEL_CONTEXT_SCHEDULE_PREEMPTED:
Z
Zhi Wang 已提交
178 179 180 181 182 183 184 185 186 187
		atomic_set(&workload->shadow_ctx_active, 0);
		break;
	default:
		WARN_ON(1);
		return NOTIFY_OK;
	}
	wake_up(&workload->shadow_ctx_status_wq);
	return NOTIFY_OK;
}

188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
static void shadow_context_descriptor_update(struct i915_gem_context *ctx,
		struct intel_engine_cs *engine)
{
	struct intel_context *ce = &ctx->engine[engine->id];
	u64 desc = 0;

	desc = ce->lrc_desc;

	/* Update bits 0-11 of the context descriptor which includes flags
	 * like GEN8_CTX_* cached in desc_template
	 */
	desc &= U64_MAX << 12;
	desc |= ctx->desc_template & ((1ULL << 12) - 1);

	ce->lrc_desc = desc;
}

205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232
static int copy_workload_to_ring_buffer(struct intel_vgpu_workload *workload)
{
	struct intel_vgpu *vgpu = workload->vgpu;
	void *shadow_ring_buffer_va;
	u32 *cs;

	/* allocate shadow ring buffer */
	cs = intel_ring_begin(workload->req, workload->rb_len / sizeof(u32));
	if (IS_ERR(cs)) {
		gvt_vgpu_err("fail to alloc size =%ld shadow  ring buffer\n",
			workload->rb_len);
		return PTR_ERR(cs);
	}

	shadow_ring_buffer_va = workload->shadow_ring_buffer_va;

	/* get shadow ring buffer va */
	workload->shadow_ring_buffer_va = cs;

	memcpy(cs, shadow_ring_buffer_va,
			workload->rb_len);

	cs += workload->rb_len / sizeof(u32);
	intel_ring_advance(workload->req, cs);

	return 0;
}

233 234 235 236 237 238 239 240 241
void release_shadow_wa_ctx(struct intel_shadow_wa_ctx *wa_ctx)
{
	if (!wa_ctx->indirect_ctx.obj)
		return;

	i915_gem_object_unpin_map(wa_ctx->indirect_ctx.obj);
	i915_gem_object_put(wa_ctx->indirect_ctx.obj);
}

242 243 244 245 246 247 248 249 250
/**
 * intel_gvt_scan_and_shadow_workload - audit the workload by scanning and
 * shadow it as well, include ringbuffer,wa_ctx and ctx.
 * @workload: an abstract entity for each execlist submission.
 *
 * This function is called before the workload submitting to i915, to make
 * sure the content of the workload is valid.
 */
int intel_gvt_scan_and_shadow_workload(struct intel_vgpu_workload *workload)
Z
Zhi Wang 已提交
251
{
252 253 254 255
	struct intel_vgpu *vgpu = workload->vgpu;
	struct intel_vgpu_submission *s = &vgpu->submission;
	struct i915_gem_context *shadow_ctx = s->shadow_ctx;
	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
Z
Zhi Wang 已提交
256
	int ring_id = workload->ring_id;
257
	struct intel_engine_cs *engine = dev_priv->engine[ring_id];
258
	struct drm_i915_gem_request *rq;
259
	struct intel_ring *ring;
Z
Zhi Wang 已提交
260 261
	int ret;

262 263
	lockdep_assert_held(&dev_priv->drm.struct_mutex);

264 265
	if (workload->shadowed)
		return 0;
Z
Zhi Wang 已提交
266

267 268
	shadow_ctx->desc_template &= ~(0x3 << GEN8_CTX_ADDRESSING_MODE_SHIFT);
	shadow_ctx->desc_template |= workload->ctx_desc.addressing_mode <<
Z
Zhi Wang 已提交
269 270
				    GEN8_CTX_ADDRESSING_MODE_SHIFT;

271
	if (!test_and_set_bit(ring_id, s->shadow_ctx_desc_updated))
272 273
		shadow_context_descriptor_update(shadow_ctx,
					dev_priv->engine[ring_id]);
274

275
	ret = intel_gvt_scan_and_shadow_ringbuffer(workload);
Z
Zhi Wang 已提交
276
	if (ret)
277
		goto err_scan;
Z
Zhi Wang 已提交
278

279 280 281 282
	if ((workload->ring_id == RCS) &&
	    (workload->wa_ctx.indirect_ctx.size != 0)) {
		ret = intel_gvt_scan_and_shadow_wa_ctx(&workload->wa_ctx);
		if (ret)
283
			goto err_scan;
284
	}
Z
Zhi Wang 已提交
285

286 287 288 289 290 291 292 293 294 295 296
	/* pin shadow context by gvt even the shadow context will be pinned
	 * when i915 alloc request. That is because gvt will update the guest
	 * context from shadow context when workload is completed, and at that
	 * moment, i915 may already unpined the shadow context to make the
	 * shadow_ctx pages invalid. So gvt need to pin itself. After update
	 * the guest context, gvt can unpin the shadow_ctx safely.
	 */
	ring = engine->context_pin(engine, shadow_ctx);
	if (IS_ERR(ring)) {
		ret = PTR_ERR(ring);
		gvt_vgpu_err("fail to pin shadow context\n");
297
		goto err_shadow;
298 299
	}

Z
Zhi Wang 已提交
300 301
	ret = populate_shadow_context(workload);
	if (ret)
302
		goto err_unpin;
Z
Zhi Wang 已提交
303

304 305 306 307
	rq = i915_gem_request_alloc(dev_priv->engine[ring_id], shadow_ctx);
	if (IS_ERR(rq)) {
		gvt_vgpu_err("fail to allocate gem request\n");
		ret = PTR_ERR(rq);
308
		goto err_unpin;
309 310 311 312 313 314 315
	}

	gvt_dbg_sched("ring id %d get i915 gem request %p\n", ring_id, rq);

	workload->req = i915_gem_request_get(rq);
	ret = copy_workload_to_ring_buffer(workload);
	if (ret)
316
		goto err_unpin;
317
	workload->shadowed = true;
318
	return 0;
319

320 321 322 323 324
err_unpin:
	engine->context_unpin(engine, shadow_ctx);
err_shadow:
	release_shadow_wa_ctx(&workload->wa_ctx);
err_scan:
325 326 327 328 329
	return ret;
}

static int dispatch_workload(struct intel_vgpu_workload *workload)
{
330 331 332 333
	struct intel_vgpu *vgpu = workload->vgpu;
	struct intel_vgpu_submission *s = &vgpu->submission;
	struct i915_gem_context *shadow_ctx = s->shadow_ctx;
	struct drm_i915_private *dev_priv = vgpu->gvt->dev_priv;
334 335 336 337 338 339 340 341 342 343
	int ring_id = workload->ring_id;
	struct intel_engine_cs *engine = dev_priv->engine[ring_id];
	int ret = 0;

	gvt_dbg_sched("ring id %d prepare to dispatch workload %p\n",
		ring_id, workload);

	mutex_lock(&dev_priv->drm.struct_mutex);

	ret = intel_gvt_scan_and_shadow_workload(workload);
Z
Zhi Wang 已提交
344
	if (ret)
345
		goto out;
Z
Zhi Wang 已提交
346 347 348

	if (workload->prepare) {
		ret = workload->prepare(workload);
349 350
		if (ret) {
			engine->context_unpin(engine, shadow_ctx);
351
			goto out;
352
		}
Z
Zhi Wang 已提交
353 354
	}

355 356 357
out:
	if (ret)
		workload->status = ret;
358

359 360 361 362 363 364
	if (!IS_ERR_OR_NULL(workload->req)) {
		gvt_dbg_sched("ring id %d submit workload to i915 %p\n",
				ring_id, workload->req);
		i915_add_request(workload->req);
		workload->dispatched = true;
	}
365

366
	mutex_unlock(&dev_priv->drm.struct_mutex);
Z
Zhi Wang 已提交
367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391
	return ret;
}

static struct intel_vgpu_workload *pick_next_workload(
		struct intel_gvt *gvt, int ring_id)
{
	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
	struct intel_vgpu_workload *workload = NULL;

	mutex_lock(&gvt->lock);

	/*
	 * no current vgpu / will be scheduled out / no workload
	 * bail out
	 */
	if (!scheduler->current_vgpu) {
		gvt_dbg_sched("ring id %d stop - no current vgpu\n", ring_id);
		goto out;
	}

	if (scheduler->need_reschedule) {
		gvt_dbg_sched("ring id %d stop - will reschedule\n", ring_id);
		goto out;
	}

392
	if (list_empty(workload_q_head(scheduler->current_vgpu, ring_id)))
Z
Zhi Wang 已提交
393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419
		goto out;

	/*
	 * still have current workload, maybe the workload disptacher
	 * fail to submit it for some reason, resubmit it.
	 */
	if (scheduler->current_workload[ring_id]) {
		workload = scheduler->current_workload[ring_id];
		gvt_dbg_sched("ring id %d still have current workload %p\n",
				ring_id, workload);
		goto out;
	}

	/*
	 * pick a workload as current workload
	 * once current workload is set, schedule policy routines
	 * will wait the current workload is finished when trying to
	 * schedule out a vgpu.
	 */
	scheduler->current_workload[ring_id] = container_of(
			workload_q_head(scheduler->current_vgpu, ring_id)->next,
			struct intel_vgpu_workload, list);

	workload = scheduler->current_workload[ring_id];

	gvt_dbg_sched("ring id %d pick new workload %p\n", ring_id, workload);

420
	atomic_inc(&workload->vgpu->submission.running_workload_num);
Z
Zhi Wang 已提交
421 422 423 424 425 426 427 428 429
out:
	mutex_unlock(&gvt->lock);
	return workload;
}

static void update_guest_context(struct intel_vgpu_workload *workload)
{
	struct intel_vgpu *vgpu = workload->vgpu;
	struct intel_gvt *gvt = vgpu->gvt;
430 431
	struct intel_vgpu_submission *s = &vgpu->submission;
	struct i915_gem_context *shadow_ctx = s->shadow_ctx;
Z
Zhi Wang 已提交
432 433 434 435 436 437 438 439 440 441 442 443
	int ring_id = workload->ring_id;
	struct drm_i915_gem_object *ctx_obj =
		shadow_ctx->engine[ring_id].state->obj;
	struct execlist_ring_context *shadow_ring_context;
	struct page *page;
	void *src;
	unsigned long context_gpa, context_page_num;
	int i;

	gvt_dbg_sched("ring id %d workload lrca %x\n", ring_id,
			workload->ctx_desc.lrca);

444
	context_page_num = gvt->dev_priv->engine[ring_id]->context_size;
Z
Zhi Wang 已提交
445 446 447 448 449 450 451 452 453 454 455 456 457

	context_page_num = context_page_num >> PAGE_SHIFT;

	if (IS_BROADWELL(gvt->dev_priv) && ring_id == RCS)
		context_page_num = 19;

	i = 2;

	while (i < context_page_num) {
		context_gpa = intel_vgpu_gma_to_gpa(vgpu->gtt.ggtt_mm,
				(u32)((workload->ctx_desc.lrca + i) <<
					GTT_PAGE_SHIFT));
		if (context_gpa == INTEL_GVT_INVALID_ADDR) {
458
			gvt_vgpu_err("invalid guest context descriptor\n");
Z
Zhi Wang 已提交
459 460 461
			return;
		}

462
		page = i915_gem_object_get_page(ctx_obj, LRC_HEADER_PAGES + i);
463
		src = kmap(page);
Z
Zhi Wang 已提交
464 465
		intel_gvt_hypervisor_write_gpa(vgpu, context_gpa, src,
				GTT_PAGE_SIZE);
466
		kunmap(page);
Z
Zhi Wang 已提交
467 468 469 470 471 472 473
		i++;
	}

	intel_gvt_hypervisor_write_gpa(vgpu, workload->ring_context_gpa +
		RING_CTX_OFF(ring_header.val), &workload->rb_tail, 4);

	page = i915_gem_object_get_page(ctx_obj, LRC_STATE_PN);
474
	shadow_ring_context = kmap(page);
Z
Zhi Wang 已提交
475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491

#define COPY_REG(name) \
	intel_gvt_hypervisor_write_gpa(vgpu, workload->ring_context_gpa + \
		RING_CTX_OFF(name.val), &shadow_ring_context->name.val, 4)

	COPY_REG(ctx_ctrl);
	COPY_REG(ctx_timestamp);

#undef COPY_REG

	intel_gvt_hypervisor_write_gpa(vgpu,
			workload->ring_context_gpa +
			sizeof(*shadow_ring_context),
			(void *)shadow_ring_context +
			sizeof(*shadow_ring_context),
			GTT_PAGE_SIZE - sizeof(*shadow_ring_context));

492
	kunmap(page);
Z
Zhi Wang 已提交
493 494 495 496 497
}

static void complete_current_workload(struct intel_gvt *gvt, int ring_id)
{
	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
498 499 500 501
	struct intel_vgpu_workload *workload =
		scheduler->current_workload[ring_id];
	struct intel_vgpu *vgpu = workload->vgpu;
	struct intel_vgpu_submission *s = &vgpu->submission;
Z
Zhi Wang 已提交
502
	int event;
Z
Zhi Wang 已提交
503 504 505

	mutex_lock(&gvt->lock);

506 507 508 509 510
	/* For the workload w/ request, needs to wait for the context
	 * switch to make sure request is completed.
	 * For the workload w/o request, directly complete the workload.
	 */
	if (workload->req) {
511 512 513 514
		struct drm_i915_private *dev_priv =
			workload->vgpu->gvt->dev_priv;
		struct intel_engine_cs *engine =
			dev_priv->engine[workload->ring_id];
Z
Zhi Wang 已提交
515 516 517
		wait_event(workload->shadow_ctx_status_wq,
			   !atomic_read(&workload->shadow_ctx_active));

518 519 520 521 522 523 524 525 526 527 528 529
		/* If this request caused GPU hang, req->fence.error will
		 * be set to -EIO. Use -EIO to set workload status so
		 * that when this request caused GPU hang, didn't trigger
		 * context switch interrupt to guest.
		 */
		if (likely(workload->status == -EINPROGRESS)) {
			if (workload->req->fence.error == -EIO)
				workload->status = -EIO;
			else
				workload->status = 0;
		}

530
		i915_gem_request_put(fetch_and_zero(&workload->req));
Z
Zhi Wang 已提交
531

532 533
		if (!workload->status && !(vgpu->resetting_eng &
					   ENGINE_MASK(ring_id))) {
534
			update_guest_context(workload);
Z
Zhi Wang 已提交
535

536 537 538 539
			for_each_set_bit(event, workload->pending_events,
					 INTEL_GVT_EVENT_MAX)
				intel_vgpu_trigger_virtual_event(vgpu, event);
		}
540 541
		mutex_lock(&dev_priv->drm.struct_mutex);
		/* unpin shadow ctx as the shadow_ctx update is done */
542
		engine->context_unpin(engine, s->shadow_ctx);
543
		mutex_unlock(&dev_priv->drm.struct_mutex);
Z
Zhi Wang 已提交
544 545 546 547 548 549 550 551 552 553
	}

	gvt_dbg_sched("ring id %d complete workload %p status %d\n",
			ring_id, workload, workload->status);

	scheduler->current_workload[ring_id] = NULL;

	list_del_init(&workload->list);
	workload->complete(workload);

554
	atomic_dec(&s->running_workload_num);
Z
Zhi Wang 已提交
555
	wake_up(&scheduler->workload_complete_wq);
556 557 558 559

	if (gvt->scheduler.need_reschedule)
		intel_gvt_request_service(gvt, INTEL_GVT_REQUEST_EVENT_SCHED);

Z
Zhi Wang 已提交
560 561 562 563 564 565 566 567 568 569 570 571 572 573 574
	mutex_unlock(&gvt->lock);
}

struct workload_thread_param {
	struct intel_gvt *gvt;
	int ring_id;
};

static int workload_thread(void *priv)
{
	struct workload_thread_param *p = (struct workload_thread_param *)priv;
	struct intel_gvt *gvt = p->gvt;
	int ring_id = p->ring_id;
	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
	struct intel_vgpu_workload *workload = NULL;
575
	struct intel_vgpu *vgpu = NULL;
Z
Zhi Wang 已提交
576
	int ret;
577 578
	bool need_force_wake = IS_SKYLAKE(gvt->dev_priv)
			|| IS_KABYLAKE(gvt->dev_priv);
579
	DEFINE_WAIT_FUNC(wait, woken_wake_function);
Z
Zhi Wang 已提交
580 581 582 583 584 585

	kfree(p);

	gvt_dbg_core("workload thread for ring %d started\n", ring_id);

	while (!kthread_should_stop()) {
586 587 588 589 590 591 592 593 594 595 596
		add_wait_queue(&scheduler->waitq[ring_id], &wait);
		do {
			workload = pick_next_workload(gvt, ring_id);
			if (workload)
				break;
			wait_woken(&wait, TASK_INTERRUPTIBLE,
				   MAX_SCHEDULE_TIMEOUT);
		} while (!kthread_should_stop());
		remove_wait_queue(&scheduler->waitq[ring_id], &wait);

		if (!workload)
Z
Zhi Wang 已提交
597 598 599 600 601 602 603 604 605 606 607 608 609 610 611
			break;

		gvt_dbg_sched("ring id %d next workload %p vgpu %d\n",
				workload->ring_id, workload,
				workload->vgpu->id);

		intel_runtime_pm_get(gvt->dev_priv);

		gvt_dbg_sched("ring id %d will dispatch workload %p\n",
				workload->ring_id, workload);

		if (need_force_wake)
			intel_uncore_forcewake_get(gvt->dev_priv,
					FORCEWAKE_ALL);

612
		mutex_lock(&gvt->lock);
Z
Zhi Wang 已提交
613
		ret = dispatch_workload(workload);
614
		mutex_unlock(&gvt->lock);
615

Z
Zhi Wang 已提交
616
		if (ret) {
617 618
			vgpu = workload->vgpu;
			gvt_vgpu_err("fail to dispatch workload, skip\n");
Z
Zhi Wang 已提交
619 620 621 622 623
			goto complete;
		}

		gvt_dbg_sched("ring id %d wait workload %p\n",
				workload->ring_id, workload);
624
		i915_wait_request(workload->req, 0, MAX_SCHEDULE_TIMEOUT);
Z
Zhi Wang 已提交
625 626

complete:
627
		gvt_dbg_sched("will complete workload %p, status: %d\n",
Z
Zhi Wang 已提交
628 629
				workload, workload->status);

630 631
		complete_current_workload(gvt, ring_id);

Z
Zhi Wang 已提交
632 633 634 635 636 637 638 639 640 641 642
		if (need_force_wake)
			intel_uncore_forcewake_put(gvt->dev_priv,
					FORCEWAKE_ALL);

		intel_runtime_pm_put(gvt->dev_priv);
	}
	return 0;
}

void intel_gvt_wait_vgpu_idle(struct intel_vgpu *vgpu)
{
643
	struct intel_vgpu_submission *s = &vgpu->submission;
Z
Zhi Wang 已提交
644 645 646
	struct intel_gvt *gvt = vgpu->gvt;
	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;

647
	if (atomic_read(&s->running_workload_num)) {
Z
Zhi Wang 已提交
648 649 650
		gvt_dbg_sched("wait vgpu idle\n");

		wait_event(scheduler->workload_complete_wq,
651
				!atomic_read(&s->running_workload_num));
Z
Zhi Wang 已提交
652 653 654 655 656 657
	}
}

void intel_gvt_clean_workload_scheduler(struct intel_gvt *gvt)
{
	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
658 659
	struct intel_engine_cs *engine;
	enum intel_engine_id i;
Z
Zhi Wang 已提交
660 661 662

	gvt_dbg_core("clean workload scheduler\n");

663 664 665 666 667
	for_each_engine(engine, gvt->dev_priv, i) {
		atomic_notifier_chain_unregister(
					&engine->context_status_notifier,
					&gvt->shadow_ctx_notifier_block[i]);
		kthread_stop(scheduler->thread[i]);
Z
Zhi Wang 已提交
668 669 670 671 672 673 674
	}
}

int intel_gvt_init_workload_scheduler(struct intel_gvt *gvt)
{
	struct intel_gvt_workload_scheduler *scheduler = &gvt->scheduler;
	struct workload_thread_param *param = NULL;
675 676
	struct intel_engine_cs *engine;
	enum intel_engine_id i;
Z
Zhi Wang 已提交
677 678 679 680 681 682
	int ret;

	gvt_dbg_core("init workload scheduler\n");

	init_waitqueue_head(&scheduler->workload_complete_wq);

683
	for_each_engine(engine, gvt->dev_priv, i) {
Z
Zhi Wang 已提交
684 685 686 687 688 689 690 691 692 693 694 695 696 697 698 699 700 701
		init_waitqueue_head(&scheduler->waitq[i]);

		param = kzalloc(sizeof(*param), GFP_KERNEL);
		if (!param) {
			ret = -ENOMEM;
			goto err;
		}

		param->gvt = gvt;
		param->ring_id = i;

		scheduler->thread[i] = kthread_run(workload_thread, param,
			"gvt workload %d", i);
		if (IS_ERR(scheduler->thread[i])) {
			gvt_err("fail to create workload thread\n");
			ret = PTR_ERR(scheduler->thread[i]);
			goto err;
		}
702 703 704 705 706

		gvt->shadow_ctx_notifier_block[i].notifier_call =
					shadow_context_status_change;
		atomic_notifier_chain_register(&engine->context_status_notifier,
					&gvt->shadow_ctx_notifier_block[i]);
Z
Zhi Wang 已提交
707 708 709 710 711 712 713 714 715
	}
	return 0;
err:
	intel_gvt_clean_workload_scheduler(gvt);
	kfree(param);
	param = NULL;
	return ret;
}

716 717 718 719 720 721 722 723
/**
 * intel_vgpu_clean_submission - free submission-related resource for vGPU
 * @vgpu: a vGPU
 *
 * This function is called when a vGPU is being destroyed.
 *
 */
void intel_vgpu_clean_submission(struct intel_vgpu *vgpu)
Z
Zhi Wang 已提交
724
{
725 726 727 728
	struct intel_vgpu_submission *s = &vgpu->submission;

	i915_gem_context_put(s->shadow_ctx);
	kmem_cache_destroy(s->workloads);
Z
Zhi Wang 已提交
729 730
}

731 732 733 734 735 736 737 738 739 740 741
/**
 * intel_vgpu_setup_submission - setup submission-related resource for vGPU
 * @vgpu: a vGPU
 *
 * This function is called when a vGPU is being created.
 *
 * Returns:
 * Zero on success, negative error code if failed.
 *
 */
int intel_vgpu_setup_submission(struct intel_vgpu *vgpu)
Z
Zhi Wang 已提交
742
{
743
	struct intel_vgpu_submission *s = &vgpu->submission;
744 745 746
	enum intel_engine_id i;
	struct intel_engine_cs *engine;
	int ret;
Z
Zhi Wang 已提交
747

748
	s->shadow_ctx = i915_gem_context_create_gvt(
Z
Zhi Wang 已提交
749
			&vgpu->gvt->dev_priv->drm);
750 751
	if (IS_ERR(s->shadow_ctx))
		return PTR_ERR(s->shadow_ctx);
Z
Zhi Wang 已提交
752

753
	bitmap_zero(s->shadow_ctx_desc_updated, I915_NUM_ENGINES);
754

755
	s->workloads = kmem_cache_create("gvt-g_vgpu_workload",
756 757 758 759
			sizeof(struct intel_vgpu_workload), 0,
			SLAB_HWCACHE_ALIGN,
			NULL);

760
	if (!s->workloads) {
761 762 763 764 765
		ret = -ENOMEM;
		goto out_shadow_ctx;
	}

	for_each_engine(engine, vgpu->gvt->dev_priv, i)
766
		INIT_LIST_HEAD(&s->workload_q_head[i]);
767

768
	atomic_set(&s->running_workload_num, 0);
769
	bitmap_zero(s->tlb_handle_pending, I915_NUM_ENGINES);
770

Z
Zhi Wang 已提交
771
	return 0;
772 773

out_shadow_ctx:
774
	i915_gem_context_put(s->shadow_ctx);
775
	return ret;
Z
Zhi Wang 已提交
776
}