sev.c 72.9 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Kernel-based Virtual Machine driver for Linux
 *
 * AMD SVM-SEV support
 *
 * Copyright 2010 Red Hat, Inc. and/or its affiliates.
 */

#include <linux/kvm_types.h>
#include <linux/kvm_host.h>
#include <linux/kernel.h>
#include <linux/highmem.h>
#include <linux/psp-sev.h>
15
#include <linux/pagemap.h>
16
#include <linux/swap.h>
17
#include <linux/misc_cgroup.h>
18
#include <linux/processor.h>
19
#include <linux/trace_events.h>
20

21
#include <asm/pkru.h>
22
#include <asm/trapnr.h>
23
#include <asm/fpu/xcr.h>
24

25 26
#include "x86.h"
#include "svm.h"
27
#include "svm_ops.h"
28
#include "cpuid.h"
29
#include "trace.h"
30

31 32 33 34 35 36 37 38 39 40 41 42 43 44 45
#ifndef CONFIG_KVM_AMD_SEV
/*
 * When this config is not defined, SEV feature is not supported and APIs in
 * this file are not used but this file still gets compiled into the KVM AMD
 * module.
 *
 * We will not have MISC_CG_RES_SEV and MISC_CG_RES_SEV_ES entries in the enum
 * misc_res_type {} defined in linux/misc_cgroup.h.
 *
 * Below macros allow compilation to succeed.
 */
#define MISC_CG_RES_SEV MISC_CG_RES_TYPES
#define MISC_CG_RES_SEV_ES MISC_CG_RES_TYPES
#endif

46
#ifdef CONFIG_KVM_AMD_SEV
47
/* enable/disable SEV support */
48
static bool sev_enabled = true;
49
module_param_named(sev, sev_enabled, bool, 0444);
50 51

/* enable/disable SEV-ES support */
52
static bool sev_es_enabled = true;
53
module_param_named(sev_es, sev_es_enabled, bool, 0444);
54 55 56 57
#else
#define sev_enabled false
#define sev_es_enabled false
#endif /* CONFIG_KVM_AMD_SEV */
58

59
static u8 sev_enc_bit;
60 61 62 63
static DECLARE_RWSEM(sev_deactivate_lock);
static DEFINE_MUTEX(sev_bitmap_lock);
unsigned int max_sev_asid;
static unsigned int min_sev_asid;
64
static unsigned long sev_me_mask;
65
static unsigned int nr_asids;
66 67 68 69 70 71 72 73 74 75 76
static unsigned long *sev_asid_bitmap;
static unsigned long *sev_reclaim_asid_bitmap;

struct enc_region {
	struct list_head list;
	unsigned long npages;
	struct page **pages;
	unsigned long uaddr;
	unsigned long size;
};

77 78
/* Called with the sev_bitmap_lock held, or on shutdown  */
static int sev_flush_asids(int min_asid, int max_asid)
79
{
80
	int ret, asid, error = 0;
81 82

	/* Check if there are any ASIDs to reclaim before performing a flush */
83 84
	asid = find_next_bit(sev_reclaim_asid_bitmap, nr_asids, min_asid);
	if (asid > max_asid)
85
		return -EBUSY;
86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103

	/*
	 * DEACTIVATE will clear the WBINVD indicator causing DF_FLUSH to fail,
	 * so it must be guarded.
	 */
	down_write(&sev_deactivate_lock);

	wbinvd_on_all_cpus();
	ret = sev_guest_df_flush(&error);

	up_write(&sev_deactivate_lock);

	if (ret)
		pr_err("SEV: DF_FLUSH failed, ret=%d, error=%#x\n", ret, error);

	return ret;
}

104 105 106 107 108
static inline bool is_mirroring_enc_context(struct kvm *kvm)
{
	return !!to_kvm_svm(kvm)->sev_info.enc_context_owner;
}

109
/* Must be called with the sev_bitmap_lock held */
110
static bool __sev_recycle_asids(int min_asid, int max_asid)
111
{
112
	if (sev_flush_asids(min_asid, max_asid))
113 114
		return false;

115
	/* The flush process will flush all reclaimable SEV and SEV-ES ASIDs */
116
	bitmap_xor(sev_asid_bitmap, sev_asid_bitmap, sev_reclaim_asid_bitmap,
117 118
		   nr_asids);
	bitmap_zero(sev_reclaim_asid_bitmap, nr_asids);
119 120 121 122

	return true;
}

123 124 125 126 127 128 129 130 131 132 133 134
static int sev_misc_cg_try_charge(struct kvm_sev_info *sev)
{
	enum misc_res_type type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV;
	return misc_cg_try_charge(type, sev->misc_cg, 1);
}

static void sev_misc_cg_uncharge(struct kvm_sev_info *sev)
{
	enum misc_res_type type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV;
	misc_cg_uncharge(type, sev->misc_cg, 1);
}

135
static int sev_asid_new(struct kvm_sev_info *sev)
136
{
137
	int asid, min_asid, max_asid, ret;
138
	bool retry = true;
139 140 141

	WARN_ON(sev->misc_cg);
	sev->misc_cg = get_current_misc_cg();
142
	ret = sev_misc_cg_try_charge(sev);
143 144 145 146 147
	if (ret) {
		put_misc_cg(sev->misc_cg);
		sev->misc_cg = NULL;
		return ret;
	}
148 149 150 151

	mutex_lock(&sev_bitmap_lock);

	/*
152 153
	 * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid.
	 * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1.
154
	 */
155
	min_asid = sev->es_active ? 1 : min_sev_asid;
156
	max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid;
157
again:
158 159
	asid = find_next_zero_bit(sev_asid_bitmap, max_asid + 1, min_asid);
	if (asid > max_asid) {
160
		if (retry && __sev_recycle_asids(min_asid, max_asid)) {
161 162 163 164
			retry = false;
			goto again;
		}
		mutex_unlock(&sev_bitmap_lock);
165 166
		ret = -EBUSY;
		goto e_uncharge;
167 168
	}

169
	__set_bit(asid, sev_asid_bitmap);
170 171 172

	mutex_unlock(&sev_bitmap_lock);

173
	return asid;
174
e_uncharge:
175
	sev_misc_cg_uncharge(sev);
176 177 178
	put_misc_cg(sev->misc_cg);
	sev->misc_cg = NULL;
	return ret;
179 180 181 182 183 184 185 186 187
}

static int sev_get_asid(struct kvm *kvm)
{
	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;

	return sev->asid;
}

188
static void sev_asid_free(struct kvm_sev_info *sev)
189 190
{
	struct svm_cpu_data *sd;
191
	int cpu;
192 193 194

	mutex_lock(&sev_bitmap_lock);

195
	__set_bit(sev->asid, sev_reclaim_asid_bitmap);
196 197 198

	for_each_possible_cpu(cpu) {
		sd = per_cpu(svm_data, cpu);
199
		sd->sev_vmcbs[sev->asid] = NULL;
200 201 202
	}

	mutex_unlock(&sev_bitmap_lock);
203

204
	sev_misc_cg_uncharge(sev);
205 206
	put_misc_cg(sev->misc_cg);
	sev->misc_cg = NULL;
207 208
}

209
static void sev_decommission(unsigned int handle)
210
{
211
	struct sev_data_decommission decommission;
212 213 214 215 216 217 218 219 220 221

	if (!handle)
		return;

	decommission.handle = handle;
	sev_guest_decommission(&decommission, NULL);
}

static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
{
222
	struct sev_data_deactivate deactivate;
223 224 225 226

	if (!handle)
		return;

227
	deactivate.handle = handle;
228 229 230

	/* Guard DEACTIVATE against WBINVD/DF_FLUSH used in ASID recycling */
	down_read(&sev_deactivate_lock);
231
	sev_guest_deactivate(&deactivate, NULL);
232 233
	up_read(&sev_deactivate_lock);

234
	sev_decommission(handle);
235 236 237 238 239 240 241
}

static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
	int asid, ret;

242 243 244
	if (kvm->created_vcpus)
		return -EINVAL;

245 246 247 248
	ret = -EBUSY;
	if (unlikely(sev->active))
		return ret;

249 250
	sev->active = true;
	sev->es_active = argp->id == KVM_SEV_ES_INIT;
251
	asid = sev_asid_new(sev);
252
	if (asid < 0)
253
		goto e_no_asid;
254
	sev->asid = asid;
255 256 257 258 259 260 261 262 263 264

	ret = sev_platform_init(&argp->error);
	if (ret)
		goto e_free;

	INIT_LIST_HEAD(&sev->regions_list);

	return 0;

e_free:
265 266
	sev_asid_free(sev);
	sev->asid = 0;
267 268
e_no_asid:
	sev->es_active = false;
269
	sev->active = false;
270 271 272 273 274
	return ret;
}

static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error)
{
275
	struct sev_data_activate activate;
276 277 278 279
	int asid = sev_get_asid(kvm);
	int ret;

	/* activate ASID on the given handle */
280 281 282
	activate.handle = handle;
	activate.asid   = asid;
	ret = sev_guest_activate(&activate, error);
283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311

	return ret;
}

static int __sev_issue_cmd(int fd, int id, void *data, int *error)
{
	struct fd f;
	int ret;

	f = fdget(fd);
	if (!f.file)
		return -EBADF;

	ret = sev_issue_cmd_external_user(f.file, id, data, error);

	fdput(f);
	return ret;
}

static int sev_issue_cmd(struct kvm *kvm, int id, void *data, int *error)
{
	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;

	return __sev_issue_cmd(sev->fd, id, data, error);
}

static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
312
	struct sev_data_launch_start start;
313 314 315 316 317 318 319 320 321 322 323
	struct kvm_sev_launch_start params;
	void *dh_blob, *session_blob;
	int *error = &argp->error;
	int ret;

	if (!sev_guest(kvm))
		return -ENOTTY;

	if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
		return -EFAULT;

324
	memset(&start, 0, sizeof(start));
325 326 327 328

	dh_blob = NULL;
	if (params.dh_uaddr) {
		dh_blob = psp_copy_user_blob(params.dh_uaddr, params.dh_len);
329 330
		if (IS_ERR(dh_blob))
			return PTR_ERR(dh_blob);
331

332 333
		start.dh_cert_address = __sme_set(__pa(dh_blob));
		start.dh_cert_len = params.dh_len;
334 335 336 337 338 339 340 341 342 343
	}

	session_blob = NULL;
	if (params.session_uaddr) {
		session_blob = psp_copy_user_blob(params.session_uaddr, params.session_len);
		if (IS_ERR(session_blob)) {
			ret = PTR_ERR(session_blob);
			goto e_free_dh;
		}

344 345
		start.session_address = __sme_set(__pa(session_blob));
		start.session_len = params.session_len;
346 347
	}

348 349
	start.handle = params.handle;
	start.policy = params.policy;
350 351

	/* create memory encryption context */
352
	ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_LAUNCH_START, &start, error);
353 354 355 356
	if (ret)
		goto e_free_session;

	/* Bind ASID to this guest */
357
	ret = sev_bind_asid(kvm, start.handle, error);
358 359
	if (ret) {
		sev_decommission(start.handle);
360
		goto e_free_session;
361
	}
362 363

	/* return handle to userspace */
364
	params.handle = start.handle;
365
	if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params))) {
366
		sev_unbind_asid(kvm, start.handle);
367 368 369 370
		ret = -EFAULT;
		goto e_free_session;
	}

371
	sev->handle = start.handle;
372 373 374 375 376 377 378 379 380 381 382 383 384 385
	sev->fd = argp->sev_fd;

e_free_session:
	kfree(session_blob);
e_free_dh:
	kfree(dh_blob);
	return ret;
}

static struct page **sev_pin_memory(struct kvm *kvm, unsigned long uaddr,
				    unsigned long ulen, unsigned long *n,
				    int write)
{
	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
386 387
	unsigned long npages, size;
	int npinned;
388 389 390
	unsigned long locked, lock_limit;
	struct page **pages;
	unsigned long first, last;
391
	int ret;
392

393 394
	lockdep_assert_held(&kvm->lock);

395
	if (ulen == 0 || uaddr + ulen < uaddr)
396
		return ERR_PTR(-EINVAL);
397 398 399 400 401 402 403 404 405 406

	/* Calculate number of pages. */
	first = (uaddr & PAGE_MASK) >> PAGE_SHIFT;
	last = ((uaddr + ulen - 1) & PAGE_MASK) >> PAGE_SHIFT;
	npages = (last - first + 1);

	locked = sev->pages_locked + npages;
	lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT;
	if (locked > lock_limit && !capable(CAP_IPC_LOCK)) {
		pr_err("SEV: %lu locked pages exceed the lock limit of %lu.\n", locked, lock_limit);
407
		return ERR_PTR(-ENOMEM);
408 409
	}

410
	if (WARN_ON_ONCE(npages > INT_MAX))
411
		return ERR_PTR(-EINVAL);
412

413 414 415
	/* Avoid using vmalloc for smaller buffers. */
	size = npages * sizeof(struct page *);
	if (size > PAGE_SIZE)
416
		pages = __vmalloc(size, GFP_KERNEL_ACCOUNT | __GFP_ZERO);
417 418 419 420
	else
		pages = kmalloc(size, GFP_KERNEL_ACCOUNT);

	if (!pages)
421
		return ERR_PTR(-ENOMEM);
422 423

	/* Pin the user virtual address. */
424
	npinned = pin_user_pages_fast(uaddr, npages, write ? FOLL_WRITE : 0, pages);
425 426
	if (npinned != npages) {
		pr_err("SEV: Failure locking %lu pages.\n", npages);
427
		ret = -ENOMEM;
428 429 430 431 432 433 434 435 436
		goto err;
	}

	*n = npages;
	sev->pages_locked = locked;

	return pages;

err:
437
	if (npinned > 0)
438
		unpin_user_pages(pages, npinned);
439 440

	kvfree(pages);
441
	return ERR_PTR(ret);
442 443 444 445 446 447 448
}

static void sev_unpin_memory(struct kvm *kvm, struct page **pages,
			     unsigned long npages)
{
	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;

449
	unpin_user_pages(pages, npages);
450 451 452 453 454 455 456 457 458
	kvfree(pages);
	sev->pages_locked -= npages;
}

static void sev_clflush_pages(struct page *pages[], unsigned long npages)
{
	uint8_t *page_virtual;
	unsigned long i;

459 460
	if (this_cpu_has(X86_FEATURE_SME_COHERENT) || npages == 0 ||
	    pages == NULL)
461 462 463 464 465 466 467 468 469 470 471 472 473 474 475 476 477 478 479 480 481 482 483 484 485 486 487 488 489 490 491 492 493 494 495
		return;

	for (i = 0; i < npages; i++) {
		page_virtual = kmap_atomic(pages[i]);
		clflush_cache_range(page_virtual, PAGE_SIZE);
		kunmap_atomic(page_virtual);
	}
}

static unsigned long get_num_contig_pages(unsigned long idx,
				struct page **inpages, unsigned long npages)
{
	unsigned long paddr, next_paddr;
	unsigned long i = idx + 1, pages = 1;

	/* find the number of contiguous pages starting from idx */
	paddr = __sme_page_pa(inpages[idx]);
	while (i < npages) {
		next_paddr = __sme_page_pa(inpages[i++]);
		if ((paddr + PAGE_SIZE) == next_paddr) {
			pages++;
			paddr = next_paddr;
			continue;
		}
		break;
	}

	return pages;
}

static int sev_launch_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
	unsigned long vaddr, vaddr_end, next_vaddr, npages, pages, size, i;
	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
	struct kvm_sev_launch_update_data params;
496
	struct sev_data_launch_update_data data;
497 498 499 500 501 502 503 504 505 506 507 508 509 510 511
	struct page **inpages;
	int ret;

	if (!sev_guest(kvm))
		return -ENOTTY;

	if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
		return -EFAULT;

	vaddr = params.uaddr;
	size = params.len;
	vaddr_end = vaddr + size;

	/* Lock the user memory. */
	inpages = sev_pin_memory(kvm, vaddr, size, &npages, 1);
512 513
	if (IS_ERR(inpages))
		return PTR_ERR(inpages);
514 515

	/*
516 517
	 * Flush (on non-coherent CPUs) before LAUNCH_UPDATE encrypts pages in
	 * place; the cache may contain the data that was written unencrypted.
518 519 520
	 */
	sev_clflush_pages(inpages, npages);

521 522 523
	data.reserved = 0;
	data.handle = sev->handle;

524 525 526 527 528 529 530 531 532 533 534 535 536 537
	for (i = 0; vaddr < vaddr_end; vaddr = next_vaddr, i += pages) {
		int offset, len;

		/*
		 * If the user buffer is not page-aligned, calculate the offset
		 * within the page.
		 */
		offset = vaddr & (PAGE_SIZE - 1);

		/* Calculate the number of pages that can be encrypted in one go. */
		pages = get_num_contig_pages(i, inpages, npages);

		len = min_t(size_t, ((pages * PAGE_SIZE) - offset), size);

538 539 540
		data.len = len;
		data.address = __sme_page_pa(inpages[i]) + offset;
		ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_DATA, &data, &argp->error);
541 542 543 544 545 546 547 548 549 550 551 552 553 554 555 556 557 558
		if (ret)
			goto e_unpin;

		size -= len;
		next_vaddr = vaddr + len;
	}

e_unpin:
	/* content of memory is updated, mark pages dirty */
	for (i = 0; i < npages; i++) {
		set_page_dirty_lock(inpages[i]);
		mark_page_accessed(inpages[i]);
	}
	/* unlock the user pages */
	sev_unpin_memory(kvm, inpages, npages);
	return ret;
}

559 560 561 562 563 564 565 566 567 568 569 570 571 572 573 574 575
static int sev_es_sync_vmsa(struct vcpu_svm *svm)
{
	struct vmcb_save_area *save = &svm->vmcb->save;

	/* Check some debug related fields before encrypting the VMSA */
	if (svm->vcpu.guest_debug || (save->dr7 & ~DR7_FIXED_1))
		return -EINVAL;

	/* Sync registgers */
	save->rax = svm->vcpu.arch.regs[VCPU_REGS_RAX];
	save->rbx = svm->vcpu.arch.regs[VCPU_REGS_RBX];
	save->rcx = svm->vcpu.arch.regs[VCPU_REGS_RCX];
	save->rdx = svm->vcpu.arch.regs[VCPU_REGS_RDX];
	save->rsp = svm->vcpu.arch.regs[VCPU_REGS_RSP];
	save->rbp = svm->vcpu.arch.regs[VCPU_REGS_RBP];
	save->rsi = svm->vcpu.arch.regs[VCPU_REGS_RSI];
	save->rdi = svm->vcpu.arch.regs[VCPU_REGS_RDI];
P
Paolo Bonzini 已提交
576
#ifdef CONFIG_X86_64
577 578 579 580 581 582 583 584
	save->r8  = svm->vcpu.arch.regs[VCPU_REGS_R8];
	save->r9  = svm->vcpu.arch.regs[VCPU_REGS_R9];
	save->r10 = svm->vcpu.arch.regs[VCPU_REGS_R10];
	save->r11 = svm->vcpu.arch.regs[VCPU_REGS_R11];
	save->r12 = svm->vcpu.arch.regs[VCPU_REGS_R12];
	save->r13 = svm->vcpu.arch.regs[VCPU_REGS_R13];
	save->r14 = svm->vcpu.arch.regs[VCPU_REGS_R14];
	save->r15 = svm->vcpu.arch.regs[VCPU_REGS_R15];
P
Paolo Bonzini 已提交
585
#endif
586 587 588 589 590 591
	save->rip = svm->vcpu.arch.regs[VCPU_REGS_RIP];

	/* Sync some non-GPR registers before encrypting */
	save->xcr0 = svm->vcpu.arch.xcr0;
	save->pkru = svm->vcpu.arch.pkru;
	save->xss  = svm->vcpu.arch.ia32_xss;
592
	save->dr6  = svm->vcpu.arch.dr6;
593 594 595 596 597 598 599

	/*
	 * SEV-ES will use a VMSA that is pointed to by the VMCB, not
	 * the traditional VMSA that is part of the VMCB. Copy the
	 * traditional VMSA as it has been built so far (in prep
	 * for LAUNCH_UPDATE_VMSA) to be the initial SEV-ES state.
	 */
600
	memcpy(svm->sev_es.vmsa, save, sizeof(*save));
601 602 603 604

	return 0;
}

605 606
static int __sev_launch_update_vmsa(struct kvm *kvm, struct kvm_vcpu *vcpu,
				    int *error)
607
{
608
	struct sev_data_launch_update_vmsa vmsa;
609 610 611 612 613 614 615 616 617 618 619 620 621
	struct vcpu_svm *svm = to_svm(vcpu);
	int ret;

	/* Perform some pre-encryption checks against the VMSA */
	ret = sev_es_sync_vmsa(svm);
	if (ret)
		return ret;

	/*
	 * The LAUNCH_UPDATE_VMSA command will perform in-place encryption of
	 * the VMSA memory content (i.e it will write the same memory region
	 * with the guest's key), so invalidate it first.
	 */
622
	clflush_cache_range(svm->sev_es.vmsa, PAGE_SIZE);
623 624 625

	vmsa.reserved = 0;
	vmsa.handle = to_kvm_svm(kvm)->sev_info.handle;
626
	vmsa.address = __sme_pa(svm->sev_es.vmsa);
627
	vmsa.len = PAGE_SIZE;
628 629 630 631 632 633
	ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, &vmsa, error);
	if (ret)
	  return ret;

	vcpu->arch.guest_state_protected = true;
	return 0;
634 635 636 637
}

static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
638
	struct kvm_vcpu *vcpu;
639 640 641 642 643
	int i, ret;

	if (!sev_es_guest(kvm))
		return -ENOTTY;

644
	kvm_for_each_vcpu(i, vcpu, kvm) {
645
		ret = mutex_lock_killable(&vcpu->mutex);
646
		if (ret)
647
			return ret;
648

649
		ret = __sev_launch_update_vmsa(kvm, vcpu, &argp->error);
650

651
		mutex_unlock(&vcpu->mutex);
652
		if (ret)
653
			return ret;
654 655
	}

656
	return 0;
657 658
}

659 660 661 662
static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
	void __user *measure = (void __user *)(uintptr_t)argp->data;
	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
663
	struct sev_data_launch_measure data;
664 665 666 667 668 669 670 671 672 673 674
	struct kvm_sev_launch_measure params;
	void __user *p = NULL;
	void *blob = NULL;
	int ret;

	if (!sev_guest(kvm))
		return -ENOTTY;

	if (copy_from_user(&params, measure, sizeof(params)))
		return -EFAULT;

675
	memset(&data, 0, sizeof(data));
676 677 678 679 680 681 682

	/* User wants to query the blob length */
	if (!params.len)
		goto cmd;

	p = (void __user *)(uintptr_t)params.uaddr;
	if (p) {
683 684
		if (params.len > SEV_FW_BLOB_MAX_SIZE)
			return -EINVAL;
685

686
		blob = kmalloc(params.len, GFP_KERNEL_ACCOUNT);
687
		if (!blob)
688
			return -ENOMEM;
689

690 691
		data.address = __psp_pa(blob);
		data.len = params.len;
692 693 694
	}

cmd:
695 696
	data.handle = sev->handle;
	ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_MEASURE, &data, &argp->error);
697 698 699 700 701 702 703 704 705 706 707 708 709 710 711 712

	/*
	 * If we query the session length, FW responded with expected data.
	 */
	if (!params.len)
		goto done;

	if (ret)
		goto e_free_blob;

	if (blob) {
		if (copy_to_user(p, blob, params.len))
			ret = -EFAULT;
	}

done:
713
	params.len = data.len;
714 715 716 717 718 719 720 721 722 723
	if (copy_to_user(measure, &params, sizeof(params)))
		ret = -EFAULT;
e_free_blob:
	kfree(blob);
	return ret;
}

static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
724
	struct sev_data_launch_finish data;
725 726 727 728

	if (!sev_guest(kvm))
		return -ENOTTY;

729 730
	data.handle = sev->handle;
	return sev_issue_cmd(kvm, SEV_CMD_LAUNCH_FINISH, &data, &argp->error);
731 732 733 734 735 736
}

static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
	struct kvm_sev_guest_status params;
737
	struct sev_data_guest_status data;
738 739 740 741 742
	int ret;

	if (!sev_guest(kvm))
		return -ENOTTY;

743
	memset(&data, 0, sizeof(data));
744

745 746
	data.handle = sev->handle;
	ret = sev_issue_cmd(kvm, SEV_CMD_GUEST_STATUS, &data, &argp->error);
747
	if (ret)
748
		return ret;
749

750 751 752
	params.policy = data.policy;
	params.state = data.state;
	params.handle = data.handle;
753 754 755

	if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params)))
		ret = -EFAULT;
756

757 758 759 760 761 762 763 764
	return ret;
}

static int __sev_issue_dbg_cmd(struct kvm *kvm, unsigned long src,
			       unsigned long dst, int size,
			       int *error, bool enc)
{
	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
765
	struct sev_data_dbg data;
766

767 768 769 770 771
	data.reserved = 0;
	data.handle = sev->handle;
	data.dst_addr = dst;
	data.src_addr = src;
	data.len = size;
772

773 774 775
	return sev_issue_cmd(kvm,
			     enc ? SEV_CMD_DBG_ENCRYPT : SEV_CMD_DBG_DECRYPT,
			     &data, error);
776 777 778 779 780 781 782 783 784 785 786 787
}

static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr,
			     unsigned long dst_paddr, int sz, int *err)
{
	int offset;

	/*
	 * Its safe to read more than we are asked, caller should ensure that
	 * destination has enough space.
	 */
	offset = src_paddr & 15;
788
	src_paddr = round_down(src_paddr, 16);
789 790 791 792 793 794
	sz = round_up(sz + offset, 16);

	return __sev_issue_dbg_cmd(kvm, src_paddr, dst_paddr, sz, err, false);
}

static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr,
795
				  void __user *dst_uaddr,
796 797 798 799 800 801 802 803 804 805 806 807 808 809 810 811 812 813 814 815 816 817 818
				  unsigned long dst_paddr,
				  int size, int *err)
{
	struct page *tpage = NULL;
	int ret, offset;

	/* if inputs are not 16-byte then use intermediate buffer */
	if (!IS_ALIGNED(dst_paddr, 16) ||
	    !IS_ALIGNED(paddr,     16) ||
	    !IS_ALIGNED(size,      16)) {
		tpage = (void *)alloc_page(GFP_KERNEL);
		if (!tpage)
			return -ENOMEM;

		dst_paddr = __sme_page_pa(tpage);
	}

	ret = __sev_dbg_decrypt(kvm, paddr, dst_paddr, size, err);
	if (ret)
		goto e_free;

	if (tpage) {
		offset = paddr & 15;
819
		if (copy_to_user(dst_uaddr, page_address(tpage) + offset, size))
820 821 822 823 824 825 826 827 828 829 830
			ret = -EFAULT;
	}

e_free:
	if (tpage)
		__free_page(tpage);

	return ret;
}

static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
831
				  void __user *vaddr,
832
				  unsigned long dst_paddr,
833
				  void __user *dst_vaddr,
834 835 836 837 838 839 840
				  int size, int *error)
{
	struct page *src_tpage = NULL;
	struct page *dst_tpage = NULL;
	int ret, len = size;

	/* If source buffer is not aligned then use an intermediate buffer */
841
	if (!IS_ALIGNED((unsigned long)vaddr, 16)) {
842 843 844 845
		src_tpage = alloc_page(GFP_KERNEL);
		if (!src_tpage)
			return -ENOMEM;

846
		if (copy_from_user(page_address(src_tpage), vaddr, size)) {
847 848 849 850 851 852 853 854 855 856 857 858 859
			__free_page(src_tpage);
			return -EFAULT;
		}

		paddr = __sme_page_pa(src_tpage);
	}

	/*
	 *  If destination buffer or length is not aligned then do read-modify-write:
	 *   - decrypt destination in an intermediate buffer
	 *   - copy the source buffer in an intermediate buffer
	 *   - use the intermediate buffer as source buffer
	 */
860
	if (!IS_ALIGNED((unsigned long)dst_vaddr, 16) || !IS_ALIGNED(size, 16)) {
861 862 863 864 865 866 867 868 869 870 871 872 873 874 875 876 877 878 879 880 881 882 883 884
		int dst_offset;

		dst_tpage = alloc_page(GFP_KERNEL);
		if (!dst_tpage) {
			ret = -ENOMEM;
			goto e_free;
		}

		ret = __sev_dbg_decrypt(kvm, dst_paddr,
					__sme_page_pa(dst_tpage), size, error);
		if (ret)
			goto e_free;

		/*
		 *  If source is kernel buffer then use memcpy() otherwise
		 *  copy_from_user().
		 */
		dst_offset = dst_paddr & 15;

		if (src_tpage)
			memcpy(page_address(dst_tpage) + dst_offset,
			       page_address(src_tpage), size);
		else {
			if (copy_from_user(page_address(dst_tpage) + dst_offset,
885
					   vaddr, size)) {
886 887 888 889 890 891 892 893 894 895 896 897 898 899 900 901 902 903 904 905 906 907 908 909 910 911 912 913 914 915 916 917 918 919 920 921 922 923 924 925 926 927 928 929 930 931 932 933 934 935 936
				ret = -EFAULT;
				goto e_free;
			}
		}

		paddr = __sme_page_pa(dst_tpage);
		dst_paddr = round_down(dst_paddr, 16);
		len = round_up(size, 16);
	}

	ret = __sev_issue_dbg_cmd(kvm, paddr, dst_paddr, len, error, true);

e_free:
	if (src_tpage)
		__free_page(src_tpage);
	if (dst_tpage)
		__free_page(dst_tpage);
	return ret;
}

static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
{
	unsigned long vaddr, vaddr_end, next_vaddr;
	unsigned long dst_vaddr;
	struct page **src_p, **dst_p;
	struct kvm_sev_dbg debug;
	unsigned long n;
	unsigned int size;
	int ret;

	if (!sev_guest(kvm))
		return -ENOTTY;

	if (copy_from_user(&debug, (void __user *)(uintptr_t)argp->data, sizeof(debug)))
		return -EFAULT;

	if (!debug.len || debug.src_uaddr + debug.len < debug.src_uaddr)
		return -EINVAL;
	if (!debug.dst_uaddr)
		return -EINVAL;

	vaddr = debug.src_uaddr;
	size = debug.len;
	vaddr_end = vaddr + size;
	dst_vaddr = debug.dst_uaddr;

	for (; vaddr < vaddr_end; vaddr = next_vaddr) {
		int len, s_off, d_off;

		/* lock userspace source and destination page */
		src_p = sev_pin_memory(kvm, vaddr & PAGE_MASK, PAGE_SIZE, &n, 0);
937 938
		if (IS_ERR(src_p))
			return PTR_ERR(src_p);
939 940

		dst_p = sev_pin_memory(kvm, dst_vaddr & PAGE_MASK, PAGE_SIZE, &n, 1);
941
		if (IS_ERR(dst_p)) {
942
			sev_unpin_memory(kvm, src_p, n);
943
			return PTR_ERR(dst_p);
944 945 946
		}

		/*
947 948 949
		 * Flush (on non-coherent CPUs) before DBG_{DE,EN}CRYPT read or modify
		 * the pages; flush the destination too so that future accesses do not
		 * see stale data.
950 951 952 953 954 955 956 957 958 959 960 961 962 963 964
		 */
		sev_clflush_pages(src_p, 1);
		sev_clflush_pages(dst_p, 1);

		/*
		 * Since user buffer may not be page aligned, calculate the
		 * offset within the page.
		 */
		s_off = vaddr & ~PAGE_MASK;
		d_off = dst_vaddr & ~PAGE_MASK;
		len = min_t(size_t, (PAGE_SIZE - s_off), size);

		if (dec)
			ret = __sev_dbg_decrypt_user(kvm,
						     __sme_page_pa(src_p[0]) + s_off,
965
						     (void __user *)dst_vaddr,
966 967 968 969 970
						     __sme_page_pa(dst_p[0]) + d_off,
						     len, &argp->error);
		else
			ret = __sev_dbg_encrypt_user(kvm,
						     __sme_page_pa(src_p[0]) + s_off,
971
						     (void __user *)vaddr,
972
						     __sme_page_pa(dst_p[0]) + d_off,
973
						     (void __user *)dst_vaddr,
974 975 976 977 978 979 980 981 982 983 984 985 986 987 988 989 990 991 992
						     len, &argp->error);

		sev_unpin_memory(kvm, src_p, n);
		sev_unpin_memory(kvm, dst_p, n);

		if (ret)
			goto err;

		next_vaddr = vaddr + len;
		dst_vaddr = dst_vaddr + len;
		size -= len;
	}
err:
	return ret;
}

static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
993
	struct sev_data_launch_secret data;
994 995 996
	struct kvm_sev_launch_secret params;
	struct page **pages;
	void *blob, *hdr;
997
	unsigned long n, i;
998 999 1000 1001 1002 1003 1004 1005 1006
	int ret, offset;

	if (!sev_guest(kvm))
		return -ENOTTY;

	if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
		return -EFAULT;

	pages = sev_pin_memory(kvm, params.guest_uaddr, params.guest_len, &n, 1);
1007 1008
	if (IS_ERR(pages))
		return PTR_ERR(pages);
1009

1010
	/*
1011 1012
	 * Flush (on non-coherent CPUs) before LAUNCH_SECRET encrypts pages in
	 * place; the cache may contain the data that was written unencrypted.
1013 1014 1015
	 */
	sev_clflush_pages(pages, n);

1016 1017 1018 1019 1020 1021 1022 1023 1024
	/*
	 * The secret must be copied into contiguous memory region, lets verify
	 * that userspace memory pages are contiguous before we issue command.
	 */
	if (get_num_contig_pages(0, pages, n) != n) {
		ret = -EINVAL;
		goto e_unpin_memory;
	}

1025
	memset(&data, 0, sizeof(data));
1026 1027

	offset = params.guest_uaddr & (PAGE_SIZE - 1);
1028 1029
	data.guest_address = __sme_page_pa(pages[0]) + offset;
	data.guest_len = params.guest_len;
1030 1031 1032 1033

	blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len);
	if (IS_ERR(blob)) {
		ret = PTR_ERR(blob);
1034
		goto e_unpin_memory;
1035 1036
	}

1037 1038
	data.trans_address = __psp_pa(blob);
	data.trans_len = params.trans_len;
1039 1040 1041 1042 1043 1044

	hdr = psp_copy_user_blob(params.hdr_uaddr, params.hdr_len);
	if (IS_ERR(hdr)) {
		ret = PTR_ERR(hdr);
		goto e_free_blob;
	}
1045 1046
	data.hdr_address = __psp_pa(hdr);
	data.hdr_len = params.hdr_len;
1047

1048 1049
	data.handle = sev->handle;
	ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, &data, &argp->error);
1050 1051 1052 1053 1054 1055

	kfree(hdr);

e_free_blob:
	kfree(blob);
e_unpin_memory:
1056 1057 1058 1059 1060
	/* content of memory is updated, mark pages dirty */
	for (i = 0; i < n; i++) {
		set_page_dirty_lock(pages[i]);
		mark_page_accessed(pages[i]);
	}
1061 1062 1063 1064
	sev_unpin_memory(kvm, pages, n);
	return ret;
}

1065 1066 1067 1068
static int sev_get_attestation_report(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
	void __user *report = (void __user *)(uintptr_t)argp->data;
	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
1069
	struct sev_data_attestation_report data;
1070 1071 1072 1073 1074 1075 1076 1077 1078 1079 1080
	struct kvm_sev_attestation_report params;
	void __user *p;
	void *blob = NULL;
	int ret;

	if (!sev_guest(kvm))
		return -ENOTTY;

	if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
		return -EFAULT;

1081
	memset(&data, 0, sizeof(data));
1082 1083 1084 1085 1086 1087 1088

	/* User wants to query the blob length */
	if (!params.len)
		goto cmd;

	p = (void __user *)(uintptr_t)params.uaddr;
	if (p) {
1089 1090
		if (params.len > SEV_FW_BLOB_MAX_SIZE)
			return -EINVAL;
1091

1092
		blob = kmalloc(params.len, GFP_KERNEL_ACCOUNT);
1093
		if (!blob)
1094
			return -ENOMEM;
1095

1096 1097 1098
		data.address = __psp_pa(blob);
		data.len = params.len;
		memcpy(data.mnonce, params.mnonce, sizeof(params.mnonce));
1099 1100
	}
cmd:
1101 1102
	data.handle = sev->handle;
	ret = sev_issue_cmd(kvm, SEV_CMD_ATTESTATION_REPORT, &data, &argp->error);
1103 1104 1105 1106 1107 1108 1109 1110 1111 1112 1113 1114 1115 1116 1117
	/*
	 * If we query the session length, FW responded with expected data.
	 */
	if (!params.len)
		goto done;

	if (ret)
		goto e_free_blob;

	if (blob) {
		if (copy_to_user(p, blob, params.len))
			ret = -EFAULT;
	}

done:
1118
	params.len = data.len;
1119 1120 1121 1122 1123 1124 1125
	if (copy_to_user(report, &params, sizeof(params)))
		ret = -EFAULT;
e_free_blob:
	kfree(blob);
	return ret;
}

1126 1127 1128 1129 1130 1131
/* Userspace wants to query session length. */
static int
__sev_send_start_query_session_length(struct kvm *kvm, struct kvm_sev_cmd *argp,
				      struct kvm_sev_send_start *params)
{
	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
1132
	struct sev_data_send_start data;
1133 1134
	int ret;

1135
	memset(&data, 0, sizeof(data));
1136 1137
	data.handle = sev->handle;
	ret = sev_issue_cmd(kvm, SEV_CMD_SEND_START, &data, &argp->error);
1138

1139
	params->session_len = data.session_len;
1140 1141 1142 1143 1144 1145 1146 1147 1148 1149
	if (copy_to_user((void __user *)(uintptr_t)argp->data, params,
				sizeof(struct kvm_sev_send_start)))
		ret = -EFAULT;

	return ret;
}

static int sev_send_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
1150
	struct sev_data_send_start data;
1151 1152 1153 1154 1155 1156 1157 1158 1159 1160 1161 1162 1163 1164 1165 1166 1167 1168 1169 1170 1171 1172 1173 1174 1175 1176 1177 1178 1179 1180 1181 1182 1183 1184 1185 1186 1187 1188 1189 1190 1191 1192 1193 1194 1195 1196 1197 1198 1199 1200
	struct kvm_sev_send_start params;
	void *amd_certs, *session_data;
	void *pdh_cert, *plat_certs;
	int ret;

	if (!sev_guest(kvm))
		return -ENOTTY;

	if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data,
				sizeof(struct kvm_sev_send_start)))
		return -EFAULT;

	/* if session_len is zero, userspace wants to query the session length */
	if (!params.session_len)
		return __sev_send_start_query_session_length(kvm, argp,
				&params);

	/* some sanity checks */
	if (!params.pdh_cert_uaddr || !params.pdh_cert_len ||
	    !params.session_uaddr || params.session_len > SEV_FW_BLOB_MAX_SIZE)
		return -EINVAL;

	/* allocate the memory to hold the session data blob */
	session_data = kmalloc(params.session_len, GFP_KERNEL_ACCOUNT);
	if (!session_data)
		return -ENOMEM;

	/* copy the certificate blobs from userspace */
	pdh_cert = psp_copy_user_blob(params.pdh_cert_uaddr,
				params.pdh_cert_len);
	if (IS_ERR(pdh_cert)) {
		ret = PTR_ERR(pdh_cert);
		goto e_free_session;
	}

	plat_certs = psp_copy_user_blob(params.plat_certs_uaddr,
				params.plat_certs_len);
	if (IS_ERR(plat_certs)) {
		ret = PTR_ERR(plat_certs);
		goto e_free_pdh;
	}

	amd_certs = psp_copy_user_blob(params.amd_certs_uaddr,
				params.amd_certs_len);
	if (IS_ERR(amd_certs)) {
		ret = PTR_ERR(amd_certs);
		goto e_free_plat_cert;
	}

	/* populate the FW SEND_START field with system physical address */
1201 1202 1203 1204 1205 1206 1207 1208 1209 1210 1211 1212
	memset(&data, 0, sizeof(data));
	data.pdh_cert_address = __psp_pa(pdh_cert);
	data.pdh_cert_len = params.pdh_cert_len;
	data.plat_certs_address = __psp_pa(plat_certs);
	data.plat_certs_len = params.plat_certs_len;
	data.amd_certs_address = __psp_pa(amd_certs);
	data.amd_certs_len = params.amd_certs_len;
	data.session_address = __psp_pa(session_data);
	data.session_len = params.session_len;
	data.handle = sev->handle;

	ret = sev_issue_cmd(kvm, SEV_CMD_SEND_START, &data, &argp->error);
1213 1214 1215 1216

	if (!ret && copy_to_user((void __user *)(uintptr_t)params.session_uaddr,
			session_data, params.session_len)) {
		ret = -EFAULT;
1217
		goto e_free_amd_cert;
1218 1219
	}

1220 1221
	params.policy = data.policy;
	params.session_len = data.session_len;
1222 1223 1224 1225 1226 1227 1228 1229 1230 1231 1232 1233 1234 1235 1236
	if (copy_to_user((void __user *)(uintptr_t)argp->data, &params,
				sizeof(struct kvm_sev_send_start)))
		ret = -EFAULT;

e_free_amd_cert:
	kfree(amd_certs);
e_free_plat_cert:
	kfree(plat_certs);
e_free_pdh:
	kfree(pdh_cert);
e_free_session:
	kfree(session_data);
	return ret;
}

1237 1238 1239 1240 1241 1242
/* Userspace wants to query either header or trans length. */
static int
__sev_send_update_data_query_lengths(struct kvm *kvm, struct kvm_sev_cmd *argp,
				     struct kvm_sev_send_update_data *params)
{
	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
1243
	struct sev_data_send_update_data data;
1244 1245
	int ret;

1246
	memset(&data, 0, sizeof(data));
1247 1248
	data.handle = sev->handle;
	ret = sev_issue_cmd(kvm, SEV_CMD_SEND_UPDATE_DATA, &data, &argp->error);
1249

1250 1251
	params->hdr_len = data.hdr_len;
	params->trans_len = data.trans_len;
1252 1253 1254 1255 1256 1257 1258 1259 1260 1261 1262

	if (copy_to_user((void __user *)(uintptr_t)argp->data, params,
			 sizeof(struct kvm_sev_send_update_data)))
		ret = -EFAULT;

	return ret;
}

static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
1263
	struct sev_data_send_update_data data;
1264 1265 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 1279 1280 1281 1282 1283 1284 1285 1286 1287 1288 1289 1290 1291 1292
	struct kvm_sev_send_update_data params;
	void *hdr, *trans_data;
	struct page **guest_page;
	unsigned long n;
	int ret, offset;

	if (!sev_guest(kvm))
		return -ENOTTY;

	if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data,
			sizeof(struct kvm_sev_send_update_data)))
		return -EFAULT;

	/* userspace wants to query either header or trans length */
	if (!params.trans_len || !params.hdr_len)
		return __sev_send_update_data_query_lengths(kvm, argp, &params);

	if (!params.trans_uaddr || !params.guest_uaddr ||
	    !params.guest_len || !params.hdr_uaddr)
		return -EINVAL;

	/* Check if we are crossing the page boundary */
	offset = params.guest_uaddr & (PAGE_SIZE - 1);
	if ((params.guest_len + offset > PAGE_SIZE))
		return -EINVAL;

	/* Pin guest memory */
	guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK,
				    PAGE_SIZE, &n, 0);
1293 1294
	if (IS_ERR(guest_page))
		return PTR_ERR(guest_page);
1295 1296 1297 1298 1299 1300 1301 1302 1303 1304 1305

	/* allocate memory for header and transport buffer */
	ret = -ENOMEM;
	hdr = kmalloc(params.hdr_len, GFP_KERNEL_ACCOUNT);
	if (!hdr)
		goto e_unpin;

	trans_data = kmalloc(params.trans_len, GFP_KERNEL_ACCOUNT);
	if (!trans_data)
		goto e_free_hdr;

1306 1307 1308 1309 1310
	memset(&data, 0, sizeof(data));
	data.hdr_address = __psp_pa(hdr);
	data.hdr_len = params.hdr_len;
	data.trans_address = __psp_pa(trans_data);
	data.trans_len = params.trans_len;
1311 1312

	/* The SEND_UPDATE_DATA command requires C-bit to be always set. */
1313 1314 1315 1316
	data.guest_address = (page_to_pfn(guest_page[0]) << PAGE_SHIFT) + offset;
	data.guest_address |= sev_me_mask;
	data.guest_len = params.guest_len;
	data.handle = sev->handle;
1317

1318
	ret = sev_issue_cmd(kvm, SEV_CMD_SEND_UPDATE_DATA, &data, &argp->error);
1319 1320

	if (ret)
1321
		goto e_free_trans_data;
1322 1323 1324 1325 1326

	/* copy transport buffer to user space */
	if (copy_to_user((void __user *)(uintptr_t)params.trans_uaddr,
			 trans_data, params.trans_len)) {
		ret = -EFAULT;
1327
		goto e_free_trans_data;
1328 1329 1330
	}

	/* Copy packet header to userspace. */
1331 1332 1333
	if (copy_to_user((void __user *)(uintptr_t)params.hdr_uaddr, hdr,
			 params.hdr_len))
		ret = -EFAULT;
1334 1335 1336 1337 1338 1339 1340 1341 1342 1343 1344

e_free_trans_data:
	kfree(trans_data);
e_free_hdr:
	kfree(hdr);
e_unpin:
	sev_unpin_memory(kvm, guest_page, n);

	return ret;
}

1345 1346 1347
static int sev_send_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
1348
	struct sev_data_send_finish data;
1349 1350 1351 1352

	if (!sev_guest(kvm))
		return -ENOTTY;

1353 1354
	data.handle = sev->handle;
	return sev_issue_cmd(kvm, SEV_CMD_SEND_FINISH, &data, &argp->error);
1355 1356
}

1357 1358 1359
static int sev_send_cancel(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
1360
	struct sev_data_send_cancel data;
1361 1362 1363 1364

	if (!sev_guest(kvm))
		return -ENOTTY;

1365 1366
	data.handle = sev->handle;
	return sev_issue_cmd(kvm, SEV_CMD_SEND_CANCEL, &data, &argp->error);
1367 1368
}

1369 1370 1371
static int sev_receive_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
1372
	struct sev_data_receive_start start;
1373 1374 1375 1376 1377 1378 1379 1380 1381 1382 1383 1384 1385 1386 1387 1388 1389 1390 1391 1392 1393 1394 1395 1396 1397 1398 1399 1400 1401 1402
	struct kvm_sev_receive_start params;
	int *error = &argp->error;
	void *session_data;
	void *pdh_data;
	int ret;

	if (!sev_guest(kvm))
		return -ENOTTY;

	/* Get parameter from the userspace */
	if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data,
			sizeof(struct kvm_sev_receive_start)))
		return -EFAULT;

	/* some sanity checks */
	if (!params.pdh_uaddr || !params.pdh_len ||
	    !params.session_uaddr || !params.session_len)
		return -EINVAL;

	pdh_data = psp_copy_user_blob(params.pdh_uaddr, params.pdh_len);
	if (IS_ERR(pdh_data))
		return PTR_ERR(pdh_data);

	session_data = psp_copy_user_blob(params.session_uaddr,
			params.session_len);
	if (IS_ERR(session_data)) {
		ret = PTR_ERR(session_data);
		goto e_free_pdh;
	}

1403 1404 1405 1406 1407 1408 1409
	memset(&start, 0, sizeof(start));
	start.handle = params.handle;
	start.policy = params.policy;
	start.pdh_cert_address = __psp_pa(pdh_data);
	start.pdh_cert_len = params.pdh_len;
	start.session_address = __psp_pa(session_data);
	start.session_len = params.session_len;
1410 1411

	/* create memory encryption context */
1412
	ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_RECEIVE_START, &start,
1413 1414
				error);
	if (ret)
1415
		goto e_free_session;
1416 1417

	/* Bind ASID to this guest */
1418
	ret = sev_bind_asid(kvm, start.handle, error);
1419 1420
	if (ret) {
		sev_decommission(start.handle);
1421
		goto e_free_session;
1422
	}
1423

1424
	params.handle = start.handle;
1425 1426 1427
	if (copy_to_user((void __user *)(uintptr_t)argp->data,
			 &params, sizeof(struct kvm_sev_receive_start))) {
		ret = -EFAULT;
1428 1429
		sev_unbind_asid(kvm, start.handle);
		goto e_free_session;
1430 1431
	}

1432
    	sev->handle = start.handle;
1433 1434 1435 1436 1437 1438 1439 1440 1441 1442
	sev->fd = argp->sev_fd;

e_free_session:
	kfree(session_data);
e_free_pdh:
	kfree(pdh_data);

	return ret;
}

1443 1444 1445 1446
static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
	struct kvm_sev_receive_update_data params;
1447
	struct sev_data_receive_update_data data;
1448 1449 1450 1451 1452 1453 1454 1455 1456 1457 1458 1459 1460 1461 1462 1463 1464 1465 1466 1467 1468 1469 1470 1471 1472 1473 1474 1475 1476 1477 1478 1479
	void *hdr = NULL, *trans = NULL;
	struct page **guest_page;
	unsigned long n;
	int ret, offset;

	if (!sev_guest(kvm))
		return -EINVAL;

	if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data,
			sizeof(struct kvm_sev_receive_update_data)))
		return -EFAULT;

	if (!params.hdr_uaddr || !params.hdr_len ||
	    !params.guest_uaddr || !params.guest_len ||
	    !params.trans_uaddr || !params.trans_len)
		return -EINVAL;

	/* Check if we are crossing the page boundary */
	offset = params.guest_uaddr & (PAGE_SIZE - 1);
	if ((params.guest_len + offset > PAGE_SIZE))
		return -EINVAL;

	hdr = psp_copy_user_blob(params.hdr_uaddr, params.hdr_len);
	if (IS_ERR(hdr))
		return PTR_ERR(hdr);

	trans = psp_copy_user_blob(params.trans_uaddr, params.trans_len);
	if (IS_ERR(trans)) {
		ret = PTR_ERR(trans);
		goto e_free_hdr;
	}

1480 1481 1482 1483 1484
	memset(&data, 0, sizeof(data));
	data.hdr_address = __psp_pa(hdr);
	data.hdr_len = params.hdr_len;
	data.trans_address = __psp_pa(trans);
	data.trans_len = params.trans_len;
1485 1486 1487

	/* Pin guest memory */
	guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK,
1488
				    PAGE_SIZE, &n, 1);
1489 1490
	if (IS_ERR(guest_page)) {
		ret = PTR_ERR(guest_page);
1491
		goto e_free_trans;
1492
	}
1493

1494 1495 1496 1497 1498 1499 1500
	/*
	 * Flush (on non-coherent CPUs) before RECEIVE_UPDATE_DATA, the PSP
	 * encrypts the written data with the guest's key, and the cache may
	 * contain dirty, unencrypted data.
	 */
	sev_clflush_pages(guest_page, n);

1501
	/* The RECEIVE_UPDATE_DATA command requires C-bit to be always set. */
1502 1503 1504 1505
	data.guest_address = (page_to_pfn(guest_page[0]) << PAGE_SHIFT) + offset;
	data.guest_address |= sev_me_mask;
	data.guest_len = params.guest_len;
	data.handle = sev->handle;
1506

1507
	ret = sev_issue_cmd(kvm, SEV_CMD_RECEIVE_UPDATE_DATA, &data,
1508 1509 1510 1511 1512 1513 1514 1515 1516 1517 1518 1519
				&argp->error);

	sev_unpin_memory(kvm, guest_page, n);

e_free_trans:
	kfree(trans);
e_free_hdr:
	kfree(hdr);

	return ret;
}

1520 1521 1522
static int sev_receive_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
{
	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
1523
	struct sev_data_receive_finish data;
1524 1525 1526 1527

	if (!sev_guest(kvm))
		return -ENOTTY;

1528 1529
	data.handle = sev->handle;
	return sev_issue_cmd(kvm, SEV_CMD_RECEIVE_FINISH, &data, &argp->error);
1530 1531
}

1532
static bool is_cmd_allowed_from_mirror(u32 cmd_id)
1533 1534 1535 1536 1537 1538 1539 1540 1541 1542 1543 1544 1545
{
	/*
	 * Allow mirrors VM to call KVM_SEV_LAUNCH_UPDATE_VMSA to enable SEV-ES
	 * active mirror VMs. Also allow the debugging and status commands.
	 */
	if (cmd_id == KVM_SEV_LAUNCH_UPDATE_VMSA ||
	    cmd_id == KVM_SEV_GUEST_STATUS || cmd_id == KVM_SEV_DBG_DECRYPT ||
	    cmd_id == KVM_SEV_DBG_ENCRYPT)
		return true;

	return false;
}

1546
static int sev_lock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm)
1547
{
1548 1549
	struct kvm_sev_info *dst_sev = &to_kvm_svm(dst_kvm)->sev_info;
	struct kvm_sev_info *src_sev = &to_kvm_svm(src_kvm)->sev_info;
1550
	int r = -EBUSY;
1551 1552 1553

	if (dst_kvm == src_kvm)
		return -EINVAL;
1554 1555

	/*
1556 1557
	 * Bail if these VMs are already involved in a migration to avoid
	 * deadlock between two VMs trying to migrate to/from each other.
1558
	 */
1559
	if (atomic_cmpxchg_acquire(&dst_sev->migration_in_progress, 0, 1))
1560 1561
		return -EBUSY;

1562 1563
	if (atomic_cmpxchg_acquire(&src_sev->migration_in_progress, 0, 1))
		goto release_dst;
1564

1565 1566 1567 1568 1569
	r = -EINTR;
	if (mutex_lock_killable(&dst_kvm->lock))
		goto release_src;
	if (mutex_lock_killable(&src_kvm->lock))
		goto unlock_dst;
1570
	return 0;
1571 1572 1573 1574 1575 1576 1577 1578

unlock_dst:
	mutex_unlock(&dst_kvm->lock);
release_src:
	atomic_set_release(&src_sev->migration_in_progress, 0);
release_dst:
	atomic_set_release(&dst_sev->migration_in_progress, 0);
	return r;
1579 1580
}

1581
static void sev_unlock_two_vms(struct kvm *dst_kvm, struct kvm *src_kvm)
1582
{
1583 1584
	struct kvm_sev_info *dst_sev = &to_kvm_svm(dst_kvm)->sev_info;
	struct kvm_sev_info *src_sev = &to_kvm_svm(src_kvm)->sev_info;
1585

1586 1587 1588 1589
	mutex_unlock(&dst_kvm->lock);
	mutex_unlock(&src_kvm->lock);
	atomic_set_release(&dst_sev->migration_in_progress, 0);
	atomic_set_release(&src_sev->migration_in_progress, 0);
1590 1591 1592 1593 1594 1595 1596 1597 1598 1599 1600 1601 1602 1603 1604 1605 1606 1607 1608 1609 1610 1611 1612 1613 1614 1615 1616 1617 1618 1619 1620 1621 1622 1623 1624 1625 1626 1627 1628 1629 1630 1631
}


static int sev_lock_vcpus_for_migration(struct kvm *kvm)
{
	struct kvm_vcpu *vcpu;
	int i, j;

	kvm_for_each_vcpu(i, vcpu, kvm) {
		if (mutex_lock_killable(&vcpu->mutex))
			goto out_unlock;
	}

	return 0;

out_unlock:
	kvm_for_each_vcpu(j, vcpu, kvm) {
		if (i == j)
			break;

		mutex_unlock(&vcpu->mutex);
	}
	return -EINTR;
}

static void sev_unlock_vcpus_for_migration(struct kvm *kvm)
{
	struct kvm_vcpu *vcpu;
	int i;

	kvm_for_each_vcpu(i, vcpu, kvm) {
		mutex_unlock(&vcpu->mutex);
	}
}

static void sev_migrate_from(struct kvm_sev_info *dst,
			      struct kvm_sev_info *src)
{
	dst->active = true;
	dst->asid = src->asid;
	dst->handle = src->handle;
	dst->pages_locked = src->pages_locked;
1632
	dst->enc_context_owner = src->enc_context_owner;
1633 1634 1635 1636 1637

	src->asid = 0;
	src->active = false;
	src->handle = 0;
	src->pages_locked = 0;
1638
	src->enc_context_owner = NULL;
1639

1640
	list_cut_before(&dst->regions_list, &src->regions_list, &src->regions_list);
1641 1642
}

1643 1644 1645 1646 1647 1648 1649 1650 1651 1652 1653 1654 1655 1656 1657 1658 1659 1660 1661 1662 1663 1664 1665 1666 1667 1668 1669 1670 1671 1672 1673 1674 1675 1676 1677 1678 1679 1680 1681 1682
static int sev_es_migrate_from(struct kvm *dst, struct kvm *src)
{
	int i;
	struct kvm_vcpu *dst_vcpu, *src_vcpu;
	struct vcpu_svm *dst_svm, *src_svm;

	if (atomic_read(&src->online_vcpus) != atomic_read(&dst->online_vcpus))
		return -EINVAL;

	kvm_for_each_vcpu(i, src_vcpu, src) {
		if (!src_vcpu->arch.guest_state_protected)
			return -EINVAL;
	}

	kvm_for_each_vcpu(i, src_vcpu, src) {
		src_svm = to_svm(src_vcpu);
		dst_vcpu = kvm_get_vcpu(dst, i);
		dst_svm = to_svm(dst_vcpu);

		/*
		 * Transfer VMSA and GHCB state to the destination.  Nullify and
		 * clear source fields as appropriate, the state now belongs to
		 * the destination.
		 */
		memcpy(&dst_svm->sev_es, &src_svm->sev_es, sizeof(src_svm->sev_es));
		dst_svm->vmcb->control.ghcb_gpa = src_svm->vmcb->control.ghcb_gpa;
		dst_svm->vmcb->control.vmsa_pa = src_svm->vmcb->control.vmsa_pa;
		dst_vcpu->arch.guest_state_protected = true;

		memset(&src_svm->sev_es, 0, sizeof(src_svm->sev_es));
		src_svm->vmcb->control.ghcb_gpa = INVALID_PAGE;
		src_svm->vmcb->control.vmsa_pa = INVALID_PAGE;
		src_vcpu->arch.guest_state_protected = false;
	}
	to_kvm_svm(src)->sev_info.es_active = false;
	to_kvm_svm(dst)->sev_info.es_active = true;

	return 0;
}

1683 1684 1685
int svm_vm_migrate_from(struct kvm *kvm, unsigned int source_fd)
{
	struct kvm_sev_info *dst_sev = &to_kvm_svm(kvm)->sev_info;
1686
	struct kvm_sev_info *src_sev, *cg_cleanup_sev;
1687 1688
	struct file *source_kvm_file;
	struct kvm *source_kvm;
1689
	bool charged = false;
1690 1691 1692 1693 1694 1695 1696 1697 1698
	int ret;

	source_kvm_file = fget(source_fd);
	if (!file_is_kvm(source_kvm_file)) {
		ret = -EBADF;
		goto out_fput;
	}

	source_kvm = source_kvm_file->private_data;
1699
	ret = sev_lock_two_vms(kvm, source_kvm);
1700 1701 1702
	if (ret)
		goto out_fput;

1703
	if (sev_guest(kvm) || !sev_guest(source_kvm)) {
1704
		ret = -EINVAL;
1705
		goto out_unlock;
1706 1707 1708
	}

	src_sev = &to_kvm_svm(source_kvm)->sev_info;
1709 1710 1711 1712 1713 1714 1715 1716 1717 1718

	/*
	 * VMs mirroring src's encryption context rely on it to keep the
	 * ASID allocated, but below we are clearing src_sev->asid.
	 */
	if (src_sev->num_mirrored_vms) {
		ret = -EBUSY;
		goto out_unlock;
	}

1719
	dst_sev->misc_cg = get_current_misc_cg();
1720
	cg_cleanup_sev = dst_sev;
1721 1722 1723
	if (dst_sev->misc_cg != src_sev->misc_cg) {
		ret = sev_misc_cg_try_charge(dst_sev);
		if (ret)
1724 1725
			goto out_dst_cgroup;
		charged = true;
1726 1727 1728 1729 1730 1731 1732 1733 1734
	}

	ret = sev_lock_vcpus_for_migration(kvm);
	if (ret)
		goto out_dst_cgroup;
	ret = sev_lock_vcpus_for_migration(source_kvm);
	if (ret)
		goto out_dst_vcpu;

1735 1736 1737 1738 1739
	if (sev_es_guest(source_kvm)) {
		ret = sev_es_migrate_from(kvm, source_kvm);
		if (ret)
			goto out_source_vcpu;
	}
1740 1741
	sev_migrate_from(dst_sev, src_sev);
	kvm_vm_dead(source_kvm);
1742
	cg_cleanup_sev = src_sev;
1743 1744
	ret = 0;

1745
out_source_vcpu:
1746 1747 1748 1749
	sev_unlock_vcpus_for_migration(source_kvm);
out_dst_vcpu:
	sev_unlock_vcpus_for_migration(kvm);
out_dst_cgroup:
1750 1751 1752 1753 1754
	/* Operates on the source on success, on the destination on failure.  */
	if (charged)
		sev_misc_cg_uncharge(cg_cleanup_sev);
	put_misc_cg(cg_cleanup_sev->misc_cg);
	cg_cleanup_sev->misc_cg = NULL;
1755 1756
out_unlock:
	sev_unlock_two_vms(kvm, source_kvm);
1757 1758 1759 1760 1761 1762
out_fput:
	if (source_kvm_file)
		fput(source_kvm_file);
	return ret;
}

1763 1764 1765 1766 1767
int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
{
	struct kvm_sev_cmd sev_cmd;
	int r;

1768
	if (!sev_enabled)
1769 1770 1771 1772 1773 1774 1775 1776 1777 1778
		return -ENOTTY;

	if (!argp)
		return 0;

	if (copy_from_user(&sev_cmd, argp, sizeof(struct kvm_sev_cmd)))
		return -EFAULT;

	mutex_lock(&kvm->lock);

1779 1780
	/* Only the enc_context_owner handles some memory enc operations. */
	if (is_mirroring_enc_context(kvm) &&
1781
	    !is_cmd_allowed_from_mirror(sev_cmd.id)) {
1782 1783 1784 1785
		r = -EINVAL;
		goto out;
	}

1786
	switch (sev_cmd.id) {
1787
	case KVM_SEV_ES_INIT:
1788
		if (!sev_es_enabled) {
1789 1790 1791 1792
			r = -ENOTTY;
			goto out;
		}
		fallthrough;
1793 1794 1795 1796 1797 1798 1799 1800 1801
	case KVM_SEV_INIT:
		r = sev_guest_init(kvm, &sev_cmd);
		break;
	case KVM_SEV_LAUNCH_START:
		r = sev_launch_start(kvm, &sev_cmd);
		break;
	case KVM_SEV_LAUNCH_UPDATE_DATA:
		r = sev_launch_update_data(kvm, &sev_cmd);
		break;
1802 1803 1804
	case KVM_SEV_LAUNCH_UPDATE_VMSA:
		r = sev_launch_update_vmsa(kvm, &sev_cmd);
		break;
1805 1806 1807 1808 1809 1810 1811 1812 1813 1814 1815 1816 1817 1818 1819 1820 1821 1822
	case KVM_SEV_LAUNCH_MEASURE:
		r = sev_launch_measure(kvm, &sev_cmd);
		break;
	case KVM_SEV_LAUNCH_FINISH:
		r = sev_launch_finish(kvm, &sev_cmd);
		break;
	case KVM_SEV_GUEST_STATUS:
		r = sev_guest_status(kvm, &sev_cmd);
		break;
	case KVM_SEV_DBG_DECRYPT:
		r = sev_dbg_crypt(kvm, &sev_cmd, true);
		break;
	case KVM_SEV_DBG_ENCRYPT:
		r = sev_dbg_crypt(kvm, &sev_cmd, false);
		break;
	case KVM_SEV_LAUNCH_SECRET:
		r = sev_launch_secret(kvm, &sev_cmd);
		break;
1823 1824 1825
	case KVM_SEV_GET_ATTESTATION_REPORT:
		r = sev_get_attestation_report(kvm, &sev_cmd);
		break;
1826 1827 1828
	case KVM_SEV_SEND_START:
		r = sev_send_start(kvm, &sev_cmd);
		break;
1829 1830 1831
	case KVM_SEV_SEND_UPDATE_DATA:
		r = sev_send_update_data(kvm, &sev_cmd);
		break;
1832 1833 1834
	case KVM_SEV_SEND_FINISH:
		r = sev_send_finish(kvm, &sev_cmd);
		break;
1835 1836 1837
	case KVM_SEV_SEND_CANCEL:
		r = sev_send_cancel(kvm, &sev_cmd);
		break;
1838 1839 1840
	case KVM_SEV_RECEIVE_START:
		r = sev_receive_start(kvm, &sev_cmd);
		break;
1841 1842 1843
	case KVM_SEV_RECEIVE_UPDATE_DATA:
		r = sev_receive_update_data(kvm, &sev_cmd);
		break;
1844 1845 1846
	case KVM_SEV_RECEIVE_FINISH:
		r = sev_receive_finish(kvm, &sev_cmd);
		break;
1847 1848 1849 1850 1851 1852 1853 1854 1855 1856 1857 1858 1859 1860 1861 1862 1863 1864 1865 1866 1867 1868 1869
	default:
		r = -EINVAL;
		goto out;
	}

	if (copy_to_user(argp, &sev_cmd, sizeof(struct kvm_sev_cmd)))
		r = -EFAULT;

out:
	mutex_unlock(&kvm->lock);
	return r;
}

int svm_register_enc_region(struct kvm *kvm,
			    struct kvm_enc_region *range)
{
	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
	struct enc_region *region;
	int ret = 0;

	if (!sev_guest(kvm))
		return -ENOTTY;

1870 1871 1872 1873
	/* If kvm is mirroring encryption context it isn't responsible for it */
	if (is_mirroring_enc_context(kvm))
		return -EINVAL;

1874 1875 1876 1877 1878 1879 1880
	if (range->addr > ULONG_MAX || range->size > ULONG_MAX)
		return -EINVAL;

	region = kzalloc(sizeof(*region), GFP_KERNEL_ACCOUNT);
	if (!region)
		return -ENOMEM;

1881
	mutex_lock(&kvm->lock);
1882
	region->pages = sev_pin_memory(kvm, range->addr, range->size, &region->npages, 1);
1883 1884
	if (IS_ERR(region->pages)) {
		ret = PTR_ERR(region->pages);
1885
		mutex_unlock(&kvm->lock);
1886 1887 1888
		goto e_free;
	}

1889 1890 1891 1892 1893 1894
	region->uaddr = range->addr;
	region->size = range->size;

	list_add_tail(&region->list, &sev->regions_list);
	mutex_unlock(&kvm->lock);

1895 1896 1897 1898 1899 1900 1901 1902 1903 1904 1905 1906 1907 1908 1909 1910 1911 1912 1913 1914 1915 1916 1917 1918 1919 1920 1921 1922 1923 1924 1925 1926 1927 1928 1929 1930 1931 1932 1933 1934 1935 1936 1937 1938 1939
	/*
	 * The guest may change the memory encryption attribute from C=0 -> C=1
	 * or vice versa for this memory range. Lets make sure caches are
	 * flushed to ensure that guest data gets written into memory with
	 * correct C-bit.
	 */
	sev_clflush_pages(region->pages, region->npages);

	return ret;

e_free:
	kfree(region);
	return ret;
}

static struct enc_region *
find_enc_region(struct kvm *kvm, struct kvm_enc_region *range)
{
	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
	struct list_head *head = &sev->regions_list;
	struct enc_region *i;

	list_for_each_entry(i, head, list) {
		if (i->uaddr == range->addr &&
		    i->size == range->size)
			return i;
	}

	return NULL;
}

static void __unregister_enc_region_locked(struct kvm *kvm,
					   struct enc_region *region)
{
	sev_unpin_memory(kvm, region->pages, region->npages);
	list_del(&region->list);
	kfree(region);
}

int svm_unregister_enc_region(struct kvm *kvm,
			      struct kvm_enc_region *range)
{
	struct enc_region *region;
	int ret;

1940 1941 1942 1943
	/* If kvm is mirroring encryption context it isn't responsible for it */
	if (is_mirroring_enc_context(kvm))
		return -EINVAL;

1944 1945 1946 1947 1948 1949 1950 1951 1952 1953 1954 1955 1956 1957 1958 1959 1960 1961 1962 1963 1964 1965 1966 1967 1968 1969 1970 1971 1972 1973
	mutex_lock(&kvm->lock);

	if (!sev_guest(kvm)) {
		ret = -ENOTTY;
		goto failed;
	}

	region = find_enc_region(kvm, range);
	if (!region) {
		ret = -EINVAL;
		goto failed;
	}

	/*
	 * Ensure that all guest tagged cache entries are flushed before
	 * releasing the pages back to the system for use. CLFLUSH will
	 * not do this, so issue a WBINVD.
	 */
	wbinvd_on_all_cpus();

	__unregister_enc_region_locked(kvm, region);

	mutex_unlock(&kvm->lock);
	return 0;

failed:
	mutex_unlock(&kvm->lock);
	return ret;
}

1974 1975 1976 1977
int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd)
{
	struct file *source_kvm_file;
	struct kvm *source_kvm;
1978
	struct kvm_sev_info *source_sev, *mirror_sev;
1979 1980 1981 1982 1983
	int ret;

	source_kvm_file = fget(source_fd);
	if (!file_is_kvm(source_kvm_file)) {
		ret = -EBADF;
1984
		goto e_source_fput;
1985 1986 1987
	}

	source_kvm = source_kvm_file->private_data;
1988 1989 1990
	ret = sev_lock_two_vms(kvm, source_kvm);
	if (ret)
		goto e_source_fput;
1991

1992 1993 1994 1995 1996 1997 1998 1999
	/*
	 * Mirrors of mirrors should work, but let's not get silly.  Also
	 * disallow out-of-band SEV/SEV-ES init if the target is already an
	 * SEV guest, or if vCPUs have been created.  KVM relies on vCPUs being
	 * created after SEV/SEV-ES initialization, e.g. to init intercepts.
	 */
	if (sev_guest(kvm) || !sev_guest(source_kvm) ||
	    is_mirroring_enc_context(source_kvm) || kvm->created_vcpus) {
2000
		ret = -EINVAL;
2001
		goto e_unlock;
2002 2003 2004 2005 2006 2007
	}

	/*
	 * The mirror kvm holds an enc_context_owner ref so its asid can't
	 * disappear until we're done with it
	 */
2008
	source_sev = &to_kvm_svm(source_kvm)->sev_info;
2009
	kvm_get_kvm(source_kvm);
2010
	source_sev->num_mirrored_vms++;
2011 2012 2013 2014 2015

	/* Set enc_context_owner and copy its encryption context over */
	mirror_sev = &to_kvm_svm(kvm)->sev_info;
	mirror_sev->enc_context_owner = source_kvm;
	mirror_sev->active = true;
2016 2017 2018 2019
	mirror_sev->asid = source_sev->asid;
	mirror_sev->fd = source_sev->fd;
	mirror_sev->es_active = source_sev->es_active;
	mirror_sev->handle = source_sev->handle;
2020
	INIT_LIST_HEAD(&mirror_sev->regions_list);
2021 2022
	ret = 0;

2023 2024 2025 2026 2027
	/*
	 * Do not copy ap_jump_table. Since the mirror does not share the same
	 * KVM contexts as the original, and they may have different
	 * memory-views.
	 */
2028

2029 2030 2031
e_unlock:
	sev_unlock_two_vms(kvm, source_kvm);
e_source_fput:
2032 2033
	if (source_kvm_file)
		fput(source_kvm_file);
2034 2035 2036
	return ret;
}

2037 2038 2039 2040 2041 2042
void sev_vm_destroy(struct kvm *kvm)
{
	struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
	struct list_head *head = &sev->regions_list;
	struct list_head *pos, *q;

2043 2044
	WARN_ON(sev->num_mirrored_vms);

2045 2046 2047
	if (!sev_guest(kvm))
		return;

2048 2049
	/* If this is a mirror_kvm release the enc_context_owner and skip sev cleanup */
	if (is_mirroring_enc_context(kvm)) {
2050 2051 2052 2053 2054 2055 2056 2057
		struct kvm *owner_kvm = sev->enc_context_owner;
		struct kvm_sev_info *owner_sev = &to_kvm_svm(owner_kvm)->sev_info;

		mutex_lock(&owner_kvm->lock);
		if (!WARN_ON(!owner_sev->num_mirrored_vms))
			owner_sev->num_mirrored_vms--;
		mutex_unlock(&owner_kvm->lock);
		kvm_put_kvm(owner_kvm);
2058 2059 2060
		return;
	}

2061 2062 2063 2064 2065 2066 2067 2068 2069 2070 2071 2072 2073 2074 2075
	/*
	 * Ensure that all guest tagged cache entries are flushed before
	 * releasing the pages back to the system for use. CLFLUSH will
	 * not do this, so issue a WBINVD.
	 */
	wbinvd_on_all_cpus();

	/*
	 * if userspace was terminated before unregistering the memory regions
	 * then lets unpin all the registered memory.
	 */
	if (!list_empty(head)) {
		list_for_each_safe(pos, q, head) {
			__unregister_enc_region_locked(kvm,
				list_entry(pos, struct enc_region, list));
2076
			cond_resched();
2077 2078 2079 2080
		}
	}

	sev_unbind_asid(kvm, sev->handle);
2081
	sev_asid_free(sev);
2082 2083
}

2084 2085
void __init sev_set_cpu_caps(void)
{
2086
	if (!sev_enabled)
2087
		kvm_cpu_cap_clear(X86_FEATURE_SEV);
2088
	if (!sev_es_enabled)
2089 2090 2091
		kvm_cpu_cap_clear(X86_FEATURE_SEV_ES);
}

2092
void __init sev_hardware_setup(void)
2093
{
2094
#ifdef CONFIG_KVM_AMD_SEV
2095
	unsigned int eax, ebx, ecx, edx, sev_asid_count, sev_es_asid_count;
2096 2097 2098
	bool sev_es_supported = false;
	bool sev_supported = false;

2099
	if (!sev_enabled || !npt_enabled)
2100 2101
		goto out;

2102 2103 2104 2105 2106 2107 2108
	/* Does the CPU support SEV? */
	if (!boot_cpu_has(X86_FEATURE_SEV))
		goto out;

	/* Retrieve SEV CPUID information */
	cpuid(0x8000001f, &eax, &ebx, &ecx, &edx);

2109 2110 2111
	/* Set encryption bit location for SEV-ES guests */
	sev_enc_bit = ebx & 0x3f;

2112
	/* Maximum number of encrypted guests supported simultaneously */
2113
	max_sev_asid = ecx;
2114
	if (!max_sev_asid)
2115
		goto out;
2116 2117

	/* Minimum ASID value that should be used for SEV guest */
2118
	min_sev_asid = edx;
2119
	sev_me_mask = 1UL << (ebx & 0x3f);
2120

2121 2122 2123 2124 2125 2126 2127
	/*
	 * Initialize SEV ASID bitmaps. Allocate space for ASID 0 in the bitmap,
	 * even though it's never used, so that the bitmap is indexed by the
	 * actual ASID.
	 */
	nr_asids = max_sev_asid + 1;
	sev_asid_bitmap = bitmap_zalloc(nr_asids, GFP_KERNEL);
2128
	if (!sev_asid_bitmap)
2129
		goto out;
2130

2131
	sev_reclaim_asid_bitmap = bitmap_zalloc(nr_asids, GFP_KERNEL);
2132 2133 2134
	if (!sev_reclaim_asid_bitmap) {
		bitmap_free(sev_asid_bitmap);
		sev_asid_bitmap = NULL;
2135
		goto out;
2136
	}
2137

2138 2139 2140 2141 2142
	sev_asid_count = max_sev_asid - min_sev_asid + 1;
	if (misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count))
		goto out;

	pr_info("SEV supported: %u ASIDs\n", sev_asid_count);
2143
	sev_supported = true;
2144

2145
	/* SEV-ES support requested? */
2146
	if (!sev_es_enabled)
2147 2148 2149 2150 2151 2152 2153 2154 2155 2156
		goto out;

	/* Does the CPU support SEV-ES? */
	if (!boot_cpu_has(X86_FEATURE_SEV_ES))
		goto out;

	/* Has the system been allocated ASIDs for SEV-ES? */
	if (min_sev_asid == 1)
		goto out;

2157 2158 2159 2160 2161
	sev_es_asid_count = min_sev_asid - 1;
	if (misc_cg_set_capacity(MISC_CG_RES_SEV_ES, sev_es_asid_count))
		goto out;

	pr_info("SEV-ES supported: %u ASIDs\n", sev_es_asid_count);
2162 2163 2164
	sev_es_supported = true;

out:
2165 2166
	sev_enabled = sev_supported;
	sev_es_enabled = sev_es_supported;
2167
#endif
2168 2169 2170 2171
}

void sev_hardware_teardown(void)
{
2172
	if (!sev_enabled)
2173 2174
		return;

2175
	/* No need to take sev_bitmap_lock, all VMs have been destroyed. */
2176
	sev_flush_asids(1, max_sev_asid);
2177

2178 2179
	bitmap_free(sev_asid_bitmap);
	bitmap_free(sev_reclaim_asid_bitmap);
2180

2181 2182
	misc_cg_set_capacity(MISC_CG_RES_SEV, 0);
	misc_cg_set_capacity(MISC_CG_RES_SEV_ES, 0);
2183 2184
}

2185 2186
int sev_cpu_init(struct svm_cpu_data *sd)
{
2187
	if (!sev_enabled)
2188 2189
		return 0;

2190
	sd->sev_vmcbs = kcalloc(nr_asids, sizeof(void *), GFP_KERNEL);
2191 2192 2193 2194
	if (!sd->sev_vmcbs)
		return -ENOMEM;

	return 0;
2195 2196
}

2197 2198 2199 2200 2201 2202 2203 2204 2205 2206 2207 2208 2209 2210 2211 2212 2213 2214 2215 2216 2217 2218 2219 2220 2221 2222 2223 2224 2225 2226 2227 2228 2229 2230 2231 2232 2233 2234 2235 2236 2237 2238 2239 2240 2241 2242 2243 2244 2245 2246 2247 2248 2249 2250 2251 2252 2253 2254 2255 2256 2257 2258
/*
 * Pages used by hardware to hold guest encrypted state must be flushed before
 * returning them to the system.
 */
static void sev_flush_guest_memory(struct vcpu_svm *svm, void *va,
				   unsigned long len)
{
	/*
	 * If hardware enforced cache coherency for encrypted mappings of the
	 * same physical page is supported, nothing to do.
	 */
	if (boot_cpu_has(X86_FEATURE_SME_COHERENT))
		return;

	/*
	 * If the VM Page Flush MSR is supported, use it to flush the page
	 * (using the page virtual address and the guest ASID).
	 */
	if (boot_cpu_has(X86_FEATURE_VM_PAGE_FLUSH)) {
		struct kvm_sev_info *sev;
		unsigned long va_start;
		u64 start, stop;

		/* Align start and stop to page boundaries. */
		va_start = (unsigned long)va;
		start = (u64)va_start & PAGE_MASK;
		stop = PAGE_ALIGN((u64)va_start + len);

		if (start < stop) {
			sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info;

			while (start < stop) {
				wrmsrl(MSR_AMD64_VM_PAGE_FLUSH,
				       start | sev->asid);

				start += PAGE_SIZE;
			}

			return;
		}

		WARN(1, "Address overflow, using WBINVD\n");
	}

	/*
	 * Hardware should always have one of the above features,
	 * but if not, use WBINVD and issue a warning.
	 */
	WARN_ONCE(1, "Using WBINVD to flush guest memory\n");
	wbinvd_on_all_cpus();
}

void sev_free_vcpu(struct kvm_vcpu *vcpu)
{
	struct vcpu_svm *svm;

	if (!sev_es_guest(vcpu->kvm))
		return;

	svm = to_svm(vcpu);

	if (vcpu->arch.guest_state_protected)
2259 2260
		sev_flush_guest_memory(svm, svm->sev_es.vmsa, PAGE_SIZE);
	__free_page(virt_to_page(svm->sev_es.vmsa));
2261

2262
	if (svm->sev_es.ghcb_sa_free)
2263
		kvfree(svm->sev_es.ghcb_sa);
2264 2265
}

2266 2267
static void dump_ghcb(struct vcpu_svm *svm)
{
2268
	struct ghcb *ghcb = svm->sev_es.ghcb;
2269 2270 2271 2272 2273 2274 2275 2276 2277 2278 2279 2280 2281 2282 2283 2284 2285 2286 2287 2288 2289 2290 2291 2292 2293
	unsigned int nbits;

	/* Re-use the dump_invalid_vmcb module parameter */
	if (!dump_invalid_vmcb) {
		pr_warn_ratelimited("set kvm_amd.dump_invalid_vmcb=1 to dump internal KVM state.\n");
		return;
	}

	nbits = sizeof(ghcb->save.valid_bitmap) * 8;

	pr_err("GHCB (GPA=%016llx):\n", svm->vmcb->control.ghcb_gpa);
	pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_code",
	       ghcb->save.sw_exit_code, ghcb_sw_exit_code_is_valid(ghcb));
	pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_1",
	       ghcb->save.sw_exit_info_1, ghcb_sw_exit_info_1_is_valid(ghcb));
	pr_err("%-20s%016llx is_valid: %u\n", "sw_exit_info_2",
	       ghcb->save.sw_exit_info_2, ghcb_sw_exit_info_2_is_valid(ghcb));
	pr_err("%-20s%016llx is_valid: %u\n", "sw_scratch",
	       ghcb->save.sw_scratch, ghcb_sw_scratch_is_valid(ghcb));
	pr_err("%-20s%*pb\n", "valid_bitmap", nbits, ghcb->save.valid_bitmap);
}

static void sev_es_sync_to_ghcb(struct vcpu_svm *svm)
{
	struct kvm_vcpu *vcpu = &svm->vcpu;
2294
	struct ghcb *ghcb = svm->sev_es.ghcb;
2295 2296 2297 2298 2299 2300

	/*
	 * The GHCB protocol so far allows for the following data
	 * to be returned:
	 *   GPRs RAX, RBX, RCX, RDX
	 *
2301 2302
	 * Copy their values, even if they may not have been written during the
	 * VM-Exit.  It's the guest's responsibility to not consume random data.
2303
	 */
2304 2305 2306 2307
	ghcb_set_rax(ghcb, vcpu->arch.regs[VCPU_REGS_RAX]);
	ghcb_set_rbx(ghcb, vcpu->arch.regs[VCPU_REGS_RBX]);
	ghcb_set_rcx(ghcb, vcpu->arch.regs[VCPU_REGS_RCX]);
	ghcb_set_rdx(ghcb, vcpu->arch.regs[VCPU_REGS_RDX]);
2308 2309 2310 2311 2312 2313
}

static void sev_es_sync_from_ghcb(struct vcpu_svm *svm)
{
	struct vmcb_control_area *control = &svm->vmcb->control;
	struct kvm_vcpu *vcpu = &svm->vcpu;
2314
	struct ghcb *ghcb = svm->sev_es.ghcb;
2315 2316 2317 2318 2319 2320 2321 2322 2323 2324 2325 2326 2327 2328 2329 2330 2331 2332 2333 2334 2335 2336 2337 2338 2339 2340 2341 2342 2343 2344 2345 2346 2347 2348 2349 2350 2351 2352 2353 2354 2355 2356 2357 2358 2359 2360
	u64 exit_code;

	/*
	 * The GHCB protocol so far allows for the following data
	 * to be supplied:
	 *   GPRs RAX, RBX, RCX, RDX
	 *   XCR0
	 *   CPL
	 *
	 * VMMCALL allows the guest to provide extra registers. KVM also
	 * expects RSI for hypercalls, so include that, too.
	 *
	 * Copy their values to the appropriate location if supplied.
	 */
	memset(vcpu->arch.regs, 0, sizeof(vcpu->arch.regs));

	vcpu->arch.regs[VCPU_REGS_RAX] = ghcb_get_rax_if_valid(ghcb);
	vcpu->arch.regs[VCPU_REGS_RBX] = ghcb_get_rbx_if_valid(ghcb);
	vcpu->arch.regs[VCPU_REGS_RCX] = ghcb_get_rcx_if_valid(ghcb);
	vcpu->arch.regs[VCPU_REGS_RDX] = ghcb_get_rdx_if_valid(ghcb);
	vcpu->arch.regs[VCPU_REGS_RSI] = ghcb_get_rsi_if_valid(ghcb);

	svm->vmcb->save.cpl = ghcb_get_cpl_if_valid(ghcb);

	if (ghcb_xcr0_is_valid(ghcb)) {
		vcpu->arch.xcr0 = ghcb_get_xcr0(ghcb);
		kvm_update_cpuid_runtime(vcpu);
	}

	/* Copy the GHCB exit information into the VMCB fields */
	exit_code = ghcb_get_sw_exit_code(ghcb);
	control->exit_code = lower_32_bits(exit_code);
	control->exit_code_hi = upper_32_bits(exit_code);
	control->exit_info_1 = ghcb_get_sw_exit_info_1(ghcb);
	control->exit_info_2 = ghcb_get_sw_exit_info_2(ghcb);

	/* Clear the valid entries fields */
	memset(ghcb->save.valid_bitmap, 0, sizeof(ghcb->save.valid_bitmap));
}

static int sev_es_validate_vmgexit(struct vcpu_svm *svm)
{
	struct kvm_vcpu *vcpu;
	struct ghcb *ghcb;
	u64 exit_code = 0;

2361
	ghcb = svm->sev_es.ghcb;
2362 2363 2364 2365 2366 2367 2368 2369 2370 2371 2372 2373 2374 2375 2376 2377 2378 2379 2380 2381 2382 2383 2384 2385 2386 2387 2388 2389 2390 2391 2392 2393 2394 2395 2396 2397 2398 2399 2400 2401

	/* Only GHCB Usage code 0 is supported */
	if (ghcb->ghcb_usage)
		goto vmgexit_err;

	/*
	 * Retrieve the exit code now even though is may not be marked valid
	 * as it could help with debugging.
	 */
	exit_code = ghcb_get_sw_exit_code(ghcb);

	if (!ghcb_sw_exit_code_is_valid(ghcb) ||
	    !ghcb_sw_exit_info_1_is_valid(ghcb) ||
	    !ghcb_sw_exit_info_2_is_valid(ghcb))
		goto vmgexit_err;

	switch (ghcb_get_sw_exit_code(ghcb)) {
	case SVM_EXIT_READ_DR7:
		break;
	case SVM_EXIT_WRITE_DR7:
		if (!ghcb_rax_is_valid(ghcb))
			goto vmgexit_err;
		break;
	case SVM_EXIT_RDTSC:
		break;
	case SVM_EXIT_RDPMC:
		if (!ghcb_rcx_is_valid(ghcb))
			goto vmgexit_err;
		break;
	case SVM_EXIT_CPUID:
		if (!ghcb_rax_is_valid(ghcb) ||
		    !ghcb_rcx_is_valid(ghcb))
			goto vmgexit_err;
		if (ghcb_get_rax(ghcb) == 0xd)
			if (!ghcb_xcr0_is_valid(ghcb))
				goto vmgexit_err;
		break;
	case SVM_EXIT_INVD:
		break;
	case SVM_EXIT_IOIO:
2402 2403
		if (ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_STR_MASK) {
			if (!ghcb_sw_scratch_is_valid(ghcb))
2404
				goto vmgexit_err;
2405 2406 2407 2408 2409
		} else {
			if (!(ghcb_get_sw_exit_info_1(ghcb) & SVM_IOIO_TYPE_MASK))
				if (!ghcb_rax_is_valid(ghcb))
					goto vmgexit_err;
		}
2410 2411 2412 2413 2414 2415 2416 2417 2418 2419 2420 2421 2422 2423 2424 2425 2426 2427 2428 2429 2430 2431 2432 2433 2434 2435 2436 2437 2438 2439
		break;
	case SVM_EXIT_MSR:
		if (!ghcb_rcx_is_valid(ghcb))
			goto vmgexit_err;
		if (ghcb_get_sw_exit_info_1(ghcb)) {
			if (!ghcb_rax_is_valid(ghcb) ||
			    !ghcb_rdx_is_valid(ghcb))
				goto vmgexit_err;
		}
		break;
	case SVM_EXIT_VMMCALL:
		if (!ghcb_rax_is_valid(ghcb) ||
		    !ghcb_cpl_is_valid(ghcb))
			goto vmgexit_err;
		break;
	case SVM_EXIT_RDTSCP:
		break;
	case SVM_EXIT_WBINVD:
		break;
	case SVM_EXIT_MONITOR:
		if (!ghcb_rax_is_valid(ghcb) ||
		    !ghcb_rcx_is_valid(ghcb) ||
		    !ghcb_rdx_is_valid(ghcb))
			goto vmgexit_err;
		break;
	case SVM_EXIT_MWAIT:
		if (!ghcb_rax_is_valid(ghcb) ||
		    !ghcb_rcx_is_valid(ghcb))
			goto vmgexit_err;
		break;
2440 2441 2442 2443 2444
	case SVM_VMGEXIT_MMIO_READ:
	case SVM_VMGEXIT_MMIO_WRITE:
		if (!ghcb_sw_scratch_is_valid(ghcb))
			goto vmgexit_err;
		break;
2445
	case SVM_VMGEXIT_NMI_COMPLETE:
2446
	case SVM_VMGEXIT_AP_HLT_LOOP:
2447
	case SVM_VMGEXIT_AP_JUMP_TABLE:
2448 2449 2450 2451 2452 2453 2454 2455 2456 2457 2458 2459 2460 2461 2462 2463 2464 2465 2466 2467 2468 2469 2470 2471 2472 2473 2474 2475 2476
	case SVM_VMGEXIT_UNSUPPORTED_EVENT:
		break;
	default:
		goto vmgexit_err;
	}

	return 0;

vmgexit_err:
	vcpu = &svm->vcpu;

	if (ghcb->ghcb_usage) {
		vcpu_unimpl(vcpu, "vmgexit: ghcb usage %#x is not valid\n",
			    ghcb->ghcb_usage);
	} else {
		vcpu_unimpl(vcpu, "vmgexit: exit reason %#llx is not valid\n",
			    exit_code);
		dump_ghcb(svm);
	}

	vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
	vcpu->run->internal.suberror = KVM_INTERNAL_ERROR_UNEXPECTED_EXIT_REASON;
	vcpu->run->internal.ndata = 2;
	vcpu->run->internal.data[0] = exit_code;
	vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;

	return -EINVAL;
}

2477
void sev_es_unmap_ghcb(struct vcpu_svm *svm)
2478
{
2479
	if (!svm->sev_es.ghcb)
2480 2481
		return;

2482
	if (svm->sev_es.ghcb_sa_free) {
2483 2484 2485 2486 2487
		/*
		 * The scratch area lives outside the GHCB, so there is a
		 * buffer that, depending on the operation performed, may
		 * need to be synced, then freed.
		 */
2488
		if (svm->sev_es.ghcb_sa_sync) {
2489
			kvm_write_guest(svm->vcpu.kvm,
2490 2491 2492 2493
					ghcb_get_sw_scratch(svm->sev_es.ghcb),
					svm->sev_es.ghcb_sa,
					svm->sev_es.ghcb_sa_len);
			svm->sev_es.ghcb_sa_sync = false;
2494 2495
		}

2496
		kvfree(svm->sev_es.ghcb_sa);
2497 2498
		svm->sev_es.ghcb_sa = NULL;
		svm->sev_es.ghcb_sa_free = false;
2499 2500
	}

2501
	trace_kvm_vmgexit_exit(svm->vcpu.vcpu_id, svm->sev_es.ghcb);
2502

2503 2504
	sev_es_sync_to_ghcb(svm);

2505 2506
	kvm_vcpu_unmap(&svm->vcpu, &svm->sev_es.ghcb_map, true);
	svm->sev_es.ghcb = NULL;
2507 2508
}

2509 2510 2511 2512 2513 2514
void pre_sev_run(struct vcpu_svm *svm, int cpu)
{
	struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
	int asid = sev_get_asid(svm->vcpu.kvm);

	/* Assign the asid allocated with this SEV guest */
2515
	svm->asid = asid;
2516 2517 2518 2519 2520 2521 2522 2523

	/*
	 * Flush guest TLB:
	 *
	 * 1) when different VMCB for the same ASID is to be run on the same host CPU.
	 * 2) or this VMCB was executed on different host CPU in previous VMRUNs.
	 */
	if (sd->sev_vmcbs[asid] == svm->vmcb &&
2524
	    svm->vcpu.arch.last_vmentry_cpu == cpu)
2525 2526 2527 2528
		return;

	sd->sev_vmcbs[asid] = svm->vmcb;
	svm->vmcb->control.tlb_ctl = TLB_CONTROL_FLUSH_ASID;
2529
	vmcb_mark_dirty(svm->vmcb, VMCB_ASID);
2530
}
2531

2532
#define GHCB_SCRATCH_AREA_LIMIT		(16ULL * PAGE_SIZE)
2533
static int setup_vmgexit_scratch(struct vcpu_svm *svm, bool sync, u64 len)
2534 2535
{
	struct vmcb_control_area *control = &svm->vmcb->control;
2536
	struct ghcb *ghcb = svm->sev_es.ghcb;
2537 2538 2539 2540 2541 2542 2543
	u64 ghcb_scratch_beg, ghcb_scratch_end;
	u64 scratch_gpa_beg, scratch_gpa_end;
	void *scratch_va;

	scratch_gpa_beg = ghcb_get_sw_scratch(ghcb);
	if (!scratch_gpa_beg) {
		pr_err("vmgexit: scratch gpa not provided\n");
2544
		return -EINVAL;
2545 2546 2547 2548 2549 2550
	}

	scratch_gpa_end = scratch_gpa_beg + len;
	if (scratch_gpa_end < scratch_gpa_beg) {
		pr_err("vmgexit: scratch length (%#llx) not valid for scratch address (%#llx)\n",
		       len, scratch_gpa_beg);
2551
		return -EINVAL;
2552 2553 2554 2555 2556 2557 2558 2559 2560 2561 2562 2563 2564 2565 2566 2567 2568
	}

	if ((scratch_gpa_beg & PAGE_MASK) == control->ghcb_gpa) {
		/* Scratch area begins within GHCB */
		ghcb_scratch_beg = control->ghcb_gpa +
				   offsetof(struct ghcb, shared_buffer);
		ghcb_scratch_end = control->ghcb_gpa +
				   offsetof(struct ghcb, reserved_1);

		/*
		 * If the scratch area begins within the GHCB, it must be
		 * completely contained in the GHCB shared buffer area.
		 */
		if (scratch_gpa_beg < ghcb_scratch_beg ||
		    scratch_gpa_end > ghcb_scratch_end) {
			pr_err("vmgexit: scratch area is outside of GHCB shared buffer area (%#llx - %#llx)\n",
			       scratch_gpa_beg, scratch_gpa_end);
2569
			return -EINVAL;
2570 2571
		}

2572
		scratch_va = (void *)svm->sev_es.ghcb;
2573 2574 2575 2576 2577 2578 2579 2580 2581
		scratch_va += (scratch_gpa_beg - control->ghcb_gpa);
	} else {
		/*
		 * The guest memory must be read into a kernel buffer, so
		 * limit the size
		 */
		if (len > GHCB_SCRATCH_AREA_LIMIT) {
			pr_err("vmgexit: scratch area exceeds KVM limits (%#llx requested, %#llx limit)\n",
			       len, GHCB_SCRATCH_AREA_LIMIT);
2582
			return -EINVAL;
2583
		}
2584
		scratch_va = kvzalloc(len, GFP_KERNEL_ACCOUNT);
2585
		if (!scratch_va)
2586
			return -ENOMEM;
2587 2588 2589 2590 2591

		if (kvm_read_guest(svm->vcpu.kvm, scratch_gpa_beg, scratch_va, len)) {
			/* Unable to copy scratch area from guest */
			pr_err("vmgexit: kvm_read_guest for scratch area failed\n");

2592
			kvfree(scratch_va);
2593
			return -EFAULT;
2594 2595 2596 2597 2598 2599 2600 2601
		}

		/*
		 * The scratch area is outside the GHCB. The operation will
		 * dictate whether the buffer needs to be synced before running
		 * the vCPU next time (i.e. a read was requested so the data
		 * must be written back to the guest memory).
		 */
2602 2603
		svm->sev_es.ghcb_sa_sync = sync;
		svm->sev_es.ghcb_sa_free = true;
2604 2605
	}

2606 2607
	svm->sev_es.ghcb_sa = scratch_va;
	svm->sev_es.ghcb_sa_len = len;
2608

2609
	return 0;
2610 2611
}

2612 2613 2614 2615 2616 2617 2618 2619 2620 2621 2622 2623
static void set_ghcb_msr_bits(struct vcpu_svm *svm, u64 value, u64 mask,
			      unsigned int pos)
{
	svm->vmcb->control.ghcb_gpa &= ~(mask << pos);
	svm->vmcb->control.ghcb_gpa |= (value & mask) << pos;
}

static u64 get_ghcb_msr_bits(struct vcpu_svm *svm, u64 mask, unsigned int pos)
{
	return (svm->vmcb->control.ghcb_gpa >> pos) & mask;
}

2624 2625 2626 2627 2628
static void set_ghcb_msr(struct vcpu_svm *svm, u64 value)
{
	svm->vmcb->control.ghcb_gpa = value;
}

2629 2630
static int sev_handle_vmgexit_msr_protocol(struct vcpu_svm *svm)
{
2631
	struct vmcb_control_area *control = &svm->vmcb->control;
2632
	struct kvm_vcpu *vcpu = &svm->vcpu;
2633
	u64 ghcb_info;
2634
	int ret = 1;
2635 2636 2637

	ghcb_info = control->ghcb_gpa & GHCB_MSR_INFO_MASK;

2638 2639 2640
	trace_kvm_vmgexit_msr_protocol_enter(svm->vcpu.vcpu_id,
					     control->ghcb_gpa);

2641 2642 2643 2644 2645 2646
	switch (ghcb_info) {
	case GHCB_MSR_SEV_INFO_REQ:
		set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX,
						    GHCB_VERSION_MIN,
						    sev_enc_bit));
		break;
2647 2648 2649 2650 2651 2652 2653 2654 2655 2656 2657
	case GHCB_MSR_CPUID_REQ: {
		u64 cpuid_fn, cpuid_reg, cpuid_value;

		cpuid_fn = get_ghcb_msr_bits(svm,
					     GHCB_MSR_CPUID_FUNC_MASK,
					     GHCB_MSR_CPUID_FUNC_POS);

		/* Initialize the registers needed by the CPUID intercept */
		vcpu->arch.regs[VCPU_REGS_RAX] = cpuid_fn;
		vcpu->arch.regs[VCPU_REGS_RCX] = 0;

2658
		ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_CPUID);
2659 2660 2661 2662 2663 2664 2665 2666 2667 2668 2669 2670 2671 2672 2673 2674 2675 2676 2677 2678 2679 2680 2681 2682 2683 2684
		if (!ret) {
			ret = -EINVAL;
			break;
		}

		cpuid_reg = get_ghcb_msr_bits(svm,
					      GHCB_MSR_CPUID_REG_MASK,
					      GHCB_MSR_CPUID_REG_POS);
		if (cpuid_reg == 0)
			cpuid_value = vcpu->arch.regs[VCPU_REGS_RAX];
		else if (cpuid_reg == 1)
			cpuid_value = vcpu->arch.regs[VCPU_REGS_RBX];
		else if (cpuid_reg == 2)
			cpuid_value = vcpu->arch.regs[VCPU_REGS_RCX];
		else
			cpuid_value = vcpu->arch.regs[VCPU_REGS_RDX];

		set_ghcb_msr_bits(svm, cpuid_value,
				  GHCB_MSR_CPUID_VALUE_MASK,
				  GHCB_MSR_CPUID_VALUE_POS);

		set_ghcb_msr_bits(svm, GHCB_MSR_CPUID_RESP,
				  GHCB_MSR_INFO_MASK,
				  GHCB_MSR_INFO_POS);
		break;
	}
2685 2686 2687 2688 2689 2690 2691 2692 2693 2694 2695 2696 2697
	case GHCB_MSR_TERM_REQ: {
		u64 reason_set, reason_code;

		reason_set = get_ghcb_msr_bits(svm,
					       GHCB_MSR_TERM_REASON_SET_MASK,
					       GHCB_MSR_TERM_REASON_SET_POS);
		reason_code = get_ghcb_msr_bits(svm,
						GHCB_MSR_TERM_REASON_MASK,
						GHCB_MSR_TERM_REASON_POS);
		pr_info("SEV-ES guest requested termination: %#llx:%#llx\n",
			reason_set, reason_code);
		fallthrough;
	}
2698
	default:
2699
		ret = -EINVAL;
2700 2701
	}

2702 2703 2704
	trace_kvm_vmgexit_msr_protocol_exit(svm->vcpu.vcpu_id,
					    control->ghcb_gpa, ret);

2705
	return ret;
2706 2707
}

2708
int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
2709
{
2710
	struct vcpu_svm *svm = to_svm(vcpu);
2711 2712 2713 2714 2715 2716 2717 2718 2719 2720 2721
	struct vmcb_control_area *control = &svm->vmcb->control;
	u64 ghcb_gpa, exit_code;
	struct ghcb *ghcb;
	int ret;

	/* Validate the GHCB */
	ghcb_gpa = control->ghcb_gpa;
	if (ghcb_gpa & GHCB_MSR_INFO_MASK)
		return sev_handle_vmgexit_msr_protocol(svm);

	if (!ghcb_gpa) {
2722
		vcpu_unimpl(vcpu, "vmgexit: GHCB gpa is not set\n");
2723 2724 2725
		return -EINVAL;
	}

2726
	if (kvm_vcpu_map(vcpu, ghcb_gpa >> PAGE_SHIFT, &svm->sev_es.ghcb_map)) {
2727
		/* Unable to map GHCB from guest */
2728
		vcpu_unimpl(vcpu, "vmgexit: error mapping GHCB [%#llx] from guest\n",
2729 2730 2731 2732
			    ghcb_gpa);
		return -EINVAL;
	}

2733 2734
	svm->sev_es.ghcb = svm->sev_es.ghcb_map.hva;
	ghcb = svm->sev_es.ghcb_map.hva;
2735

2736
	trace_kvm_vmgexit_enter(vcpu->vcpu_id, ghcb);
2737

2738 2739 2740 2741 2742 2743 2744 2745 2746 2747 2748
	exit_code = ghcb_get_sw_exit_code(ghcb);

	ret = sev_es_validate_vmgexit(svm);
	if (ret)
		return ret;

	sev_es_sync_from_ghcb(svm);
	ghcb_set_sw_exit_info_1(ghcb, 0);
	ghcb_set_sw_exit_info_2(ghcb, 0);

	switch (exit_code) {
2749
	case SVM_VMGEXIT_MMIO_READ:
2750 2751
		ret = setup_vmgexit_scratch(svm, true, control->exit_info_2);
		if (ret)
2752 2753
			break;

2754
		ret = kvm_sev_es_mmio_read(vcpu,
2755 2756
					   control->exit_info_1,
					   control->exit_info_2,
2757
					   svm->sev_es.ghcb_sa);
2758 2759
		break;
	case SVM_VMGEXIT_MMIO_WRITE:
2760 2761
		ret = setup_vmgexit_scratch(svm, false, control->exit_info_2);
		if (ret)
2762 2763
			break;

2764
		ret = kvm_sev_es_mmio_write(vcpu,
2765 2766
					    control->exit_info_1,
					    control->exit_info_2,
2767
					    svm->sev_es.ghcb_sa);
2768
		break;
2769
	case SVM_VMGEXIT_NMI_COMPLETE:
2770
		ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_IRET);
2771
		break;
2772
	case SVM_VMGEXIT_AP_HLT_LOOP:
2773
		ret = kvm_emulate_ap_reset_hold(vcpu);
2774
		break;
2775
	case SVM_VMGEXIT_AP_JUMP_TABLE: {
2776
		struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info;
2777 2778 2779 2780 2781 2782 2783 2784 2785 2786 2787 2788 2789 2790 2791 2792 2793 2794 2795 2796 2797 2798 2799

		switch (control->exit_info_1) {
		case 0:
			/* Set AP jump table address */
			sev->ap_jump_table = control->exit_info_2;
			break;
		case 1:
			/* Get AP jump table address */
			ghcb_set_sw_exit_info_2(ghcb, sev->ap_jump_table);
			break;
		default:
			pr_err("svm: vmgexit: unsupported AP jump table request - exit_info_1=%#llx\n",
			       control->exit_info_1);
			ghcb_set_sw_exit_info_1(ghcb, 1);
			ghcb_set_sw_exit_info_2(ghcb,
						X86_TRAP_UD |
						SVM_EVTINJ_TYPE_EXEPT |
						SVM_EVTINJ_VALID);
		}

		ret = 1;
		break;
	}
2800
	case SVM_VMGEXIT_UNSUPPORTED_EVENT:
2801
		vcpu_unimpl(vcpu,
2802 2803
			    "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n",
			    control->exit_info_1, control->exit_info_2);
2804
		ret = -EINVAL;
2805 2806
		break;
	default:
2807
		ret = svm_invoke_exit_handler(vcpu, exit_code);
2808 2809 2810 2811
	}

	return ret;
}
2812 2813 2814

int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in)
{
2815 2816
	int count;
	int bytes;
2817
	int r;
2818 2819 2820 2821 2822 2823 2824 2825

	if (svm->vmcb->control.exit_info_2 > INT_MAX)
		return -EINVAL;

	count = svm->vmcb->control.exit_info_2;
	if (unlikely(check_mul_overflow(count, size, &bytes)))
		return -EINVAL;

2826 2827 2828
	r = setup_vmgexit_scratch(svm, in, bytes);
	if (r)
		return r;
2829

2830
	return kvm_sev_es_string_io(&svm->vcpu, size, port, svm->sev_es.ghcb_sa,
2831
				    count, in);
2832
}
2833 2834 2835 2836 2837 2838 2839 2840 2841 2842 2843 2844 2845

void sev_es_init_vmcb(struct vcpu_svm *svm)
{
	struct kvm_vcpu *vcpu = &svm->vcpu;

	svm->vmcb->control.nested_ctl |= SVM_NESTED_CTL_SEV_ES_ENABLE;
	svm->vmcb->control.virt_ext |= LBR_CTL_ENABLE_MASK;

	/*
	 * An SEV-ES guest requires a VMSA area that is a separate from the
	 * VMCB page. Do not include the encryption mask on the VMSA physical
	 * address since hardware will access it using the guest key.
	 */
2846
	svm->vmcb->control.vmsa_pa = __pa(svm->sev_es.vmsa);
2847 2848 2849 2850 2851 2852 2853 2854 2855 2856 2857 2858 2859 2860 2861 2862 2863 2864 2865 2866 2867 2868 2869 2870 2871 2872 2873 2874 2875 2876 2877 2878

	/* Can't intercept CR register access, HV can't modify CR registers */
	svm_clr_intercept(svm, INTERCEPT_CR0_READ);
	svm_clr_intercept(svm, INTERCEPT_CR4_READ);
	svm_clr_intercept(svm, INTERCEPT_CR8_READ);
	svm_clr_intercept(svm, INTERCEPT_CR0_WRITE);
	svm_clr_intercept(svm, INTERCEPT_CR4_WRITE);
	svm_clr_intercept(svm, INTERCEPT_CR8_WRITE);

	svm_clr_intercept(svm, INTERCEPT_SELECTIVE_CR0);

	/* Track EFER/CR register changes */
	svm_set_intercept(svm, TRAP_EFER_WRITE);
	svm_set_intercept(svm, TRAP_CR0_WRITE);
	svm_set_intercept(svm, TRAP_CR4_WRITE);
	svm_set_intercept(svm, TRAP_CR8_WRITE);

	/* No support for enable_vmware_backdoor */
	clr_exception_intercept(svm, GP_VECTOR);

	/* Can't intercept XSETBV, HV can't modify XCR0 directly */
	svm_clr_intercept(svm, INTERCEPT_XSETBV);

	/* Clear intercepts on selected MSRs */
	set_msr_interception(vcpu, svm->msrpm, MSR_EFER, 1, 1);
	set_msr_interception(vcpu, svm->msrpm, MSR_IA32_CR_PAT, 1, 1);
	set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHFROMIP, 1, 1);
	set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTBRANCHTOIP, 1, 1);
	set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTFROMIP, 1, 1);
	set_msr_interception(vcpu, svm->msrpm, MSR_IA32_LASTINTTOIP, 1, 1);
}

2879
void sev_es_vcpu_reset(struct vcpu_svm *svm)
2880 2881
{
	/*
2882 2883
	 * Set the GHCB MSR value as per the GHCB specification when emulating
	 * vCPU RESET for an SEV-ES guest.
2884 2885 2886 2887 2888
	 */
	set_ghcb_msr(svm, GHCB_MSR_SEV_INFO(GHCB_VERSION_MAX,
					    GHCB_VERSION_MIN,
					    sev_enc_bit));
}
2889

2890
void sev_es_prepare_guest_switch(struct vcpu_svm *svm, unsigned int cpu)
2891 2892 2893 2894 2895 2896 2897 2898 2899
{
	struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
	struct vmcb_save_area *hostsa;

	/*
	 * As an SEV-ES guest, hardware will restore the host state on VMEXIT,
	 * of which one step is to perform a VMLOAD. Since hardware does not
	 * perform a VMSAVE on VMRUN, the host savearea must be updated.
	 */
2900
	vmsave(__sme_page_pa(sd->save_area));
2901 2902 2903 2904 2905

	/* XCR0 is restored on VMEXIT, save the current host value */
	hostsa = (struct vmcb_save_area *)(page_address(sd->save_area) + 0x400);
	hostsa->xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);

I
Ingo Molnar 已提交
2906
	/* PKRU is restored on VMEXIT, save the current host value */
2907 2908 2909 2910 2911 2912
	hostsa->pkru = read_pkru();

	/* MSR_IA32_XSS is restored on VMEXIT, save the currnet host value */
	hostsa->xss = host_xss;
}

2913 2914 2915 2916 2917
void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector)
{
	struct vcpu_svm *svm = to_svm(vcpu);

	/* First SIPI: Use the values as initially set by the VMM */
2918 2919
	if (!svm->sev_es.received_first_sipi) {
		svm->sev_es.received_first_sipi = true;
2920 2921 2922 2923 2924 2925 2926 2927
		return;
	}

	/*
	 * Subsequent SIPI: Return from an AP Reset Hold VMGEXIT, where
	 * the guest will set the CS and RIP. Set SW_EXIT_INFO_2 to a
	 * non-zero value.
	 */
2928
	if (!svm->sev_es.ghcb)
2929 2930
		return;

2931
	ghcb_set_sw_exit_info_2(svm->sev_es.ghcb, 1);
2932
}