提交 d04f7de0 编写于 作者: L Linus Torvalds

Merge tag 'x86_sev_for_v5.14_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip

Pull x86 SEV updates from Borislav Petkov:

 - Differentiate the type of exception the #VC handler raises depending
   on code executed in the guest and handle the case where failure to
   get the RIP would result in a #GP, as it should, instead of in a #PF

 - Disable interrupts while the per-CPU GHCB is held

 - Split the #VC handler depending on where the #VC exception has
   happened and therefore provide for precise context tracking like the
   rest of the exception handlers deal with noinstr regions now

 - Add defines for the GHCB version 2 protocol so that further shared
   development with KVM can happen without merge conflicts

 - The usual small cleanups

* tag 'x86_sev_for_v5.14_rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip:
  x86/sev: Use "SEV: " prefix for messages from sev.c
  x86/sev: Add defines for GHCB version 2 MSR protocol requests
  x86/sev: Split up runtime #VC handler for correct state tracking
  x86/sev: Make sure IRQs are disabled while GHCB is active
  x86/sev: Propagate #GP if getting linear instruction address failed
  x86/insn: Extend error reporting from insn_fetch_from_user[_inatomic]()
  x86/insn-eval: Make 0 a valid RIP for insn_get_effective_ip()
  x86/sev: Fix error message in runtime #VC handler
...@@ -506,7 +506,7 @@ SYM_CODE_START(\asmsym) ...@@ -506,7 +506,7 @@ SYM_CODE_START(\asmsym)
movq %rsp, %rdi /* pt_regs pointer */ movq %rsp, %rdi /* pt_regs pointer */
call \cfunc call kernel_\cfunc
/* /*
* No need to switch back to the IST stack. The current stack is either * No need to switch back to the IST stack. The current stack is either
...@@ -517,7 +517,7 @@ SYM_CODE_START(\asmsym) ...@@ -517,7 +517,7 @@ SYM_CODE_START(\asmsym)
/* Switch to the regular task stack */ /* Switch to the regular task stack */
.Lfrom_usermode_switch_stack_\@: .Lfrom_usermode_switch_stack_\@:
idtentry_body safe_stack_\cfunc, has_error_code=1 idtentry_body user_\cfunc, has_error_code=1
_ASM_NOKPROBE(\asmsym) _ASM_NOKPROBE(\asmsym)
SYM_CODE_END(\asmsym) SYM_CODE_END(\asmsym)
......
...@@ -312,8 +312,8 @@ static __always_inline void __##func(struct pt_regs *regs) ...@@ -312,8 +312,8 @@ static __always_inline void __##func(struct pt_regs *regs)
*/ */
#define DECLARE_IDTENTRY_VC(vector, func) \ #define DECLARE_IDTENTRY_VC(vector, func) \
DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func); \ DECLARE_IDTENTRY_RAW_ERRORCODE(vector, func); \
__visible noinstr void ist_##func(struct pt_regs *regs, unsigned long error_code); \ __visible noinstr void kernel_##func(struct pt_regs *regs, unsigned long error_code); \
__visible noinstr void safe_stack_##func(struct pt_regs *regs, unsigned long error_code) __visible noinstr void user_##func(struct pt_regs *regs, unsigned long error_code)
/** /**
* DEFINE_IDTENTRY_IST - Emit code for IST entry points * DEFINE_IDTENTRY_IST - Emit code for IST entry points
...@@ -355,33 +355,24 @@ static __always_inline void __##func(struct pt_regs *regs) ...@@ -355,33 +355,24 @@ static __always_inline void __##func(struct pt_regs *regs)
DEFINE_IDTENTRY_RAW_ERRORCODE(func) DEFINE_IDTENTRY_RAW_ERRORCODE(func)
/** /**
* DEFINE_IDTENTRY_VC_SAFE_STACK - Emit code for VMM communication handler * DEFINE_IDTENTRY_VC_KERNEL - Emit code for VMM communication handler
which runs on a safe stack. when raised from kernel mode
* @func: Function name of the entry point * @func: Function name of the entry point
* *
* Maps to DEFINE_IDTENTRY_RAW_ERRORCODE * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE
*/ */
#define DEFINE_IDTENTRY_VC_SAFE_STACK(func) \ #define DEFINE_IDTENTRY_VC_KERNEL(func) \
DEFINE_IDTENTRY_RAW_ERRORCODE(safe_stack_##func) DEFINE_IDTENTRY_RAW_ERRORCODE(kernel_##func)
/** /**
* DEFINE_IDTENTRY_VC_IST - Emit code for VMM communication handler * DEFINE_IDTENTRY_VC_USER - Emit code for VMM communication handler
which runs on the VC fall-back stack when raised from user mode
* @func: Function name of the entry point * @func: Function name of the entry point
* *
* Maps to DEFINE_IDTENTRY_RAW_ERRORCODE * Maps to DEFINE_IDTENTRY_RAW_ERRORCODE
*/ */
#define DEFINE_IDTENTRY_VC_IST(func) \ #define DEFINE_IDTENTRY_VC_USER(func) \
DEFINE_IDTENTRY_RAW_ERRORCODE(ist_##func) DEFINE_IDTENTRY_RAW_ERRORCODE(user_##func)
/**
* DEFINE_IDTENTRY_VC - Emit code for VMM communication handler
* @func: Function name of the entry point
*
* Maps to DEFINE_IDTENTRY_RAW_ERRORCODE
*/
#define DEFINE_IDTENTRY_VC(func) \
DEFINE_IDTENTRY_RAW_ERRORCODE(func)
#else /* CONFIG_X86_64 */ #else /* CONFIG_X86_64 */
......
...@@ -9,8 +9,13 @@ ...@@ -9,8 +9,13 @@
#define __ASM_X86_SEV_COMMON_H #define __ASM_X86_SEV_COMMON_H
#define GHCB_MSR_INFO_POS 0 #define GHCB_MSR_INFO_POS 0
#define GHCB_MSR_INFO_MASK (BIT_ULL(12) - 1) #define GHCB_DATA_LOW 12
#define GHCB_MSR_INFO_MASK (BIT_ULL(GHCB_DATA_LOW) - 1)
#define GHCB_DATA(v) \
(((unsigned long)(v) & ~GHCB_MSR_INFO_MASK) >> GHCB_DATA_LOW)
/* SEV Information Request/Response */
#define GHCB_MSR_SEV_INFO_RESP 0x001 #define GHCB_MSR_SEV_INFO_RESP 0x001
#define GHCB_MSR_SEV_INFO_REQ 0x002 #define GHCB_MSR_SEV_INFO_REQ 0x002
#define GHCB_MSR_VER_MAX_POS 48 #define GHCB_MSR_VER_MAX_POS 48
...@@ -28,6 +33,7 @@ ...@@ -28,6 +33,7 @@
#define GHCB_MSR_PROTO_MAX(v) (((v) >> GHCB_MSR_VER_MAX_POS) & GHCB_MSR_VER_MAX_MASK) #define GHCB_MSR_PROTO_MAX(v) (((v) >> GHCB_MSR_VER_MAX_POS) & GHCB_MSR_VER_MAX_MASK)
#define GHCB_MSR_PROTO_MIN(v) (((v) >> GHCB_MSR_VER_MIN_POS) & GHCB_MSR_VER_MIN_MASK) #define GHCB_MSR_PROTO_MIN(v) (((v) >> GHCB_MSR_VER_MIN_POS) & GHCB_MSR_VER_MIN_MASK)
/* CPUID Request/Response */
#define GHCB_MSR_CPUID_REQ 0x004 #define GHCB_MSR_CPUID_REQ 0x004
#define GHCB_MSR_CPUID_RESP 0x005 #define GHCB_MSR_CPUID_RESP 0x005
#define GHCB_MSR_CPUID_FUNC_POS 32 #define GHCB_MSR_CPUID_FUNC_POS 32
...@@ -45,6 +51,14 @@ ...@@ -45,6 +51,14 @@
(((unsigned long)reg & GHCB_MSR_CPUID_REG_MASK) << GHCB_MSR_CPUID_REG_POS) | \ (((unsigned long)reg & GHCB_MSR_CPUID_REG_MASK) << GHCB_MSR_CPUID_REG_POS) | \
(((unsigned long)fn) << GHCB_MSR_CPUID_FUNC_POS)) (((unsigned long)fn) << GHCB_MSR_CPUID_FUNC_POS))
/* AP Reset Hold */
#define GHCB_MSR_AP_RESET_HOLD_REQ 0x006
#define GHCB_MSR_AP_RESET_HOLD_RESP 0x007
/* GHCB Hypervisor Feature Request/Response */
#define GHCB_MSR_HV_FT_REQ 0x080
#define GHCB_MSR_HV_FT_RESP 0x081
#define GHCB_MSR_TERM_REQ 0x100 #define GHCB_MSR_TERM_REQ 0x100
#define GHCB_MSR_TERM_REASON_SET_POS 12 #define GHCB_MSR_TERM_REASON_SET_POS 12
#define GHCB_MSR_TERM_REASON_SET_MASK 0xf #define GHCB_MSR_TERM_REASON_SET_MASK 0xf
......
...@@ -7,12 +7,11 @@ ...@@ -7,12 +7,11 @@
* Author: Joerg Roedel <jroedel@suse.de> * Author: Joerg Roedel <jroedel@suse.de>
*/ */
#define pr_fmt(fmt) "SEV-ES: " fmt #define pr_fmt(fmt) "SEV: " fmt
#include <linux/sched/debug.h> /* For show_regs() */ #include <linux/sched/debug.h> /* For show_regs() */
#include <linux/percpu-defs.h> #include <linux/percpu-defs.h>
#include <linux/mem_encrypt.h> #include <linux/mem_encrypt.h>
#include <linux/lockdep.h>
#include <linux/printk.h> #include <linux/printk.h>
#include <linux/mm_types.h> #include <linux/mm_types.h>
#include <linux/set_memory.h> #include <linux/set_memory.h>
...@@ -192,11 +191,19 @@ void noinstr __sev_es_ist_exit(void) ...@@ -192,11 +191,19 @@ void noinstr __sev_es_ist_exit(void)
this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], *(unsigned long *)ist); this_cpu_write(cpu_tss_rw.x86_tss.ist[IST_INDEX_VC], *(unsigned long *)ist);
} }
static __always_inline struct ghcb *sev_es_get_ghcb(struct ghcb_state *state) /*
* Nothing shall interrupt this code path while holding the per-CPU
* GHCB. The backup GHCB is only for NMIs interrupting this path.
*
* Callers must disable local interrupts around it.
*/
static noinstr struct ghcb *__sev_get_ghcb(struct ghcb_state *state)
{ {
struct sev_es_runtime_data *data; struct sev_es_runtime_data *data;
struct ghcb *ghcb; struct ghcb *ghcb;
WARN_ON(!irqs_disabled());
data = this_cpu_read(runtime_data); data = this_cpu_read(runtime_data);
ghcb = &data->ghcb_page; ghcb = &data->ghcb_page;
...@@ -213,7 +220,9 @@ static __always_inline struct ghcb *sev_es_get_ghcb(struct ghcb_state *state) ...@@ -213,7 +220,9 @@ static __always_inline struct ghcb *sev_es_get_ghcb(struct ghcb_state *state)
data->ghcb_active = false; data->ghcb_active = false;
data->backup_ghcb_active = false; data->backup_ghcb_active = false;
instrumentation_begin();
panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use"); panic("Unable to handle #VC exception! GHCB and Backup GHCB are already in use");
instrumentation_end();
} }
/* Mark backup_ghcb active before writing to it */ /* Mark backup_ghcb active before writing to it */
...@@ -258,17 +267,24 @@ static int vc_fetch_insn_kernel(struct es_em_ctxt *ctxt, ...@@ -258,17 +267,24 @@ static int vc_fetch_insn_kernel(struct es_em_ctxt *ctxt,
static enum es_result __vc_decode_user_insn(struct es_em_ctxt *ctxt) static enum es_result __vc_decode_user_insn(struct es_em_ctxt *ctxt)
{ {
char buffer[MAX_INSN_SIZE]; char buffer[MAX_INSN_SIZE];
int res; int insn_bytes;
res = insn_fetch_from_user_inatomic(ctxt->regs, buffer); insn_bytes = insn_fetch_from_user_inatomic(ctxt->regs, buffer);
if (!res) { if (insn_bytes == 0) {
/* Nothing could be copied */
ctxt->fi.vector = X86_TRAP_PF; ctxt->fi.vector = X86_TRAP_PF;
ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER; ctxt->fi.error_code = X86_PF_INSTR | X86_PF_USER;
ctxt->fi.cr2 = ctxt->regs->ip; ctxt->fi.cr2 = ctxt->regs->ip;
return ES_EXCEPTION; return ES_EXCEPTION;
} else if (insn_bytes == -EINVAL) {
/* Effective RIP could not be calculated */
ctxt->fi.vector = X86_TRAP_GP;
ctxt->fi.error_code = 0;
ctxt->fi.cr2 = 0;
return ES_EXCEPTION;
} }
if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, res)) if (!insn_decode_from_regs(&ctxt->insn, ctxt->regs, buffer, insn_bytes))
return ES_DECODE_FAILED; return ES_DECODE_FAILED;
if (ctxt->insn.immediate.got) if (ctxt->insn.immediate.got)
...@@ -479,11 +495,13 @@ static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt ...@@ -479,11 +495,13 @@ static enum es_result vc_slow_virt_to_phys(struct ghcb *ghcb, struct es_em_ctxt
/* Include code shared with pre-decompression boot stage */ /* Include code shared with pre-decompression boot stage */
#include "sev-shared.c" #include "sev-shared.c"
static __always_inline void sev_es_put_ghcb(struct ghcb_state *state) static noinstr void __sev_put_ghcb(struct ghcb_state *state)
{ {
struct sev_es_runtime_data *data; struct sev_es_runtime_data *data;
struct ghcb *ghcb; struct ghcb *ghcb;
WARN_ON(!irqs_disabled());
data = this_cpu_read(runtime_data); data = this_cpu_read(runtime_data);
ghcb = &data->ghcb_page; ghcb = &data->ghcb_page;
...@@ -507,7 +525,7 @@ void noinstr __sev_es_nmi_complete(void) ...@@ -507,7 +525,7 @@ void noinstr __sev_es_nmi_complete(void)
struct ghcb_state state; struct ghcb_state state;
struct ghcb *ghcb; struct ghcb *ghcb;
ghcb = sev_es_get_ghcb(&state); ghcb = __sev_get_ghcb(&state);
vc_ghcb_invalidate(ghcb); vc_ghcb_invalidate(ghcb);
ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_NMI_COMPLETE); ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_NMI_COMPLETE);
...@@ -517,7 +535,7 @@ void noinstr __sev_es_nmi_complete(void) ...@@ -517,7 +535,7 @@ void noinstr __sev_es_nmi_complete(void)
sev_es_wr_ghcb_msr(__pa_nodebug(ghcb)); sev_es_wr_ghcb_msr(__pa_nodebug(ghcb));
VMGEXIT(); VMGEXIT();
sev_es_put_ghcb(&state); __sev_put_ghcb(&state);
} }
static u64 get_jump_table_addr(void) static u64 get_jump_table_addr(void)
...@@ -529,7 +547,7 @@ static u64 get_jump_table_addr(void) ...@@ -529,7 +547,7 @@ static u64 get_jump_table_addr(void)
local_irq_save(flags); local_irq_save(flags);
ghcb = sev_es_get_ghcb(&state); ghcb = __sev_get_ghcb(&state);
vc_ghcb_invalidate(ghcb); vc_ghcb_invalidate(ghcb);
ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE); ghcb_set_sw_exit_code(ghcb, SVM_VMGEXIT_AP_JUMP_TABLE);
...@@ -543,7 +561,7 @@ static u64 get_jump_table_addr(void) ...@@ -543,7 +561,7 @@ static u64 get_jump_table_addr(void)
ghcb_sw_exit_info_2_is_valid(ghcb)) ghcb_sw_exit_info_2_is_valid(ghcb))
ret = ghcb->save.sw_exit_info_2; ret = ghcb->save.sw_exit_info_2;
sev_es_put_ghcb(&state); __sev_put_ghcb(&state);
local_irq_restore(flags); local_irq_restore(flags);
...@@ -668,7 +686,7 @@ static void sev_es_ap_hlt_loop(void) ...@@ -668,7 +686,7 @@ static void sev_es_ap_hlt_loop(void)
struct ghcb_state state; struct ghcb_state state;
struct ghcb *ghcb; struct ghcb *ghcb;
ghcb = sev_es_get_ghcb(&state); ghcb = __sev_get_ghcb(&state);
while (true) { while (true) {
vc_ghcb_invalidate(ghcb); vc_ghcb_invalidate(ghcb);
...@@ -685,7 +703,7 @@ static void sev_es_ap_hlt_loop(void) ...@@ -685,7 +703,7 @@ static void sev_es_ap_hlt_loop(void)
break; break;
} }
sev_es_put_ghcb(&state); __sev_put_ghcb(&state);
} }
/* /*
...@@ -775,7 +793,7 @@ void __init sev_es_init_vc_handling(void) ...@@ -775,7 +793,7 @@ void __init sev_es_init_vc_handling(void)
sev_es_setup_play_dead(); sev_es_setup_play_dead();
/* Secondary CPUs use the runtime #VC handler */ /* Secondary CPUs use the runtime #VC handler */
initial_vc_handler = (unsigned long)safe_stack_exc_vmm_communication; initial_vc_handler = (unsigned long)kernel_exc_vmm_communication;
} }
static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt) static void __init vc_early_forward_exception(struct es_em_ctxt *ctxt)
...@@ -1213,14 +1231,6 @@ static enum es_result vc_handle_trap_ac(struct ghcb *ghcb, ...@@ -1213,14 +1231,6 @@ static enum es_result vc_handle_trap_ac(struct ghcb *ghcb,
return ES_EXCEPTION; return ES_EXCEPTION;
} }
static __always_inline void vc_handle_trap_db(struct pt_regs *regs)
{
if (user_mode(regs))
noist_exc_debug(regs);
else
exc_debug(regs);
}
static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt, static enum es_result vc_handle_exitcode(struct es_em_ctxt *ctxt,
struct ghcb *ghcb, struct ghcb *ghcb,
unsigned long exit_code) unsigned long exit_code)
...@@ -1316,44 +1326,15 @@ static __always_inline bool on_vc_fallback_stack(struct pt_regs *regs) ...@@ -1316,44 +1326,15 @@ static __always_inline bool on_vc_fallback_stack(struct pt_regs *regs)
return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2)); return (sp >= __this_cpu_ist_bottom_va(VC2) && sp < __this_cpu_ist_top_va(VC2));
} }
/* static bool vc_raw_handle_exception(struct pt_regs *regs, unsigned long error_code)
* Main #VC exception handler. It is called when the entry code was able to
* switch off the IST to a safe kernel stack.
*
* With the current implementation it is always possible to switch to a safe
* stack because #VC exceptions only happen at known places, like intercepted
* instructions or accesses to MMIO areas/IO ports. They can also happen with
* code instrumentation when the hypervisor intercepts #DB, but the critical
* paths are forbidden to be instrumented, so #DB exceptions currently also
* only happen in safe places.
*/
DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
{ {
irqentry_state_t irq_state;
struct ghcb_state state; struct ghcb_state state;
struct es_em_ctxt ctxt; struct es_em_ctxt ctxt;
enum es_result result; enum es_result result;
struct ghcb *ghcb; struct ghcb *ghcb;
bool ret = true;
/* ghcb = __sev_get_ghcb(&state);
* Handle #DB before calling into !noinstr code to avoid recursive #DB.
*/
if (error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB) {
vc_handle_trap_db(regs);
return;
}
irq_state = irqentry_nmi_enter(regs);
lockdep_assert_irqs_disabled();
instrumentation_begin();
/*
* This is invoked through an interrupt gate, so IRQs are disabled. The
* code below might walk page-tables for user or kernel addresses, so
* keep the IRQs disabled to protect us against concurrent TLB flushes.
*/
ghcb = sev_es_get_ghcb(&state);
vc_ghcb_invalidate(ghcb); vc_ghcb_invalidate(ghcb);
result = vc_init_em_ctxt(&ctxt, regs, error_code); result = vc_init_em_ctxt(&ctxt, regs, error_code);
...@@ -1361,7 +1342,7 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication) ...@@ -1361,7 +1342,7 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
if (result == ES_OK) if (result == ES_OK)
result = vc_handle_exitcode(&ctxt, ghcb, error_code); result = vc_handle_exitcode(&ctxt, ghcb, error_code);
sev_es_put_ghcb(&state); __sev_put_ghcb(&state);
/* Done - now check the result */ /* Done - now check the result */
switch (result) { switch (result) {
...@@ -1369,17 +1350,20 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication) ...@@ -1369,17 +1350,20 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
vc_finish_insn(&ctxt); vc_finish_insn(&ctxt);
break; break;
case ES_UNSUPPORTED: case ES_UNSUPPORTED:
pr_err_ratelimited("Unsupported exit-code 0x%02lx in early #VC exception (IP: 0x%lx)\n", pr_err_ratelimited("Unsupported exit-code 0x%02lx in #VC exception (IP: 0x%lx)\n",
error_code, regs->ip); error_code, regs->ip);
goto fail; ret = false;
break;
case ES_VMM_ERROR: case ES_VMM_ERROR:
pr_err_ratelimited("Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n", pr_err_ratelimited("Failure in communication with VMM (exit-code 0x%02lx IP: 0x%lx)\n",
error_code, regs->ip); error_code, regs->ip);
goto fail; ret = false;
break;
case ES_DECODE_FAILED: case ES_DECODE_FAILED:
pr_err_ratelimited("Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n", pr_err_ratelimited("Failed to decode instruction (exit-code 0x%02lx IP: 0x%lx)\n",
error_code, regs->ip); error_code, regs->ip);
goto fail; ret = false;
break;
case ES_EXCEPTION: case ES_EXCEPTION:
vc_forward_exception(&ctxt); vc_forward_exception(&ctxt);
break; break;
...@@ -1395,24 +1379,52 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication) ...@@ -1395,24 +1379,52 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
BUG(); BUG();
} }
out: return ret;
instrumentation_end(); }
irqentry_nmi_exit(regs, irq_state);
return; static __always_inline bool vc_is_db(unsigned long error_code)
{
return error_code == SVM_EXIT_EXCP_BASE + X86_TRAP_DB;
}
fail: /*
if (user_mode(regs)) { * Runtime #VC exception handler when raised from kernel mode. Runs in NMI mode
/* * and will panic when an error happens.
* Do not kill the machine if user-space triggered the */
* exception. Send SIGBUS instead and let user-space deal with DEFINE_IDTENTRY_VC_KERNEL(exc_vmm_communication)
* it. {
*/ irqentry_state_t irq_state;
force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0);
} else { /*
pr_emerg("PANIC: Unhandled #VC exception in kernel space (result=%d)\n", * With the current implementation it is always possible to switch to a
result); * safe stack because #VC exceptions only happen at known places, like
* intercepted instructions or accesses to MMIO areas/IO ports. They can
* also happen with code instrumentation when the hypervisor intercepts
* #DB, but the critical paths are forbidden to be instrumented, so #DB
* exceptions currently also only happen in safe places.
*
* But keep this here in case the noinstr annotations are violated due
* to bug elsewhere.
*/
if (unlikely(on_vc_fallback_stack(regs))) {
instrumentation_begin();
panic("Can't handle #VC exception from unsupported context\n");
instrumentation_end();
}
/*
* Handle #DB before calling into !noinstr code to avoid recursive #DB.
*/
if (vc_is_db(error_code)) {
exc_debug(regs);
return;
}
irq_state = irqentry_nmi_enter(regs);
instrumentation_begin();
if (!vc_raw_handle_exception(regs, error_code)) {
/* Show some debug info */ /* Show some debug info */
show_regs(regs); show_regs(regs);
...@@ -1423,23 +1435,38 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication) ...@@ -1423,23 +1435,38 @@ DEFINE_IDTENTRY_VC_SAFE_STACK(exc_vmm_communication)
panic("Returned from Terminate-Request to Hypervisor\n"); panic("Returned from Terminate-Request to Hypervisor\n");
} }
goto out; instrumentation_end();
irqentry_nmi_exit(regs, irq_state);
} }
/* This handler runs on the #VC fall-back stack. It can cause further #VC exceptions */ /*
DEFINE_IDTENTRY_VC_IST(exc_vmm_communication) * Runtime #VC exception handler when raised from user mode. Runs in IRQ mode
* and will kill the current task with SIGBUS when an error happens.
*/
DEFINE_IDTENTRY_VC_USER(exc_vmm_communication)
{ {
/*
* Handle #DB before calling into !noinstr code to avoid recursive #DB.
*/
if (vc_is_db(error_code)) {
noist_exc_debug(regs);
return;
}
irqentry_enter_from_user_mode(regs);
instrumentation_begin(); instrumentation_begin();
panic("Can't handle #VC exception from unsupported context\n");
instrumentation_end();
}
DEFINE_IDTENTRY_VC(exc_vmm_communication) if (!vc_raw_handle_exception(regs, error_code)) {
{ /*
if (likely(!on_vc_fallback_stack(regs))) * Do not kill the machine if user-space triggered the
safe_stack_exc_vmm_communication(regs, error_code); * exception. Send SIGBUS instead and let user-space deal with
else * it.
ist_exc_vmm_communication(regs, error_code); */
force_sig_fault(SIGBUS, BUS_OBJERR, (void __user *)0);
}
instrumentation_end();
irqentry_exit_to_user_mode(regs);
} }
bool __init handle_vc_boot_ghcb(struct pt_regs *regs) bool __init handle_vc_boot_ghcb(struct pt_regs *regs)
......
...@@ -346,14 +346,12 @@ bool fixup_umip_exception(struct pt_regs *regs) ...@@ -346,14 +346,12 @@ bool fixup_umip_exception(struct pt_regs *regs)
if (!regs) if (!regs)
return false; return false;
nr_copied = insn_fetch_from_user(regs, buf);
/* /*
* The insn_fetch_from_user above could have failed if user code * Give up on emulation if fetching the instruction failed. Should a
* is protected by a memory protection key. Give up on emulation * page fault or a #GP be issued?
* in such a case. Should we issue a page fault?
*/ */
if (!nr_copied) nr_copied = insn_fetch_from_user(regs, buf);
if (nr_copied <= 0)
return false; return false;
if (!insn_decode_from_regs(&insn, regs, buf, nr_copied)) if (!insn_decode_from_regs(&insn, regs, buf, nr_copied))
......
...@@ -1417,7 +1417,7 @@ void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs) ...@@ -1417,7 +1417,7 @@ void __user *insn_get_addr_ref(struct insn *insn, struct pt_regs *regs)
} }
} }
static unsigned long insn_get_effective_ip(struct pt_regs *regs) static int insn_get_effective_ip(struct pt_regs *regs, unsigned long *ip)
{ {
unsigned long seg_base = 0; unsigned long seg_base = 0;
...@@ -1430,10 +1430,12 @@ static unsigned long insn_get_effective_ip(struct pt_regs *regs) ...@@ -1430,10 +1430,12 @@ static unsigned long insn_get_effective_ip(struct pt_regs *regs)
if (!user_64bit_mode(regs)) { if (!user_64bit_mode(regs)) {
seg_base = insn_get_seg_base(regs, INAT_SEG_REG_CS); seg_base = insn_get_seg_base(regs, INAT_SEG_REG_CS);
if (seg_base == -1L) if (seg_base == -1L)
return 0; return -EINVAL;
} }
return seg_base + regs->ip; *ip = seg_base + regs->ip;
return 0;
} }
/** /**
...@@ -1446,18 +1448,17 @@ static unsigned long insn_get_effective_ip(struct pt_regs *regs) ...@@ -1446,18 +1448,17 @@ static unsigned long insn_get_effective_ip(struct pt_regs *regs)
* *
* Returns: * Returns:
* *
* Number of instruction bytes copied. * - number of instruction bytes copied.
* * - 0 if nothing was copied.
* 0 if nothing was copied. * - -EINVAL if the linear address of the instruction could not be calculated
*/ */
int insn_fetch_from_user(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE]) int insn_fetch_from_user(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE])
{ {
unsigned long ip; unsigned long ip;
int not_copied; int not_copied;
ip = insn_get_effective_ip(regs); if (insn_get_effective_ip(regs, &ip))
if (!ip) return -EINVAL;
return 0;
not_copied = copy_from_user(buf, (void __user *)ip, MAX_INSN_SIZE); not_copied = copy_from_user(buf, (void __user *)ip, MAX_INSN_SIZE);
...@@ -1475,18 +1476,17 @@ int insn_fetch_from_user(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE]) ...@@ -1475,18 +1476,17 @@ int insn_fetch_from_user(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE])
* *
* Returns: * Returns:
* *
* Number of instruction bytes copied. * - number of instruction bytes copied.
* * - 0 if nothing was copied.
* 0 if nothing was copied. * - -EINVAL if the linear address of the instruction could not be calculated.
*/ */
int insn_fetch_from_user_inatomic(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE]) int insn_fetch_from_user_inatomic(struct pt_regs *regs, unsigned char buf[MAX_INSN_SIZE])
{ {
unsigned long ip; unsigned long ip;
int not_copied; int not_copied;
ip = insn_get_effective_ip(regs); if (insn_get_effective_ip(regs, &ip))
if (!ip) return -EINVAL;
return 0;
not_copied = __copy_from_user_inatomic(buf, (void __user *)ip, MAX_INSN_SIZE); not_copied = __copy_from_user_inatomic(buf, (void __user *)ip, MAX_INSN_SIZE);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册