提交 8764ed55 编写于 作者: S Sean Christopherson 提交者: Paolo Bonzini

KVM: x86: Whitelist port 0x7e for pre-incrementing %rip

KVM's recent bug fix to update %rip after emulating I/O broke userspace
that relied on the previous behavior of incrementing %rip prior to
exiting to userspace.  When running a Windows XP guest on AMD hardware,
Qemu may patch "OUT 0x7E" instructions in reaction to the OUT itself.
Because KVM's old behavior was to increment %rip before exiting to
userspace to handle the I/O, Qemu manually adjusted %rip to account for
the OUT instruction.

Arguably this is a userspace bug as KVM requires userspace to re-enter
the kernel to complete instruction emulation before taking any other
actions.  That being said, this is a bit of a grey area and breaking
userspace that has worked for many years is bad.

Pre-increment %rip on OUT to port 0x7e before exiting to userspace to
hack around the issue.

Fixes: 45def77e ("KVM: x86: update %rip after emulating IO")
Reported-by: NSimon Becherer <simon@becherer.de>
Reported-and-tested-by: NIakov Karpov <srid@rkmail.ru>
Reported-by: NGabriele Balducci <balducci@units.it>
Reported-by: NAntti Antinoja <reader@fennosys.fi>
Cc: stable@vger.kernel.org
Cc: Takashi Iwai <tiwai@suse.com>
Cc: Jiri Slaby <jslaby@suse.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: NSean Christopherson <sean.j.christopherson@intel.com>
Signed-off-by: NPaolo Bonzini <pbonzini@redhat.com>
上级 dbcdae18
...@@ -381,6 +381,7 @@ struct kvm_sync_regs { ...@@ -381,6 +381,7 @@ struct kvm_sync_regs {
#define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0) #define KVM_X86_QUIRK_LINT0_REENABLED (1 << 0)
#define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1) #define KVM_X86_QUIRK_CD_NW_CLEARED (1 << 1)
#define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2) #define KVM_X86_QUIRK_LAPIC_MMIO_HOLE (1 << 2)
#define KVM_X86_QUIRK_OUT_7E_INC_RIP (1 << 3)
#define KVM_STATE_NESTED_GUEST_MODE 0x00000001 #define KVM_STATE_NESTED_GUEST_MODE 0x00000001
#define KVM_STATE_NESTED_RUN_PENDING 0x00000002 #define KVM_STATE_NESTED_RUN_PENDING 0x00000002
......
...@@ -6539,6 +6539,12 @@ int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu, ...@@ -6539,6 +6539,12 @@ int kvm_emulate_instruction_from_buffer(struct kvm_vcpu *vcpu,
} }
EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer); EXPORT_SYMBOL_GPL(kvm_emulate_instruction_from_buffer);
static int complete_fast_pio_out_port_0x7e(struct kvm_vcpu *vcpu)
{
vcpu->arch.pio.count = 0;
return 1;
}
static int complete_fast_pio_out(struct kvm_vcpu *vcpu) static int complete_fast_pio_out(struct kvm_vcpu *vcpu)
{ {
vcpu->arch.pio.count = 0; vcpu->arch.pio.count = 0;
...@@ -6555,12 +6561,23 @@ static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size, ...@@ -6555,12 +6561,23 @@ static int kvm_fast_pio_out(struct kvm_vcpu *vcpu, int size,
unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX); unsigned long val = kvm_register_read(vcpu, VCPU_REGS_RAX);
int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt, int ret = emulator_pio_out_emulated(&vcpu->arch.emulate_ctxt,
size, port, &val, 1); size, port, &val, 1);
if (ret)
return ret;
if (!ret) { /*
* Workaround userspace that relies on old KVM behavior of %rip being
* incremented prior to exiting to userspace to handle "OUT 0x7e".
*/
if (port == 0x7e &&
kvm_check_has_quirk(vcpu->kvm, KVM_X86_QUIRK_OUT_7E_INC_RIP)) {
vcpu->arch.complete_userspace_io =
complete_fast_pio_out_port_0x7e;
kvm_skip_emulated_instruction(vcpu);
} else {
vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu); vcpu->arch.pio.linear_rip = kvm_get_linear_rip(vcpu);
vcpu->arch.complete_userspace_io = complete_fast_pio_out; vcpu->arch.complete_userspace_io = complete_fast_pio_out;
} }
return ret; return 0;
} }
static int complete_fast_pio_in(struct kvm_vcpu *vcpu) static int complete_fast_pio_in(struct kvm_vcpu *vcpu)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册