1. 06 8月, 2018 2 次提交
  2. 13 6月, 2018 1 次提交
    • K
      treewide: kmalloc() -> kmalloc_array() · 6da2ec56
      Kees Cook 提交于
      The kmalloc() function has a 2-factor argument form, kmalloc_array(). This
      patch replaces cases of:
      
              kmalloc(a * b, gfp)
      
      with:
              kmalloc_array(a * b, gfp)
      
      as well as handling cases of:
      
              kmalloc(a * b * c, gfp)
      
      with:
      
              kmalloc(array3_size(a, b, c), gfp)
      
      as it's slightly less ugly than:
      
              kmalloc_array(array_size(a, b), c, gfp)
      
      This does, however, attempt to ignore constant size factors like:
      
              kmalloc(4 * 1024, gfp)
      
      though any constants defined via macros get caught up in the conversion.
      
      Any factors with a sizeof() of "unsigned char", "char", and "u8" were
      dropped, since they're redundant.
      
      The tools/ directory was manually excluded, since it has its own
      implementation of kmalloc().
      
      The Coccinelle script used for this was:
      
      // Fix redundant parens around sizeof().
      @@
      type TYPE;
      expression THING, E;
      @@
      
      (
        kmalloc(
      -	(sizeof(TYPE)) * E
      +	sizeof(TYPE) * E
        , ...)
      |
        kmalloc(
      -	(sizeof(THING)) * E
      +	sizeof(THING) * E
        , ...)
      )
      
      // Drop single-byte sizes and redundant parens.
      @@
      expression COUNT;
      typedef u8;
      typedef __u8;
      @@
      
      (
        kmalloc(
      -	sizeof(u8) * (COUNT)
      +	COUNT
        , ...)
      |
        kmalloc(
      -	sizeof(__u8) * (COUNT)
      +	COUNT
        , ...)
      |
        kmalloc(
      -	sizeof(char) * (COUNT)
      +	COUNT
        , ...)
      |
        kmalloc(
      -	sizeof(unsigned char) * (COUNT)
      +	COUNT
        , ...)
      |
        kmalloc(
      -	sizeof(u8) * COUNT
      +	COUNT
        , ...)
      |
        kmalloc(
      -	sizeof(__u8) * COUNT
      +	COUNT
        , ...)
      |
        kmalloc(
      -	sizeof(char) * COUNT
      +	COUNT
        , ...)
      |
        kmalloc(
      -	sizeof(unsigned char) * COUNT
      +	COUNT
        , ...)
      )
      
      // 2-factor product with sizeof(type/expression) and identifier or constant.
      @@
      type TYPE;
      expression THING;
      identifier COUNT_ID;
      constant COUNT_CONST;
      @@
      
      (
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(TYPE) * (COUNT_ID)
      +	COUNT_ID, sizeof(TYPE)
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(TYPE) * COUNT_ID
      +	COUNT_ID, sizeof(TYPE)
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(TYPE) * (COUNT_CONST)
      +	COUNT_CONST, sizeof(TYPE)
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(TYPE) * COUNT_CONST
      +	COUNT_CONST, sizeof(TYPE)
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(THING) * (COUNT_ID)
      +	COUNT_ID, sizeof(THING)
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(THING) * COUNT_ID
      +	COUNT_ID, sizeof(THING)
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(THING) * (COUNT_CONST)
      +	COUNT_CONST, sizeof(THING)
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(THING) * COUNT_CONST
      +	COUNT_CONST, sizeof(THING)
        , ...)
      )
      
      // 2-factor product, only identifiers.
      @@
      identifier SIZE, COUNT;
      @@
      
      - kmalloc
      + kmalloc_array
        (
      -	SIZE * COUNT
      +	COUNT, SIZE
        , ...)
      
      // 3-factor product with 1 sizeof(type) or sizeof(expression), with
      // redundant parens removed.
      @@
      expression THING;
      identifier STRIDE, COUNT;
      type TYPE;
      @@
      
      (
        kmalloc(
      -	sizeof(TYPE) * (COUNT) * (STRIDE)
      +	array3_size(COUNT, STRIDE, sizeof(TYPE))
        , ...)
      |
        kmalloc(
      -	sizeof(TYPE) * (COUNT) * STRIDE
      +	array3_size(COUNT, STRIDE, sizeof(TYPE))
        , ...)
      |
        kmalloc(
      -	sizeof(TYPE) * COUNT * (STRIDE)
      +	array3_size(COUNT, STRIDE, sizeof(TYPE))
        , ...)
      |
        kmalloc(
      -	sizeof(TYPE) * COUNT * STRIDE
      +	array3_size(COUNT, STRIDE, sizeof(TYPE))
        , ...)
      |
        kmalloc(
      -	sizeof(THING) * (COUNT) * (STRIDE)
      +	array3_size(COUNT, STRIDE, sizeof(THING))
        , ...)
      |
        kmalloc(
      -	sizeof(THING) * (COUNT) * STRIDE
      +	array3_size(COUNT, STRIDE, sizeof(THING))
        , ...)
      |
        kmalloc(
      -	sizeof(THING) * COUNT * (STRIDE)
      +	array3_size(COUNT, STRIDE, sizeof(THING))
        , ...)
      |
        kmalloc(
      -	sizeof(THING) * COUNT * STRIDE
      +	array3_size(COUNT, STRIDE, sizeof(THING))
        , ...)
      )
      
      // 3-factor product with 2 sizeof(variable), with redundant parens removed.
      @@
      expression THING1, THING2;
      identifier COUNT;
      type TYPE1, TYPE2;
      @@
      
      (
        kmalloc(
      -	sizeof(TYPE1) * sizeof(TYPE2) * COUNT
      +	array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
        , ...)
      |
        kmalloc(
      -	sizeof(TYPE1) * sizeof(THING2) * (COUNT)
      +	array3_size(COUNT, sizeof(TYPE1), sizeof(TYPE2))
        , ...)
      |
        kmalloc(
      -	sizeof(THING1) * sizeof(THING2) * COUNT
      +	array3_size(COUNT, sizeof(THING1), sizeof(THING2))
        , ...)
      |
        kmalloc(
      -	sizeof(THING1) * sizeof(THING2) * (COUNT)
      +	array3_size(COUNT, sizeof(THING1), sizeof(THING2))
        , ...)
      |
        kmalloc(
      -	sizeof(TYPE1) * sizeof(THING2) * COUNT
      +	array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
        , ...)
      |
        kmalloc(
      -	sizeof(TYPE1) * sizeof(THING2) * (COUNT)
      +	array3_size(COUNT, sizeof(TYPE1), sizeof(THING2))
        , ...)
      )
      
      // 3-factor product, only identifiers, with redundant parens removed.
      @@
      identifier STRIDE, SIZE, COUNT;
      @@
      
      (
        kmalloc(
      -	(COUNT) * STRIDE * SIZE
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kmalloc(
      -	COUNT * (STRIDE) * SIZE
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kmalloc(
      -	COUNT * STRIDE * (SIZE)
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kmalloc(
      -	(COUNT) * (STRIDE) * SIZE
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kmalloc(
      -	COUNT * (STRIDE) * (SIZE)
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kmalloc(
      -	(COUNT) * STRIDE * (SIZE)
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kmalloc(
      -	(COUNT) * (STRIDE) * (SIZE)
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      |
        kmalloc(
      -	COUNT * STRIDE * SIZE
      +	array3_size(COUNT, STRIDE, SIZE)
        , ...)
      )
      
      // Any remaining multi-factor products, first at least 3-factor products,
      // when they're not all constants...
      @@
      expression E1, E2, E3;
      constant C1, C2, C3;
      @@
      
      (
        kmalloc(C1 * C2 * C3, ...)
      |
        kmalloc(
      -	(E1) * E2 * E3
      +	array3_size(E1, E2, E3)
        , ...)
      |
        kmalloc(
      -	(E1) * (E2) * E3
      +	array3_size(E1, E2, E3)
        , ...)
      |
        kmalloc(
      -	(E1) * (E2) * (E3)
      +	array3_size(E1, E2, E3)
        , ...)
      |
        kmalloc(
      -	E1 * E2 * E3
      +	array3_size(E1, E2, E3)
        , ...)
      )
      
      // And then all remaining 2 factors products when they're not all constants,
      // keeping sizeof() as the second factor argument.
      @@
      expression THING, E1, E2;
      type TYPE;
      constant C1, C2, C3;
      @@
      
      (
        kmalloc(sizeof(THING) * C2, ...)
      |
        kmalloc(sizeof(TYPE) * C2, ...)
      |
        kmalloc(C1 * C2 * C3, ...)
      |
        kmalloc(C1 * C2, ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(TYPE) * (E2)
      +	E2, sizeof(TYPE)
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(TYPE) * E2
      +	E2, sizeof(TYPE)
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(THING) * (E2)
      +	E2, sizeof(THING)
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	sizeof(THING) * E2
      +	E2, sizeof(THING)
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	(E1) * E2
      +	E1, E2
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	(E1) * (E2)
      +	E1, E2
        , ...)
      |
      - kmalloc
      + kmalloc_array
        (
      -	E1 * E2
      +	E1, E2
        , ...)
      )
      Signed-off-by: NKees Cook <keescook@chromium.org>
      6da2ec56
  3. 06 6月, 2018 1 次提交
    • K
      x86/bugs: Add AMD's SPEC_CTRL MSR usage · 6ac2f49e
      Konrad Rzeszutek Wilk 提交于
      The AMD document outlining the SSBD handling
      124441_AMD64_SpeculativeStoreBypassDisable_Whitepaper_final.pdf
      mentions that if CPUID 8000_0008.EBX[24] is set we should be using
      the SPEC_CTRL MSR (0x48) over the VIRT SPEC_CTRL MSR (0xC001_011f)
      for speculative store bypass disable.
      
      This in effect means we should clear the X86_FEATURE_VIRT_SSBD
      flag so that we would prefer the SPEC_CTRL MSR.
      
      See the document titled:
         124441_AMD64_SpeculativeStoreBypassDisable_Whitepaper_final.pdf
      
      A copy of this document is available at
         https://bugzilla.kernel.org/show_bug.cgi?id=199889Signed-off-by: NKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>
      Signed-off-by: NThomas Gleixner <tglx@linutronix.de>
      Cc: Tom Lendacky <thomas.lendacky@amd.com>
      Cc: Janakarajan Natarajan <Janakarajan.Natarajan@amd.com>
      Cc: kvm@vger.kernel.org
      Cc: KarimAllah Ahmed <karahmed@amazon.de>
      Cc: andrew.cooper3@citrix.com
      Cc: Joerg Roedel <joro@8bytes.org>
      Cc: Radim Krčmář <rkrcmar@redhat.com>
      Cc: Andy Lutomirski <luto@kernel.org>
      Cc: "H. Peter Anvin" <hpa@zytor.com>
      Cc: Paolo Bonzini <pbonzini@redhat.com>
      Cc: Borislav Petkov <bp@suse.de>
      Cc: David Woodhouse <dwmw@amazon.co.uk>
      Cc: Kees Cook <keescook@chromium.org>
      Link: https://lkml.kernel.org/r/20180601145921.9500-3-konrad.wilk@oracle.com
      6ac2f49e
  4. 02 6月, 2018 1 次提交
    • M
      kvm: Make VM ioctl do valloc for some archs · d1e5b0e9
      Marc Orr 提交于
      The kvm struct has been bloating. For example, it's tens of kilo-bytes
      for x86, which turns out to be a large amount of memory to allocate
      contiguously via kzalloc. Thus, this patch does the following:
      1. Uses architecture-specific routines to allocate the kvm struct via
         vzalloc for x86.
      2. Switches arm to __KVM_HAVE_ARCH_VM_ALLOC so that it can use vzalloc
         when has_vhe() is true.
      
      Other architectures continue to default to kalloc, as they have a
      dependency on kalloc or have a small-enough struct kvm.
      Signed-off-by: NMarc Orr <marcorr@google.com>
      Reviewed-by: NMarc Zyngier <marc.zyngier@arm.com>
      Signed-off-by: NPaolo Bonzini <pbonzini@redhat.com>
      d1e5b0e9
  5. 25 5月, 2018 1 次提交
    • D
      KVM: x86: prevent integer overflows in KVM_MEMORY_ENCRYPT_REG_REGION · 86bf20cb
      Dan Carpenter 提交于
      This is a fix from reviewing the code, but it looks like it might be
      able to lead to an Oops.  It affects 32bit systems.
      
      The KVM_MEMORY_ENCRYPT_REG_REGION ioctl uses a u64 for range->addr and
      range->size but the high 32 bits would be truncated away on a 32 bit
      system.  This is harmless but it's also harmless to prevent it.
      
      Then in sev_pin_memory() the "uaddr + ulen" calculation can wrap around.
      The wrap around can happen on 32 bit or 64 bit systems, but I was only
      able to figure out a problem for 32 bit systems.  We would pick a number
      which results in "npages" being zero.  The sev_pin_memory() would then
      return ZERO_SIZE_PTR without allocating anything.
      
      I made it illegal to call sev_pin_memory() with "ulen" set to zero.
      Hopefully, that doesn't cause any problems.  I also changed the type of
      "first" and "last" to long, just for cosmetic reasons.  Otherwise on a
      64 bit system you're saving "uaddr >> 12" in an int and it truncates the
      high 20 bits away.  The math works in the current code so far as I can
      see but it's just weird.
      Signed-off-by: NDan Carpenter <dan.carpenter@oracle.com>
      [Brijesh noted that the code is only reachable on X86_64.]
      Reviewed-by: NBrijesh Singh <brijesh.singh@amd.com>
      Signed-off-by: NRadim Krčmář <rkrcmar@redhat.com>
      86bf20cb
  6. 17 5月, 2018 4 次提交
  7. 15 5月, 2018 1 次提交
  8. 03 5月, 2018 2 次提交
  9. 16 4月, 2018 2 次提交
  10. 11 4月, 2018 1 次提交
  11. 05 4月, 2018 1 次提交
  12. 29 3月, 2018 2 次提交
    • B
      KVM: SVM: Implement pause loop exit logic in SVM · 8566ac8b
      Babu Moger 提交于
      Bring the PLE(pause loop exit) logic to AMD svm driver.
      
      While testing, we found this helping in situations where numerous
      pauses are generated. Without these patches we could see continuos
      VMEXITS due to pause interceptions. Tested it on AMD EPYC server with
      boot parameter idle=poll on a VM with 32 vcpus to simulate extensive
      pause behaviour. Here are VMEXITS in 10 seconds interval.
      
      Pauses                  810199                  504
      Total                   882184                  325415
      Signed-off-by: NBabu Moger <babu.moger@amd.com>
      [Prevented the window from dropping below the initial value. - Radim]
      Signed-off-by: NRadim Krčmář <rkrcmar@redhat.com>
      8566ac8b
    • B
      KVM: SVM: Add pause filter threshold · 1d8fb44a
      Babu Moger 提交于
      This patch adds the support for pause filtering threshold. This feature
      support is indicated by CPUID Fn8000_000A_EDX. See AMD APM Vol 2 Section
      15.14.4 Pause Intercept Filtering for more details.
      
      In this mode, a 16-bit pause filter threshold field is added in VMCB.
      The threshold value is a cycle count that is used to reset the pause
      counter.  As with simple pause filtering, VMRUN loads the pause count
      value from VMCB into an internal counter. Then, on each pause instruction
      the hardware checks the elapsed number of cycles since the most recent
      pause instruction against the pause Filter Threshold. If the elapsed cycle
      count is greater than the pause filter threshold, then the internal pause
      count is reloaded from VMCB and execution continues. If the elapsed cycle
      count is less than the pause filter threshold, then the internal pause
      count is decremented. If the count value is less than zero and pause
      intercept is enabled, a #VMEXIT is triggered. If advanced pause filtering
      is supported and pause filter threshold field is set to zero, the filter
      will operate in the simpler, count only mode.
      Signed-off-by: NBabu Moger <babu.moger@amd.com>
      Signed-off-by: NRadim Krčmář <rkrcmar@redhat.com>
      1d8fb44a
  13. 28 3月, 2018 1 次提交
    • A
      KVM: x86: Fix perf timer mode IP reporting · dd60d217
      Andi Kleen 提交于
      KVM and perf have a special backdoor mechanism to report the IP for interrupts
      re-executed after vm exit. This works for the NMIs that perf normally uses.
      
      However when perf is in timer mode it doesn't work because the timer interrupt
      doesn't get this special treatment. This is common when KVM is running
      nested in another hypervisor which may not implement the PMU, so only
      timer mode is available.
      
      Call the functions to set up the backdoor IP also for non NMI interrupts.
      
      I renamed the functions to set up the backdoor IP reporting to be more
      appropiate for their new use.  The SVM change is only compile tested.
      
      v2: Moved the functions inline.
      For the normal interrupt case the before/after functions are now
      called from x86.c, not arch specific code.
      For the NMI case we still need to call it in the architecture
      specific code, because it's already needed in the low level *_run
      functions.
      Signed-off-by: NAndi Kleen <ak@linux.intel.com>
      [Removed unnecessary calls from arch handle_external_intr. - Radim]
      Signed-off-by: NRadim Krčmář <rkrcmar@redhat.com>
      dd60d217
  14. 24 3月, 2018 3 次提交
  15. 17 3月, 2018 5 次提交
  16. 08 3月, 2018 1 次提交
  17. 02 3月, 2018 3 次提交
  18. 24 2月, 2018 3 次提交
    • B
      KVM: SVM: Fix SEV LAUNCH_SECRET command · 9c5e0afa
      Brijesh Singh 提交于
      The SEV LAUNCH_SECRET command fails with error code 'invalid param'
      because we missed filling the guest and header system physical address
      while issuing the command.
      
      Fixes: 9f5b5b95 (KVM: SVM: Add support for SEV LAUNCH_SECRET command)
      Cc: Paolo Bonzini <pbonzini@redhat.com>
      Cc: Radim Krčmář <rkrcmar@redhat.com>
      Cc: Borislav Petkov <bp@suse.de>
      Cc: Tom Lendacky <thomas.lendacky@amd.com>
      Cc: linux-kernel@vger.kernel.org
      Cc: Joerg Roedel <joro@8bytes.org>
      Signed-off-by: NBrijesh Singh <brijesh.singh@amd.com>
      Signed-off-by: NPaolo Bonzini <pbonzini@redhat.com>
      9c5e0afa
    • B
      KVM: SVM: install RSM intercept · 7607b717
      Brijesh Singh 提交于
      RSM instruction is used by the SMM handler to return from SMM mode.
      Currently, rsm causes a #UD - which results in instruction fetch, decode,
      and emulate. By installing the RSM intercept we can avoid the instruction
      fetch since we know that #VMEXIT was due to rsm.
      
      The patch is required for the SEV guest, because in case of SEV guest
      memory is encrypted with guest-specific key and hypervisor will not
      able to fetch the instruction bytes from the guest memory.
      
      Cc: Paolo Bonzini <pbonzini@redhat.com>
      Cc: Radim Krčmář <rkrcmar@redhat.com>
      Cc: Joerg Roedel <joro@8bytes.org>
      Cc: Borislav Petkov <bp@suse.de>
      Cc: Tom Lendacky <thomas.lendacky@amd.com>
      Signed-off-by: NBrijesh Singh <brijesh.singh@amd.com>
      Signed-off-by: NPaolo Bonzini <pbonzini@redhat.com>
      7607b717
    • B
      KVM: SVM: no need to call access_ok() in LAUNCH_MEASURE command · 3e233385
      Brijesh Singh 提交于
      Using the access_ok() to validate the input before issuing the SEV
      command does not buy us anything in this case. If userland is
      giving us a garbage pointer then copy_to_user() will catch it when we try
      to return the measurement.
      Suggested-by: NAl Viro <viro@ZenIV.linux.org.uk>
      Fixes: 0d0736f7 (KVM: SVM: Add support for KVM_SEV_LAUNCH_MEASURE ...)
      Cc: Paolo Bonzini <pbonzini@redhat.com>
      Cc: Radim Krčmář <rkrcmar@redhat.com>
      Cc: Borislav Petkov <bp@suse.de>
      Cc: Tom Lendacky <thomas.lendacky@amd.com>
      Cc: linux-kernel@vger.kernel.org
      Cc: Joerg Roedel <joro@8bytes.org>
      Signed-off-by: NBrijesh Singh <brijesh.singh@amd.com>
      Signed-off-by: NPaolo Bonzini <pbonzini@redhat.com>
      3e233385
  19. 23 2月, 2018 2 次提交
  20. 04 2月, 2018 2 次提交
    • K
      KVM/SVM: Allow direct access to MSR_IA32_SPEC_CTRL · b2ac58f9
      KarimAllah Ahmed 提交于
      [ Based on a patch from Paolo Bonzini <pbonzini@redhat.com> ]
      
      ... basically doing exactly what we do for VMX:
      
      - Passthrough SPEC_CTRL to guests (if enabled in guest CPUID)
      - Save and restore SPEC_CTRL around VMExit and VMEntry only if the guest
        actually used it.
      Signed-off-by: NKarimAllah Ahmed <karahmed@amazon.de>
      Signed-off-by: NDavid Woodhouse <dwmw@amazon.co.uk>
      Signed-off-by: NThomas Gleixner <tglx@linutronix.de>
      Reviewed-by: NDarren Kenny <darren.kenny@oracle.com>
      Reviewed-by: NKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>
      Cc: Andrea Arcangeli <aarcange@redhat.com>
      Cc: Andi Kleen <ak@linux.intel.com>
      Cc: Jun Nakajima <jun.nakajima@intel.com>
      Cc: kvm@vger.kernel.org
      Cc: Dave Hansen <dave.hansen@intel.com>
      Cc: Tim Chen <tim.c.chen@linux.intel.com>
      Cc: Andy Lutomirski <luto@kernel.org>
      Cc: Asit Mallick <asit.k.mallick@intel.com>
      Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com>
      Cc: Greg KH <gregkh@linuxfoundation.org>
      Cc: Paolo Bonzini <pbonzini@redhat.com>
      Cc: Dan Williams <dan.j.williams@intel.com>
      Cc: Linus Torvalds <torvalds@linux-foundation.org>
      Cc: Ashok Raj <ashok.raj@intel.com>
      Link: https://lkml.kernel.org/r/1517669783-20732-1-git-send-email-karahmed@amazon.de
      b2ac58f9
    • A
      KVM/x86: Add IBPB support · 15d45071
      Ashok Raj 提交于
      The Indirect Branch Predictor Barrier (IBPB) is an indirect branch
      control mechanism. It keeps earlier branches from influencing
      later ones.
      
      Unlike IBRS and STIBP, IBPB does not define a new mode of operation.
      It's a command that ensures predicted branch targets aren't used after
      the barrier. Although IBRS and IBPB are enumerated by the same CPUID
      enumeration, IBPB is very different.
      
      IBPB helps mitigate against three potential attacks:
      
      * Mitigate guests from being attacked by other guests.
        - This is addressed by issing IBPB when we do a guest switch.
      
      * Mitigate attacks from guest/ring3->host/ring3.
        These would require a IBPB during context switch in host, or after
        VMEXIT. The host process has two ways to mitigate
        - Either it can be compiled with retpoline
        - If its going through context switch, and has set !dumpable then
          there is a IBPB in that path.
          (Tim's patch: https://patchwork.kernel.org/patch/10192871)
        - The case where after a VMEXIT you return back to Qemu might make
          Qemu attackable from guest when Qemu isn't compiled with retpoline.
        There are issues reported when doing IBPB on every VMEXIT that resulted
        in some tsc calibration woes in guest.
      
      * Mitigate guest/ring0->host/ring0 attacks.
        When host kernel is using retpoline it is safe against these attacks.
        If host kernel isn't using retpoline we might need to do a IBPB flush on
        every VMEXIT.
      
      Even when using retpoline for indirect calls, in certain conditions 'ret'
      can use the BTB on Skylake-era CPUs. There are other mitigations
      available like RSB stuffing/clearing.
      
      * IBPB is issued only for SVM during svm_free_vcpu().
        VMX has a vmclear and SVM doesn't.  Follow discussion here:
        https://lkml.org/lkml/2018/1/15/146
      
      Please refer to the following spec for more details on the enumeration
      and control.
      
      Refer here to get documentation about mitigations.
      
      https://software.intel.com/en-us/side-channel-security-support
      
      [peterz: rebase and changelog rewrite]
      [karahmed: - rebase
                 - vmx: expose PRED_CMD if guest has it in CPUID
                 - svm: only pass through IBPB if guest has it in CPUID
                 - vmx: support !cpu_has_vmx_msr_bitmap()]
                 - vmx: support nested]
      [dwmw2: Expose CPUID bit too (AMD IBPB only for now as we lack IBRS)
              PRED_CMD is a write-only MSR]
      Signed-off-by: NAshok Raj <ashok.raj@intel.com>
      Signed-off-by: NPeter Zijlstra (Intel) <peterz@infradead.org>
      Signed-off-by: NDavid Woodhouse <dwmw@amazon.co.uk>
      Signed-off-by: NKarimAllah Ahmed <karahmed@amazon.de>
      Signed-off-by: NThomas Gleixner <tglx@linutronix.de>
      Reviewed-by: NKonrad Rzeszutek Wilk <konrad.wilk@oracle.com>
      Cc: Andrea Arcangeli <aarcange@redhat.com>
      Cc: Andi Kleen <ak@linux.intel.com>
      Cc: kvm@vger.kernel.org
      Cc: Asit Mallick <asit.k.mallick@intel.com>
      Cc: Linus Torvalds <torvalds@linux-foundation.org>
      Cc: Andy Lutomirski <luto@kernel.org>
      Cc: Dave Hansen <dave.hansen@intel.com>
      Cc: Arjan Van De Ven <arjan.van.de.ven@intel.com>
      Cc: Greg KH <gregkh@linuxfoundation.org>
      Cc: Jun Nakajima <jun.nakajima@intel.com>
      Cc: Paolo Bonzini <pbonzini@redhat.com>
      Cc: Dan Williams <dan.j.williams@intel.com>
      Cc: Tim Chen <tim.c.chen@linux.intel.com>
      Link: http://lkml.kernel.org/r/1515720739-43819-6-git-send-email-ashok.raj@intel.com
      Link: https://lkml.kernel.org/r/1517522386-18410-3-git-send-email-karahmed@amazon.de
      15d45071
  21. 16 1月, 2018 1 次提交