Merge branch 'kvm-sev-cgroup' into HEAD

author Paolo Bonzini <pbonzini@redhat.com>

Thu, 22 Apr 2021 06:39:48 +0000 (02:39 -0400)

committer Paolo Bonzini <pbonzini@redhat.com>

Thu, 22 Apr 2021 17:19:01 +0000 (13:19 -0400)
author Paolo Bonzini <pbonzini@redhat.com>
Thu, 22 Apr 2021 06:39:48 +0000 (02:39 -0400)
committer Paolo Bonzini <pbonzini@redhat.com>
Thu, 22 Apr 2021 17:19:01 +0000 (13:19 -0400)
diff --combined Documentation/virt/kvm/api.rst

index fd4a849,307f2fc..56c6fca
--- 1/Documentation/virt/kvm/api.rst
--- 2/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@@ -204,7 -204,7 +204,7 @@@ Errors
   
     ======     ============================================================
     EFAULT     the msr index list cannot be read from or written to
- -  E2BIG      the msr index list is to be to fit in the array specified by
+ +  E2BIG      the msr index list is too big to fit in the array specified by
                the user.
     ======     ============================================================
   
@@@ -1495,7 -1495,8 +1495,8 @@@ Fails if any VCPU has already been crea
   
   Define which vcpu is the Bootstrap Processor (BSP).  Values are the same
   as the vcpu id in KVM_CREATE_VCPU.  If this ioctl is not called, the default
- is vcpu 0.
+ is vcpu 0. This ioctl has to be called before vcpu creation,
+ otherwise it will return EBUSY error.
   
   
   4.42 KVM_GET_XSAVE
@@@ -3357,9 -3358,6 +3358,9 @@@ indicating the number of supported regi
   For ppc, the KVM_CAP_PPC_GUEST_DEBUG_SSTEP capability indicates whether
   the single-step debug event (KVM_GUESTDBG_SINGLESTEP) is supported.
   
+ +Also when supported, KVM_CAP_SET_GUEST_DEBUG2 capability indicates the
+ +supported KVM_GUESTDBG_* bits in the control field.
+ +
   When debug events exit the main run loop with the reason
   KVM_EXIT_DEBUG with the kvm_debug_exit_arch part of the kvm_run
   structure containing architecture specific debug information.
@@@ -3692,105 -3690,31 +3693,105 @@@ which is the maximum number of possibl
   
   Queues an SMI on the thread's vcpu.
   
- -4.97 KVM_CAP_PPC_MULTITCE
- --------------------------
+ +4.97 KVM_X86_SET_MSR_FILTER
+ +----------------------------
   
- -:Capability: KVM_CAP_PPC_MULTITCE
- -:Architectures: ppc
- -:Type: vm
+ +:Capability: KVM_X86_SET_MSR_FILTER
+ +:Architectures: x86
+ +:Type: vm ioctl
+ +:Parameters: struct kvm_msr_filter
+ +:Returns: 0 on success, < 0 on error
   
- -This capability means the kernel is capable of handling hypercalls
- -H_PUT_TCE_INDIRECT and H_STUFF_TCE without passing those into the user
- -space. This significantly accelerates DMA operations for PPC KVM guests.
- -User space should expect that its handlers for these hypercalls
- -are not going to be called if user space previously registered LIOBN
- -in KVM (via KVM_CREATE_SPAPR_TCE or similar calls).
+ +::
   
- -In order to enable H_PUT_TCE_INDIRECT and H_STUFF_TCE use in the guest,
- -user space might have to advertise it for the guest. For example,
- -IBM pSeries (sPAPR) guest starts using them if "hcall-multi-tce" is
- -present in the "ibm,hypertas-functions" device-tree property.
+ +  struct kvm_msr_filter_range {
+ +  #define KVM_MSR_FILTER_READ  (1 << 0)
+ +  #define KVM_MSR_FILTER_WRITE (1 << 1)
+ +      __u32 flags;
+ +      __u32 nmsrs; /* number of msrs in bitmap */
+ +      __u32 base;  /* MSR index the bitmap starts at */
+ +      __u8 *bitmap; /* a 1 bit allows the operations in flags, 0 denies */
+ +  };
   
- -The hypercalls mentioned above may or may not be processed successfully
- -in the kernel based fast path. If they can not be handled by the kernel,
- -they will get passed on to user space. So user space still has to have
- -an implementation for these despite the in kernel acceleration.
+ +  #define KVM_MSR_FILTER_MAX_RANGES 16
+ +  struct kvm_msr_filter {
+ +  #define KVM_MSR_FILTER_DEFAULT_ALLOW (0 << 0)
+ +  #define KVM_MSR_FILTER_DEFAULT_DENY  (1 << 0)
+ +      __u32 flags;
+ +      struct kvm_msr_filter_range ranges[KVM_MSR_FILTER_MAX_RANGES];
+ +  };
   
- -This capability is always enabled.
+ +flags values for ``struct kvm_msr_filter_range``:
+ +
+ +``KVM_MSR_FILTER_READ``
+ +
+ +  Filter read accesses to MSRs using the given bitmap. A 0 in the bitmap
+ +  indicates that a read should immediately fail, while a 1 indicates that
+ +  a read for a particular MSR should be handled regardless of the default
+ +  filter action.
+ +
+ +``KVM_MSR_FILTER_WRITE``
+ +
+ +  Filter write accesses to MSRs using the given bitmap. A 0 in the bitmap
+ +  indicates that a write should immediately fail, while a 1 indicates that
+ +  a write for a particular MSR should be handled regardless of the default
+ +  filter action.
+ +
+ +``KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE``
+ +
+ +  Filter both read and write accesses to MSRs using the given bitmap. A 0
+ +  in the bitmap indicates that both reads and writes should immediately fail,
+ +  while a 1 indicates that reads and writes for a particular MSR are not
+ +  filtered by this range.
+ +
+ +flags values for ``struct kvm_msr_filter``:
+ +
+ +``KVM_MSR_FILTER_DEFAULT_ALLOW``
+ +
+ +  If no filter range matches an MSR index that is getting accessed, KVM will
+ +  fall back to allowing access to the MSR.
+ +
+ +``KVM_MSR_FILTER_DEFAULT_DENY``
+ +
+ +  If no filter range matches an MSR index that is getting accessed, KVM will
+ +  fall back to rejecting access to the MSR. In this mode, all MSRs that should
+ +  be processed by KVM need to explicitly be marked as allowed in the bitmaps.
+ +
+ +This ioctl allows user space to define up to 16 bitmaps of MSR ranges to
+ +specify whether a certain MSR access should be explicitly filtered for or not.
+ +
+ +If this ioctl has never been invoked, MSR accesses are not guarded and the
+ +default KVM in-kernel emulation behavior is fully preserved.
+ +
+ +Calling this ioctl with an empty set of ranges (all nmsrs == 0) disables MSR
+ +filtering. In that mode, ``KVM_MSR_FILTER_DEFAULT_DENY`` is invalid and causes
+ +an error.
+ +
+ +As soon as the filtering is in place, every MSR access is processed through
+ +the filtering except for accesses to the x2APIC MSRs (from 0x800 to 0x8ff);
+ +x2APIC MSRs are always allowed, independent of the ``default_allow`` setting,
+ +and their behavior depends on the ``X2APIC_ENABLE`` bit of the APIC base
+ +register.
+ +
+ +If a bit is within one of the defined ranges, read and write accesses are
+ +guarded by the bitmap's value for the MSR index if the kind of access
+ +is included in the ``struct kvm_msr_filter_range`` flags.  If no range
+ +cover this particular access, the behavior is determined by the flags
+ +field in the kvm_msr_filter struct: ``KVM_MSR_FILTER_DEFAULT_ALLOW``
+ +and ``KVM_MSR_FILTER_DEFAULT_DENY``.
+ +
+ +Each bitmap range specifies a range of MSRs to potentially allow access on.
+ +The range goes from MSR index [base .. base+nmsrs]. The flags field
+ +indicates whether reads, writes or both reads and writes are filtered
+ +by setting a 1 bit in the bitmap for the corresponding MSR index.
+ +
+ +If an MSR access is not permitted through the filtering, it generates a
+ +#GP inside the guest. When combined with KVM_CAP_X86_USER_SPACE_MSR, that
+ +allows user space to deflect and potentially handle various MSR accesses
+ +into user space.
+ +
+ +If a vCPU is in running state while this ioctl is invoked, the vCPU may
+ +experience inconsistent filtering behavior on MSR accesses.
   
   4.98 KVM_CREATE_SPAPR_TCE_64
   ----------------------------
@@@ -4786,7 -4710,109 +4787,109 @@@ KVM_PV_VM_VERIF
     Verify the integrity of the unpacked image. Only if this succeeds,
     KVM is allowed to start protected VCPUs.
   
- 4.126 KVM_XEN_HVM_SET_ATTR
+ 4.126 KVM_X86_SET_MSR_FILTER
+ ----------------------------
+ 
+ :Capability: KVM_X86_SET_MSR_FILTER
+ :Architectures: x86
+ :Type: vm ioctl
+ :Parameters: struct kvm_msr_filter
+ :Returns: 0 on success, < 0 on error
+ 
+ ::
+ 
+   struct kvm_msr_filter_range {
+   #define KVM_MSR_FILTER_READ  (1 << 0)
+   #define KVM_MSR_FILTER_WRITE (1 << 1)
+       __u32 flags;
+       __u32 nmsrs; /* number of msrs in bitmap */
+       __u32 base;  /* MSR index the bitmap starts at */
+       __u8 *bitmap; /* a 1 bit allows the operations in flags, 0 denies */
+   };
+ 
+   #define KVM_MSR_FILTER_MAX_RANGES 16
+   struct kvm_msr_filter {
+   #define KVM_MSR_FILTER_DEFAULT_ALLOW (0 << 0)
+   #define KVM_MSR_FILTER_DEFAULT_DENY  (1 << 0)
+       __u32 flags;
+       struct kvm_msr_filter_range ranges[KVM_MSR_FILTER_MAX_RANGES];
+   };
+ 
+ flags values for ``struct kvm_msr_filter_range``:
+ 
+ ``KVM_MSR_FILTER_READ``
+ 
+   Filter read accesses to MSRs using the given bitmap. A 0 in the bitmap
+   indicates that a read should immediately fail, while a 1 indicates that
+   a read for a particular MSR should be handled regardless of the default
+   filter action.
+ 
+ ``KVM_MSR_FILTER_WRITE``
+ 
+   Filter write accesses to MSRs using the given bitmap. A 0 in the bitmap
+   indicates that a write should immediately fail, while a 1 indicates that
+   a write for a particular MSR should be handled regardless of the default
+   filter action.
+ 
+ ``KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE``
+ 
+   Filter both read and write accesses to MSRs using the given bitmap. A 0
+   in the bitmap indicates that both reads and writes should immediately fail,
+   while a 1 indicates that reads and writes for a particular MSR are not
+   filtered by this range.
+ 
+ flags values for ``struct kvm_msr_filter``:
+ 
+ ``KVM_MSR_FILTER_DEFAULT_ALLOW``
+ 
+   If no filter range matches an MSR index that is getting accessed, KVM will
+   fall back to allowing access to the MSR.
+ 
+ ``KVM_MSR_FILTER_DEFAULT_DENY``
+ 
+   If no filter range matches an MSR index that is getting accessed, KVM will
+   fall back to rejecting access to the MSR. In this mode, all MSRs that should
+   be processed by KVM need to explicitly be marked as allowed in the bitmaps.
+ 
+ This ioctl allows user space to define up to 16 bitmaps of MSR ranges to
+ specify whether a certain MSR access should be explicitly filtered for or not.
+ 
+ If this ioctl has never been invoked, MSR accesses are not guarded and the
+ default KVM in-kernel emulation behavior is fully preserved.
+ 
+ Calling this ioctl with an empty set of ranges (all nmsrs == 0) disables MSR
+ filtering. In that mode, ``KVM_MSR_FILTER_DEFAULT_DENY`` is invalid and causes
+ an error.
+ 
+ As soon as the filtering is in place, every MSR access is processed through
+ the filtering except for accesses to the x2APIC MSRs (from 0x800 to 0x8ff);
+ x2APIC MSRs are always allowed, independent of the ``default_allow`` setting,
+ and their behavior depends on the ``X2APIC_ENABLE`` bit of the APIC base
+ register.
+ 
+ If a bit is within one of the defined ranges, read and write accesses are
+ guarded by the bitmap's value for the MSR index if the kind of access
+ is included in the ``struct kvm_msr_filter_range`` flags.  If no range
+ cover this particular access, the behavior is determined by the flags
+ field in the kvm_msr_filter struct: ``KVM_MSR_FILTER_DEFAULT_ALLOW``
+ and ``KVM_MSR_FILTER_DEFAULT_DENY``.
+ 
+ Each bitmap range specifies a range of MSRs to potentially allow access on.
+ The range goes from MSR index [base .. base+nmsrs]. The flags field
+ indicates whether reads, writes or both reads and writes are filtered
+ by setting a 1 bit in the bitmap for the corresponding MSR index.
+ 
+ If an MSR access is not permitted through the filtering, it generates a
+ #GP inside the guest. When combined with KVM_CAP_X86_USER_SPACE_MSR, that
+ allows user space to deflect and potentially handle various MSR accesses
+ into user space.
+ 
+ Note, invoking this ioctl with a vCPU is running is inherently racy.  However,
+ KVM does guarantee that vCPUs will see either the previous filter or the new
+ filter, e.g. MSRs with identical settings in both the old and new filter will
+ have deterministic behavior.
+ 
+ 4.127 KVM_XEN_HVM_SET_ATTR
   --------------------------
   
   :Capability: KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO
@@@ -4829,7 -4855,7 +4932,7 @@@ KVM_XEN_ATTR_TYPE_SHARED_INF
   KVM_XEN_ATTR_TYPE_UPCALL_VECTOR
     Sets the exception vector used to deliver Xen event channel upcalls.
   
- -4.128 KVM_XEN_HVM_GET_ATTR
+ +4.127 KVM_XEN_HVM_GET_ATTR
   --------------------------
   
   :Capability: KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO
@@@ -4841,7 -4867,7 +4944,7 @@@
   Allows Xen VM attributes to be read. For the structure and types,
   see KVM_XEN_HVM_SET_ATTR above.
   
- -4.129 KVM_XEN_VCPU_SET_ATTR
+ +4.128 KVM_XEN_VCPU_SET_ATTR
   ---------------------------
   
   :Capability: KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO
@@@ -4903,7 -4929,7 +5006,7 @@@ KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADJUS
     or RUNSTATE_offline) to set the current accounted state as of the
     adjusted state_entry_time.
   
- -4.130 KVM_XEN_VCPU_GET_ATTR
+ +4.129 KVM_XEN_VCPU_GET_ATTR
   ---------------------------
   
   :Capability: KVM_CAP_XEN_HVM / KVM_XEN_HVM_CONFIG_SHARED_INFO
@@@ -6207,45 -6233,6 +6310,45 @@@ KVM_RUN_BUS_LOCK flag is used to distin
   This capability can be used to check / enable 2nd DAWR feature provided
   by POWER10 processor.
   
+ +7.24 KVM_CAP_VM_COPY_ENC_CONTEXT_FROM
+ +-------------------------------------
+ +
+ +Architectures: x86 SEV enabled
+ +Type: vm
+ +Parameters: args[0] is the fd of the source vm
+ +Returns: 0 on success; ENOTTY on error
+ +
+ +This capability enables userspace to copy encryption context from the vm
+ +indicated by the fd to the vm this is called on.
+ +
+ +This is intended to support in-guest workloads scheduled by the host. This
+ +allows the in-guest workload to maintain its own NPTs and keeps the two vms
+ +from accidentally clobbering each other with interrupts and the like (separate
+ +APIC/MSRs/etc).
+ +
+ +7.25 KVM_CAP_SGX_ATTRIBUTE
+ +----------------------
+ +
+ +:Architectures: x86
+ +:Target: VM
+ +:Parameters: args[0] is a file handle of a SGX attribute file in securityfs
+ +:Returns: 0 on success, -EINVAL if the file handle is invalid or if a requested
+ +          attribute is not supported by KVM.
+ +
+ +KVM_CAP_SGX_ATTRIBUTE enables a userspace VMM to grant a VM access to one or
+ +more priveleged enclave attributes.  args[0] must hold a file handle to a valid
+ +SGX attribute file corresponding to an attribute that is supported/restricted
+ +by KVM (currently only PROVISIONKEY).
+ +
+ +The SGX subsystem restricts access to a subset of enclave attributes to provide
+ +additional security for an uncompromised kernel, e.g. use of the PROVISIONKEY
+ +is restricted to deter malware from using the PROVISIONKEY to obtain a stable
+ +system fingerprint.  To prevent userspace from circumventing such restrictions
+ +by running an enclave in a VM, KVM prevents access to privileged attributes by
+ +default.
+ +
+ +See Documentation/x86/sgx/2.Kernel-internals.rst for more details.
+ +
   8. Other capabilities.
   ======================
   
@@@ -6740,29 -6727,3 +6843,29 @@@ vcpu_info is set
   The KVM_XEN_HVM_CONFIG_RUNSTATE flag indicates that the runstate-related
   features KVM_XEN_VCPU_ATTR_TYPE_RUNSTATE_ADDR/_CURRENT/_DATA/_ADJUST are
   supported by the KVM_XEN_VCPU_SET_ATTR/KVM_XEN_VCPU_GET_ATTR ioctls.
+ +
+ +8.31 KVM_CAP_PPC_MULTITCE
+ +-------------------------
+ +
+ +:Capability: KVM_CAP_PPC_MULTITCE
+ +:Architectures: ppc
+ +:Type: vm
+ +
+ +This capability means the kernel is capable of handling hypercalls
+ +H_PUT_TCE_INDIRECT and H_STUFF_TCE without passing those into the user
+ +space. This significantly accelerates DMA operations for PPC KVM guests.
+ +User space should expect that its handlers for these hypercalls
+ +are not going to be called if user space previously registered LIOBN
+ +in KVM (via KVM_CREATE_SPAPR_TCE or similar calls).
+ +
+ +In order to enable H_PUT_TCE_INDIRECT and H_STUFF_TCE use in the guest,
+ +user space might have to advertise it for the guest. For example,
+ +IBM pSeries (sPAPR) guest starts using them if "hcall-multi-tce" is
+ +present in the "ibm,hypertas-functions" device-tree property.
+ +
+ +The hypercalls mentioned above may or may not be processed successfully
+ +in the kernel based fast path. If they can not be handled by the kernel,
+ +they will get passed on to user space. So user space still has to have
+ +an implementation for these despite the in kernel acceleration.
+ +
+ +This capability is always enabled.
diff --combined MAINTAINERS

index 0cb606a,c80ad73..0417ebf
--- 1/MAINTAINERS
--- 2/MAINTAINERS
+++ b/MAINTAINERS
@@@ -1181,7 -1181,7 +1181,7 @@@ M:      Joel Fernandes <joel@joelfernandes.o
   M:    Christian Brauner <christian@brauner.io>
   M:    Hridya Valsaraju <hridya@google.com>
   M:    Suren Baghdasaryan <surenb@google.com>
- L:    devel@driverdev.osuosl.org
+ L:    linux-kernel@vger.kernel.org
   S:    Supported
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging.git
   F:    drivers/android/
@@@ -2489,7 -2489,7 +2489,7 @@@ N:      sc27x
   N:    sc2731
   
   ARM/STI ARCHITECTURE
- M:    Patrice Chotard <patrice.chotard@st.com>
+ M:    Patrice Chotard <patrice.chotard@foss.st.com>
   L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
   S:    Maintained
   W:    http://www.stlinux.com
@@@ -2522,7 -2522,7 +2522,7 @@@ F:      include/linux/remoteproc/st_slim_rpr
   
   ARM/STM32 ARCHITECTURE
   M:    Maxime Coquelin <mcoquelin.stm32@gmail.com>
- M:    Alexandre Torgue <alexandre.torgue@st.com>
+ M:    Alexandre Torgue <alexandre.torgue@foss.st.com>
   L:    linux-stm32@st-md-mailman.stormreply.com (moderated for non-subscribers)
   L:    linux-arm-kernel@lists.infradead.org (moderated for non-subscribers)
   S:    Maintained
@@@ -3115,7 -3115,7 +3115,7 @@@ C:      irc://irc.oftc.net/bcach
   F:    drivers/md/bcache/
   
   BDISP ST MEDIA DRIVER
- M:    Fabien Dessenne <fabien.dessenne@st.com>
+ M:    Fabien Dessenne <fabien.dessenne@foss.st.com>
   L:    linux-media@vger.kernel.org
   S:    Supported
   W:    https://linuxtv.org
@@@ -3675,7 -3675,7 +3675,7 @@@ M:      bcm-kernel-feedback-list@broadcom.co
   L:    linux-pm@vger.kernel.org
   S:    Maintained
   T:    git git://github.com/broadcom/stblinux.git
- F:    drivers/soc/bcm/bcm-pmb.c
+ F:    drivers/soc/bcm/bcm63xx/bcm-pmb.c
   F:    include/dt-bindings/soc/bcm-pmb.h
   
   BROADCOM SPECIFIC AMBA DRIVER (BCMA)
@@@ -5080,7 -5080,7 +5080,7 @@@ S:      Maintaine
   F:    drivers/platform/x86/dell/dell-wmi.c
   
   DELTA ST MEDIA DRIVER
- M:    Hugues Fruchet <hugues.fruchet@st.com>
+ M:    Hugues Fruchet <hugues.fruchet@foss.st.com>
   L:    linux-media@vger.kernel.org
   S:    Supported
   W:    https://linuxtv.org
@@@ -6006,7 -6006,6 +6006,6 @@@ F:      drivers/gpu/drm/rockchip
   
   DRM DRIVERS FOR STI
   M:    Benjamin Gaignard <benjamin.gaignard@linaro.org>
- M:    Vincent Abriou <vincent.abriou@st.com>
   L:    dri-devel@lists.freedesktop.org
   S:    Maintained
   T:    git git://anongit.freedesktop.org/drm/drm-misc
@@@ -6014,10 -6013,9 +6013,9 @@@ F:     Documentation/devicetree/bindings/di
   F:    drivers/gpu/drm/sti
   
   DRM DRIVERS FOR STM
- M:    Yannick Fertre <yannick.fertre@st.com>
- M:    Philippe Cornu <philippe.cornu@st.com>
+ M:    Yannick Fertre <yannick.fertre@foss.st.com>
+ M:    Philippe Cornu <philippe.cornu@foss.st.com>
   M:    Benjamin Gaignard <benjamin.gaignard@linaro.org>
- M:    Vincent Abriou <vincent.abriou@st.com>
   L:    dri-devel@lists.freedesktop.org
   S:    Maintained
   T:    git git://anongit.freedesktop.org/drm/drm-misc
@@@ -7476,8 -7474,9 +7474,9 @@@ F:      include/uapi/asm-generic
   GENERIC PHY FRAMEWORK
   M:    Kishon Vijay Abraham I <kishon@ti.com>
   M:    Vinod Koul <vkoul@kernel.org>
- L:    linux-kernel@vger.kernel.org
+ L:    linux-phy@lists.infradead.org
   S:    Supported
+ Q:    https://patchwork.kernel.org/project/linux-phy/list/
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/phy/linux-phy.git
   F:    Documentation/devicetree/bindings/phy/
   F:    drivers/phy/
@@@ -8116,7 -8115,6 +8115,6 @@@ F:      drivers/crypto/hisilicon/sec2/sec_ma
   
   HISILICON STAGING DRIVERS FOR HIKEY 960/970
   M:    Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
- L:    devel@driverdev.osuosl.org
   S:    Maintained
   F:    drivers/staging/hikey9xx/
   
@@@ -8231,7 -8229,7 +8229,7 @@@ F:      include/linux/hugetlb.
   F:    mm/hugetlb.c
   
   HVA ST MEDIA DRIVER
- M:    Jean-Christophe Trotin <jean-christophe.trotin@st.com>
+ M:    Jean-Christophe Trotin <jean-christophe.trotin@foss.st.com>
   L:    linux-media@vger.kernel.org
   S:    Supported
   W:    https://linuxtv.org
@@@ -8521,6 -8519,7 +8519,7 @@@ IBM Power SRIOV Virtual NIC Device Driv
   M:    Dany Madden <drt@linux.ibm.com>
   M:    Lijun Pan <ljp@linux.ibm.com>
   M:    Sukadev Bhattiprolu <sukadev@linux.ibm.com>
+ R:    Thomas Falcon <tlfalcon@linux.ibm.com>
   L:    netdev@vger.kernel.org
   S:    Supported
   F:    drivers/net/ethernet/ibm/ibmvnic.*
@@@ -9274,7 -9273,6 +9273,7 @@@ Q:      https://patchwork.kernel.org/project
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip.git x86/sgx
   F:    Documentation/x86/sgx.rst
   F:    arch/x86/entry/vdso/vsgx.S
+ +F:    arch/x86/include/asm/sgx.h
   F:    arch/x86/include/uapi/asm/sgx.h
   F:    arch/x86/kernel/cpu/sgx/*
   F:    tools/testing/selftests/sgx/*
@@@ -10031,7 -10029,6 +10030,6 @@@ F:   scripts/leaking_addresses.p
   
   LED SUBSYSTEM
   M:    Pavel Machek <pavel@ucw.cz>
- R:    Dan Murphy <dmurphy@ti.com>
   L:    linux-leds@vger.kernel.org
   S:    Maintained
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/pavel/linux-leds.git
@@@ -10907,7 -10904,6 +10905,6 @@@ T:   git git://linuxtv.org/media_tree.gi
   F:    drivers/media/radio/radio-maxiradio*
   
   MCAN MMIO DEVICE DRIVER
- M:    Dan Murphy <dmurphy@ti.com>
   M:    Pankaj Sharma <pankj.sharma@samsung.com>
   L:    linux-can@vger.kernel.org
   S:    Maintained
@@@ -11168,7 -11164,7 +11165,7 @@@ T:   git git://linuxtv.org/media_tree.gi
   F:    drivers/media/dvb-frontends/stv6111*
   
   MEDIA DRIVERS FOR STM32 - DCMI
- M:    Hugues Fruchet <hugues.fruchet@st.com>
+ M:    Hugues Fruchet <hugues.fruchet@foss.st.com>
   L:    linux-media@vger.kernel.org
   S:    Supported
   T:    git git://linuxtv.org/media_tree.git
@@@ -12539,7 -12535,7 +12536,7 @@@ NETWORKING [MPTCP
   M:    Mat Martineau <mathew.j.martineau@linux.intel.com>
   M:    Matthieu Baerts <matthieu.baerts@tessares.net>
   L:    netdev@vger.kernel.org
- L:    mptcp@lists.01.org
+ L:    mptcp@lists.linux.dev
   S:    Maintained
   W:    https://github.com/multipath-tcp/mptcp_net-next/wiki
   B:    https://github.com/multipath-tcp/mptcp_net-next/issues
@@@ -14710,15 -14706,11 +14707,11 @@@ F:        drivers/net/ethernet/qlogic/qlcnic
   QLOGIC QLGE 10Gb ETHERNET DRIVER
   M:    Manish Chopra <manishc@marvell.com>
   M:    GR-Linux-NIC-Dev@marvell.com
- L:    netdev@vger.kernel.org
- S:    Supported
- F:    drivers/staging/qlge/
- 
- QLOGIC QLGE 10Gb ETHERNET DRIVER
   M:    Coiby Xu <coiby.xu@gmail.com>
   L:    netdev@vger.kernel.org
- S:    Maintained
+ S:    Supported
   F:    Documentation/networking/device_drivers/qlogic/qlge.rst
+ F:    drivers/staging/qlge/
   
   QM1D1B0004 MEDIA DRIVER
   M:    Akihiro Tsukada <tskd08@gmail.com>
@@@ -15636,8 -15628,8 +15629,8 @@@ F:   Documentation/s390/pci.rs
   
   S390 VFIO AP DRIVER
   M:    Tony Krowiak <akrowiak@linux.ibm.com>
- M:    Pierre Morel <pmorel@linux.ibm.com>
   M:    Halil Pasic <pasic@linux.ibm.com>
+ M:    Jason Herne <jjherne@linux.ibm.com>
   L:    linux-s390@vger.kernel.org
   S:    Supported
   W:    http://www.ibm.com/developerworks/linux/linux390/
@@@ -15649,6 -15641,7 +15642,7 @@@ F:   drivers/s390/crypto/vfio_ap_private.
   S390 VFIO-CCW DRIVER
   M:    Cornelia Huck <cohuck@redhat.com>
   M:    Eric Farman <farman@linux.ibm.com>
+ M:    Matthew Rosato <mjrosato@linux.ibm.com>
   R:    Halil Pasic <pasic@linux.ibm.com>
   L:    linux-s390@vger.kernel.org
   L:    kvm@vger.kernel.org
@@@ -15659,6 -15652,7 +15653,7 @@@ F:   include/uapi/linux/vfio_ccw.
   
   S390 VFIO-PCI DRIVER
   M:    Matthew Rosato <mjrosato@linux.ibm.com>
+ M:    Eric Farman <farman@linux.ibm.com>
   L:    linux-s390@vger.kernel.org
   L:    kvm@vger.kernel.org
   S:    Supported
@@@ -16888,8 -16882,10 +16883,10 @@@ F: tools/spi
   
   SPIDERNET NETWORK DRIVER for CELL
   M:    Ishizaki Kou <kou.ishizaki@toshiba.co.jp>
+ M:    Geoff Levand <geoff@infradead.org>
   L:    netdev@vger.kernel.org
- S:    Supported
+ L:    linuxppc-dev@lists.ozlabs.org
+ S:    Maintained
   F:    Documentation/networking/device_drivers/ethernet/toshiba/spider_net.rst
   F:    drivers/net/ethernet/toshiba/spider_net*
   
@@@ -16943,7 -16939,8 +16940,8 @@@ F:   Documentation/devicetree/bindings/me
   F:    drivers/media/i2c/st-mipid02.c
   
   ST STM32 I2C/SMBUS DRIVER
- M:    Pierre-Yves MORDRET <pierre-yves.mordret@st.com>
+ M:    Pierre-Yves MORDRET <pierre-yves.mordret@foss.st.com>
+ M:    Alain Volmat <alain.volmat@foss.st.com>
   L:    linux-i2c@vger.kernel.org
   S:    Maintained
   F:    drivers/i2c/busses/i2c-stm32*
@@@ -17041,7 -17038,7 +17039,7 @@@ F:   drivers/staging/vt665?
   
   STAGING SUBSYSTEM
   M:    Greg Kroah-Hartman <gregkh@linuxfoundation.org>
- L:    devel@driverdev.osuosl.org
+ L:    linux-staging@lists.linux.dev
   S:    Supported
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/staging.git
   F:    drivers/staging/
@@@ -17068,7 -17065,7 +17066,7 @@@ F:   kernel/jump_label.
   F:    kernel/static_call.c
   
   STI AUDIO (ASoC) DRIVERS
- M:    Arnaud Pouliquen <arnaud.pouliquen@st.com>
+ M:    Arnaud Pouliquen <arnaud.pouliquen@foss.st.com>
   L:    alsa-devel@alsa-project.org (moderated for non-subscribers)
   S:    Maintained
   F:    Documentation/devicetree/bindings/sound/st,sti-asoc-card.txt
@@@ -17088,15 -17085,15 +17086,15 @@@ T:        git git://linuxtv.org/media_tree.gi
   F:    drivers/media/usb/stk1160/
   
   STM32 AUDIO (ASoC) DRIVERS
- M:    Olivier Moysan <olivier.moysan@st.com>
- M:    Arnaud Pouliquen <arnaud.pouliquen@st.com>
+ M:    Olivier Moysan <olivier.moysan@foss.st.com>
+ M:    Arnaud Pouliquen <arnaud.pouliquen@foss.st.com>
   L:    alsa-devel@alsa-project.org (moderated for non-subscribers)
   S:    Maintained
   F:    Documentation/devicetree/bindings/iio/adc/st,stm32-*.yaml
   F:    sound/soc/stm/
   
   STM32 TIMER/LPTIMER DRIVERS
- M:    Fabrice Gasnier <fabrice.gasnier@st.com>
+ M:    Fabrice Gasnier <fabrice.gasnier@foss.st.com>
   S:    Maintained
   F:    Documentation/ABI/testing/*timer-stm32
   F:    Documentation/devicetree/bindings/*/*stm32-*timer*
@@@ -17106,7 -17103,7 +17104,7 @@@ F:   include/linux/*/stm32-*tim
   
   STMMAC ETHERNET DRIVER
   M:    Giuseppe Cavallaro <peppe.cavallaro@st.com>
- M:    Alexandre Torgue <alexandre.torgue@st.com>
+ M:    Alexandre Torgue <alexandre.torgue@foss.st.com>
   M:    Jose Abreu <joabreu@synopsys.com>
   L:    netdev@vger.kernel.org
   S:    Supported
@@@ -17848,7 -17845,6 +17846,6 @@@ S:   Maintaine
   F:    drivers/thermal/ti-soc-thermal/
   
   TI BQ27XXX POWER SUPPLY DRIVER
- R:    Dan Murphy <dmurphy@ti.com>
   F:    drivers/power/supply/bq27xxx_battery.c
   F:    drivers/power/supply/bq27xxx_battery_i2c.c
   F:    include/linux/power/bq27xxx_battery.h
@@@ -17983,7 -17979,6 +17980,6 @@@ S:   Odd Fixe
   F:    sound/soc/codecs/tas571x*
   
   TI TCAN4X5X DEVICE DRIVER
- M:    Dan Murphy <dmurphy@ti.com>
   L:    linux-can@vger.kernel.org
   S:    Maintained
   F:    Documentation/devicetree/bindings/net/can/tcan4x5x.txt
@@@ -19136,7 -19131,7 +19132,7 @@@ VME SUBSYSTE
   M:    Martyn Welch <martyn@welchs.me.uk>
   M:    Manohar Vanga <manohar.vanga@gmail.com>
   M:    Greg Kroah-Hartman <gregkh@linuxfoundation.org>
- L:    devel@driverdev.osuosl.org
+ L:    linux-kernel@vger.kernel.org
   S:    Maintained
   T:    git git://git.kernel.org/pub/scm/linux/kernel/git/gregkh/char-misc.git
   F:    Documentation/driver-api/vme.rst
diff --combined arch/x86/include/asm/kvm_host.h

index 6e195f7,3768819..ad22d48
--- 1/arch/x86/include/asm/kvm_host.h
--- 2/arch/x86/include/asm/kvm_host.h
+++ b/arch/x86/include/asm/kvm_host.h
@@@ -221,22 -221,12 +221,22 @@@ enum x86_intercept_stage
   #define DR7_FIXED_1   0x00000400
   #define DR7_VOLATILE  0xffff2bff
   
+ +#define KVM_GUESTDBG_VALID_MASK \
+ +      (KVM_GUESTDBG_ENABLE | \
+ +      KVM_GUESTDBG_SINGLESTEP | \
+ +      KVM_GUESTDBG_USE_HW_BP | \
+ +      KVM_GUESTDBG_USE_SW_BP | \
+ +      KVM_GUESTDBG_INJECT_BP | \
+ +      KVM_GUESTDBG_INJECT_DB)
+ +
+ +
   #define PFERR_PRESENT_BIT 0
   #define PFERR_WRITE_BIT 1
   #define PFERR_USER_BIT 2
   #define PFERR_RSVD_BIT 3
   #define PFERR_FETCH_BIT 4
   #define PFERR_PK_BIT 5
+ +#define PFERR_SGX_BIT 15
   #define PFERR_GUEST_FINAL_BIT 32
   #define PFERR_GUEST_PAGE_BIT 33
   
@@@ -246,7 -236,6 +246,7 @@@
   #define PFERR_RSVD_MASK (1U << PFERR_RSVD_BIT)
   #define PFERR_FETCH_MASK (1U << PFERR_FETCH_BIT)
   #define PFERR_PK_MASK (1U << PFERR_PK_BIT)
+ +#define PFERR_SGX_MASK (1U << PFERR_SGX_BIT)
   #define PFERR_GUEST_FINAL_MASK (1ULL << PFERR_GUEST_FINAL_BIT)
   #define PFERR_GUEST_PAGE_MASK (1ULL << PFERR_GUEST_PAGE_BIT)
   
@@@ -895,12 -884,29 +895,29 @@@ struct kvm_hv_syndbg 
         u64 options;
   };
   
+ /* Current state of Hyper-V TSC page clocksource */
+ enum hv_tsc_page_status {
+       /* TSC page was not set up or disabled */
+       HV_TSC_PAGE_UNSET = 0,
+       /* TSC page MSR was written by the guest, update pending */
+       HV_TSC_PAGE_GUEST_CHANGED,
+       /* TSC page MSR was written by KVM userspace, update pending */
+       HV_TSC_PAGE_HOST_CHANGED,
+       /* TSC page was properly set up and is currently active  */
+       HV_TSC_PAGE_SET,
+       /* TSC page is currently being updated and therefore is inactive */
+       HV_TSC_PAGE_UPDATING,
+       /* TSC page was set up with an inaccessible GPA */
+       HV_TSC_PAGE_BROKEN,
+ };
+ 
   /* Hyper-V emulation context */
   struct kvm_hv {
         struct mutex hv_lock;
         u64 hv_guest_os_id;
         u64 hv_hypercall;
         u64 hv_tsc_page;
+       enum hv_tsc_page_status hv_tsc_page_status;
   
         /* Hyper-v based guest crash (NT kernel bugcheck) parameters */
         u64 hv_crash_param[HV_X64_MSR_CRASH_PARAMS];
@@@ -942,6 -948,12 +959,12 @@@ enum kvm_irqchip_mode 
         KVM_IRQCHIP_SPLIT,        /* created with KVM_CAP_SPLIT_IRQCHIP */
   };
   
+ struct kvm_x86_msr_filter {
+       u8 count;
+       bool default_allow:1;
+       struct msr_bitmap_range ranges[16];
+ };
+ 
   #define APICV_INHIBIT_REASON_DISABLE    0
   #define APICV_INHIBIT_REASON_HYPERV     1
   #define APICV_INHIBIT_REASON_NESTED     2
@@@ -1036,20 -1048,12 +1059,15 @@@ struct kvm_arch 
         bool guest_can_read_msr_platform_info;
         bool exception_payload_enabled;
   
+       bool bus_lock_detection_enabled;
+ 
         /* Deflect RDMSR and WRMSR to user space when they trigger a #GP */
         u32 user_space_msr_mask;
- 
-       struct {
-               u8 count;
-               bool default_allow:1;
-               struct msr_bitmap_range ranges[16];
-       } msr_filter;
- 
-       bool bus_lock_detection_enabled;
+       struct kvm_x86_msr_filter __rcu *msr_filter;
   
+ +      /* Guest can access the SGX PROVISIONKEY. */
+ +      bool sgx_provisioning_allowed;
+ +
         struct kvm_pmu_event_filter __rcu *pmu_event_filter;
         struct task_struct *nx_lpage_recovery_thread;
   
@@@ -1064,36 -1068,25 +1082,36 @@@
         bool tdp_mmu_enabled;
   
         /*
- -       * List of struct kvmp_mmu_pages being used as roots.
+ +       * List of struct kvm_mmu_pages being used as roots.
          * All struct kvm_mmu_pages in the list should have
          * tdp_mmu_page set.
- -       * All struct kvm_mmu_pages in the list should have a positive
- -       * root_count except when a thread holds the MMU lock and is removing
- -       * an entry from the list.
+ +       *
+ +       * For reads, this list is protected by:
+ +       *      the MMU lock in read mode + RCU or
+ +       *      the MMU lock in write mode
+ +       *
+ +       * For writes, this list is protected by:
+ +       *      the MMU lock in read mode + the tdp_mmu_pages_lock or
+ +       *      the MMU lock in write mode
+ +       *
+ +       * Roots will remain in the list until their tdp_mmu_root_count
+ +       * drops to zero, at which point the thread that decremented the
+ +       * count to zero should removed the root from the list and clean
+ +       * it up, freeing the root after an RCU grace period.
          */
         struct list_head tdp_mmu_roots;
   
         /*
          * List of struct kvmp_mmu_pages not being used as roots.
          * All struct kvm_mmu_pages in the list should have
- -       * tdp_mmu_page set and a root_count of 0.
+ +       * tdp_mmu_page set and a tdp_mmu_root_count of 0.
          */
         struct list_head tdp_mmu_pages;
   
         /*
          * Protects accesses to the following fields when the MMU lock
          * is held in read mode:
+ +       *  - tdp_mmu_roots (above)
          *  - tdp_mmu_pages (above)
          *  - the link field of struct kvm_mmu_pages used by the TDP MMU
          *  - lpage_disallowed_mmu_pages
@@@ -1150,9 -1143,6 +1168,9 @@@ struct kvm_vcpu_stat 
         u64 req_event;
         u64 halt_poll_success_ns;
         u64 halt_poll_fail_ns;
+ +      u64 nested_run;
+ +      u64 directed_yield_attempted;
+ +      u64 directed_yield_successful;
   };
   
   struct x86_instruction_info;
@@@ -1279,8 -1269,8 +1297,8 @@@ struct kvm_x86_ops 
         int (*set_identity_map_addr)(struct kvm *kvm, u64 ident_addr);
         u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio);
   
- -      void (*load_mmu_pgd)(struct kvm_vcpu *vcpu, unsigned long pgd,
- -                           int pgd_level);
+ +      void (*load_mmu_pgd)(struct kvm_vcpu *vcpu, hpa_t root_hpa,
+ +                           int root_level);
   
         bool (*has_wbinvd_exit)(void);
   
@@@ -1349,7 -1339,6 +1367,7 @@@
         int (*mem_enc_op)(struct kvm *kvm, void __user *argp);
         int (*mem_enc_reg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
         int (*mem_enc_unreg_region)(struct kvm *kvm, struct kvm_enc_region *argp);
+ +      int (*vm_copy_enc_context_from)(struct kvm *kvm, unsigned int source_fd);
   
         int (*get_msr_feature)(struct kvm_msr_entry *entry);
   
@@@ -1368,7 -1357,6 +1386,7 @@@
   struct kvm_x86_nested_ops {
         int (*check_events)(struct kvm_vcpu *vcpu);
         bool (*hv_timer_pending)(struct kvm_vcpu *vcpu);
+ +      void (*triple_fault)(struct kvm_vcpu *vcpu);
         int (*get_state)(struct kvm_vcpu *vcpu,
                          struct kvm_nested_state __user *user_kvm_nested_state,
                          unsigned user_data_size);
@@@ -1440,6 -1428,9 +1458,6 @@@ void kvm_mmu_destroy(struct kvm_vcpu *v
   int kvm_mmu_create(struct kvm_vcpu *vcpu);
   void kvm_mmu_init_vm(struct kvm *kvm);
   void kvm_mmu_uninit_vm(struct kvm *kvm);
- -void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask,
- -              u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask,
- -              u64 acc_track_mask, u64 me_mask);
   
   void kvm_mmu_reset_context(struct kvm_vcpu *vcpu);
   void kvm_mmu_slot_remove_write_access(struct kvm *kvm,
@@@ -1449,6 -1440,8 +1467,6 @@@ void kvm_mmu_zap_collapsible_sptes(stru
                                    const struct kvm_memory_slot *memslot);
   void kvm_mmu_slot_leaf_clear_dirty(struct kvm *kvm,
                                    struct kvm_memory_slot *memslot);
- -void kvm_mmu_slot_largepage_remove_write_access(struct kvm *kvm,
- -                                      struct kvm_memory_slot *memslot);
   void kvm_mmu_zap_all(struct kvm *kvm);
   void kvm_mmu_invalidate_mmio_sptes(struct kvm *kvm, u64 gen);
   unsigned long kvm_mmu_calculate_default_mmu_pages(struct kvm *kvm);
@@@ -1545,11 -1538,6 +1563,11 @@@ int kvm_get_msr(struct kvm_vcpu *vcpu, 
   int kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data);
   int kvm_emulate_rdmsr(struct kvm_vcpu *vcpu);
   int kvm_emulate_wrmsr(struct kvm_vcpu *vcpu);
+ +int kvm_emulate_as_nop(struct kvm_vcpu *vcpu);
+ +int kvm_emulate_invd(struct kvm_vcpu *vcpu);
+ +int kvm_emulate_mwait(struct kvm_vcpu *vcpu);
+ +int kvm_handle_invalid_op(struct kvm_vcpu *vcpu);
+ +int kvm_emulate_monitor(struct kvm_vcpu *vcpu);
   
   int kvm_fast_pio(struct kvm_vcpu *vcpu, int size, unsigned short port, int in);
   int kvm_emulate_cpuid(struct kvm_vcpu *vcpu);
@@@ -1578,14 -1566,14 +1596,14 @@@ void kvm_get_dr(struct kvm_vcpu *vcpu, 
   unsigned long kvm_get_cr8(struct kvm_vcpu *vcpu);
   void kvm_lmsw(struct kvm_vcpu *vcpu, unsigned long msw);
   void kvm_get_cs_db_l_bits(struct kvm_vcpu *vcpu, int *db, int *l);
- -int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr);
+ +int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu);
   
   int kvm_get_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr);
   int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr);
   
   unsigned long kvm_get_rflags(struct kvm_vcpu *vcpu);
   void kvm_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags);
- -bool kvm_rdpmc(struct kvm_vcpu *vcpu);
+ +int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu);
   
   void kvm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr);
   void kvm_queue_exception_e(struct kvm_vcpu *vcpu, unsigned nr, u32 error_code);
@@@ -1626,6 -1614,9 +1644,6 @@@ void kvm_update_dr7(struct kvm_vcpu *vc
   
   int kvm_mmu_unprotect_page(struct kvm *kvm, gfn_t gfn);
   void __kvm_mmu_free_some_pages(struct kvm_vcpu *vcpu);
- -int kvm_mmu_load(struct kvm_vcpu *vcpu);
- -void kvm_mmu_unload(struct kvm_vcpu *vcpu);
- -void kvm_mmu_sync_roots(struct kvm_vcpu *vcpu);
   void kvm_mmu_free_roots(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
                         ulong roots_to_free);
   gpa_t translate_nested_gpa(struct kvm_vcpu *vcpu, gpa_t gpa, u32 access,
@@@ -1744,7 -1735,11 +1762,7 @@@ asmlinkage void kvm_spurious_fault(void
         _ASM_EXTABLE(666b, 667b)
   
   #define KVM_ARCH_WANT_MMU_NOTIFIER
- -int kvm_unmap_hva_range(struct kvm *kvm, unsigned long start, unsigned long end,
- -                      unsigned flags);
- -int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end);
- -int kvm_test_age_hva(struct kvm *kvm, unsigned long hva);
- -int kvm_set_spte_hva(struct kvm *kvm, unsigned long hva, pte_t pte);
+ +
   int kvm_cpu_has_injectable_intr(struct kvm_vcpu *v);
   int kvm_cpu_has_interrupt(struct kvm_vcpu *vcpu);
   int kvm_cpu_has_extint(struct kvm_vcpu *v);
diff --combined arch/x86/kernel/kvm.c

index 224a7a1,78bb0fa..bd01a61
--- 1/arch/x86/kernel/kvm.c
--- 2/arch/x86/kernel/kvm.c
+++ b/arch/x86/kernel/kvm.c
@@@ -451,10 -451,6 +451,10 @@@ static void __init sev_map_percpu_data(
         }
   }
   
+ +#ifdef CONFIG_SMP
+ +
+ +static DEFINE_PER_CPU(cpumask_var_t, __pv_cpu_mask);
+ +
   static bool pv_tlb_flush_supported(void)
   {
         return (kvm_para_has_feature(KVM_FEATURE_PV_TLB_FLUSH) &&
@@@ -462,6 -458,10 +462,6 @@@
                 kvm_para_has_feature(KVM_FEATURE_STEAL_TIME));
   }
   
- -static DEFINE_PER_CPU(cpumask_var_t, __pv_cpu_mask);
- -
- -#ifdef CONFIG_SMP
- -
   static bool pv_ipi_supported(void)
   {
         return kvm_para_has_feature(KVM_FEATURE_PV_SEND_IPI);
@@@ -574,49 -574,6 +574,49 @@@ static void kvm_smp_send_call_func_ipi(
         }
   }
   
+ +static void kvm_flush_tlb_others(const struct cpumask *cpumask,
+ +                      const struct flush_tlb_info *info)
+ +{
+ +      u8 state;
+ +      int cpu;
+ +      struct kvm_steal_time *src;
+ +      struct cpumask *flushmask = this_cpu_cpumask_var_ptr(__pv_cpu_mask);
+ +
+ +      cpumask_copy(flushmask, cpumask);
+ +      /*
+ +       * We have to call flush only on online vCPUs. And
+ +       * queue flush_on_enter for pre-empted vCPUs
+ +       */
+ +      for_each_cpu(cpu, flushmask) {
+ +              src = &per_cpu(steal_time, cpu);
+ +              state = READ_ONCE(src->preempted);
+ +              if ((state & KVM_VCPU_PREEMPTED)) {
+ +                      if (try_cmpxchg(&src->preempted, &state,
+ +                                      state | KVM_VCPU_FLUSH_TLB))
+ +                              __cpumask_clear_cpu(cpu, flushmask);
+ +              }
+ +      }
+ +
+ +      native_flush_tlb_others(flushmask, info);
+ +}
+ +
+ +static __init int kvm_alloc_cpumask(void)
+ +{
+ +      int cpu;
+ +
+ +      if (!kvm_para_available() || nopv)
+ +              return 0;
+ +
+ +      if (pv_tlb_flush_supported() || pv_ipi_supported())
+ +              for_each_possible_cpu(cpu) {
+ +                      zalloc_cpumask_var_node(per_cpu_ptr(&__pv_cpu_mask, cpu),
+ +                              GFP_KERNEL, cpu_to_node(cpu));
+ +              }
+ +
+ +      return 0;
+ +}
+ +arch_initcall(kvm_alloc_cpumask);
+ +
   static void __init kvm_smp_prepare_boot_cpu(void)
   {
         /*
@@@ -654,8 -611,33 +654,8 @@@ static int kvm_cpu_down_prepare(unsigne
         local_irq_enable();
         return 0;
   }
- -#endif
- -
- -static void kvm_flush_tlb_others(const struct cpumask *cpumask,
- -                      const struct flush_tlb_info *info)
- -{
- -      u8 state;
- -      int cpu;
- -      struct kvm_steal_time *src;
- -      struct cpumask *flushmask = this_cpu_cpumask_var_ptr(__pv_cpu_mask);
- -
- -      cpumask_copy(flushmask, cpumask);
- -      /*
- -       * We have to call flush only on online vCPUs. And
- -       * queue flush_on_enter for pre-empted vCPUs
- -       */
- -      for_each_cpu(cpu, flushmask) {
- -              src = &per_cpu(steal_time, cpu);
- -              state = READ_ONCE(src->preempted);
- -              if ((state & KVM_VCPU_PREEMPTED)) {
- -                      if (try_cmpxchg(&src->preempted, &state,
- -                                      state | KVM_VCPU_FLUSH_TLB))
- -                              __cpumask_clear_cpu(cpu, flushmask);
- -              }
- -      }
   
- -      native_flush_tlb_others(flushmask, info);
- -}
+ +#endif
   
   static void __init kvm_guest_init(void)
   {
@@@ -671,6 -653,12 +671,6 @@@
                 pv_ops.time.steal_clock = kvm_steal_clock;
         }
   
- -      if (pv_tlb_flush_supported()) {
- -              pv_ops.mmu.flush_tlb_others = kvm_flush_tlb_others;
- -              pv_ops.mmu.tlb_remove_table = tlb_remove_table;
- -              pr_info("KVM setup pv remote TLB flush\n");
- -      }
- -
         if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
                 apic_set_eoi_write(kvm_guest_apic_eoi_write);
   
@@@ -680,12 -668,6 +680,12 @@@
         }
   
   #ifdef CONFIG_SMP
+ +      if (pv_tlb_flush_supported()) {
+ +              pv_ops.mmu.flush_tlb_others = kvm_flush_tlb_others;
+ +              pv_ops.mmu.tlb_remove_table = tlb_remove_table;
+ +              pr_info("KVM setup pv remote TLB flush\n");
+ +      }
+ +
         smp_ops.smp_prepare_boot_cpu = kvm_smp_prepare_boot_cpu;
         if (pv_sched_yield_supported()) {
                 smp_ops.send_call_func_ipi = kvm_smp_send_call_func_ipi;
@@@ -752,7 -734,7 +752,7 @@@ static uint32_t __init kvm_detect(void
   
   static void __init kvm_apic_init(void)
   {
- -#if defined(CONFIG_SMP)
+ +#ifdef CONFIG_SMP
         if (pv_ipi_supported())
                 kvm_setup_pv_ipi();
   #endif
@@@ -812,6 -794,32 +812,6 @@@ static __init int activate_jump_labels(
   }
   arch_initcall(activate_jump_labels);
   
- -static __init int kvm_alloc_cpumask(void)
- -{
- -      int cpu;
- -      bool alloc = false;
- -
- -      if (!kvm_para_available() || nopv)
- -              return 0;
- -
- -      if (pv_tlb_flush_supported())
- -              alloc = true;
- -
- -#if defined(CONFIG_SMP)
- -      if (pv_ipi_supported())
- -              alloc = true;
- -#endif
- -
- -      if (alloc)
- -              for_each_possible_cpu(cpu) {
- -                      zalloc_cpumask_var_node(per_cpu_ptr(&__pv_cpu_mask, cpu),
- -                              GFP_KERNEL, cpu_to_node(cpu));
- -              }
- -
- -      return 0;
- -}
- -arch_initcall(kvm_alloc_cpumask);
- -
   #ifdef CONFIG_PARAVIRT_SPINLOCKS
   
   /* Kick a cpu by its apicid. Used to wake up a halted vcpu */
@@@ -828,28 -836,25 +828,25 @@@ static void kvm_kick_cpu(int cpu
   
   static void kvm_wait(u8 *ptr, u8 val)
   {
-       unsigned long flags;
- 
         if (in_nmi())
                 return;
   
-       local_irq_save(flags);
- 
-       if (READ_ONCE(*ptr) != val)
-               goto out;
- 
         /*
          * halt until it's our turn and kicked. Note that we do safe halt
          * for irq enabled case to avoid hang when lock info is overwritten
          * in irq spinlock slowpath and no spurious interrupt occur to save us.
          */
-       if (arch_irqs_disabled_flags(flags))
-               halt();
-       else
-               safe_halt();
+       if (irqs_disabled()) {
+               if (READ_ONCE(*ptr) == val)
+                       halt();
+       } else {
+               local_irq_disable();
   
- out:
-       local_irq_restore(flags);
+               if (READ_ONCE(*ptr) == val)
+                       safe_halt();
+ 
+               local_irq_enable();
+       }
   }
   
   #ifdef CONFIG_X86_32
diff --combined arch/x86/kvm/Makefile

index 87f514c,eafc4d6..c589db5
--- 1/arch/x86/kvm/Makefile
--- 2/arch/x86/kvm/Makefile
+++ b/arch/x86/kvm/Makefile
@@@ -1,6 -1,6 +1,6 @@@
   # SPDX-License-Identifier: GPL-2.0
   
- ccflags-y += -Iarch/x86/kvm
+ ccflags-y += -I $(srctree)/arch/x86/kvm
   ccflags-$(CONFIG_KVM_WERROR) += -Werror
   
   ifeq ($(CONFIG_FRAME_POINTER),y)
@@@ -23,8 -23,6 +23,8 @@@ kvm-$(CONFIG_KVM_XEN) += xen.
   
   kvm-intel-y           += vmx/vmx.o vmx/vmenter.o vmx/pmu_intel.o vmx/vmcs12.o \
                            vmx/evmcs.o vmx/nested.o vmx/posted_intr.o
+ +kvm-intel-$(CONFIG_X86_SGX_KVM)       += vmx/sgx.o
+ +
   kvm-amd-y             += svm/svm.o svm/vmenter.o svm/pmu.o svm/nested.o svm/avic.o svm/sev.o
   
   obj-$(CONFIG_KVM)     += kvm.o
diff --combined arch/x86/kvm/svm/nested.c

index 3d8824e,fb204ea..540d43b
--- 1/arch/x86/kvm/svm/nested.c
--- 2/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@@ -29,8 -29,6 +29,8 @@@
   #include "lapic.h"
   #include "svm.h"
   
+ +#define CC KVM_NESTED_VMENTER_CONSISTENCY_CHECK
+ +
   static void nested_svm_inject_npf_exit(struct kvm_vcpu *vcpu,
                                        struct x86_exception *fault)
   {
@@@ -94,12 -92,12 +94,12 @@@ static unsigned long nested_svm_get_tdp
   static void nested_svm_init_mmu_context(struct kvm_vcpu *vcpu)
   {
         struct vcpu_svm *svm = to_svm(vcpu);
- -      struct vmcb *hsave = svm->nested.hsave;
   
         WARN_ON(mmu_is_nested(vcpu));
   
         vcpu->arch.mmu = &vcpu->arch.guest_mmu;
- -      kvm_init_shadow_npt_mmu(vcpu, X86_CR0_PG, hsave->save.cr4, hsave->save.efer,
+ +      kvm_init_shadow_npt_mmu(vcpu, X86_CR0_PG, svm->vmcb01.ptr->save.cr4,
+ +                              svm->vmcb01.ptr->save.efer,
                                 svm->nested.ctl.nested_cr3);
         vcpu->arch.mmu->get_guest_pgd     = nested_svm_get_tdp_cr3;
         vcpu->arch.mmu->get_pdptr         = nested_svm_get_tdp_pdptr;
@@@ -125,7 -123,7 +125,7 @@@ void recalc_intercepts(struct vcpu_svm 
                 return;
   
         c = &svm->vmcb->control;
- -      h = &svm->nested.hsave->control;
+ +      h = &svm->vmcb01.ptr->control;
         g = &svm->nested.ctl;
   
         for (i = 0; i < MAX_INTERCEPT; i++)
@@@ -215,64 -213,44 +215,64 @@@ static bool nested_svm_vmrun_msrpm(stru
         return true;
   }
   
- -static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu)
+ +/*
+ + * Bits 11:0 of bitmap address are ignored by hardware
+ + */
+ +static bool nested_svm_check_bitmap_pa(struct kvm_vcpu *vcpu, u64 pa, u32 size)
   {
- -      struct vcpu_svm *svm = to_svm(vcpu);
+ +      u64 addr = PAGE_ALIGN(pa);
   
- -      if (WARN_ON(!is_guest_mode(vcpu)))
- -              return true;
- -
- -      if (!nested_svm_vmrun_msrpm(svm)) {
- -              vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
- -              vcpu->run->internal.suberror =
- -                      KVM_INTERNAL_ERROR_EMULATION;
- -              vcpu->run->internal.ndata = 0;
- -              return false;
- -      }
- -
- -      return true;
+ +      return kvm_vcpu_is_legal_gpa(vcpu, addr) &&
+ +          kvm_vcpu_is_legal_gpa(vcpu, addr + size - 1);
   }
   
- -static bool nested_vmcb_check_controls(struct vmcb_control_area *control)
+ +static bool nested_vmcb_check_controls(struct kvm_vcpu *vcpu,
+ +                                     struct vmcb_control_area *control)
   {
- -      if ((vmcb_is_intercept(control, INTERCEPT_VMRUN)) == 0)
+ +      if (CC(!vmcb_is_intercept(control, INTERCEPT_VMRUN)))
                 return false;
   
- -      if (control->asid == 0)
+ +      if (CC(control->asid == 0))
                 return false;
   
- -      if ((control->nested_ctl & SVM_NESTED_CTL_NP_ENABLE) &&
- -          !npt_enabled)
+ +      if (CC((control->nested_ctl & SVM_NESTED_CTL_NP_ENABLE) && !npt_enabled))
+ +              return false;
+ +
+ +      if (CC(!nested_svm_check_bitmap_pa(vcpu, control->msrpm_base_pa,
+ +                                         MSRPM_SIZE)))
+ +              return false;
+ +      if (CC(!nested_svm_check_bitmap_pa(vcpu, control->iopm_base_pa,
+ +                                         IOPM_SIZE)))
                 return false;
   
         return true;
   }
   
- -static bool nested_vmcb_check_save(struct vcpu_svm *svm, struct vmcb *vmcb12)
+ +static bool nested_vmcb_check_cr3_cr4(struct kvm_vcpu *vcpu,
+ +                                    struct vmcb_save_area *save)
   {
- -      struct kvm_vcpu *vcpu = &svm->vcpu;
- -      bool vmcb12_lma;
+ +      /*
+ +       * These checks are also performed by KVM_SET_SREGS,
+ +       * except that EFER.LMA is not checked by SVM against
+ +       * CR0.PG && EFER.LME.
+ +       */
+ +      if ((save->efer & EFER_LME) && (save->cr0 & X86_CR0_PG)) {
+ +              if (CC(!(save->cr4 & X86_CR4_PAE)) ||
+ +                  CC(!(save->cr0 & X86_CR0_PE)) ||
+ +                  CC(kvm_vcpu_is_illegal_gpa(vcpu, save->cr3)))
+ +                      return false;
+ +      }
+ +
+ +      if (CC(!kvm_is_valid_cr4(vcpu, save->cr4)))
+ +              return false;
+ +
+ +      return true;
+ +}
   
+ +/* Common checks that apply to both L1 and L2 state.  */
+ +static bool nested_vmcb_valid_sregs(struct kvm_vcpu *vcpu,
+ +                                  struct vmcb_save_area *save)
+ +{
         /*
          * FIXME: these should be done after copying the fields,
          * to avoid TOC/TOU races.  For these save area checks
@@@ -280,27 -258,31 +280,27 @@@
          * kvm_set_cr4 handle failure; EFER_SVME is an exception
          * so it is force-set later in nested_prepare_vmcb_save.
          */
- -      if ((vmcb12->save.efer & EFER_SVME) == 0)
+ +      if (CC(!(save->efer & EFER_SVME)))
                 return false;
   
- -      if (((vmcb12->save.cr0 & X86_CR0_CD) == 0) && (vmcb12->save.cr0 & X86_CR0_NW))
+ +      if (CC((save->cr0 & X86_CR0_CD) == 0 && (save->cr0 & X86_CR0_NW)) ||
+ +          CC(save->cr0 & ~0xffffffffULL))
                 return false;
   
- -      if (!kvm_dr6_valid(vmcb12->save.dr6) || !kvm_dr7_valid(vmcb12->save.dr7))
+ +      if (CC(!kvm_dr6_valid(save->dr6)) || CC(!kvm_dr7_valid(save->dr7)))
                 return false;
   
- -      vmcb12_lma = (vmcb12->save.efer & EFER_LME) && (vmcb12->save.cr0 & X86_CR0_PG);
+ +      if (!nested_vmcb_check_cr3_cr4(vcpu, save))
+ +              return false;
   
- -      if (vmcb12_lma) {
- -              if (!(vmcb12->save.cr4 & X86_CR4_PAE) ||
- -                  !(vmcb12->save.cr0 & X86_CR0_PE) ||
- -                  kvm_vcpu_is_illegal_gpa(vcpu, vmcb12->save.cr3))
- -                      return false;
- -      }
- -      if (!kvm_is_valid_cr4(&svm->vcpu, vmcb12->save.cr4))
+ +      if (CC(!kvm_valid_efer(vcpu, save->efer)))
                 return false;
   
         return true;
   }
   
- -static void load_nested_vmcb_control(struct vcpu_svm *svm,
- -                                   struct vmcb_control_area *control)
+ +static void nested_load_control_from_vmcb12(struct vcpu_svm *svm,
+ +                                          struct vmcb_control_area *control)
   {
         copy_vmcb_control_area(&svm->nested.ctl, control);
   
@@@ -312,9 -294,9 +312,9 @@@
   
   /*
    * Synchronize fields that are written by the processor, so that
- - * they can be copied back into the nested_vmcb.
+ + * they can be copied back into the vmcb12.
    */
- -void sync_nested_vmcb_control(struct vcpu_svm *svm)
+ +void nested_sync_control_from_vmcb02(struct vcpu_svm *svm)
   {
         u32 mask;
         svm->nested.ctl.event_inj      = svm->vmcb->control.event_inj;
@@@ -342,8 -324,8 +342,8 @@@
    * Transfer any event that L0 or L1 wanted to inject into L2 to
    * EXIT_INT_INFO.
    */
- -static void nested_vmcb_save_pending_event(struct vcpu_svm *svm,
- -                                         struct vmcb *vmcb12)
+ +static void nested_save_pending_event_to_vmcb12(struct vcpu_svm *svm,
+ +                                              struct vmcb *vmcb12)
   {
         struct kvm_vcpu *vcpu = &svm->vcpu;
         u32 exit_int_info = 0;
@@@ -387,12 -369,12 +387,12 @@@ static inline bool nested_npt_enabled(s
   static int nested_svm_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
                                bool nested_npt)
   {
- -      if (kvm_vcpu_is_illegal_gpa(vcpu, cr3))
+ +      if (CC(kvm_vcpu_is_illegal_gpa(vcpu, cr3)))
                 return -EINVAL;
   
         if (!nested_npt && is_pae_paging(vcpu) &&
             (cr3 != kvm_read_cr3(vcpu) || pdptrs_changed(vcpu))) {
- -              if (!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))
+ +              if (CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3)))
                         return -EINVAL;
         }
   
@@@ -411,43 -393,15 +411,42 @@@
         return 0;
   }
   
- -static void nested_prepare_vmcb_save(struct vcpu_svm *svm, struct vmcb *vmcb12)
+ +void nested_vmcb02_compute_g_pat(struct vcpu_svm *svm)
   {
+ +      if (!svm->nested.vmcb02.ptr)
+ +              return;
+ +
+ +      /* FIXME: merge g_pat from vmcb01 and vmcb12.  */
+ +      svm->nested.vmcb02.ptr->save.g_pat = svm->vmcb01.ptr->save.g_pat;
+ +}
+ +
+ +static void nested_vmcb02_prepare_save(struct vcpu_svm *svm, struct vmcb *vmcb12)
+ +{
+ +      bool new_vmcb12 = false;
+ +
+ +      nested_vmcb02_compute_g_pat(svm);
+ +
         /* Load the nested guest state */
- 
- -      svm->vmcb->save.es = vmcb12->save.es;
- -      svm->vmcb->save.cs = vmcb12->save.cs;
- -      svm->vmcb->save.ss = vmcb12->save.ss;
- -      svm->vmcb->save.ds = vmcb12->save.ds;
- -      svm->vmcb->save.gdtr = vmcb12->save.gdtr;
- -      svm->vmcb->save.idtr = vmcb12->save.idtr;
+ +      if (svm->nested.vmcb12_gpa != svm->nested.last_vmcb12_gpa) {
+ +              new_vmcb12 = true;
+ +              svm->nested.last_vmcb12_gpa = svm->nested.vmcb12_gpa;
+ +      }
+ +
+ +      if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_SEG))) {
+ +              svm->vmcb->save.es = vmcb12->save.es;
+ +              svm->vmcb->save.cs = vmcb12->save.cs;
+ +              svm->vmcb->save.ss = vmcb12->save.ss;
+ +              svm->vmcb->save.ds = vmcb12->save.ds;
+ +              svm->vmcb->save.cpl = vmcb12->save.cpl;
+ +              vmcb_mark_dirty(svm->vmcb, VMCB_SEG);
+ +      }
+ +
+ +      if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_DT))) {
+ +              svm->vmcb->save.gdtr = vmcb12->save.gdtr;
+ +              svm->vmcb->save.idtr = vmcb12->save.idtr;
+ +              vmcb_mark_dirty(svm->vmcb, VMCB_DT);
+ +      }
+ +
         kvm_set_rflags(&svm->vcpu, vmcb12->save.rflags | X86_EFLAGS_FIXED);
   
         /*
@@@ -459,9 -413,7 +458,9 @@@
   
         svm_set_cr0(&svm->vcpu, vmcb12->save.cr0);
         svm_set_cr4(&svm->vcpu, vmcb12->save.cr4);
- -      svm->vmcb->save.cr2 = svm->vcpu.arch.cr2 = vmcb12->save.cr2;
+ +
+ +      svm->vcpu.arch.cr2 = vmcb12->save.cr2;
+ +
         kvm_rax_write(&svm->vcpu, vmcb12->save.rax);
         kvm_rsp_write(&svm->vcpu, vmcb12->save.rsp);
         kvm_rip_write(&svm->vcpu, vmcb12->save.rip);
@@@ -470,41 -422,15 +469,41 @@@
         svm->vmcb->save.rax = vmcb12->save.rax;
         svm->vmcb->save.rsp = vmcb12->save.rsp;
         svm->vmcb->save.rip = vmcb12->save.rip;
- -      svm->vmcb->save.dr7 = vmcb12->save.dr7 | DR7_FIXED_1;
- -      svm->vcpu.arch.dr6  = vmcb12->save.dr6 | DR6_ACTIVE_LOW;
- -      svm->vmcb->save.cpl = vmcb12->save.cpl;
+ +
+ +      /* These bits will be set properly on the first execution when new_vmc12 is true */
+ +      if (unlikely(new_vmcb12 || vmcb_is_dirty(vmcb12, VMCB_DR))) {
+ +              svm->vmcb->save.dr7 = vmcb12->save.dr7 | DR7_FIXED_1;
+ +              svm->vcpu.arch.dr6  = vmcb12->save.dr6 | DR6_ACTIVE_LOW;
+ +              vmcb_mark_dirty(svm->vmcb, VMCB_DR);
+ +      }
   }
   
- -static void nested_prepare_vmcb_control(struct vcpu_svm *svm)
+ +static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
   {
         const u32 mask = V_INTR_MASKING_MASK | V_GIF_ENABLE_MASK | V_GIF_MASK;
   
+ +      /*
+ +       * Filled at exit: exit_code, exit_code_hi, exit_info_1, exit_info_2,
+ +       * exit_int_info, exit_int_info_err, next_rip, insn_len, insn_bytes.
+ +       */
+ +
+ +      /*
+ +       * Also covers avic_vapic_bar, avic_backing_page, avic_logical_id,
+ +       * avic_physical_id.
+ +       */
+ +      WARN_ON(svm->vmcb01.ptr->control.int_ctl & AVIC_ENABLE_MASK);
+ +
+ +      /* Copied from vmcb01.  msrpm_base can be overwritten later.  */
+ +      svm->vmcb->control.nested_ctl = svm->vmcb01.ptr->control.nested_ctl;
+ +      svm->vmcb->control.iopm_base_pa = svm->vmcb01.ptr->control.iopm_base_pa;
+ +      svm->vmcb->control.msrpm_base_pa = svm->vmcb01.ptr->control.msrpm_base_pa;
+ +
+ +      /* Done at vmrun: asid.  */
+ +
+ +      /* Also overwritten later if necessary.  */
+ +      svm->vmcb->control.tlb_ctl = TLB_CONTROL_DO_NOTHING;
+ +
+ +      /* nested_cr3.  */
         if (nested_npt_enabled(svm))
                 nested_svm_init_mmu_context(&svm->vcpu);
   
@@@ -513,7 -439,7 +512,7 @@@
   
         svm->vmcb->control.int_ctl             =
                 (svm->nested.ctl.int_ctl & ~mask) |
- -              (svm->nested.hsave->control.int_ctl & mask);
+ +              (svm->vmcb01.ptr->control.int_ctl & mask);
   
         svm->vmcb->control.virt_ext            = svm->nested.ctl.virt_ext;
         svm->vmcb->control.int_vector          = svm->nested.ctl.int_vector;
@@@ -528,28 -454,17 +527,28 @@@
         enter_guest_mode(&svm->vcpu);
   
         /*
- -       * Merge guest and host intercepts - must be called  with vcpu in
- -       * guest-mode to take affect here
+ +       * Merge guest and host intercepts - must be called with vcpu in
+ +       * guest-mode to take effect.
          */
         recalc_intercepts(svm);
+ +}
   
- -      vmcb_mark_all_dirty(svm->vmcb);
+ +static void nested_svm_copy_common_state(struct vmcb *from_vmcb, struct vmcb *to_vmcb)
+ +{
+ +      /*
+ +       * Some VMCB state is shared between L1 and L2 and thus has to be
+ +       * moved at the time of nested vmrun and vmexit.
+ +       *
+ +       * VMLOAD/VMSAVE state would also belong in this category, but KVM
+ +       * always performs VMLOAD and VMSAVE from the VMCB01.
+ +       */
+ +      to_vmcb->save.spec_ctrl = from_vmcb->save.spec_ctrl;
   }
   
- -int enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb12_gpa,
+ +int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb12_gpa,
                          struct vmcb *vmcb12)
   {
+ +      struct vcpu_svm *svm = to_svm(vcpu);
         int ret;
   
         trace_kvm_nested_vmrun(svm->vmcb->save.rip, vmcb12_gpa,
@@@ -567,14 -482,8 +566,14 @@@
   
   
         svm->nested.vmcb12_gpa = vmcb12_gpa;
- -      nested_prepare_vmcb_control(svm);
- -      nested_prepare_vmcb_save(svm, vmcb12);
+ +
+ +      WARN_ON(svm->vmcb == svm->nested.vmcb02.ptr);
+ +
+ +      nested_svm_copy_common_state(svm->vmcb01.ptr, svm->nested.vmcb02.ptr);
+ +
+ +      svm_switch_vmcb(svm, &svm->nested.vmcb02);
+ +      nested_vmcb02_prepare_control(svm);
+ +      nested_vmcb02_prepare_save(svm, vmcb12);
   
         ret = nested_svm_load_cr3(&svm->vcpu, vmcb12->save.cr3,
                                   nested_npt_enabled(svm));
@@@ -582,48 -491,47 +581,48 @@@
                 return ret;
   
         if (!npt_enabled)
- -              svm->vcpu.arch.mmu->inject_page_fault = svm_inject_page_fault_nested;
+ +              vcpu->arch.mmu->inject_page_fault = svm_inject_page_fault_nested;
   
         svm_set_gif(svm, true);
   
         return 0;
   }
   
- -int nested_svm_vmrun(struct vcpu_svm *svm)
+ +int nested_svm_vmrun(struct kvm_vcpu *vcpu)
   {
+ +      struct vcpu_svm *svm = to_svm(vcpu);
         int ret;
         struct vmcb *vmcb12;
- -      struct vmcb *hsave = svm->nested.hsave;
- -      struct vmcb *vmcb = svm->vmcb;
         struct kvm_host_map map;
         u64 vmcb12_gpa;
   
- -      if (is_smm(&svm->vcpu)) {
- -              kvm_queue_exception(&svm->vcpu, UD_VECTOR);
+ +      ++vcpu->stat.nested_run;
+ +
+ +      if (is_smm(vcpu)) {
+ +              kvm_queue_exception(vcpu, UD_VECTOR);
                 return 1;
         }
   
         vmcb12_gpa = svm->vmcb->save.rax;
- -      ret = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(vmcb12_gpa), &map);
+ +      ret = kvm_vcpu_map(vcpu, gpa_to_gfn(vmcb12_gpa), &map);
         if (ret == -EINVAL) {
- -              kvm_inject_gp(&svm->vcpu, 0);
+ +              kvm_inject_gp(vcpu, 0);
                 return 1;
         } else if (ret) {
- -              return kvm_skip_emulated_instruction(&svm->vcpu);
+ +              return kvm_skip_emulated_instruction(vcpu);
         }
   
- -      ret = kvm_skip_emulated_instruction(&svm->vcpu);
+ +      ret = kvm_skip_emulated_instruction(vcpu);
   
         vmcb12 = map.hva;
   
         if (WARN_ON_ONCE(!svm->nested.initialized))
                 return -EINVAL;
   
- -      load_nested_vmcb_control(svm, &vmcb12->control);
+ +      nested_load_control_from_vmcb12(svm, &vmcb12->control);
   
- -      if (!nested_vmcb_check_save(svm, vmcb12) ||
- -          !nested_vmcb_check_controls(&svm->nested.ctl)) {
+ +      if (!nested_vmcb_valid_sregs(vcpu, &vmcb12->save) ||
+ +          !nested_vmcb_check_controls(vcpu, &svm->nested.ctl)) {
                 vmcb12->control.exit_code    = SVM_EXIT_ERR;
                 vmcb12->control.exit_code_hi = 0;
                 vmcb12->control.exit_info_1  = 0;
@@@ -633,25 -541,36 +632,25 @@@
   
   
         /* Clear internal status */
- -      kvm_clear_exception_queue(&svm->vcpu);
- -      kvm_clear_interrupt_queue(&svm->vcpu);
+ +      kvm_clear_exception_queue(vcpu);
+ +      kvm_clear_interrupt_queue(vcpu);
   
         /*
- -       * Save the old vmcb, so we don't need to pick what we save, but can
- -       * restore everything when a VMEXIT occurs
+ +       * Since vmcb01 is not in use, we can use it to store some of the L1
+ +       * state.
          */
- -      hsave->save.es     = vmcb->save.es;
- -      hsave->save.cs     = vmcb->save.cs;
- -      hsave->save.ss     = vmcb->save.ss;
- -      hsave->save.ds     = vmcb->save.ds;
- -      hsave->save.gdtr   = vmcb->save.gdtr;
- -      hsave->save.idtr   = vmcb->save.idtr;
- -      hsave->save.efer   = svm->vcpu.arch.efer;
- -      hsave->save.cr0    = kvm_read_cr0(&svm->vcpu);
- -      hsave->save.cr4    = svm->vcpu.arch.cr4;
- -      hsave->save.rflags = kvm_get_rflags(&svm->vcpu);
- -      hsave->save.rip    = kvm_rip_read(&svm->vcpu);
- -      hsave->save.rsp    = vmcb->save.rsp;
- -      hsave->save.rax    = vmcb->save.rax;
- -      if (npt_enabled)
- -              hsave->save.cr3    = vmcb->save.cr3;
- -      else
- -              hsave->save.cr3    = kvm_read_cr3(&svm->vcpu);
- -
- -      copy_vmcb_control_area(&hsave->control, &vmcb->control);
+ +      svm->vmcb01.ptr->save.efer   = vcpu->arch.efer;
+ +      svm->vmcb01.ptr->save.cr0    = kvm_read_cr0(vcpu);
+ +      svm->vmcb01.ptr->save.cr4    = vcpu->arch.cr4;
+ +      svm->vmcb01.ptr->save.rflags = kvm_get_rflags(vcpu);
+ +      svm->vmcb01.ptr->save.rip    = kvm_rip_read(vcpu);
+ +
+ +      if (!npt_enabled)
+ +              svm->vmcb01.ptr->save.cr3 = kvm_read_cr3(vcpu);
   
         svm->nested.nested_run_pending = 1;
   
- -      if (enter_svm_guest_mode(svm, vmcb12_gpa, vmcb12))
+ +      if (enter_svm_guest_mode(vcpu, vmcb12_gpa, vmcb12))
                 goto out_exit_err;
   
         if (nested_svm_vmrun_msrpm(svm))
@@@ -668,7 -587,7 +667,7 @@@ out_exit_err
         nested_svm_vmexit(svm);
   
   out:
- -      kvm_vcpu_unmap(&svm->vcpu, &map, true);
+ +      kvm_vcpu_unmap(vcpu, &map, true);
   
         return ret;
   }
@@@ -691,30 -610,27 +690,30 @@@ void nested_svm_vmloadsave(struct vmcb 
   
   int nested_svm_vmexit(struct vcpu_svm *svm)
   {
- -      int rc;
+ +      struct kvm_vcpu *vcpu = &svm->vcpu;
         struct vmcb *vmcb12;
- -      struct vmcb *hsave = svm->nested.hsave;
         struct vmcb *vmcb = svm->vmcb;
         struct kvm_host_map map;
+ +      int rc;
   
- -      rc = kvm_vcpu_map(&svm->vcpu, gpa_to_gfn(svm->nested.vmcb12_gpa), &map);
+ +      /* Triple faults in L2 should never escape. */
+ +      WARN_ON_ONCE(kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu));
+ +
+ +      rc = kvm_vcpu_map(vcpu, gpa_to_gfn(svm->nested.vmcb12_gpa), &map);
         if (rc) {
                 if (rc == -EINVAL)
- -                      kvm_inject_gp(&svm->vcpu, 0);
+ +                      kvm_inject_gp(vcpu, 0);
                 return 1;
         }
   
         vmcb12 = map.hva;
   
         /* Exit Guest-Mode */
- -      leave_guest_mode(&svm->vcpu);
+ +      leave_guest_mode(vcpu);
         svm->nested.vmcb12_gpa = 0;
         WARN_ON_ONCE(svm->nested.nested_run_pending);
   
- -      kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, &svm->vcpu);
+ +      kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
   
         /* in case we halted in L2 */
         svm->vcpu.arch.mp_state = KVM_MP_STATE_RUNNABLE;
@@@ -728,14 -644,14 +727,14 @@@
         vmcb12->save.gdtr   = vmcb->save.gdtr;
         vmcb12->save.idtr   = vmcb->save.idtr;
         vmcb12->save.efer   = svm->vcpu.arch.efer;
- -      vmcb12->save.cr0    = kvm_read_cr0(&svm->vcpu);
- -      vmcb12->save.cr3    = kvm_read_cr3(&svm->vcpu);
+ +      vmcb12->save.cr0    = kvm_read_cr0(vcpu);
+ +      vmcb12->save.cr3    = kvm_read_cr3(vcpu);
         vmcb12->save.cr2    = vmcb->save.cr2;
         vmcb12->save.cr4    = svm->vcpu.arch.cr4;
- -      vmcb12->save.rflags = kvm_get_rflags(&svm->vcpu);
- -      vmcb12->save.rip    = kvm_rip_read(&svm->vcpu);
- -      vmcb12->save.rsp    = kvm_rsp_read(&svm->vcpu);
- -      vmcb12->save.rax    = kvm_rax_read(&svm->vcpu);
+ +      vmcb12->save.rflags = kvm_get_rflags(vcpu);
+ +      vmcb12->save.rip    = kvm_rip_read(vcpu);
+ +      vmcb12->save.rsp    = kvm_rsp_read(vcpu);
+ +      vmcb12->save.rax    = kvm_rax_read(vcpu);
         vmcb12->save.dr7    = vmcb->save.dr7;
         vmcb12->save.dr6    = svm->vcpu.arch.dr6;
         vmcb12->save.cpl    = vmcb->save.cpl;
@@@ -747,7 -663,7 +746,7 @@@
         vmcb12->control.exit_info_2       = vmcb->control.exit_info_2;
   
         if (vmcb12->control.exit_code != SVM_EXIT_ERR)
- -              nested_vmcb_save_pending_event(svm, vmcb12);
+ +              nested_save_pending_event_to_vmcb12(svm, vmcb12);
   
         if (svm->nrips_enabled)
                 vmcb12->control.next_rip  = vmcb->control.next_rip;
@@@ -762,39 -678,37 +761,39 @@@
         vmcb12->control.pause_filter_thresh =
                 svm->vmcb->control.pause_filter_thresh;
   
- -      /* Restore the original control entries */
- -      copy_vmcb_control_area(&vmcb->control, &hsave->control);
+ +      nested_svm_copy_common_state(svm->nested.vmcb02.ptr, svm->vmcb01.ptr);
+ +
+ +      svm_switch_vmcb(svm, &svm->vmcb01);
+ +      WARN_ON_ONCE(svm->vmcb->control.exit_code != SVM_EXIT_VMRUN);
   
- -      /* On vmexit the  GIF is set to false */
+ +      /*
+ +       * On vmexit the  GIF is set to false and
+ +       * no event can be injected in L1.
+ +       */
         svm_set_gif(svm, false);
+ +      svm->vmcb->control.exit_int_info = 0;
   
- -      svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset =
- -              svm->vcpu.arch.l1_tsc_offset;
+ +      svm->vcpu.arch.tsc_offset = svm->vcpu.arch.l1_tsc_offset;
+ +      if (svm->vmcb->control.tsc_offset != svm->vcpu.arch.tsc_offset) {
+ +              svm->vmcb->control.tsc_offset = svm->vcpu.arch.tsc_offset;
+ +              vmcb_mark_dirty(svm->vmcb, VMCB_INTERCEPTS);
+ +      }
   
         svm->nested.ctl.nested_cr3 = 0;
   
- -      /* Restore selected save entries */
- -      svm->vmcb->save.es = hsave->save.es;
- -      svm->vmcb->save.cs = hsave->save.cs;
- -      svm->vmcb->save.ss = hsave->save.ss;
- -      svm->vmcb->save.ds = hsave->save.ds;
- -      svm->vmcb->save.gdtr = hsave->save.gdtr;
- -      svm->vmcb->save.idtr = hsave->save.idtr;
- -      kvm_set_rflags(&svm->vcpu, hsave->save.rflags);
- -      kvm_set_rflags(&svm->vcpu, hsave->save.rflags | X86_EFLAGS_FIXED);
- -      svm_set_efer(&svm->vcpu, hsave->save.efer);
- -      svm_set_cr0(&svm->vcpu, hsave->save.cr0 | X86_CR0_PE);
- -      svm_set_cr4(&svm->vcpu, hsave->save.cr4);
- -      kvm_rax_write(&svm->vcpu, hsave->save.rax);
- -      kvm_rsp_write(&svm->vcpu, hsave->save.rsp);
- -      kvm_rip_write(&svm->vcpu, hsave->save.rip);
- -      svm->vmcb->save.dr7 = DR7_FIXED_1;
- -      svm->vmcb->save.cpl = 0;
- -      svm->vmcb->control.exit_int_info = 0;
+ +      /*
+ +       * Restore processor state that had been saved in vmcb01
+ +       */
+ +      kvm_set_rflags(vcpu, svm->vmcb->save.rflags);
+ +      svm_set_efer(vcpu, svm->vmcb->save.efer);
+ +      svm_set_cr0(vcpu, svm->vmcb->save.cr0 | X86_CR0_PE);
+ +      svm_set_cr4(vcpu, svm->vmcb->save.cr4);
+ +      kvm_rax_write(vcpu, svm->vmcb->save.rax);
+ +      kvm_rsp_write(vcpu, svm->vmcb->save.rsp);
+ +      kvm_rip_write(vcpu, svm->vmcb->save.rip);
   
- -      vmcb_mark_all_dirty(svm->vmcb);
+ +      svm->vcpu.arch.dr7 = DR7_FIXED_1;
+ +      kvm_update_dr7(&svm->vcpu);
   
         trace_kvm_nested_vmexit_inject(vmcb12->control.exit_code,
                                        vmcb12->control.exit_info_1,
@@@ -803,62 -717,50 +802,62 @@@
                                        vmcb12->control.exit_int_info_err,
                                        KVM_ISA_SVM);
   
- -      kvm_vcpu_unmap(&svm->vcpu, &map, true);
+ +      kvm_vcpu_unmap(vcpu, &map, true);
   
- -      nested_svm_uninit_mmu_context(&svm->vcpu);
+ +      nested_svm_uninit_mmu_context(vcpu);
   
- -      rc = nested_svm_load_cr3(&svm->vcpu, hsave->save.cr3, false);
+ +      rc = nested_svm_load_cr3(vcpu, svm->vmcb->save.cr3, false);
         if (rc)
                 return 1;
   
- -      if (npt_enabled)
- -              svm->vmcb->save.cr3 = hsave->save.cr3;
- -
         /*
          * Drop what we picked up for L2 via svm_complete_interrupts() so it
          * doesn't end up in L1.
          */
         svm->vcpu.arch.nmi_injected = false;
- -      kvm_clear_exception_queue(&svm->vcpu);
- -      kvm_clear_interrupt_queue(&svm->vcpu);
+ +      kvm_clear_exception_queue(vcpu);
+ +      kvm_clear_interrupt_queue(vcpu);
+ +
+ +      /*
+ +       * If we are here following the completion of a VMRUN that
+ +       * is being single-stepped, queue the pending #DB intercept
+ +       * right now so that it an be accounted for before we execute
+ +       * L1's next instruction.
+ +       */
+ +      if (unlikely(svm->vmcb->save.rflags & X86_EFLAGS_TF))
+ +              kvm_queue_exception(&(svm->vcpu), DB_VECTOR);
   
         return 0;
   }
   
+ +static void nested_svm_triple_fault(struct kvm_vcpu *vcpu)
+ +{
+ +      nested_svm_simple_vmexit(to_svm(vcpu), SVM_EXIT_SHUTDOWN);
+ +}
+ +
   int svm_allocate_nested(struct vcpu_svm *svm)
   {
- -      struct page *hsave_page;
+ +      struct page *vmcb02_page;
   
         if (svm->nested.initialized)
                 return 0;
   
- -      hsave_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
- -      if (!hsave_page)
+ +      vmcb02_page = alloc_page(GFP_KERNEL_ACCOUNT | __GFP_ZERO);
+ +      if (!vmcb02_page)
                 return -ENOMEM;
- -      svm->nested.hsave = page_address(hsave_page);
+ +      svm->nested.vmcb02.ptr = page_address(vmcb02_page);
+ +      svm->nested.vmcb02.pa = __sme_set(page_to_pfn(vmcb02_page) << PAGE_SHIFT);
   
         svm->nested.msrpm = svm_vcpu_alloc_msrpm();
         if (!svm->nested.msrpm)
- -              goto err_free_hsave;
+ +              goto err_free_vmcb02;
         svm_vcpu_init_msrpm(&svm->vcpu, svm->nested.msrpm);
   
         svm->nested.initialized = true;
         return 0;
   
- -err_free_hsave:
- -      __free_page(hsave_page);
+ +err_free_vmcb02:
+ +      __free_page(vmcb02_page);
         return -ENOMEM;
   }
   
@@@ -870,8 -772,8 +869,8 @@@ void svm_free_nested(struct vcpu_svm *s
         svm_vcpu_free_msrpm(svm->nested.msrpm);
         svm->nested.msrpm = NULL;
   
- -      __free_page(virt_to_page(svm->nested.hsave));
- -      svm->nested.hsave = NULL;
+ +      __free_page(virt_to_page(svm->nested.vmcb02.ptr));
+ +      svm->nested.vmcb02.ptr = NULL;
   
         svm->nested.initialized = false;
   }
@@@ -881,19 -783,18 +880,19 @@@
    */
   void svm_leave_nested(struct vcpu_svm *svm)
   {
- -      if (is_guest_mode(&svm->vcpu)) {
- -              struct vmcb *hsave = svm->nested.hsave;
- -              struct vmcb *vmcb = svm->vmcb;
+ +      struct kvm_vcpu *vcpu = &svm->vcpu;
   
+ +      if (is_guest_mode(vcpu)) {
                 svm->nested.nested_run_pending = 0;
- -              leave_guest_mode(&svm->vcpu);
- -              copy_vmcb_control_area(&vmcb->control, &hsave->control);
- -              nested_svm_uninit_mmu_context(&svm->vcpu);
+ +              leave_guest_mode(vcpu);
+ +
+ +              svm_switch_vmcb(svm, &svm->nested.vmcb02);
+ +
+ +              nested_svm_uninit_mmu_context(vcpu);
                 vmcb_mark_all_dirty(svm->vmcb);
         }
   
- -      kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, &svm->vcpu);
+ +      kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
   }
   
   static int nested_svm_exit_handled_msr(struct vcpu_svm *svm)
@@@ -1002,15 -903,16 +1001,15 @@@ int nested_svm_exit_handled(struct vcpu
         return vmexit;
   }
   
- -int nested_svm_check_permissions(struct vcpu_svm *svm)
+ +int nested_svm_check_permissions(struct kvm_vcpu *vcpu)
   {
- -      if (!(svm->vcpu.arch.efer & EFER_SVME) ||
- -          !is_paging(&svm->vcpu)) {
- -              kvm_queue_exception(&svm->vcpu, UD_VECTOR);
+ +      if (!(vcpu->arch.efer & EFER_SVME) || !is_paging(vcpu)) {
+ +              kvm_queue_exception(vcpu, UD_VECTOR);
                 return 1;
         }
   
- -      if (svm->vmcb->save.cpl) {
- -              kvm_inject_gp(&svm->vcpu, 0);
+ +      if (to_svm(vcpu)->vmcb->save.cpl) {
+ +              kvm_inject_gp(vcpu, 0);
                 return 1;
         }
   
@@@ -1058,11 -960,50 +1057,11 @@@ static void nested_svm_inject_exception
         nested_svm_vmexit(svm);
   }
   
- -static void nested_svm_smi(struct vcpu_svm *svm)
- -{
- -      svm->vmcb->control.exit_code = SVM_EXIT_SMI;
- -      svm->vmcb->control.exit_info_1 = 0;
- -      svm->vmcb->control.exit_info_2 = 0;
- -
- -      nested_svm_vmexit(svm);
- -}
- -
- -static void nested_svm_nmi(struct vcpu_svm *svm)
- -{
- -      svm->vmcb->control.exit_code = SVM_EXIT_NMI;
- -      svm->vmcb->control.exit_info_1 = 0;
- -      svm->vmcb->control.exit_info_2 = 0;
- -
- -      nested_svm_vmexit(svm);
- -}
- -
- -static void nested_svm_intr(struct vcpu_svm *svm)
- -{
- -      trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
- -
- -      svm->vmcb->control.exit_code   = SVM_EXIT_INTR;
- -      svm->vmcb->control.exit_info_1 = 0;
- -      svm->vmcb->control.exit_info_2 = 0;
- -
- -      nested_svm_vmexit(svm);
- -}
- -
   static inline bool nested_exit_on_init(struct vcpu_svm *svm)
   {
         return vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_INIT);
   }
   
- -static void nested_svm_init(struct vcpu_svm *svm)
- -{
- -      svm->vmcb->control.exit_code   = SVM_EXIT_INIT;
- -      svm->vmcb->control.exit_info_1 = 0;
- -      svm->vmcb->control.exit_info_2 = 0;
- -
- -      nested_svm_vmexit(svm);
- -}
- -
- -
   static int svm_check_nested_events(struct kvm_vcpu *vcpu)
   {
         struct vcpu_svm *svm = to_svm(vcpu);
@@@ -1076,18 -1017,12 +1075,18 @@@
                         return -EBUSY;
                 if (!nested_exit_on_init(svm))
                         return 0;
- -              nested_svm_init(svm);
+ +              nested_svm_simple_vmexit(svm, SVM_EXIT_INIT);
                 return 0;
         }
   
         if (vcpu->arch.exception.pending) {
- -              if (block_nested_events)
+ +              /*
+ +               * Only a pending nested run can block a pending exception.
+ +               * Otherwise an injected NMI/interrupt should either be
+ +               * lost or delivered to the nested hypervisor in the EXITINTINFO
+ +               * vmcb field, while delivering the pending exception.
+ +               */
+ +              if (svm->nested.nested_run_pending)
                           return -EBUSY;
                 if (!nested_exit_on_exception(svm))
                         return 0;
@@@ -1100,7 -1035,7 +1099,7 @@@
                         return -EBUSY;
                 if (!nested_exit_on_smi(svm))
                         return 0;
- -              nested_svm_smi(svm);
+ +              nested_svm_simple_vmexit(svm, SVM_EXIT_SMI);
                 return 0;
         }
   
@@@ -1109,7 -1044,7 +1108,7 @@@
                         return -EBUSY;
                 if (!nested_exit_on_nmi(svm))
                         return 0;
- -              nested_svm_nmi(svm);
+ +              nested_svm_simple_vmexit(svm, SVM_EXIT_NMI);
                 return 0;
         }
   
@@@ -1118,8 -1053,7 +1117,8 @@@
                         return -EBUSY;
                 if (!nested_exit_on_intr(svm))
                         return 0;
- -              nested_svm_intr(svm);
+ +              trace_kvm_nested_intr_vmexit(svm->vmcb->save.rip);
+ +              nested_svm_simple_vmexit(svm, SVM_EXIT_INTR);
                 return 0;
         }
   
@@@ -1138,8 -1072,8 +1137,8 @@@ int nested_svm_exit_special(struct vcpu
         case SVM_EXIT_EXCP_BASE ... SVM_EXIT_EXCP_BASE + 0x1f: {
                 u32 excp_bits = 1 << (exit_code - SVM_EXIT_EXCP_BASE);
   
- -              if (get_host_vmcb(svm)->control.intercepts[INTERCEPT_EXCEPTION] &
- -                              excp_bits)
+ +              if (svm->vmcb01.ptr->control.intercepts[INTERCEPT_EXCEPTION] &
+ +                  excp_bits)
                         return NESTED_EXIT_HOST;
                 else if (exit_code == SVM_EXIT_EXCP_BASE + PF_VECTOR &&
                          svm->vcpu.arch.apf.host_apf_flags)
@@@ -1203,9 -1137,10 +1202,9 @@@ static int svm_get_nested_state(struct 
         if (copy_to_user(&user_vmcb->control, &svm->nested.ctl,
                          sizeof(user_vmcb->control)))
                 return -EFAULT;
- -      if (copy_to_user(&user_vmcb->save, &svm->nested.hsave->save,
+ +      if (copy_to_user(&user_vmcb->save, &svm->vmcb01.ptr->save,
                          sizeof(user_vmcb->save)))
                 return -EFAULT;
- -
   out:
         return kvm_state.size;
   }
@@@ -1215,6 -1150,7 +1214,6 @@@ static int svm_set_nested_state(struct 
                                 struct kvm_nested_state *kvm_state)
   {
         struct vcpu_svm *svm = to_svm(vcpu);
- -      struct vmcb *hsave = svm->nested.hsave;
         struct vmcb __user *user_vmcb = (struct vmcb __user *)
                 &user_kvm_nested_state->data.svm[0];
         struct vmcb_control_area *ctl;
@@@ -1259,8 -1195,8 +1258,8 @@@
                 return -EINVAL;
   
         ret  = -ENOMEM;
- -      ctl  = kzalloc(sizeof(*ctl),  GFP_KERNEL);
- -      save = kzalloc(sizeof(*save), GFP_KERNEL);
+ +      ctl  = kzalloc(sizeof(*ctl),  GFP_KERNEL_ACCOUNT);
+ +      save = kzalloc(sizeof(*save), GFP_KERNEL_ACCOUNT);
         if (!ctl || !save)
                 goto out_free;
   
@@@ -1271,12 -1207,12 +1270,12 @@@
                 goto out_free;
   
         ret = -EINVAL;
- -      if (!nested_vmcb_check_controls(ctl))
+ +      if (!nested_vmcb_check_controls(vcpu, ctl))
                 goto out_free;
   
         /*
          * Processor state contains L2 state.  Check that it is
- -       * valid for guest mode (see nested_vmcb_checks).
+ +       * valid for guest mode (see nested_vmcb_check_save).
          */
         cr0 = kvm_read_cr0(vcpu);
           if (((cr0 & X86_CR0_CD) == 0) && (cr0 & X86_CR0_NW))
@@@ -1285,48 -1221,29 +1284,48 @@@
         /*
          * Validate host state saved from before VMRUN (see
          * nested_svm_check_permissions).
- -       * TODO: validate reserved bits for all saved state.
          */
- -      if (!(save->cr0 & X86_CR0_PG))
- -              goto out_free;
- -      if (!(save->efer & EFER_SVME))
+ +      if (!(save->cr0 & X86_CR0_PG) ||
+ +          !(save->cr0 & X86_CR0_PE) ||
+ +          (save->rflags & X86_EFLAGS_VM) ||
+ +          !nested_vmcb_valid_sregs(vcpu, save))
                 goto out_free;
   
         /*
- -       * All checks done, we can enter guest mode.  L1 control fields
- -       * come from the nested save state.  Guest state is already
- -       * in the registers, the save area of the nested state instead
- -       * contains saved L1 state.
+ +       * All checks done, we can enter guest mode. Userspace provides
+ +       * vmcb12.control, which will be combined with L1 and stored into
+ +       * vmcb02, and the L1 save state which we store in vmcb01.
+ +       * L2 registers if needed are moved from the current VMCB to VMCB02.
          */
   
         svm->nested.nested_run_pending =
                 !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
   
- -      copy_vmcb_control_area(&hsave->control, &svm->vmcb->control);
- -      hsave->save = *save;
- -
         svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa;
- -      load_nested_vmcb_control(svm, ctl);
- -      nested_prepare_vmcb_control(svm);
+ +      if (svm->current_vmcb == &svm->vmcb01)
+ +              svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save;
+ +
+ +      svm->vmcb01.ptr->save.es = save->es;
+ +      svm->vmcb01.ptr->save.cs = save->cs;
+ +      svm->vmcb01.ptr->save.ss = save->ss;
+ +      svm->vmcb01.ptr->save.ds = save->ds;
+ +      svm->vmcb01.ptr->save.gdtr = save->gdtr;
+ +      svm->vmcb01.ptr->save.idtr = save->idtr;
+ +      svm->vmcb01.ptr->save.rflags = save->rflags | X86_EFLAGS_FIXED;
+ +      svm->vmcb01.ptr->save.efer = save->efer;
+ +      svm->vmcb01.ptr->save.cr0 = save->cr0;
+ +      svm->vmcb01.ptr->save.cr3 = save->cr3;
+ +      svm->vmcb01.ptr->save.cr4 = save->cr4;
+ +      svm->vmcb01.ptr->save.rax = save->rax;
+ +      svm->vmcb01.ptr->save.rsp = save->rsp;
+ +      svm->vmcb01.ptr->save.rip = save->rip;
+ +      svm->vmcb01.ptr->save.cpl = 0;
+ +
+ +      nested_load_control_from_vmcb12(svm, ctl);
+ +
+ +      svm_switch_vmcb(svm, &svm->nested.vmcb02);
+ +
+ +      nested_vmcb02_prepare_control(svm);
   
         kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
         ret = 0;
@@@ -1337,31 -1254,8 +1336,31 @@@ out_free
         return ret;
   }
   
+ +static bool svm_get_nested_state_pages(struct kvm_vcpu *vcpu)
+ +{
+ +      struct vcpu_svm *svm = to_svm(vcpu);
+ +
+ +      if (WARN_ON(!is_guest_mode(vcpu)))
+ +              return true;
+ +
+ +      if (nested_svm_load_cr3(&svm->vcpu, vcpu->arch.cr3,
+ +                              nested_npt_enabled(svm)))
+ +              return false;
+ +
+ +      if (!nested_svm_vmrun_msrpm(svm)) {
+ +              vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+ +              vcpu->run->internal.suberror =
+ +                      KVM_INTERNAL_ERROR_EMULATION;
+ +              vcpu->run->internal.ndata = 0;
+ +              return false;
+ +      }
+ +
+ +      return true;
+ +}
+ +
   struct kvm_x86_nested_ops svm_nested_ops = {
         .check_events = svm_check_nested_events,
+ +      .triple_fault = nested_svm_triple_fault,
         .get_nested_state_pages = svm_get_nested_state_pages,
         .get_state = svm_get_nested_state,
         .set_state = svm_set_nested_state,
diff --combined arch/x86/kvm/svm/sev.c

index b4e471b,214eefb..2632852
--- 1/arch/x86/kvm/svm/sev.c
--- 2/arch/x86/kvm/svm/sev.c
+++ b/arch/x86/kvm/svm/sev.c
@@@ -14,6 -14,7 +14,7 @@@
   #include <linux/psp-sev.h>
   #include <linux/pagemap.h>
   #include <linux/swap.h>
+ #include <linux/misc_cgroup.h>
   #include <linux/processor.h>
   #include <linux/trace_events.h>
   #include <asm/fpu/internal.h>
@@@ -28,13 -29,27 +29,28 @@@
   
   #define __ex(x) __kvm_handle_fault_on_reboot(x)
   
+ #ifndef CONFIG_KVM_AMD_SEV
+ /*
+  * When this config is not defined, SEV feature is not supported and APIs in
+  * this file are not used but this file still gets compiled into the KVM AMD
+  * module.
+  *
+  * We will not have MISC_CG_RES_SEV and MISC_CG_RES_SEV_ES entries in the enum
+  * misc_res_type {} defined in linux/misc_cgroup.h.
+  *
+  * Below macros allow compilation to succeed.
+  */
+ #define MISC_CG_RES_SEV MISC_CG_RES_TYPES
+ #define MISC_CG_RES_SEV_ES MISC_CG_RES_TYPES
+ #endif
+ 
   static u8 sev_enc_bit;
   static int sev_flush_asids(void);
   static DECLARE_RWSEM(sev_deactivate_lock);
   static DEFINE_MUTEX(sev_bitmap_lock);
   unsigned int max_sev_asid;
   static unsigned int min_sev_asid;
+ +static unsigned long sev_me_mask;
   static unsigned long *sev_asid_bitmap;
   static unsigned long *sev_reclaim_asid_bitmap;
   
@@@ -67,11 -82,6 +83,11 @@@ static int sev_flush_asids(void
         return ret;
   }
   
+ +static inline bool is_mirroring_enc_context(struct kvm *kvm)
+ +{
+ +      return !!to_kvm_svm(kvm)->sev_info.enc_context_owner;
+ +}
+ +
   /* Must be called with the sev_bitmap_lock held */
   static bool __sev_recycle_asids(int min_asid, int max_asid)
   {
@@@ -93,10 -103,21 +109,21 @@@
         return true;
   }
   
- static int sev_asid_new(bool es_active)
+ static int sev_asid_new(struct kvm_sev_info *sev)
   {
-       int pos, min_asid, max_asid;
+       int pos, min_asid, max_asid, ret;
         bool retry = true;
+       enum misc_res_type type;
+ 
+       type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV;
+       WARN_ON(sev->misc_cg);
+       sev->misc_cg = get_current_misc_cg();
+       ret = misc_cg_try_charge(type, sev->misc_cg, 1);
+       if (ret) {
+               put_misc_cg(sev->misc_cg);
+               sev->misc_cg = NULL;
+               return ret;
+       }
   
         mutex_lock(&sev_bitmap_lock);
   
@@@ -104,8 -125,8 +131,8 @@@
          * SEV-enabled guests must use asid from min_sev_asid to max_sev_asid.
          * SEV-ES-enabled guest can use from 1 to min_sev_asid - 1.
          */
-       min_asid = es_active ? 0 : min_sev_asid - 1;
-       max_asid = es_active ? min_sev_asid - 1 : max_sev_asid;
+       min_asid = sev->es_active ? 0 : min_sev_asid - 1;
+       max_asid = sev->es_active ? min_sev_asid - 1 : max_sev_asid;
   again:
         pos = find_next_zero_bit(sev_asid_bitmap, max_sev_asid, min_asid);
         if (pos >= max_asid) {
@@@ -114,7 -135,8 +141,8 @@@
                         goto again;
                 }
                 mutex_unlock(&sev_bitmap_lock);
-               return -EBUSY;
+               ret = -EBUSY;
+               goto e_uncharge;
         }
   
         __set_bit(pos, sev_asid_bitmap);
@@@ -122,6 -144,11 +150,11 @@@
         mutex_unlock(&sev_bitmap_lock);
   
         return pos + 1;
+ e_uncharge:
+       misc_cg_uncharge(type, sev->misc_cg, 1);
+       put_misc_cg(sev->misc_cg);
+       sev->misc_cg = NULL;
+       return ret;
   }
   
   static int sev_get_asid(struct kvm *kvm)
@@@ -131,14 -158,15 +164,15 @@@
         return sev->asid;
   }
   
- static void sev_asid_free(int asid)
+ static void sev_asid_free(struct kvm_sev_info *sev)
   {
         struct svm_cpu_data *sd;
         int cpu, pos;
+       enum misc_res_type type;
   
         mutex_lock(&sev_bitmap_lock);
   
-       pos = asid - 1;
+       pos = sev->asid - 1;
         __set_bit(pos, sev_reclaim_asid_bitmap);
   
         for_each_possible_cpu(cpu) {
@@@ -147,71 -175,100 +181,80 @@@
         }
   
         mutex_unlock(&sev_bitmap_lock);
+ 
+       type = sev->es_active ? MISC_CG_RES_SEV_ES : MISC_CG_RES_SEV;
+       misc_cg_uncharge(type, sev->misc_cg, 1);
+       put_misc_cg(sev->misc_cg);
+       sev->misc_cg = NULL;
   }
   
   static void sev_unbind_asid(struct kvm *kvm, unsigned int handle)
   {
- -      struct sev_data_decommission *decommission;
- -      struct sev_data_deactivate *data;
+ +      struct sev_data_decommission decommission;
+ +      struct sev_data_deactivate deactivate;
   
         if (!handle)
                 return;
   
- -      data = kzalloc(sizeof(*data), GFP_KERNEL);
- -      if (!data)
- -              return;
- -
- -      /* deactivate handle */
- -      data->handle = handle;
+ +      deactivate.handle = handle;
   
         /* Guard DEACTIVATE against WBINVD/DF_FLUSH used in ASID recycling */
         down_read(&sev_deactivate_lock);
- -      sev_guest_deactivate(data, NULL);
+ +      sev_guest_deactivate(&deactivate, NULL);
         up_read(&sev_deactivate_lock);
   
- -      kfree(data);
- -
- -      decommission = kzalloc(sizeof(*decommission), GFP_KERNEL);
- -      if (!decommission)
- -              return;
- -
         /* decommission handle */
- -      decommission->handle = handle;
- -      sev_guest_decommission(decommission, NULL);
- -
- -      kfree(decommission);
+ +      decommission.handle = handle;
+ +      sev_guest_decommission(&decommission, NULL);
   }
   
   static int sev_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
   {
         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ +      bool es_active = argp->id == KVM_SEV_ES_INIT;
         int asid, ret;
   
+ +      if (kvm->created_vcpus)
+ +              return -EINVAL;
+ +
         ret = -EBUSY;
         if (unlikely(sev->active))
                 return ret;
   
-       asid = sev_asid_new(es_active);
++      sev->es_active = es_active;
+       asid = sev_asid_new(sev);
         if (asid < 0)
--              return ret;
++              goto e_no_asid;
+       sev->asid = asid;
   
         ret = sev_platform_init(&argp->error);
         if (ret)
                 goto e_free;
   
         sev->active = true;
-       sev->es_active = es_active;
+ +      sev->asid = asid;
         INIT_LIST_HEAD(&sev->regions_list);
   
         return 0;
   
   e_free:
-       sev_asid_free(asid);
+       sev_asid_free(sev);
+       sev->asid = 0;
++e_no_asid:
++      sev->es_active = false;
         return ret;
   }
   
- -static int sev_es_guest_init(struct kvm *kvm, struct kvm_sev_cmd *argp)
- -{
- -      if (!sev_es)
- -              return -ENOTTY;
- -
- -      to_kvm_svm(kvm)->sev_info.es_active = true;
- -
- -      return sev_guest_init(kvm, argp);
- -}
- -
   static int sev_bind_asid(struct kvm *kvm, unsigned int handle, int *error)
   {
- -      struct sev_data_activate *data;
+ +      struct sev_data_activate activate;
         int asid = sev_get_asid(kvm);
         int ret;
   
- -      data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
- -      if (!data)
- -              return -ENOMEM;
- -
         /* activate ASID on the given handle */
- -      data->handle = handle;
- -      data->asid   = asid;
- -      ret = sev_guest_activate(data, error);
- -      kfree(data);
+ +      activate.handle = handle;
+ +      activate.asid   = asid;
+ +      ret = sev_guest_activate(&activate, error);
   
         return ret;
   }
@@@ -241,7 -298,7 +284,7 @@@ static int sev_issue_cmd(struct kvm *kv
   static int sev_launch_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
   {
         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
- -      struct sev_data_launch_start *start;
+ +      struct sev_data_launch_start start;
         struct kvm_sev_launch_start params;
         void *dh_blob, *session_blob;
         int *error = &argp->error;
@@@ -253,16 -310,20 +296,16 @@@
         if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
                 return -EFAULT;
   
- -      start = kzalloc(sizeof(*start), GFP_KERNEL_ACCOUNT);
- -      if (!start)
- -              return -ENOMEM;
+ +      memset(&start, 0, sizeof(start));
   
         dh_blob = NULL;
         if (params.dh_uaddr) {
                 dh_blob = psp_copy_user_blob(params.dh_uaddr, params.dh_len);
- -              if (IS_ERR(dh_blob)) {
- -                      ret = PTR_ERR(dh_blob);
- -                      goto e_free;
- -              }
+ +              if (IS_ERR(dh_blob))
+ +                      return PTR_ERR(dh_blob);
   
- -              start->dh_cert_address = __sme_set(__pa(dh_blob));
- -              start->dh_cert_len = params.dh_len;
+ +              start.dh_cert_address = __sme_set(__pa(dh_blob));
+ +              start.dh_cert_len = params.dh_len;
         }
   
         session_blob = NULL;
@@@ -273,38 -334,40 +316,38 @@@
                         goto e_free_dh;
                 }
   
- -              start->session_address = __sme_set(__pa(session_blob));
- -              start->session_len = params.session_len;
+ +              start.session_address = __sme_set(__pa(session_blob));
+ +              start.session_len = params.session_len;
         }
   
- -      start->handle = params.handle;
- -      start->policy = params.policy;
+ +      start.handle = params.handle;
+ +      start.policy = params.policy;
   
         /* create memory encryption context */
- -      ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_LAUNCH_START, start, error);
+ +      ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_LAUNCH_START, &start, error);
         if (ret)
                 goto e_free_session;
   
         /* Bind ASID to this guest */
- -      ret = sev_bind_asid(kvm, start->handle, error);
+ +      ret = sev_bind_asid(kvm, start.handle, error);
         if (ret)
                 goto e_free_session;
   
         /* return handle to userspace */
- -      params.handle = start->handle;
+ +      params.handle = start.handle;
         if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params))) {
- -              sev_unbind_asid(kvm, start->handle);
+ +              sev_unbind_asid(kvm, start.handle);
                 ret = -EFAULT;
                 goto e_free_session;
         }
   
- -      sev->handle = start->handle;
+ +      sev->handle = start.handle;
         sev->fd = argp->sev_fd;
   
   e_free_session:
         kfree(session_blob);
   e_free_dh:
         kfree(dh_blob);
- -e_free:
- -      kfree(start);
         return ret;
   }
   
@@@ -423,7 -486,7 +466,7 @@@ static int sev_launch_update_data(struc
         unsigned long vaddr, vaddr_end, next_vaddr, npages, pages, size, i;
         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
         struct kvm_sev_launch_update_data params;
- -      struct sev_data_launch_update_data *data;
+ +      struct sev_data_launch_update_data data;
         struct page **inpages;
         int ret;
   
@@@ -433,14 -496,20 +476,14 @@@
         if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
                 return -EFAULT;
   
- -      data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
- -      if (!data)
- -              return -ENOMEM;
- -
         vaddr = params.uaddr;
         size = params.len;
         vaddr_end = vaddr + size;
   
         /* Lock the user memory. */
         inpages = sev_pin_memory(kvm, vaddr, size, &npages, 1);
- -      if (IS_ERR(inpages)) {
- -              ret = PTR_ERR(inpages);
- -              goto e_free;
- -      }
+ +      if (IS_ERR(inpages))
+ +              return PTR_ERR(inpages);
   
         /*
          * Flush (on non-coherent CPUs) before LAUNCH_UPDATE encrypts pages in
@@@ -448,9 -517,6 +491,9 @@@
          */
         sev_clflush_pages(inpages, npages);
   
+ +      data.reserved = 0;
+ +      data.handle = sev->handle;
+ +
         for (i = 0; vaddr < vaddr_end; vaddr = next_vaddr, i += pages) {
                 int offset, len;
   
@@@ -465,9 -531,10 +508,9 @@@
   
                 len = min_t(size_t, ((pages * PAGE_SIZE) - offset), size);
   
- -              data->handle = sev->handle;
- -              data->len = len;
- -              data->address = __sme_page_pa(inpages[i]) + offset;
- -              ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_DATA, data, &argp->error);
+ +              data.len = len;
+ +              data.address = __sme_page_pa(inpages[i]) + offset;
+ +              ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_DATA, &data, &argp->error);
                 if (ret)
                         goto e_unpin;
   
@@@ -483,6 -550,8 +526,6 @@@ e_unpin
         }
         /* unlock the user pages */
         sev_unpin_memory(kvm, inpages, npages);
- -e_free:
- -      kfree(data);
         return ret;
   }
   
@@@ -534,22 -603,23 +577,22 @@@ static int sev_es_sync_vmsa(struct vcpu
   static int sev_launch_update_vmsa(struct kvm *kvm, struct kvm_sev_cmd *argp)
   {
         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
- -      struct sev_data_launch_update_vmsa *vmsa;
+ +      struct sev_data_launch_update_vmsa vmsa;
+ +      struct kvm_vcpu *vcpu;
         int i, ret;
   
         if (!sev_es_guest(kvm))
                 return -ENOTTY;
   
- -      vmsa = kzalloc(sizeof(*vmsa), GFP_KERNEL);
- -      if (!vmsa)
- -              return -ENOMEM;
+ +      vmsa.reserved = 0;
   
- -      for (i = 0; i < kvm->created_vcpus; i++) {
- -              struct vcpu_svm *svm = to_svm(kvm->vcpus[i]);
+ +      kvm_for_each_vcpu(i, vcpu, kvm) {
+ +              struct vcpu_svm *svm = to_svm(vcpu);
   
                 /* Perform some pre-encryption checks against the VMSA */
                 ret = sev_es_sync_vmsa(svm);
                 if (ret)
- -                      goto e_free;
+ +                      return ret;
   
                 /*
                  * The LAUNCH_UPDATE_VMSA command will perform in-place
@@@ -559,25 -629,27 +602,25 @@@
                  */
                 clflush_cache_range(svm->vmsa, PAGE_SIZE);
   
- -              vmsa->handle = sev->handle;
- -              vmsa->address = __sme_pa(svm->vmsa);
- -              vmsa->len = PAGE_SIZE;
- -              ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, vmsa,
+ +              vmsa.handle = sev->handle;
+ +              vmsa.address = __sme_pa(svm->vmsa);
+ +              vmsa.len = PAGE_SIZE;
+ +              ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_VMSA, &vmsa,
                                     &argp->error);
                 if (ret)
- -                      goto e_free;
+ +                      return ret;
   
                 svm->vcpu.arch.guest_state_protected = true;
         }
   
- -e_free:
- -      kfree(vmsa);
- -      return ret;
+ +      return 0;
   }
   
   static int sev_launch_measure(struct kvm *kvm, struct kvm_sev_cmd *argp)
   {
         void __user *measure = (void __user *)(uintptr_t)argp->data;
         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
- -      struct sev_data_launch_measure *data;
+ +      struct sev_data_launch_measure data;
         struct kvm_sev_launch_measure params;
         void __user *p = NULL;
         void *blob = NULL;
@@@ -589,7 -661,9 +632,7 @@@
         if (copy_from_user(&params, measure, sizeof(params)))
                 return -EFAULT;
   
- -      data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
- -      if (!data)
- -              return -ENOMEM;
+ +      memset(&data, 0, sizeof(data));
   
         /* User wants to query the blob length */
         if (!params.len)
@@@ -597,20 -671,23 +640,20 @@@
   
         p = (void __user *)(uintptr_t)params.uaddr;
         if (p) {
- -              if (params.len > SEV_FW_BLOB_MAX_SIZE) {
- -                      ret = -EINVAL;
- -                      goto e_free;
- -              }
+ +              if (params.len > SEV_FW_BLOB_MAX_SIZE)
+ +                      return -EINVAL;
   
- -              ret = -ENOMEM;
- -              blob = kmalloc(params.len, GFP_KERNEL);
+ +              blob = kmalloc(params.len, GFP_KERNEL_ACCOUNT);
                 if (!blob)
- -                      goto e_free;
+ +                      return -ENOMEM;
   
- -              data->address = __psp_pa(blob);
- -              data->len = params.len;
+ +              data.address = __psp_pa(blob);
+ +              data.len = params.len;
         }
   
   cmd:
- -      data->handle = sev->handle;
- -      ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_MEASURE, data, &argp->error);
+ +      data.handle = sev->handle;
+ +      ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_MEASURE, &data, &argp->error);
   
         /*
          * If we query the session length, FW responded with expected data.
@@@ -627,50 -704,63 +670,50 @@@
         }
   
   done:
- -      params.len = data->len;
+ +      params.len = data.len;
         if (copy_to_user(measure, &params, sizeof(params)))
                 ret = -EFAULT;
   e_free_blob:
         kfree(blob);
- -e_free:
- -      kfree(data);
         return ret;
   }
   
   static int sev_launch_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
   {
         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
- -      struct sev_data_launch_finish *data;
- -      int ret;
+ +      struct sev_data_launch_finish data;
   
         if (!sev_guest(kvm))
                 return -ENOTTY;
   
- -      data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
- -      if (!data)
- -              return -ENOMEM;
- -
- -      data->handle = sev->handle;
- -      ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_FINISH, data, &argp->error);
- -
- -      kfree(data);
- -      return ret;
+ +      data.handle = sev->handle;
+ +      return sev_issue_cmd(kvm, SEV_CMD_LAUNCH_FINISH, &data, &argp->error);
   }
   
   static int sev_guest_status(struct kvm *kvm, struct kvm_sev_cmd *argp)
   {
         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
         struct kvm_sev_guest_status params;
- -      struct sev_data_guest_status *data;
+ +      struct sev_data_guest_status data;
         int ret;
   
         if (!sev_guest(kvm))
                 return -ENOTTY;
   
- -      data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
- -      if (!data)
- -              return -ENOMEM;
+ +      memset(&data, 0, sizeof(data));
   
- -      data->handle = sev->handle;
- -      ret = sev_issue_cmd(kvm, SEV_CMD_GUEST_STATUS, data, &argp->error);
+ +      data.handle = sev->handle;
+ +      ret = sev_issue_cmd(kvm, SEV_CMD_GUEST_STATUS, &data, &argp->error);
         if (ret)
- -              goto e_free;
+ +              return ret;
   
- -      params.policy = data->policy;
- -      params.state = data->state;
- -      params.handle = data->handle;
+ +      params.policy = data.policy;
+ +      params.state = data.state;
+ +      params.handle = data.handle;
   
         if (copy_to_user((void __user *)(uintptr_t)argp->data, &params, sizeof(params)))
                 ret = -EFAULT;
- -e_free:
- -      kfree(data);
+ +
         return ret;
   }
   
@@@ -679,17 -769,23 +722,17 @@@ static int __sev_issue_dbg_cmd(struct k
                                int *error, bool enc)
   {
         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
- -      struct sev_data_dbg *data;
- -      int ret;
- -
- -      data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
- -      if (!data)
- -              return -ENOMEM;
+ +      struct sev_data_dbg data;
   
- -      data->handle = sev->handle;
- -      data->dst_addr = dst;
- -      data->src_addr = src;
- -      data->len = size;
+ +      data.reserved = 0;
+ +      data.handle = sev->handle;
+ +      data.dst_addr = dst;
+ +      data.src_addr = src;
+ +      data.len = size;
   
- -      ret = sev_issue_cmd(kvm,
- -                          enc ? SEV_CMD_DBG_ENCRYPT : SEV_CMD_DBG_DECRYPT,
- -                          data, error);
- -      kfree(data);
- -      return ret;
+ +      return sev_issue_cmd(kvm,
+ +                           enc ? SEV_CMD_DBG_ENCRYPT : SEV_CMD_DBG_DECRYPT,
+ +                           &data, error);
   }
   
   static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr,
@@@ -909,7 -1005,7 +952,7 @@@ err
   static int sev_launch_secret(struct kvm *kvm, struct kvm_sev_cmd *argp)
   {
         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
- -      struct sev_data_launch_secret *data;
+ +      struct sev_data_launch_secret data;
         struct kvm_sev_launch_secret params;
         struct page **pages;
         void *blob, *hdr;
@@@ -941,36 -1037,41 +984,36 @@@
                 goto e_unpin_memory;
         }
   
- -      ret = -ENOMEM;
- -      data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
- -      if (!data)
- -              goto e_unpin_memory;
+ +      memset(&data, 0, sizeof(data));
   
         offset = params.guest_uaddr & (PAGE_SIZE - 1);
- -      data->guest_address = __sme_page_pa(pages[0]) + offset;
- -      data->guest_len = params.guest_len;
+ +      data.guest_address = __sme_page_pa(pages[0]) + offset;
+ +      data.guest_len = params.guest_len;
   
         blob = psp_copy_user_blob(params.trans_uaddr, params.trans_len);
         if (IS_ERR(blob)) {
                 ret = PTR_ERR(blob);
- -              goto e_free;
+ +              goto e_unpin_memory;
         }
   
- -      data->trans_address = __psp_pa(blob);
- -      data->trans_len = params.trans_len;
+ +      data.trans_address = __psp_pa(blob);
+ +      data.trans_len = params.trans_len;
   
         hdr = psp_copy_user_blob(params.hdr_uaddr, params.hdr_len);
         if (IS_ERR(hdr)) {
                 ret = PTR_ERR(hdr);
                 goto e_free_blob;
         }
- -      data->hdr_address = __psp_pa(hdr);
- -      data->hdr_len = params.hdr_len;
+ +      data.hdr_address = __psp_pa(hdr);
+ +      data.hdr_len = params.hdr_len;
   
- -      data->handle = sev->handle;
- -      ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, data, &argp->error);
+ +      data.handle = sev->handle;
+ +      ret = sev_issue_cmd(kvm, SEV_CMD_LAUNCH_UPDATE_SECRET, &data, &argp->error);
   
         kfree(hdr);
   
   e_free_blob:
         kfree(blob);
- -e_free:
- -      kfree(data);
   e_unpin_memory:
         /* content of memory is updated, mark pages dirty */
         for (i = 0; i < n; i++) {
@@@ -985,7 -1086,7 +1028,7 @@@ static int sev_get_attestation_report(s
   {
         void __user *report = (void __user *)(uintptr_t)argp->data;
         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
- -      struct sev_data_attestation_report *data;
+ +      struct sev_data_attestation_report data;
         struct kvm_sev_attestation_report params;
         void __user *p;
         void *blob = NULL;
@@@ -997,7 -1098,9 +1040,7 @@@
         if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data, sizeof(params)))
                 return -EFAULT;
   
- -      data = kzalloc(sizeof(*data), GFP_KERNEL_ACCOUNT);
- -      if (!data)
- -              return -ENOMEM;
+ +      memset(&data, 0, sizeof(data));
   
         /* User wants to query the blob length */
         if (!params.len)
@@@ -1005,20 -1108,23 +1048,20 @@@
   
         p = (void __user *)(uintptr_t)params.uaddr;
         if (p) {
- -              if (params.len > SEV_FW_BLOB_MAX_SIZE) {
- -                      ret = -EINVAL;
- -                      goto e_free;
- -              }
+ +              if (params.len > SEV_FW_BLOB_MAX_SIZE)
+ +                      return -EINVAL;
   
- -              ret = -ENOMEM;
- -              blob = kmalloc(params.len, GFP_KERNEL);
+ +              blob = kmalloc(params.len, GFP_KERNEL_ACCOUNT);
                 if (!blob)
- -                      goto e_free;
+ +                      return -ENOMEM;
   
- -              data->address = __psp_pa(blob);
- -              data->len = params.len;
- -              memcpy(data->mnonce, params.mnonce, sizeof(params.mnonce));
+ +              data.address = __psp_pa(blob);
+ +              data.len = params.len;
+ +              memcpy(data.mnonce, params.mnonce, sizeof(params.mnonce));
         }
   cmd:
- -      data->handle = sev->handle;
- -      ret = sev_issue_cmd(kvm, SEV_CMD_ATTESTATION_REPORT, data, &argp->error);
+ +      data.handle = sev->handle;
+ +      ret = sev_issue_cmd(kvm, SEV_CMD_ATTESTATION_REPORT, &data, &argp->error);
         /*
          * If we query the session length, FW responded with expected data.
          */
@@@ -1034,411 -1140,16 +1077,411 @@@
         }
   
   done:
- -      params.len = data->len;
+ +      params.len = data.len;
         if (copy_to_user(report, &params, sizeof(params)))
                 ret = -EFAULT;
   e_free_blob:
         kfree(blob);
- -e_free:
- -      kfree(data);
         return ret;
   }
   
+ +/* Userspace wants to query session length. */
+ +static int
+ +__sev_send_start_query_session_length(struct kvm *kvm, struct kvm_sev_cmd *argp,
+ +                                    struct kvm_sev_send_start *params)
+ +{
+ +      struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ +      struct sev_data_send_start data;
+ +      int ret;
+ +
+ +      data.handle = sev->handle;
+ +      ret = sev_issue_cmd(kvm, SEV_CMD_SEND_START, &data, &argp->error);
+ +      if (ret < 0)
+ +              return ret;
+ +
+ +      params->session_len = data.session_len;
+ +      if (copy_to_user((void __user *)(uintptr_t)argp->data, params,
+ +                              sizeof(struct kvm_sev_send_start)))
+ +              ret = -EFAULT;
+ +
+ +      return ret;
+ +}
+ +
+ +static int sev_send_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
+ +{
+ +      struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ +      struct sev_data_send_start data;
+ +      struct kvm_sev_send_start params;
+ +      void *amd_certs, *session_data;
+ +      void *pdh_cert, *plat_certs;
+ +      int ret;
+ +
+ +      if (!sev_guest(kvm))
+ +              return -ENOTTY;
+ +
+ +      if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data,
+ +                              sizeof(struct kvm_sev_send_start)))
+ +              return -EFAULT;
+ +
+ +      /* if session_len is zero, userspace wants to query the session length */
+ +      if (!params.session_len)
+ +              return __sev_send_start_query_session_length(kvm, argp,
+ +                              &params);
+ +
+ +      /* some sanity checks */
+ +      if (!params.pdh_cert_uaddr || !params.pdh_cert_len ||
+ +          !params.session_uaddr || params.session_len > SEV_FW_BLOB_MAX_SIZE)
+ +              return -EINVAL;
+ +
+ +      /* allocate the memory to hold the session data blob */
+ +      session_data = kmalloc(params.session_len, GFP_KERNEL_ACCOUNT);
+ +      if (!session_data)
+ +              return -ENOMEM;
+ +
+ +      /* copy the certificate blobs from userspace */
+ +      pdh_cert = psp_copy_user_blob(params.pdh_cert_uaddr,
+ +                              params.pdh_cert_len);
+ +      if (IS_ERR(pdh_cert)) {
+ +              ret = PTR_ERR(pdh_cert);
+ +              goto e_free_session;
+ +      }
+ +
+ +      plat_certs = psp_copy_user_blob(params.plat_certs_uaddr,
+ +                              params.plat_certs_len);
+ +      if (IS_ERR(plat_certs)) {
+ +              ret = PTR_ERR(plat_certs);
+ +              goto e_free_pdh;
+ +      }
+ +
+ +      amd_certs = psp_copy_user_blob(params.amd_certs_uaddr,
+ +                              params.amd_certs_len);
+ +      if (IS_ERR(amd_certs)) {
+ +              ret = PTR_ERR(amd_certs);
+ +              goto e_free_plat_cert;
+ +      }
+ +
+ +      /* populate the FW SEND_START field with system physical address */
+ +      memset(&data, 0, sizeof(data));
+ +      data.pdh_cert_address = __psp_pa(pdh_cert);
+ +      data.pdh_cert_len = params.pdh_cert_len;
+ +      data.plat_certs_address = __psp_pa(plat_certs);
+ +      data.plat_certs_len = params.plat_certs_len;
+ +      data.amd_certs_address = __psp_pa(amd_certs);
+ +      data.amd_certs_len = params.amd_certs_len;
+ +      data.session_address = __psp_pa(session_data);
+ +      data.session_len = params.session_len;
+ +      data.handle = sev->handle;
+ +
+ +      ret = sev_issue_cmd(kvm, SEV_CMD_SEND_START, &data, &argp->error);
+ +
+ +      if (!ret && copy_to_user((void __user *)(uintptr_t)params.session_uaddr,
+ +                      session_data, params.session_len)) {
+ +              ret = -EFAULT;
+ +              goto e_free_amd_cert;
+ +      }
+ +
+ +      params.policy = data.policy;
+ +      params.session_len = data.session_len;
+ +      if (copy_to_user((void __user *)(uintptr_t)argp->data, &params,
+ +                              sizeof(struct kvm_sev_send_start)))
+ +              ret = -EFAULT;
+ +
+ +e_free_amd_cert:
+ +      kfree(amd_certs);
+ +e_free_plat_cert:
+ +      kfree(plat_certs);
+ +e_free_pdh:
+ +      kfree(pdh_cert);
+ +e_free_session:
+ +      kfree(session_data);
+ +      return ret;
+ +}
+ +
+ +/* Userspace wants to query either header or trans length. */
+ +static int
+ +__sev_send_update_data_query_lengths(struct kvm *kvm, struct kvm_sev_cmd *argp,
+ +                                   struct kvm_sev_send_update_data *params)
+ +{
+ +      struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ +      struct sev_data_send_update_data data;
+ +      int ret;
+ +
+ +      data.handle = sev->handle;
+ +      ret = sev_issue_cmd(kvm, SEV_CMD_SEND_UPDATE_DATA, &data, &argp->error);
+ +      if (ret < 0)
+ +              return ret;
+ +
+ +      params->hdr_len = data.hdr_len;
+ +      params->trans_len = data.trans_len;
+ +
+ +      if (copy_to_user((void __user *)(uintptr_t)argp->data, params,
+ +                       sizeof(struct kvm_sev_send_update_data)))
+ +              ret = -EFAULT;
+ +
+ +      return ret;
+ +}
+ +
+ +static int sev_send_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
+ +{
+ +      struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ +      struct sev_data_send_update_data data;
+ +      struct kvm_sev_send_update_data params;
+ +      void *hdr, *trans_data;
+ +      struct page **guest_page;
+ +      unsigned long n;
+ +      int ret, offset;
+ +
+ +      if (!sev_guest(kvm))
+ +              return -ENOTTY;
+ +
+ +      if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data,
+ +                      sizeof(struct kvm_sev_send_update_data)))
+ +              return -EFAULT;
+ +
+ +      /* userspace wants to query either header or trans length */
+ +      if (!params.trans_len || !params.hdr_len)
+ +              return __sev_send_update_data_query_lengths(kvm, argp, &params);
+ +
+ +      if (!params.trans_uaddr || !params.guest_uaddr ||
+ +          !params.guest_len || !params.hdr_uaddr)
+ +              return -EINVAL;
+ +
+ +      /* Check if we are crossing the page boundary */
+ +      offset = params.guest_uaddr & (PAGE_SIZE - 1);
+ +      if ((params.guest_len + offset > PAGE_SIZE))
+ +              return -EINVAL;
+ +
+ +      /* Pin guest memory */
+ +      guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK,
+ +                                  PAGE_SIZE, &n, 0);
+ +      if (!guest_page)
+ +              return -EFAULT;
+ +
+ +      /* allocate memory for header and transport buffer */
+ +      ret = -ENOMEM;
+ +      hdr = kmalloc(params.hdr_len, GFP_KERNEL_ACCOUNT);
+ +      if (!hdr)
+ +              goto e_unpin;
+ +
+ +      trans_data = kmalloc(params.trans_len, GFP_KERNEL_ACCOUNT);
+ +      if (!trans_data)
+ +              goto e_free_hdr;
+ +
+ +      memset(&data, 0, sizeof(data));
+ +      data.hdr_address = __psp_pa(hdr);
+ +      data.hdr_len = params.hdr_len;
+ +      data.trans_address = __psp_pa(trans_data);
+ +      data.trans_len = params.trans_len;
+ +
+ +      /* The SEND_UPDATE_DATA command requires C-bit to be always set. */
+ +      data.guest_address = (page_to_pfn(guest_page[0]) << PAGE_SHIFT) + offset;
+ +      data.guest_address |= sev_me_mask;
+ +      data.guest_len = params.guest_len;
+ +      data.handle = sev->handle;
+ +
+ +      ret = sev_issue_cmd(kvm, SEV_CMD_SEND_UPDATE_DATA, &data, &argp->error);
+ +
+ +      if (ret)
+ +              goto e_free_trans_data;
+ +
+ +      /* copy transport buffer to user space */
+ +      if (copy_to_user((void __user *)(uintptr_t)params.trans_uaddr,
+ +                       trans_data, params.trans_len)) {
+ +              ret = -EFAULT;
+ +              goto e_free_trans_data;
+ +      }
+ +
+ +      /* Copy packet header to userspace. */
+ +      ret = copy_to_user((void __user *)(uintptr_t)params.hdr_uaddr, hdr,
+ +                              params.hdr_len);
+ +
+ +e_free_trans_data:
+ +      kfree(trans_data);
+ +e_free_hdr:
+ +      kfree(hdr);
+ +e_unpin:
+ +      sev_unpin_memory(kvm, guest_page, n);
+ +
+ +      return ret;
+ +}
+ +
+ +static int sev_send_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
+ +{
+ +      struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ +      struct sev_data_send_finish data;
+ +
+ +      if (!sev_guest(kvm))
+ +              return -ENOTTY;
+ +
+ +      data.handle = sev->handle;
+ +      return sev_issue_cmd(kvm, SEV_CMD_SEND_FINISH, &data, &argp->error);
+ +}
+ +
+ +static int sev_send_cancel(struct kvm *kvm, struct kvm_sev_cmd *argp)
+ +{
+ +      struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ +      struct sev_data_send_cancel data;
+ +
+ +      if (!sev_guest(kvm))
+ +              return -ENOTTY;
+ +
+ +      data.handle = sev->handle;
+ +      return sev_issue_cmd(kvm, SEV_CMD_SEND_CANCEL, &data, &argp->error);
+ +}
+ +
+ +static int sev_receive_start(struct kvm *kvm, struct kvm_sev_cmd *argp)
+ +{
+ +      struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ +      struct sev_data_receive_start start;
+ +      struct kvm_sev_receive_start params;
+ +      int *error = &argp->error;
+ +      void *session_data;
+ +      void *pdh_data;
+ +      int ret;
+ +
+ +      if (!sev_guest(kvm))
+ +              return -ENOTTY;
+ +
+ +      /* Get parameter from the userspace */
+ +      if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data,
+ +                      sizeof(struct kvm_sev_receive_start)))
+ +              return -EFAULT;
+ +
+ +      /* some sanity checks */
+ +      if (!params.pdh_uaddr || !params.pdh_len ||
+ +          !params.session_uaddr || !params.session_len)
+ +              return -EINVAL;
+ +
+ +      pdh_data = psp_copy_user_blob(params.pdh_uaddr, params.pdh_len);
+ +      if (IS_ERR(pdh_data))
+ +              return PTR_ERR(pdh_data);
+ +
+ +      session_data = psp_copy_user_blob(params.session_uaddr,
+ +                      params.session_len);
+ +      if (IS_ERR(session_data)) {
+ +              ret = PTR_ERR(session_data);
+ +              goto e_free_pdh;
+ +      }
+ +
+ +      memset(&start, 0, sizeof(start));
+ +      start.handle = params.handle;
+ +      start.policy = params.policy;
+ +      start.pdh_cert_address = __psp_pa(pdh_data);
+ +      start.pdh_cert_len = params.pdh_len;
+ +      start.session_address = __psp_pa(session_data);
+ +      start.session_len = params.session_len;
+ +
+ +      /* create memory encryption context */
+ +      ret = __sev_issue_cmd(argp->sev_fd, SEV_CMD_RECEIVE_START, &start,
+ +                              error);
+ +      if (ret)
+ +              goto e_free_session;
+ +
+ +      /* Bind ASID to this guest */
+ +      ret = sev_bind_asid(kvm, start.handle, error);
+ +      if (ret)
+ +              goto e_free_session;
+ +
+ +      params.handle = start.handle;
+ +      if (copy_to_user((void __user *)(uintptr_t)argp->data,
+ +                       &params, sizeof(struct kvm_sev_receive_start))) {
+ +              ret = -EFAULT;
+ +              sev_unbind_asid(kvm, start.handle);
+ +              goto e_free_session;
+ +      }
+ +
+ +      sev->handle = start.handle;
+ +      sev->fd = argp->sev_fd;
+ +
+ +e_free_session:
+ +      kfree(session_data);
+ +e_free_pdh:
+ +      kfree(pdh_data);
+ +
+ +      return ret;
+ +}
+ +
+ +static int sev_receive_update_data(struct kvm *kvm, struct kvm_sev_cmd *argp)
+ +{
+ +      struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ +      struct kvm_sev_receive_update_data params;
+ +      struct sev_data_receive_update_data data;
+ +      void *hdr = NULL, *trans = NULL;
+ +      struct page **guest_page;
+ +      unsigned long n;
+ +      int ret, offset;
+ +
+ +      if (!sev_guest(kvm))
+ +              return -EINVAL;
+ +
+ +      if (copy_from_user(&params, (void __user *)(uintptr_t)argp->data,
+ +                      sizeof(struct kvm_sev_receive_update_data)))
+ +              return -EFAULT;
+ +
+ +      if (!params.hdr_uaddr || !params.hdr_len ||
+ +          !params.guest_uaddr || !params.guest_len ||
+ +          !params.trans_uaddr || !params.trans_len)
+ +              return -EINVAL;
+ +
+ +      /* Check if we are crossing the page boundary */
+ +      offset = params.guest_uaddr & (PAGE_SIZE - 1);
+ +      if ((params.guest_len + offset > PAGE_SIZE))
+ +              return -EINVAL;
+ +
+ +      hdr = psp_copy_user_blob(params.hdr_uaddr, params.hdr_len);
+ +      if (IS_ERR(hdr))
+ +              return PTR_ERR(hdr);
+ +
+ +      trans = psp_copy_user_blob(params.trans_uaddr, params.trans_len);
+ +      if (IS_ERR(trans)) {
+ +              ret = PTR_ERR(trans);
+ +              goto e_free_hdr;
+ +      }
+ +
+ +      memset(&data, 0, sizeof(data));
+ +      data.hdr_address = __psp_pa(hdr);
+ +      data.hdr_len = params.hdr_len;
+ +      data.trans_address = __psp_pa(trans);
+ +      data.trans_len = params.trans_len;
+ +
+ +      /* Pin guest memory */
+ +      ret = -EFAULT;
+ +      guest_page = sev_pin_memory(kvm, params.guest_uaddr & PAGE_MASK,
+ +                                  PAGE_SIZE, &n, 0);
+ +      if (!guest_page)
+ +              goto e_free_trans;
+ +
+ +      /* The RECEIVE_UPDATE_DATA command requires C-bit to be always set. */
+ +      data.guest_address = (page_to_pfn(guest_page[0]) << PAGE_SHIFT) + offset;
+ +      data.guest_address |= sev_me_mask;
+ +      data.guest_len = params.guest_len;
+ +      data.handle = sev->handle;
+ +
+ +      ret = sev_issue_cmd(kvm, SEV_CMD_RECEIVE_UPDATE_DATA, &data,
+ +                              &argp->error);
+ +
+ +      sev_unpin_memory(kvm, guest_page, n);
+ +
+ +e_free_trans:
+ +      kfree(trans);
+ +e_free_hdr:
+ +      kfree(hdr);
+ +
+ +      return ret;
+ +}
+ +
+ +static int sev_receive_finish(struct kvm *kvm, struct kvm_sev_cmd *argp)
+ +{
+ +      struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
+ +      struct sev_data_receive_finish data;
+ +
+ +      if (!sev_guest(kvm))
+ +              return -ENOTTY;
+ +
+ +      data.handle = sev->handle;
+ +      return sev_issue_cmd(kvm, SEV_CMD_RECEIVE_FINISH, &data, &argp->error);
+ +}
+ +
   int svm_mem_enc_op(struct kvm *kvm, void __user *argp)
   {
         struct kvm_sev_cmd sev_cmd;
@@@ -1455,22 -1166,13 +1498,22 @@@
   
         mutex_lock(&kvm->lock);
   
+ +      /* enc_context_owner handles all memory enc operations */
+ +      if (is_mirroring_enc_context(kvm)) {
+ +              r = -EINVAL;
+ +              goto out;
+ +      }
+ +
         switch (sev_cmd.id) {
+ +      case KVM_SEV_ES_INIT:
+ +              if (!sev_es) {
+ +                      r = -ENOTTY;
+ +                      goto out;
+ +              }
+ +              fallthrough;
         case KVM_SEV_INIT:
                 r = sev_guest_init(kvm, &sev_cmd);
                 break;
- -      case KVM_SEV_ES_INIT:
- -              r = sev_es_guest_init(kvm, &sev_cmd);
- -              break;
         case KVM_SEV_LAUNCH_START:
                 r = sev_launch_start(kvm, &sev_cmd);
                 break;
@@@ -1501,27 -1203,6 +1544,27 @@@
         case KVM_SEV_GET_ATTESTATION_REPORT:
                 r = sev_get_attestation_report(kvm, &sev_cmd);
                 break;
+ +      case KVM_SEV_SEND_START:
+ +              r = sev_send_start(kvm, &sev_cmd);
+ +              break;
+ +      case KVM_SEV_SEND_UPDATE_DATA:
+ +              r = sev_send_update_data(kvm, &sev_cmd);
+ +              break;
+ +      case KVM_SEV_SEND_FINISH:
+ +              r = sev_send_finish(kvm, &sev_cmd);
+ +              break;
+ +      case KVM_SEV_SEND_CANCEL:
+ +              r = sev_send_cancel(kvm, &sev_cmd);
+ +              break;
+ +      case KVM_SEV_RECEIVE_START:
+ +              r = sev_receive_start(kvm, &sev_cmd);
+ +              break;
+ +      case KVM_SEV_RECEIVE_UPDATE_DATA:
+ +              r = sev_receive_update_data(kvm, &sev_cmd);
+ +              break;
+ +      case KVM_SEV_RECEIVE_FINISH:
+ +              r = sev_receive_finish(kvm, &sev_cmd);
+ +              break;
         default:
                 r = -EINVAL;
                 goto out;
@@@ -1545,10 -1226,6 +1588,10 @@@ int svm_register_enc_region(struct kvm 
         if (!sev_guest(kvm))
                 return -ENOTTY;
   
+ +      /* If kvm is mirroring encryption context it isn't responsible for it */
+ +      if (is_mirroring_enc_context(kvm))
+ +              return -EINVAL;
+ +
         if (range->addr > ULONG_MAX || range->size > ULONG_MAX)
                 return -EINVAL;
   
@@@ -1615,10 -1292,6 +1658,10 @@@ int svm_unregister_enc_region(struct kv
         struct enc_region *region;
         int ret;
   
+ +      /* If kvm is mirroring encryption context it isn't responsible for it */
+ +      if (is_mirroring_enc_context(kvm))
+ +              return -EINVAL;
+ +
         mutex_lock(&kvm->lock);
   
         if (!sev_guest(kvm)) {
@@@ -1649,71 -1322,6 +1692,71 @@@ failed
         return ret;
   }
   
+ +int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd)
+ +{
+ +      struct file *source_kvm_file;
+ +      struct kvm *source_kvm;
+ +      struct kvm_sev_info *mirror_sev;
+ +      unsigned int asid;
+ +      int ret;
+ +
+ +      source_kvm_file = fget(source_fd);
+ +      if (!file_is_kvm(source_kvm_file)) {
+ +              ret = -EBADF;
+ +              goto e_source_put;
+ +      }
+ +
+ +      source_kvm = source_kvm_file->private_data;
+ +      mutex_lock(&source_kvm->lock);
+ +
+ +      if (!sev_guest(source_kvm)) {
+ +              ret = -EINVAL;
+ +              goto e_source_unlock;
+ +      }
+ +
+ +      /* Mirrors of mirrors should work, but let's not get silly */
+ +      if (is_mirroring_enc_context(source_kvm) || source_kvm == kvm) {
+ +              ret = -EINVAL;
+ +              goto e_source_unlock;
+ +      }
+ +
+ +      asid = to_kvm_svm(source_kvm)->sev_info.asid;
+ +
+ +      /*
+ +       * The mirror kvm holds an enc_context_owner ref so its asid can't
+ +       * disappear until we're done with it
+ +       */
+ +      kvm_get_kvm(source_kvm);
+ +
+ +      fput(source_kvm_file);
+ +      mutex_unlock(&source_kvm->lock);
+ +      mutex_lock(&kvm->lock);
+ +
+ +      if (sev_guest(kvm)) {
+ +              ret = -EINVAL;
+ +              goto e_mirror_unlock;
+ +      }
+ +
+ +      /* Set enc_context_owner and copy its encryption context over */
+ +      mirror_sev = &to_kvm_svm(kvm)->sev_info;
+ +      mirror_sev->enc_context_owner = source_kvm;
+ +      mirror_sev->asid = asid;
+ +      mirror_sev->active = true;
+ +
+ +      mutex_unlock(&kvm->lock);
+ +      return 0;
+ +
+ +e_mirror_unlock:
+ +      mutex_unlock(&kvm->lock);
+ +      kvm_put_kvm(source_kvm);
+ +      return ret;
+ +e_source_unlock:
+ +      mutex_unlock(&source_kvm->lock);
+ +e_source_put:
+ +      fput(source_kvm_file);
+ +      return ret;
+ +}
+ +
   void sev_vm_destroy(struct kvm *kvm)
   {
         struct kvm_sev_info *sev = &to_kvm_svm(kvm)->sev_info;
@@@ -1723,12 -1331,6 +1766,12 @@@
         if (!sev_guest(kvm))
                 return;
   
+ +      /* If this is a mirror_kvm release the enc_context_owner and skip sev cleanup */
+ +      if (is_mirroring_enc_context(kvm)) {
+ +              kvm_put_kvm(sev->enc_context_owner);
+ +              return;
+ +      }
+ +
         mutex_lock(&kvm->lock);
   
         /*
@@@ -1753,12 -1355,12 +1796,12 @@@
         mutex_unlock(&kvm->lock);
   
         sev_unbind_asid(kvm, sev->handle);
-       sev_asid_free(sev->asid);
+       sev_asid_free(sev);
   }
   
   void __init sev_hardware_setup(void)
   {
-       unsigned int eax, ebx, ecx, edx;
+       unsigned int eax, ebx, ecx, edx, sev_asid_count, sev_es_asid_count;
         bool sev_es_supported = false;
         bool sev_supported = false;
   
@@@ -1780,7 -1382,6 +1823,7 @@@
   
         /* Minimum ASID value that should be used for SEV guest */
         min_sev_asid = edx;
+ +      sev_me_mask = 1UL << (ebx & 0x3f);
   
         /* Initialize SEV ASID bitmaps */
         sev_asid_bitmap = bitmap_zalloc(max_sev_asid, GFP_KERNEL);
@@@ -1791,7 -1392,11 +1834,11 @@@
         if (!sev_reclaim_asid_bitmap)
                 goto out;
   
-       pr_info("SEV supported: %u ASIDs\n", max_sev_asid - min_sev_asid + 1);
+       sev_asid_count = max_sev_asid - min_sev_asid + 1;
+       if (misc_cg_set_capacity(MISC_CG_RES_SEV, sev_asid_count))
+               goto out;
+ 
+       pr_info("SEV supported: %u ASIDs\n", sev_asid_count);
         sev_supported = true;
   
         /* SEV-ES support requested? */
@@@ -1806,7 -1411,11 +1853,11 @@@
         if (min_sev_asid == 1)
                 goto out;
   
-       pr_info("SEV-ES supported: %u ASIDs\n", min_sev_asid - 1);
+       sev_es_asid_count = min_sev_asid - 1;
+       if (misc_cg_set_capacity(MISC_CG_RES_SEV_ES, sev_es_asid_count))
+               goto out;
+ 
+       pr_info("SEV-ES supported: %u ASIDs\n", sev_es_asid_count);
         sev_es_supported = true;
   
   out:
@@@ -1821,6 -1430,8 +1872,8 @@@ void sev_hardware_teardown(void
   
         bitmap_free(sev_asid_bitmap);
         bitmap_free(sev_reclaim_asid_bitmap);
+       misc_cg_set_capacity(MISC_CG_RES_SEV, 0);
+       misc_cg_set_capacity(MISC_CG_RES_SEV_ES, 0);
   
         sev_flush_asids();
   }
@@@ -2214,7 -1825,7 +2267,7 @@@ static bool setup_vmgexit_scratch(struc
                                len, GHCB_SCRATCH_AREA_LIMIT);
                         return false;
                 }
- -              scratch_va = kzalloc(len, GFP_KERNEL);
+ +              scratch_va = kzalloc(len, GFP_KERNEL_ACCOUNT);
                 if (!scratch_va)
                         return false;
   
@@@ -2288,7 -1899,7 +2341,7 @@@ static int sev_handle_vmgexit_msr_proto
                 vcpu->arch.regs[VCPU_REGS_RAX] = cpuid_fn;
                 vcpu->arch.regs[VCPU_REGS_RCX] = 0;
   
- -              ret = svm_invoke_exit_handler(svm, SVM_EXIT_CPUID);
+ +              ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_CPUID);
                 if (!ret) {
                         ret = -EINVAL;
                         break;
@@@ -2338,9 -1949,8 +2391,9 @@@
         return ret;
   }
   
- -int sev_handle_vmgexit(struct vcpu_svm *svm)
+ +int sev_handle_vmgexit(struct kvm_vcpu *vcpu)
   {
+ +      struct vcpu_svm *svm = to_svm(vcpu);
         struct vmcb_control_area *control = &svm->vmcb->control;
         u64 ghcb_gpa, exit_code;
         struct ghcb *ghcb;
@@@ -2352,13 -1962,13 +2405,13 @@@
                 return sev_handle_vmgexit_msr_protocol(svm);
   
         if (!ghcb_gpa) {
- -              vcpu_unimpl(&svm->vcpu, "vmgexit: GHCB gpa is not set\n");
+ +              vcpu_unimpl(vcpu, "vmgexit: GHCB gpa is not set\n");
                 return -EINVAL;
         }
   
- -      if (kvm_vcpu_map(&svm->vcpu, ghcb_gpa >> PAGE_SHIFT, &svm->ghcb_map)) {
+ +      if (kvm_vcpu_map(vcpu, ghcb_gpa >> PAGE_SHIFT, &svm->ghcb_map)) {
                 /* Unable to map GHCB from guest */
- -              vcpu_unimpl(&svm->vcpu, "vmgexit: error mapping GHCB [%#llx] from guest\n",
+ +              vcpu_unimpl(vcpu, "vmgexit: error mapping GHCB [%#llx] from guest\n",
                             ghcb_gpa);
                 return -EINVAL;
         }
@@@ -2366,7 -1976,7 +2419,7 @@@
         svm->ghcb = svm->ghcb_map.hva;
         ghcb = svm->ghcb_map.hva;
   
- -      trace_kvm_vmgexit_enter(svm->vcpu.vcpu_id, ghcb);
+ +      trace_kvm_vmgexit_enter(vcpu->vcpu_id, ghcb);
   
         exit_code = ghcb_get_sw_exit_code(ghcb);
   
@@@ -2384,7 -1994,7 +2437,7 @@@
                 if (!setup_vmgexit_scratch(svm, true, control->exit_info_2))
                         break;
   
- -              ret = kvm_sev_es_mmio_read(&svm->vcpu,
+ +              ret = kvm_sev_es_mmio_read(vcpu,
                                            control->exit_info_1,
                                            control->exit_info_2,
                                            svm->ghcb_sa);
@@@ -2393,19 -2003,19 +2446,19 @@@
                 if (!setup_vmgexit_scratch(svm, false, control->exit_info_2))
                         break;
   
- -              ret = kvm_sev_es_mmio_write(&svm->vcpu,
+ +              ret = kvm_sev_es_mmio_write(vcpu,
                                             control->exit_info_1,
                                             control->exit_info_2,
                                             svm->ghcb_sa);
                 break;
         case SVM_VMGEXIT_NMI_COMPLETE:
- -              ret = svm_invoke_exit_handler(svm, SVM_EXIT_IRET);
+ +              ret = svm_invoke_exit_handler(vcpu, SVM_EXIT_IRET);
                 break;
         case SVM_VMGEXIT_AP_HLT_LOOP:
- -              ret = kvm_emulate_ap_reset_hold(&svm->vcpu);
+ +              ret = kvm_emulate_ap_reset_hold(vcpu);
                 break;
         case SVM_VMGEXIT_AP_JUMP_TABLE: {
- -              struct kvm_sev_info *sev = &to_kvm_svm(svm->vcpu.kvm)->sev_info;
+ +              struct kvm_sev_info *sev = &to_kvm_svm(vcpu->kvm)->sev_info;
   
                 switch (control->exit_info_1) {
                 case 0:
@@@ -2430,12 -2040,12 +2483,12 @@@
                 break;
         }
         case SVM_VMGEXIT_UNSUPPORTED_EVENT:
- -              vcpu_unimpl(&svm->vcpu,
+ +              vcpu_unimpl(vcpu,
                             "vmgexit: unsupported event - exit_info_1=%#llx, exit_info_2=%#llx\n",
                             control->exit_info_1, control->exit_info_2);
                 break;
         default:
- -              ret = svm_invoke_exit_handler(svm, exit_code);
+ +              ret = svm_invoke_exit_handler(vcpu, exit_code);
         }
   
         return ret;
@@@ -2544,8 -2154,5 +2597,8 @@@ void sev_vcpu_deliver_sipi_vector(struc
          * the guest will set the CS and RIP. Set SW_EXIT_INFO_2 to a
          * non-zero value.
          */
+ +      if (!svm->ghcb)
+ +              return;
+ +
         ghcb_set_sw_exit_info_2(svm->ghcb, 1);
   }
diff --combined arch/x86/kvm/svm/svm.h

index 454da1c,9806aae..d620619
--- 1/arch/x86/kvm/svm/svm.h
--- 2/arch/x86/kvm/svm/svm.h
+++ b/arch/x86/kvm/svm/svm.h
@@@ -28,10 -28,7 +28,10 @@@ static const u32 host_save_user_msrs[] 
   };
   #define NR_HOST_SAVE_USER_MSRS ARRAY_SIZE(host_save_user_msrs)
   
- -#define MAX_DIRECT_ACCESS_MSRS        18
+ +#define       IOPM_SIZE PAGE_SIZE * 3
+ +#define       MSRPM_SIZE PAGE_SIZE * 2
+ +
+ +#define MAX_DIRECT_ACCESS_MSRS        20
   #define MSRPM_OFFSETS 16
   extern u32 msrpm_offsets[MSRPM_OFFSETS] __read_mostly;
   extern bool npt_enabled;
@@@ -68,7 -65,7 +68,8 @@@ struct kvm_sev_info 
         unsigned long pages_locked; /* Number of pages locked */
         struct list_head regions_list;  /* List of registered regions */
         u64 ap_jump_table;      /* SEV-ES AP Jump Table address */
+ +      struct kvm *enc_context_owner; /* Owner of copied encryption context */
+       struct misc_cg *misc_cg; /* For misc cgroup accounting */
   };
   
   struct kvm_svm {
@@@ -85,19 -82,11 +86,19 @@@
   
   struct kvm_vcpu;
   
+ +struct kvm_vmcb_info {
+ +      struct vmcb *ptr;
+ +      unsigned long pa;
+ +      int cpu;
+ +      uint64_t asid_generation;
+ +};
+ +
   struct svm_nested_state {
- -      struct vmcb *hsave;
+ +      struct kvm_vmcb_info vmcb02;
         u64 hsave_msr;
         u64 vm_cr_msr;
         u64 vmcb12_gpa;
+ +      u64 last_vmcb12_gpa;
   
         /* These are the merged vectors */
         u32 *msrpm;
@@@ -114,14 -103,13 +115,14 @@@
   
   struct vcpu_svm {
         struct kvm_vcpu vcpu;
+ +      /* vmcb always points at current_vmcb->ptr, it's purely a shorthand. */
         struct vmcb *vmcb;
- -      unsigned long vmcb_pa;
+ +      struct kvm_vmcb_info vmcb01;
+ +      struct kvm_vmcb_info *current_vmcb;
         struct svm_cpu_data *svm_data;
         u32 asid;
- -      uint64_t asid_generation;
- -      uint64_t sysenter_esp;
- -      uint64_t sysenter_eip;
+ +      u32 sysenter_esp_hi;
+ +      u32 sysenter_eip_hi;
         uint64_t tsc_aux;
   
         u64 msr_decfg;
@@@ -252,14 -240,17 +253,14 @@@ static inline void vmcb_mark_dirty(stru
         vmcb->control.clean &= ~(1 << bit);
   }
   
- -static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
+ +static inline bool vmcb_is_dirty(struct vmcb *vmcb, int bit)
   {
- -      return container_of(vcpu, struct vcpu_svm, vcpu);
+ +        return !test_bit(bit, (unsigned long *)&vmcb->control.clean);
   }
   
- -static inline struct vmcb *get_host_vmcb(struct vcpu_svm *svm)
+ +static inline struct vcpu_svm *to_svm(struct kvm_vcpu *vcpu)
   {
- -      if (is_guest_mode(&svm->vcpu))
- -              return svm->nested.hsave;
- -      else
- -              return svm->vmcb;
+ +      return container_of(vcpu, struct vcpu_svm, vcpu);
   }
   
   static inline void vmcb_set_intercept(struct vmcb_control_area *control, u32 bit)
@@@ -282,7 -273,7 +283,7 @@@ static inline bool vmcb_is_intercept(st
   
   static inline void set_dr_intercepts(struct vcpu_svm *svm)
   {
- -      struct vmcb *vmcb = get_host_vmcb(svm);
+ +      struct vmcb *vmcb = svm->vmcb01.ptr;
   
         if (!sev_es_guest(svm->vcpu.kvm)) {
                 vmcb_set_intercept(&vmcb->control, INTERCEPT_DR0_READ);
@@@ -309,7 -300,7 +310,7 @@@
   
   static inline void clr_dr_intercepts(struct vcpu_svm *svm)
   {
- -      struct vmcb *vmcb = get_host_vmcb(svm);
+ +      struct vmcb *vmcb = svm->vmcb01.ptr;
   
         vmcb->control.intercepts[INTERCEPT_DR] = 0;
   
@@@ -324,7 -315,7 +325,7 @@@
   
   static inline void set_exception_intercept(struct vcpu_svm *svm, u32 bit)
   {
- -      struct vmcb *vmcb = get_host_vmcb(svm);
+ +      struct vmcb *vmcb = svm->vmcb01.ptr;
   
         WARN_ON_ONCE(bit >= 32);
         vmcb_set_intercept(&vmcb->control, INTERCEPT_EXCEPTION_OFFSET + bit);
@@@ -334,7 -325,7 +335,7 @@@
   
   static inline void clr_exception_intercept(struct vcpu_svm *svm, u32 bit)
   {
- -      struct vmcb *vmcb = get_host_vmcb(svm);
+ +      struct vmcb *vmcb = svm->vmcb01.ptr;
   
         WARN_ON_ONCE(bit >= 32);
         vmcb_clr_intercept(&vmcb->control, INTERCEPT_EXCEPTION_OFFSET + bit);
@@@ -344,7 -335,7 +345,7 @@@
   
   static inline void svm_set_intercept(struct vcpu_svm *svm, int bit)
   {
- -      struct vmcb *vmcb = get_host_vmcb(svm);
+ +      struct vmcb *vmcb = svm->vmcb01.ptr;
   
         vmcb_set_intercept(&vmcb->control, bit);
   
@@@ -353,7 -344,7 +354,7 @@@
   
   static inline void svm_clr_intercept(struct vcpu_svm *svm, int bit)
   {
- -      struct vmcb *vmcb = get_host_vmcb(svm);
+ +      struct vmcb *vmcb = svm->vmcb01.ptr;
   
         vmcb_clr_intercept(&vmcb->control, bit);
   
@@@ -415,7 -406,7 +416,7 @@@ bool svm_smi_blocked(struct kvm_vcpu *v
   bool svm_nmi_blocked(struct kvm_vcpu *vcpu);
   bool svm_interrupt_blocked(struct kvm_vcpu *vcpu);
   void svm_set_gif(struct vcpu_svm *svm, bool value);
- -int svm_invoke_exit_handler(struct vcpu_svm *svm, u64 exit_code);
+ +int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64 exit_code);
   void set_msr_interception(struct kvm_vcpu *vcpu, u32 *msrpm, u32 msr,
                           int read, int write);
   
@@@ -447,30 -438,20 +448,30 @@@ static inline bool nested_exit_on_nmi(s
         return vmcb_is_intercept(&svm->nested.ctl, INTERCEPT_NMI);
   }
   
- -int enter_svm_guest_mode(struct vcpu_svm *svm, u64 vmcb_gpa,
- -                       struct vmcb *nested_vmcb);
+ +int enter_svm_guest_mode(struct kvm_vcpu *vcpu, u64 vmcb_gpa, struct vmcb *vmcb12);
   void svm_leave_nested(struct vcpu_svm *svm);
   void svm_free_nested(struct vcpu_svm *svm);
   int svm_allocate_nested(struct vcpu_svm *svm);
- -int nested_svm_vmrun(struct vcpu_svm *svm);
+ +int nested_svm_vmrun(struct kvm_vcpu *vcpu);
   void nested_svm_vmloadsave(struct vmcb *from_vmcb, struct vmcb *to_vmcb);
   int nested_svm_vmexit(struct vcpu_svm *svm);
+ +
+ +static inline int nested_svm_simple_vmexit(struct vcpu_svm *svm, u32 exit_code)
+ +{
+ +      svm->vmcb->control.exit_code   = exit_code;
+ +      svm->vmcb->control.exit_info_1 = 0;
+ +      svm->vmcb->control.exit_info_2 = 0;
+ +      return nested_svm_vmexit(svm);
+ +}
+ +
   int nested_svm_exit_handled(struct vcpu_svm *svm);
- -int nested_svm_check_permissions(struct vcpu_svm *svm);
+ +int nested_svm_check_permissions(struct kvm_vcpu *vcpu);
   int nested_svm_check_exception(struct vcpu_svm *svm, unsigned nr,
                                bool has_error_code, u32 error_code);
   int nested_svm_exit_special(struct vcpu_svm *svm);
- -void sync_nested_vmcb_control(struct vcpu_svm *svm);
+ +void nested_sync_control_from_vmcb02(struct vcpu_svm *svm);
+ +void nested_vmcb02_compute_g_pat(struct vcpu_svm *svm);
+ +void svm_switch_vmcb(struct vcpu_svm *svm, struct kvm_vmcb_info *target_vmcb);
   
   extern struct kvm_x86_nested_ops svm_nested_ops;
   
@@@ -511,8 -492,8 +512,8 @@@ void avic_vm_destroy(struct kvm *kvm)
   int avic_vm_init(struct kvm *kvm);
   void avic_init_vmcb(struct vcpu_svm *svm);
   void svm_toggle_avic_for_irq_window(struct kvm_vcpu *vcpu, bool activate);
- -int avic_incomplete_ipi_interception(struct vcpu_svm *svm);
- -int avic_unaccelerated_access_interception(struct vcpu_svm *svm);
+ +int avic_incomplete_ipi_interception(struct kvm_vcpu *vcpu);
+ +int avic_unaccelerated_access_interception(struct kvm_vcpu *vcpu);
   int avic_init_vcpu(struct vcpu_svm *svm);
   void avic_vcpu_load(struct kvm_vcpu *vcpu, int cpu);
   void avic_vcpu_put(struct kvm_vcpu *vcpu);
@@@ -581,12 -562,11 +582,12 @@@ int svm_register_enc_region(struct kvm 
                             struct kvm_enc_region *range);
   int svm_unregister_enc_region(struct kvm *kvm,
                               struct kvm_enc_region *range);
+ +int svm_vm_copy_asid_from(struct kvm *kvm, unsigned int source_fd);
   void pre_sev_run(struct vcpu_svm *svm, int cpu);
   void __init sev_hardware_setup(void);
   void sev_hardware_teardown(void);
   void sev_free_vcpu(struct kvm_vcpu *vcpu);
- -int sev_handle_vmgexit(struct vcpu_svm *svm);
+ +int sev_handle_vmgexit(struct kvm_vcpu *vcpu);
   int sev_es_string_io(struct vcpu_svm *svm, int size, unsigned int port, int in);
   void sev_es_init_vmcb(struct vcpu_svm *svm);
   void sev_es_create_vcpu(struct vcpu_svm *svm);
diff --combined arch/x86/kvm/x86.c

index c9ba6f2,eca6362..f0d0b6e
--- 1/arch/x86/kvm/x86.c
--- 2/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@@ -75,7 -75,6 +75,7 @@@
   #include <asm/tlbflush.h>
   #include <asm/intel_pt.h>
   #include <asm/emulate_prefix.h>
+ +#include <asm/sgx.h>
   #include <clocksource/hyperv_timer.h>
   
   #define CREATE_TRACE_POINTS
@@@ -246,9 -245,6 +246,9 @@@ struct kvm_stats_debugfs_item debugfs_e
         VCPU_STAT("l1d_flush", l1d_flush),
         VCPU_STAT("halt_poll_success_ns", halt_poll_success_ns),
         VCPU_STAT("halt_poll_fail_ns", halt_poll_fail_ns),
+ +      VCPU_STAT("nested_run", nested_run),
+ +      VCPU_STAT("directed_yield_attempted", directed_yield_attempted),
+ +      VCPU_STAT("directed_yield_successful", directed_yield_successful),
         VM_STAT("mmu_shadow_zapped", mmu_shadow_zapped),
         VM_STAT("mmu_pte_write", mmu_pte_write),
         VM_STAT("mmu_pde_zapped", mmu_pde_zapped),
@@@ -275,8 -271,7 +275,7 @@@ static struct kmem_cache *x86_emulator_
    * When called, it means the previous get/set msr reached an invalid msr.
    * Return true if we want to ignore/silent this failed msr access.
    */
- static bool kvm_msr_ignored_check(struct kvm_vcpu *vcpu, u32 msr,
-                                 u64 data, bool write)
+ static bool kvm_msr_ignored_check(u32 msr, u64 data, bool write)
   {
         const char *op = write ? "wrmsr" : "rdmsr";
   
@@@ -548,6 -543,8 +547,6 @@@ static void kvm_multiple_exception(stru
   
         if (!vcpu->arch.exception.pending && !vcpu->arch.exception.injected) {
         queue:
- -              if (has_error && !is_protmode(vcpu))
- -                      has_error = false;
                 if (reinject) {
                         /*
                          * On vmentry, vcpu->arch.exception.pending is only
@@@ -986,17 -983,14 +985,17 @@@ static int __kvm_set_xcr(struct kvm_vcp
         return 0;
   }
   
- -int kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
+ +int kvm_emulate_xsetbv(struct kvm_vcpu *vcpu)
   {
- -      if (static_call(kvm_x86_get_cpl)(vcpu) == 0)
- -              return __kvm_set_xcr(vcpu, index, xcr);
+ +      if (static_call(kvm_x86_get_cpl)(vcpu) != 0 ||
+ +          __kvm_set_xcr(vcpu, kvm_rcx_read(vcpu), kvm_read_edx_eax(vcpu))) {
+ +              kvm_inject_gp(vcpu, 0);
+ +              return 1;
+ +      }
   
- -      return 1;
+ +      return kvm_skip_emulated_instruction(vcpu);
   }
- -EXPORT_SYMBOL_GPL(kvm_set_xcr);
+ +EXPORT_SYMBOL_GPL(kvm_emulate_xsetbv);
   
   bool kvm_is_valid_cr4(struct kvm_vcpu *vcpu, unsigned long cr4)
   {
@@@ -1197,21 -1191,20 +1196,21 @@@ void kvm_get_dr(struct kvm_vcpu *vcpu, 
   }
   EXPORT_SYMBOL_GPL(kvm_get_dr);
   
- -bool kvm_rdpmc(struct kvm_vcpu *vcpu)
+ +int kvm_emulate_rdpmc(struct kvm_vcpu *vcpu)
   {
         u32 ecx = kvm_rcx_read(vcpu);
         u64 data;
- -      int err;
   
- -      err = kvm_pmu_rdpmc(vcpu, ecx, &data);
- -      if (err)
- -              return err;
+ +      if (kvm_pmu_rdpmc(vcpu, ecx, &data)) {
+ +              kvm_inject_gp(vcpu, 0);
+ +              return 1;
+ +      }
+ +
         kvm_rax_write(vcpu, (u32)data);
         kvm_rdx_write(vcpu, data >> 32);
- -      return err;
+ +      return kvm_skip_emulated_instruction(vcpu);
   }
- -EXPORT_SYMBOL_GPL(kvm_rdpmc);
+ +EXPORT_SYMBOL_GPL(kvm_emulate_rdpmc);
   
   /*
    * List of msr numbers which we expose to userspace through KVM_GET_MSRS
@@@ -1451,7 -1444,7 +1450,7 @@@ static int do_get_msr_feature(struct kv
         if (r == KVM_MSR_RET_INVALID) {
                 /* Unconditionally clear the output for simplicity */
                 *data = 0;
-               if (kvm_msr_ignored_check(vcpu, index, 0, false))
+               if (kvm_msr_ignored_check(index, 0, false))
                         r = 0;
         }
   
@@@ -1532,35 -1525,44 +1531,44 @@@ EXPORT_SYMBOL_GPL(kvm_enable_efer_bits)
   
   bool kvm_msr_allowed(struct kvm_vcpu *vcpu, u32 index, u32 type)
   {
+       struct kvm_x86_msr_filter *msr_filter;
+       struct msr_bitmap_range *ranges;
         struct kvm *kvm = vcpu->kvm;
-       struct msr_bitmap_range *ranges = kvm->arch.msr_filter.ranges;
-       u32 count = kvm->arch.msr_filter.count;
-       u32 i;
-       bool r = kvm->arch.msr_filter.default_allow;
+       bool allowed;
         int idx;
+       u32 i;
   
-       /* MSR filtering not set up or x2APIC enabled, allow everything */
-       if (!count || (index >= 0x800 && index <= 0x8ff))
+       /* x2APIC MSRs do not support filtering. */
+       if (index >= 0x800 && index <= 0x8ff)
                 return true;
   
-       /* Prevent collision with set_msr_filter */
         idx = srcu_read_lock(&kvm->srcu);
   
-       for (i = 0; i < count; i++) {
+       msr_filter = srcu_dereference(kvm->arch.msr_filter, &kvm->srcu);
+       if (!msr_filter) {
+               allowed = true;
+               goto out;
+       }
+ 
+       allowed = msr_filter->default_allow;
+       ranges = msr_filter->ranges;
+ 
+       for (i = 0; i < msr_filter->count; i++) {
                 u32 start = ranges[i].base;
                 u32 end = start + ranges[i].nmsrs;
                 u32 flags = ranges[i].flags;
                 unsigned long *bitmap = ranges[i].bitmap;
   
                 if ((index >= start) && (index < end) && (flags & type)) {
-                       r = !!test_bit(index - start, bitmap);
+                       allowed = !!test_bit(index - start, bitmap);
                         break;
                 }
         }
   
+ out:
         srcu_read_unlock(&kvm->srcu, idx);
   
-       return r;
+       return allowed;
   }
   EXPORT_SYMBOL_GPL(kvm_msr_allowed);
   
@@@ -1617,7 -1619,7 +1625,7 @@@ static int kvm_set_msr_ignored_check(st
         int ret = __kvm_set_msr(vcpu, index, data, host_initiated);
   
         if (ret == KVM_MSR_RET_INVALID)
-               if (kvm_msr_ignored_check(vcpu, index, data, true))
+               if (kvm_msr_ignored_check(index, data, true))
                         ret = 0;
   
         return ret;
@@@ -1655,7 -1657,7 +1663,7 @@@ static int kvm_get_msr_ignored_check(st
         if (ret == KVM_MSR_RET_INVALID) {
                 /* Unconditionally clear *data for simplicity */
                 *data = 0;
-               if (kvm_msr_ignored_check(vcpu, index, 0, false))
+               if (kvm_msr_ignored_check(index, 0, false))
                         ret = 0;
         }
   
@@@ -1789,40 -1791,6 +1797,40 @@@ int kvm_emulate_wrmsr(struct kvm_vcpu *
   }
   EXPORT_SYMBOL_GPL(kvm_emulate_wrmsr);
   
+ +int kvm_emulate_as_nop(struct kvm_vcpu *vcpu)
+ +{
+ +      return kvm_skip_emulated_instruction(vcpu);
+ +}
+ +EXPORT_SYMBOL_GPL(kvm_emulate_as_nop);
+ +
+ +int kvm_emulate_invd(struct kvm_vcpu *vcpu)
+ +{
+ +      /* Treat an INVD instruction as a NOP and just skip it. */
+ +      return kvm_emulate_as_nop(vcpu);
+ +}
+ +EXPORT_SYMBOL_GPL(kvm_emulate_invd);
+ +
+ +int kvm_emulate_mwait(struct kvm_vcpu *vcpu)
+ +{
+ +      pr_warn_once("kvm: MWAIT instruction emulated as NOP!\n");
+ +      return kvm_emulate_as_nop(vcpu);
+ +}
+ +EXPORT_SYMBOL_GPL(kvm_emulate_mwait);
+ +
+ +int kvm_handle_invalid_op(struct kvm_vcpu *vcpu)
+ +{
+ +      kvm_queue_exception(vcpu, UD_VECTOR);
+ +      return 1;
+ +}
+ +EXPORT_SYMBOL_GPL(kvm_handle_invalid_op);
+ +
+ +int kvm_emulate_monitor(struct kvm_vcpu *vcpu)
+ +{
+ +      pr_warn_once("kvm: MONITOR instruction emulated as NOP!\n");
+ +      return kvm_emulate_as_nop(vcpu);
+ +}
+ +EXPORT_SYMBOL_GPL(kvm_emulate_monitor);
+ +
   static inline bool kvm_vcpu_exit_request(struct kvm_vcpu *vcpu)
   {
         xfer_to_guest_mode_prepare();
@@@ -2360,7 -2328,7 +2368,7 @@@ static void kvm_synchronize_tsc(struct 
         kvm_vcpu_write_tsc_offset(vcpu, offset);
         raw_spin_unlock_irqrestore(&kvm->arch.tsc_write_lock, flags);
   
-       spin_lock(&kvm->arch.pvclock_gtod_sync_lock);
+       spin_lock_irqsave(&kvm->arch.pvclock_gtod_sync_lock, flags);
         if (!matched) {
                 kvm->arch.nr_vcpus_matched_tsc = 0;
         } else if (!already_matched) {
@@@ -2368,7 -2336,7 +2376,7 @@@
         }
   
         kvm_track_tsc_matching(vcpu);
-       spin_unlock(&kvm->arch.pvclock_gtod_sync_lock);
+       spin_unlock_irqrestore(&kvm->arch.pvclock_gtod_sync_lock, flags);
   }
   
   static inline void adjust_tsc_offset_guest(struct kvm_vcpu *vcpu,
@@@ -2590,11 -2558,16 +2598,16 @@@ static void kvm_gen_update_masterclock(
         int i;
         struct kvm_vcpu *vcpu;
         struct kvm_arch *ka = &kvm->arch;
+       unsigned long flags;
+ 
+       kvm_hv_invalidate_tsc_page(kvm);
   
-       spin_lock(&ka->pvclock_gtod_sync_lock);
         kvm_make_mclock_inprogress_request(kvm);
+ 
         /* no guest entries from this point */
+       spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
         pvclock_update_vm_gtod_copy(kvm);
+       spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
   
         kvm_for_each_vcpu(i, vcpu, kvm)
                 kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
@@@ -2602,8 -2575,6 +2615,6 @@@
         /* guest entries allowed */
         kvm_for_each_vcpu(i, vcpu, kvm)
                 kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu);
- 
-       spin_unlock(&ka->pvclock_gtod_sync_lock);
   #endif
   }
   
@@@ -2611,17 -2582,18 +2622,18 @@@ u64 get_kvmclock_ns(struct kvm *kvm
   {
         struct kvm_arch *ka = &kvm->arch;
         struct pvclock_vcpu_time_info hv_clock;
+       unsigned long flags;
         u64 ret;
   
-       spin_lock(&ka->pvclock_gtod_sync_lock);
+       spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
         if (!ka->use_master_clock) {
-               spin_unlock(&ka->pvclock_gtod_sync_lock);
+               spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
                 return get_kvmclock_base_ns() + ka->kvmclock_offset;
         }
   
         hv_clock.tsc_timestamp = ka->master_cycle_now;
         hv_clock.system_time = ka->master_kernel_ns + ka->kvmclock_offset;
-       spin_unlock(&ka->pvclock_gtod_sync_lock);
+       spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
   
         /* both __this_cpu_read() and rdtsc() should be on the same cpu */
         get_cpu();
@@@ -2715,13 -2687,13 +2727,13 @@@ static int kvm_guest_time_update(struc
          * If the host uses TSC clock, then passthrough TSC as stable
          * to the guest.
          */
-       spin_lock(&ka->pvclock_gtod_sync_lock);
+       spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
         use_master_clock = ka->use_master_clock;
         if (use_master_clock) {
                 host_tsc = ka->master_cycle_now;
                 kernel_ns = ka->master_kernel_ns;
         }
-       spin_unlock(&ka->pvclock_gtod_sync_lock);
+       spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
   
         /* Keep irq disabled to prevent changes to the clock */
         local_irq_save(flags);
@@@ -3410,12 -3382,6 +3422,12 @@@ int kvm_get_msr_common(struct kvm_vcpu 
                 msr_info->data = 0;
                 break;
         case MSR_F15H_PERF_CTL0 ... MSR_F15H_PERF_CTR5:
+ +              if (kvm_pmu_is_valid_msr(vcpu, msr_info->index))
+ +                      return kvm_pmu_get_msr(vcpu, msr_info);
+ +              if (!msr_info->host_initiated)
+ +                      return 1;
+ +              msr_info->data = 0;
+ +              break;
         case MSR_K7_EVNTSEL0 ... MSR_K7_EVNTSEL3:
         case MSR_K7_PERFCTR0 ... MSR_K7_PERFCTR3:
         case MSR_P6_PERFCTR0 ... MSR_P6_PERFCTR1:
@@@ -3805,14 -3771,8 +3817,14 @@@ int kvm_vm_ioctl_check_extension(struc
         case KVM_CAP_X86_USER_SPACE_MSR:
         case KVM_CAP_X86_MSR_FILTER:
         case KVM_CAP_ENFORCE_PV_FEATURE_CPUID:
+ +#ifdef CONFIG_X86_SGX_KVM
+ +      case KVM_CAP_SGX_ATTRIBUTE:
+ +#endif
+ +      case KVM_CAP_VM_COPY_ENC_CONTEXT_FROM:
                 r = 1;
                 break;
+ +      case KVM_CAP_SET_GUEST_DEBUG2:
+ +              return KVM_GUESTDBG_VALID_MASK;
   #ifdef CONFIG_KVM_XEN
         case KVM_CAP_XEN_HVM:
                 r = KVM_XEN_HVM_CONFIG_HYPERCALL_MSR |
@@@ -4715,6 -4675,7 +4727,6 @@@ static int kvm_vcpu_ioctl_enable_cap(st
                         kvm_update_pv_runtime(vcpu);
   
                 return 0;
- -
         default:
                 return -EINVAL;
         }
@@@ -5396,28 -5357,6 +5408,28 @@@ split_irqchip_unlock
                         kvm->arch.bus_lock_detection_enabled = true;
                 r = 0;
                 break;
+ +#ifdef CONFIG_X86_SGX_KVM
+ +      case KVM_CAP_SGX_ATTRIBUTE: {
+ +              unsigned long allowed_attributes = 0;
+ +
+ +              r = sgx_set_attribute(&allowed_attributes, cap->args[0]);
+ +              if (r)
+ +                      break;
+ +
+ +              /* KVM only supports the PROVISIONKEY privileged attribute. */
+ +              if ((allowed_attributes & SGX_ATTR_PROVISIONKEY) &&
+ +                  !(allowed_attributes & ~SGX_ATTR_PROVISIONKEY))
+ +                      kvm->arch.sgx_provisioning_allowed = true;
+ +              else
+ +                      r = -EINVAL;
+ +              break;
+ +      }
+ +#endif
+ +      case KVM_CAP_VM_COPY_ENC_CONTEXT_FROM:
+ +              r = -EINVAL;
+ +              if (kvm_x86_ops.vm_copy_enc_context_from)
+ +                      r = kvm_x86_ops.vm_copy_enc_context_from(kvm, cap->args[0]);
+ +              return r;
         default:
                 r = -EINVAL;
                 break;
@@@ -5425,25 -5364,34 +5437,34 @@@
         return r;
   }
   
- static void kvm_clear_msr_filter(struct kvm *kvm)
+ static struct kvm_x86_msr_filter *kvm_alloc_msr_filter(bool default_allow)
+ {
+       struct kvm_x86_msr_filter *msr_filter;
+ 
+       msr_filter = kzalloc(sizeof(*msr_filter), GFP_KERNEL_ACCOUNT);
+       if (!msr_filter)
+               return NULL;
+ 
+       msr_filter->default_allow = default_allow;
+       return msr_filter;
+ }
+ 
+ static void kvm_free_msr_filter(struct kvm_x86_msr_filter *msr_filter)
   {
         u32 i;
-       u32 count = kvm->arch.msr_filter.count;
-       struct msr_bitmap_range ranges[16];
   
-       mutex_lock(&kvm->lock);
-       kvm->arch.msr_filter.count = 0;
-       memcpy(ranges, kvm->arch.msr_filter.ranges, count * sizeof(ranges[0]));
-       mutex_unlock(&kvm->lock);
-       synchronize_srcu(&kvm->srcu);
+       if (!msr_filter)
+               return;
   
-       for (i = 0; i < count; i++)
-               kfree(ranges[i].bitmap);
+       for (i = 0; i < msr_filter->count; i++)
+               kfree(msr_filter->ranges[i].bitmap);
+ 
+       kfree(msr_filter);
   }
   
- static int kvm_add_msr_filter(struct kvm *kvm, struct kvm_msr_filter_range *user_range)
+ static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter,
+                             struct kvm_msr_filter_range *user_range)
   {
-       struct msr_bitmap_range *ranges = kvm->arch.msr_filter.ranges;
         struct msr_bitmap_range range;
         unsigned long *bitmap = NULL;
         size_t bitmap_size;
@@@ -5477,11 -5425,9 +5498,9 @@@
                 goto err;
         }
   
-       /* Everything ok, add this range identifier to our global pool */
-       ranges[kvm->arch.msr_filter.count] = range;
-       /* Make sure we filled the array before we tell anyone to walk it */
-       smp_wmb();
-       kvm->arch.msr_filter.count++;
+       /* Everything ok, add this range identifier. */
+       msr_filter->ranges[msr_filter->count] = range;
+       msr_filter->count++;
   
         return 0;
   err:
@@@ -5492,10 -5438,11 +5511,11 @@@
   static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
   {
         struct kvm_msr_filter __user *user_msr_filter = argp;
+       struct kvm_x86_msr_filter *new_filter, *old_filter;
         struct kvm_msr_filter filter;
         bool default_allow;
-       int r = 0;
         bool empty = true;
+       int r = 0;
         u32 i;
   
         if (copy_from_user(&filter, user_msr_filter, sizeof(filter)))
@@@ -5508,25 -5455,32 +5528,32 @@@
         if (empty && !default_allow)
                 return -EINVAL;
   
-       kvm_clear_msr_filter(kvm);
- 
-       kvm->arch.msr_filter.default_allow = default_allow;
+       new_filter = kvm_alloc_msr_filter(default_allow);
+       if (!new_filter)
+               return -ENOMEM;
   
-       /*
-        * Protect from concurrent calls to this function that could trigger
-        * a TOCTOU violation on kvm->arch.msr_filter.count.
-        */
-       mutex_lock(&kvm->lock);
         for (i = 0; i < ARRAY_SIZE(filter.ranges); i++) {
-               r = kvm_add_msr_filter(kvm, &filter.ranges[i]);
-               if (r)
-                       break;
+               r = kvm_add_msr_filter(new_filter, &filter.ranges[i]);
+               if (r) {
+                       kvm_free_msr_filter(new_filter);
+                       return r;
+               }
         }
   
+       mutex_lock(&kvm->lock);
+ 
+       /* The per-VM filter is protected by kvm->lock... */
+       old_filter = srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1);
+ 
+       rcu_assign_pointer(kvm->arch.msr_filter, new_filter);
+       synchronize_srcu(&kvm->srcu);
+ 
+       kvm_free_msr_filter(old_filter);
+ 
         kvm_make_all_cpus_request(kvm, KVM_REQ_MSR_FILTER_CHANGED);
         mutex_unlock(&kvm->lock);
   
-       return r;
+       return 0;
   }
   
   long kvm_arch_vm_ioctl(struct file *filp,
@@@ -5773,6 -5727,7 +5800,7 @@@ set_pit2_out
         }
   #endif
         case KVM_SET_CLOCK: {
+               struct kvm_arch *ka = &kvm->arch;
                 struct kvm_clock_data user_ns;
                 u64 now_ns;
   
@@@ -5791,8 -5746,22 +5819,22 @@@
                  * pvclock_update_vm_gtod_copy().
                  */
                 kvm_gen_update_masterclock(kvm);
-               now_ns = get_kvmclock_ns(kvm);
-               kvm->arch.kvmclock_offset += user_ns.clock - now_ns;
+ 
+               /*
+                * This pairs with kvm_guest_time_update(): when masterclock is
+                * in use, we use master_kernel_ns + kvmclock_offset to set
+                * unsigned 'system_time' so if we use get_kvmclock_ns() (which
+                * is slightly ahead) here we risk going negative on unsigned
+                * 'system_time' when 'user_ns.clock' is very small.
+                */
+               spin_lock_irq(&ka->pvclock_gtod_sync_lock);
+               if (kvm->arch.use_master_clock)
+                       now_ns = ka->master_kernel_ns;
+               else
+                       now_ns = get_kvmclock_base_ns();
+               ka->kvmclock_offset = user_ns.clock - now_ns;
+               spin_unlock_irq(&ka->pvclock_gtod_sync_lock);
+ 
                 kvm_make_all_cpus_request(kvm, KVM_REQ_CLOCK_UPDATE);
                 break;
         }
@@@ -6032,7 -6001,6 +6074,7 @@@ gpa_t kvm_mmu_gva_to_gpa_read(struct kv
         u32 access = (static_call(kvm_x86_get_cpl)(vcpu) == 3) ? PFERR_USER_MASK : 0;
         return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
   }
+ +EXPORT_SYMBOL_GPL(kvm_mmu_gva_to_gpa_read);
   
    gpa_t kvm_mmu_gva_to_gpa_fetch(struct kvm_vcpu *vcpu, gva_t gva,
                                 struct x86_exception *exception)
@@@ -6049,7 -6017,6 +6091,7 @@@ gpa_t kvm_mmu_gva_to_gpa_write(struct k
         access |= PFERR_WRITE_MASK;
         return vcpu->arch.walk_mmu->gva_to_gpa(vcpu, gva, access, exception);
   }
+ +EXPORT_SYMBOL_GPL(kvm_mmu_gva_to_gpa_write);
   
   /* uses this to access any guest's mapped memory without checking CPL */
   gpa_t kvm_mmu_gva_to_gpa_system(struct kvm_vcpu *vcpu, gva_t gva,
@@@ -6678,7 -6645,7 +6720,7 @@@ static int kvm_emulate_wbinvd_noskip(st
                 int cpu = get_cpu();
   
                 cpumask_set_cpu(cpu, vcpu->arch.wbinvd_dirty_mask);
-               smp_call_function_many(vcpu->arch.wbinvd_dirty_mask,
+               on_each_cpu_mask(vcpu->arch.wbinvd_dirty_mask,
                                 wbinvd_ipi, NULL, 1);
                 put_cpu();
                 cpumask_clear(vcpu->arch.wbinvd_dirty_mask);
@@@ -7773,6 -7740,7 +7815,7 @@@ static void kvm_hyperv_tsc_notifier(voi
         struct kvm *kvm;
         struct kvm_vcpu *vcpu;
         int cpu;
+       unsigned long flags;
   
         mutex_lock(&kvm_lock);
         list_for_each_entry(kvm, &vm_list, vm_list)
@@@ -7788,17 -7756,15 +7831,15 @@@
         list_for_each_entry(kvm, &vm_list, vm_list) {
                 struct kvm_arch *ka = &kvm->arch;
   
-               spin_lock(&ka->pvclock_gtod_sync_lock);
- 
+               spin_lock_irqsave(&ka->pvclock_gtod_sync_lock, flags);
                 pvclock_update_vm_gtod_copy(kvm);
+               spin_unlock_irqrestore(&ka->pvclock_gtod_sync_lock, flags);
   
                 kvm_for_each_vcpu(cpu, vcpu, kvm)
                         kvm_make_request(KVM_REQ_CLOCK_UPDATE, vcpu);
   
                 kvm_for_each_vcpu(cpu, vcpu, kvm)
                         kvm_clear_request(KVM_REQ_MCLOCK_INPROGRESS, vcpu);
- 
-               spin_unlock(&ka->pvclock_gtod_sync_lock);
         }
         mutex_unlock(&kvm_lock);
   }
@@@ -8079,6 -8045,9 +8120,6 @@@ int kvm_arch_init(void *opaque
         if (r)
                 goto out_free_percpu;
   
- -      kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK,
- -                      PT_DIRTY_MASK, PT64_NX_MASK, 0,
- -                      PT_PRESENT_MASK, 0, sme_me_mask);
         kvm_timer_init();
   
         perf_register_guest_info_callbacks(&kvm_guest_cbs);
@@@ -8238,35 -8207,21 +8279,35 @@@ void kvm_apicv_init(struct kvm *kvm, bo
   }
   EXPORT_SYMBOL_GPL(kvm_apicv_init);
   
- -static void kvm_sched_yield(struct kvm *kvm, unsigned long dest_id)
+ +static void kvm_sched_yield(struct kvm_vcpu *vcpu, unsigned long dest_id)
   {
         struct kvm_vcpu *target = NULL;
         struct kvm_apic_map *map;
   
+ +      vcpu->stat.directed_yield_attempted++;
+ +
         rcu_read_lock();
- -      map = rcu_dereference(kvm->arch.apic_map);
+ +      map = rcu_dereference(vcpu->kvm->arch.apic_map);
   
         if (likely(map) && dest_id <= map->max_apic_id && map->phys_map[dest_id])
                 target = map->phys_map[dest_id]->vcpu;
   
         rcu_read_unlock();
   
- -      if (target && READ_ONCE(target->ready))
- -              kvm_vcpu_yield_to(target);
+ +      if (!target || !READ_ONCE(target->ready))
+ +              goto no_yield;
+ +
+ +      /* Ignore requests to yield to self */
+ +      if (vcpu == target)
+ +              goto no_yield;
+ +
+ +      if (kvm_vcpu_yield_to(target) <= 0)
+ +              goto no_yield;
+ +
+ +      vcpu->stat.directed_yield_successful++;
+ +
+ +no_yield:
+ +      return;
   }
   
   int kvm_emulate_hypercall(struct kvm_vcpu *vcpu)
@@@ -8313,7 -8268,7 +8354,7 @@@
                         break;
   
                 kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1);
- -              kvm_sched_yield(vcpu->kvm, a1);
+ +              kvm_sched_yield(vcpu, a1);
                 ret = 0;
                 break;
   #ifdef CONFIG_X86_64
@@@ -8331,7 -8286,7 +8372,7 @@@
                 if (!guest_pv_has(vcpu, KVM_FEATURE_PV_SCHED_YIELD))
                         break;
   
- -              kvm_sched_yield(vcpu->kvm, a0);
+ +              kvm_sched_yield(vcpu, a0);
                 ret = 0;
                 break;
         default:
@@@ -8414,27 -8369,6 +8455,27 @@@ static void update_cr8_intercept(struc
         static_call(kvm_x86_update_cr8_intercept)(vcpu, tpr, max_irr);
   }
   
+ +
+ +int kvm_check_nested_events(struct kvm_vcpu *vcpu)
+ +{
+ +      if (WARN_ON_ONCE(!is_guest_mode(vcpu)))
+ +              return -EIO;
+ +
+ +      if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
+ +              kvm_x86_ops.nested_ops->triple_fault(vcpu);
+ +              return 1;
+ +      }
+ +
+ +      return kvm_x86_ops.nested_ops->check_events(vcpu);
+ +}
+ +
+ +static void kvm_inject_exception(struct kvm_vcpu *vcpu)
+ +{
+ +      if (vcpu->arch.exception.error_code && !is_protmode(vcpu))
+ +              vcpu->arch.exception.error_code = false;
+ +      static_call(kvm_x86_queue_exception)(vcpu);
+ +}
+ +
   static void inject_pending_event(struct kvm_vcpu *vcpu, bool *req_immediate_exit)
   {
         int r;
@@@ -8443,7 -8377,7 +8484,7 @@@
         /* try to reinject previous events if any */
   
         if (vcpu->arch.exception.injected) {
- -              static_call(kvm_x86_queue_exception)(vcpu);
+ +              kvm_inject_exception(vcpu);
                 can_inject = false;
         }
         /*
@@@ -8480,7 -8414,7 +8521,7 @@@
          * from L2 to L1.
          */
         if (is_guest_mode(vcpu)) {
- -              r = kvm_x86_ops.nested_ops->check_events(vcpu);
+ +              r = kvm_check_nested_events(vcpu);
                 if (r < 0)
                         goto busy;
         }
@@@ -8506,7 -8440,7 +8547,7 @@@
                         }
                 }
   
- -              static_call(kvm_x86_queue_exception)(vcpu);
+ +              kvm_inject_exception(vcpu);
                 can_inject = false;
         }
   
@@@ -9043,14 -8977,10 +9084,14 @@@ static int vcpu_enter_guest(struct kvm_
                         goto out;
                 }
                 if (kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu)) {
- -                      vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
- -                      vcpu->mmio_needed = 0;
- -                      r = 0;
- -                      goto out;
+ +                      if (is_guest_mode(vcpu)) {
+ +                              kvm_x86_ops.nested_ops->triple_fault(vcpu);
+ +                      } else {
+ +                              vcpu->run->exit_reason = KVM_EXIT_SHUTDOWN;
+ +                              vcpu->mmio_needed = 0;
+ +                              r = 0;
+ +                              goto out;
+ +                      }
                 }
                 if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) {
                         /* Page is swapped out. Do synthetic halt */
@@@ -9348,7 -9278,7 +9389,7 @@@ static inline int vcpu_block(struct kv
   static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu)
   {
         if (is_guest_mode(vcpu))
- -              kvm_x86_ops.nested_ops->check_events(vcpu);
+ +              kvm_check_nested_events(vcpu);
   
         return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE &&
                 !vcpu->arch.apf.halted);
@@@ -10745,8 -10675,6 +10786,6 @@@ void kvm_arch_pre_destroy_vm(struct kv
   
   void kvm_arch_destroy_vm(struct kvm *kvm)
   {
-       u32 i;
- 
         if (current->mm == kvm->mm) {
                 /*
                  * Free memory regions allocated on behalf of userspace,
@@@ -10762,8 -10690,7 +10801,7 @@@
                 mutex_unlock(&kvm->slots_lock);
         }
         static_call_cond(kvm_x86_vm_destroy)(kvm);
-       for (i = 0; i < kvm->arch.msr_filter.count; i++)
-               kfree(kvm->arch.msr_filter.ranges[i].bitmap);
+       kvm_free_msr_filter(srcu_dereference_check(kvm->arch.msr_filter, &kvm->srcu, 1));
         kvm_pic_destroy(kvm);
         kvm_ioapic_destroy(kvm);
         kvm_free_vcpus(kvm);
@@@ -11093,14 -11020,6 +11131,14 @@@ bool kvm_arch_dy_runnable(struct kvm_vc
         return false;
   }
   
+ +bool kvm_arch_dy_has_pending_interrupt(struct kvm_vcpu *vcpu)
+ +{
+ +      if (vcpu->arch.apicv_active && static_call(kvm_x86_dy_apicv_has_pending_interrupt)(vcpu))
+ +              return true;
+ +
+ +      return false;
+ +}
+ +
   bool kvm_arch_vcpu_in_kernel(struct kvm_vcpu *vcpu)
   {
         return vcpu->arch.preempted_in_kernel;
@@@ -11622,7 -11541,7 +11660,7 @@@ int kvm_handle_invpcid(struct kvm_vcpu 
   
                 fallthrough;
         case INVPCID_TYPE_ALL_INCL_GLOBAL:
- -              kvm_mmu_unload(vcpu);
+ +              kvm_make_request(KVM_REQ_MMU_RELOAD, vcpu);
                 return kvm_skip_emulated_instruction(vcpu);
   
         default:
diff --combined arch/x86/kvm/x86.h

index daccf20,9035e34..5334bf4
--- 1/arch/x86/kvm/x86.h
--- 2/arch/x86/kvm/x86.h
+++ b/arch/x86/kvm/x86.h
@@@ -8,14 -8,6 +8,14 @@@
   #include "kvm_cache_regs.h"
   #include "kvm_emulate.h"
   
+ +#define KVM_NESTED_VMENTER_CONSISTENCY_CHECK(consistency_check)               \
+ +({                                                                    \
+ +      bool failed = (consistency_check);                              \
+ +      if (failed)                                                     \
+ +              trace_kvm_nested_vmenter_failed(#consistency_check, 0); \
+ +      failed;                                                         \
+ +})
+ +
   #define KVM_DEFAULT_PLE_GAP           128
   #define KVM_VMX_DEFAULT_PLE_WINDOW    4096
   #define KVM_DEFAULT_PLE_WINDOW_GROW   2
@@@ -56,8 -48,6 +56,8 @@@ static inline unsigned int __shrink_ple
   
   #define MSR_IA32_CR_PAT_DEFAULT  0x0007040600070406ULL
   
+ +int kvm_check_nested_events(struct kvm_vcpu *vcpu);
+ +
   static inline void kvm_clear_exception_queue(struct kvm_vcpu *vcpu)
   {
         vcpu->arch.exception.pending = false;
@@@ -260,7 -250,6 +260,6 @@@ static inline bool kvm_vcpu_latch_init(
   void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock, int sec_hi_ofs);
   void kvm_inject_realmode_interrupt(struct kvm_vcpu *vcpu, int irq, int inc_eip);
   
- void kvm_write_tsc(struct kvm_vcpu *vcpu, struct msr_data *msr);
   u64 get_kvmclock_ns(struct kvm *kvm);
   
   int kvm_read_guest_virt(struct kvm_vcpu *vcpu,
diff --combined tools/testing/selftests/kvm/.gitignore

index 137ab72,7bd7e77..34414e8
--- 1/tools/testing/selftests/kvm/.gitignore
--- 2/tools/testing/selftests/kvm/.gitignore
+++ b/tools/testing/selftests/kvm/.gitignore
@@@ -8,10 -8,13 +8,13 @@@
   /x86_64/debug_regs
   /x86_64/evmcs_test
   /x86_64/get_cpuid_test
+ /x86_64/get_msr_index_features
   /x86_64/kvm_pv_test
+ /x86_64/hyperv_clock
   /x86_64/hyperv_cpuid
   /x86_64/mmio_warning_test
   /x86_64/platform_info_test
+ /x86_64/set_boot_cpu_id
   /x86_64/set_sregs_test
   /x86_64/smm_test
   /x86_64/state_test
@@@ -35,7 -38,6 +38,7 @@@
   /dirty_log_perf_test
   /hardware_disable_test
   /kvm_create_max_vcpus
+ +/kvm_page_table_test
   /memslot_modification_stress_test
   /set_memory_region_test
   /steal_time
diff --combined tools/testing/selftests/kvm/Makefile

index 75dc57d,67eebb5..6b0a9e7
--- 1/tools/testing/selftests/kvm/Makefile
--- 2/tools/testing/selftests/kvm/Makefile
+++ b/tools/testing/selftests/kvm/Makefile
@@@ -39,12 -39,15 +39,15 @@@ LIBKVM_aarch64 = lib/aarch64/processor.
   LIBKVM_s390x = lib/s390x/processor.c lib/s390x/ucall.c lib/s390x/diag318_test_handler.c
   
   TEST_GEN_PROGS_x86_64 = x86_64/cr4_cpuid_sync_test
+ TEST_GEN_PROGS_x86_64 += x86_64/get_msr_index_features
   TEST_GEN_PROGS_x86_64 += x86_64/evmcs_test
   TEST_GEN_PROGS_x86_64 += x86_64/get_cpuid_test
+ TEST_GEN_PROGS_x86_64 += x86_64/hyperv_clock
   TEST_GEN_PROGS_x86_64 += x86_64/hyperv_cpuid
   TEST_GEN_PROGS_x86_64 += x86_64/kvm_pv_test
   TEST_GEN_PROGS_x86_64 += x86_64/mmio_warning_test
   TEST_GEN_PROGS_x86_64 += x86_64/platform_info_test
+ TEST_GEN_PROGS_x86_64 += x86_64/set_boot_cpu_id
   TEST_GEN_PROGS_x86_64 += x86_64/set_sregs_test
   TEST_GEN_PROGS_x86_64 += x86_64/smm_test
   TEST_GEN_PROGS_x86_64 += x86_64/state_test
@@@ -69,7 -72,6 +72,7 @@@ TEST_GEN_PROGS_x86_64 += dirty_log_tes
   TEST_GEN_PROGS_x86_64 += dirty_log_perf_test
   TEST_GEN_PROGS_x86_64 += hardware_disable_test
   TEST_GEN_PROGS_x86_64 += kvm_create_max_vcpus
+ +TEST_GEN_PROGS_x86_64 += kvm_page_table_test
   TEST_GEN_PROGS_x86_64 += memslot_modification_stress_test
   TEST_GEN_PROGS_x86_64 += set_memory_region_test
   TEST_GEN_PROGS_x86_64 += steal_time
@@@ -80,7 -82,6 +83,7 @@@ TEST_GEN_PROGS_aarch64 += demand_paging
   TEST_GEN_PROGS_aarch64 += dirty_log_test
   TEST_GEN_PROGS_aarch64 += dirty_log_perf_test
   TEST_GEN_PROGS_aarch64 += kvm_create_max_vcpus
+ +TEST_GEN_PROGS_aarch64 += kvm_page_table_test
   TEST_GEN_PROGS_aarch64 += set_memory_region_test
   TEST_GEN_PROGS_aarch64 += steal_time
   
@@@ -90,7 -91,6 +93,7 @@@ TEST_GEN_PROGS_s390x += s390x/sync_regs
   TEST_GEN_PROGS_s390x += demand_paging_test
   TEST_GEN_PROGS_s390x += dirty_log_test
   TEST_GEN_PROGS_s390x += kvm_create_max_vcpus
+ +TEST_GEN_PROGS_s390x += kvm_page_table_test
   TEST_GEN_PROGS_s390x += set_memory_region_test
   
   TEST_GEN_PROGS += $(TEST_GEN_PROGS_$(UNAME_M))
diff --combined tools/testing/selftests/kvm/include/kvm_util.h

index f52a749,0f4258e..0e6cc25
--- 1/tools/testing/selftests/kvm/include/kvm_util.h
--- 2/tools/testing/selftests/kvm/include/kvm_util.h
+++ b/tools/testing/selftests/kvm/include/kvm_util.h
@@@ -16,6 -16,7 +16,7 @@@
   
   #include "sparsebit.h"
   
+ #define KVM_DEV_PATH "/dev/kvm"
   #define KVM_MAX_VCPUS 512
   
   /*
@@@ -68,6 -69,9 +69,6 @@@ enum vm_guest_mode 
   #define MIN_PAGE_SIZE         (1U << MIN_PAGE_SHIFT)
   #define PTES_PER_MIN_PAGE     ptes_per_page(MIN_PAGE_SIZE)
   
- -#define vm_guest_mode_string(m) vm_guest_mode_string[m]
- -extern const char * const vm_guest_mode_string[];
- -
   struct vm_guest_mode_params {
         unsigned int pa_bits;
         unsigned int va_bits;
@@@ -81,7 -85,6 +82,7 @@@ int vm_enable_cap(struct kvm_vm *vm, st
   int vcpu_enable_cap(struct kvm_vm *vm, uint32_t vcpu_id,
                     struct kvm_enable_cap *cap);
   void vm_enable_dirty_ring(struct kvm_vm *vm, uint32_t ring_size);
+ +const char *vm_guest_mode_string(uint32_t i);
   
   struct kvm_vm *vm_create(enum vm_guest_mode mode, uint64_t phy_pages, int perm);
   void kvm_vm_free(struct kvm_vm *vmp);
@@@ -131,6 -134,7 +132,7 @@@ void vcpu_ioctl(struct kvm_vm *vm, uint
   int _vcpu_ioctl(struct kvm_vm *vm, uint32_t vcpuid, unsigned long ioctl,
                 void *arg);
   void vm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
+ int _vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg);
   void kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
   int _kvm_ioctl(struct kvm_vm *vm, unsigned long ioctl, void *arg);
   void vm_mem_region_set_flags(struct kvm_vm *vm, uint32_t slot, uint32_t flags);
diff --combined tools/testing/selftests/kvm/lib/kvm_util.c

index c7a2228,b8849a1..35247db
--- 1/tools/testing/selftests/kvm/lib/kvm_util.c
--- 2/tools/testing/selftests/kvm/lib/kvm_util.c
+++ b/tools/testing/selftests/kvm/lib/kvm_util.c
@@@ -18,6 -18,7 +18,6 @@@
   #include <unistd.h>
   #include <linux/kernel.h>
   
- -#define KVM_UTIL_PGS_PER_HUGEPG 512
   #define KVM_UTIL_MIN_PFN      2
   
   static int vcpu_mmap_sz(void);
@@@ -142,24 -143,17 +142,24 @@@ static void vm_open(struct kvm_vm *vm, 
                 "rc: %i errno: %i", vm->fd, errno);
   }
   
- -const char * const vm_guest_mode_string[] = {
- -      "PA-bits:52,  VA-bits:48,  4K pages",
- -      "PA-bits:52,  VA-bits:48, 64K pages",
- -      "PA-bits:48,  VA-bits:48,  4K pages",
- -      "PA-bits:48,  VA-bits:48, 64K pages",
- -      "PA-bits:40,  VA-bits:48,  4K pages",
- -      "PA-bits:40,  VA-bits:48, 64K pages",
- -      "PA-bits:ANY, VA-bits:48,  4K pages",
- -};
- -_Static_assert(sizeof(vm_guest_mode_string)/sizeof(char *) == NUM_VM_MODES,
- -             "Missing new mode strings?");
+ +const char *vm_guest_mode_string(uint32_t i)
+ +{
+ +      static const char * const strings[] = {
+ +              [VM_MODE_P52V48_4K]     = "PA-bits:52,  VA-bits:48,  4K pages",
+ +              [VM_MODE_P52V48_64K]    = "PA-bits:52,  VA-bits:48, 64K pages",
+ +              [VM_MODE_P48V48_4K]     = "PA-bits:48,  VA-bits:48,  4K pages",
+ +              [VM_MODE_P48V48_64K]    = "PA-bits:48,  VA-bits:48, 64K pages",
+ +              [VM_MODE_P40V48_4K]     = "PA-bits:40,  VA-bits:48,  4K pages",
+ +              [VM_MODE_P40V48_64K]    = "PA-bits:40,  VA-bits:48, 64K pages",
+ +              [VM_MODE_PXXV48_4K]     = "PA-bits:ANY, VA-bits:48,  4K pages",
+ +      };
+ +      _Static_assert(sizeof(strings)/sizeof(char *) == NUM_VM_MODES,
+ +                     "Missing new mode strings?");
+ +
+ +      TEST_ASSERT(i < NUM_VM_MODES, "Guest mode ID %d too big", i);
+ +
+ +      return strings[i];
+ +}
   
   const struct vm_guest_mode_params vm_guest_mode_params[] = {
         { 52, 48,  0x1000, 12 },
@@@ -687,7 -681,7 +687,7 @@@ void vm_userspace_mem_region_add(struc
   {
         int ret;
         struct userspace_mem_region *region;
- -      size_t huge_page_size = KVM_UTIL_PGS_PER_HUGEPG * vm->page_size;
+ +      size_t backing_src_pagesz = get_backing_src_pagesz(src_type);
         size_t alignment;
   
         TEST_ASSERT(vm_adjust_num_guest_pages(vm->mode, npages) == npages,
@@@ -749,7 -743,7 +749,7 @@@
   #endif
   
         if (src_type == VM_MEM_SRC_ANONYMOUS_THP)
- -              alignment = max(huge_page_size, alignment);
+ +              alignment = max(backing_src_pagesz, alignment);
   
         /* Add enough memory to align up if necessary */
         if (alignment > 1)
@@@ -758,7 -752,7 +758,7 @@@
         region->mmap_start = mmap(NULL, region->mmap_size,
                                   PROT_READ | PROT_WRITE,
                                   MAP_PRIVATE | MAP_ANONYMOUS
- -                                | (src_type == VM_MEM_SRC_ANONYMOUS_HUGETLB ? MAP_HUGETLB : 0),
+ +                                | vm_mem_backing_src_alias(src_type)->flag,
                                   -1, 0);
         TEST_ASSERT(region->mmap_start != MAP_FAILED,
                     "test_malloc failed, mmap_start: %p errno: %i",
@@@ -768,13 -762,22 +768,13 @@@
         region->host_mem = align(region->mmap_start, alignment);
   
         /* As needed perform madvise */
- -      if (src_type == VM_MEM_SRC_ANONYMOUS || src_type == VM_MEM_SRC_ANONYMOUS_THP) {
- -              struct stat statbuf;
- -
- -              ret = stat("/sys/kernel/mm/transparent_hugepage", &statbuf);
- -              TEST_ASSERT(ret == 0 || (ret == -1 && errno == ENOENT),
- -                          "stat /sys/kernel/mm/transparent_hugepage");
- -
- -              TEST_ASSERT(ret == 0 || src_type != VM_MEM_SRC_ANONYMOUS_THP,
- -                          "VM_MEM_SRC_ANONYMOUS_THP requires THP to be configured in the host kernel");
- -
- -              if (ret == 0) {
- -                      ret = madvise(region->host_mem, npages * vm->page_size,
- -                                    src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE);
- -                      TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %x",
- -                                  region->host_mem, npages * vm->page_size, src_type);
- -              }
+ +      if ((src_type == VM_MEM_SRC_ANONYMOUS ||
+ +           src_type == VM_MEM_SRC_ANONYMOUS_THP) && thp_configured()) {
+ +              ret = madvise(region->host_mem, npages * vm->page_size,
+ +                            src_type == VM_MEM_SRC_ANONYMOUS ? MADV_NOHUGEPAGE : MADV_HUGEPAGE);
+ +              TEST_ASSERT(ret == 0, "madvise failed, addr: %p length: 0x%lx src_type: %s",
+ +                          region->host_mem, npages * vm->page_size,
+ +                          vm_mem_backing_src_alias(src_type)->name);
         }
   
         region->unused_phy_pages = sparsebit_alloc();
@@@ -1694,11 -1697,16 +1694,16 @@@ void vm_ioctl(struct kvm_vm *vm, unsign
   {
         int ret;
   
-       ret = ioctl(vm->fd, cmd, arg);
+       ret = _vm_ioctl(vm, cmd, arg);
         TEST_ASSERT(ret == 0, "vm ioctl %lu failed, rc: %i errno: %i (%s)",
                 cmd, ret, errno, strerror(errno));
   }
   
+ int _vm_ioctl(struct kvm_vm *vm, unsigned long cmd, void *arg)
+ {
+       return ioctl(vm->fd, cmd, arg);
+ }
+ 
   /*
    * KVM system ioctl
    *
author	Paolo Bonzini <pbonzini@redhat.com>
	Thu, 22 Apr 2021 06:39:48 +0000 (02:39 -0400)
committer	Paolo Bonzini <pbonzini@redhat.com>
	Thu, 22 Apr 2021 17:19:01 +0000 (13:19 -0400)
		1	2
Documentation/virt/kvm/api.rst	patch \|	diff1 \|	diff2 \|	blob \| history
MAINTAINERS	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/include/asm/kvm_host.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kernel/kvm.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/svm/nested.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/svm/sev.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/svm/svm.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/x86.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/x86.h	patch \|	diff1 \|	diff2 \|	blob \| history
tools/testing/selftests/kvm/.gitignore	patch \|	diff1 \|	diff2 \|	blob \| history
tools/testing/selftests/kvm/Makefile	patch \|	diff1 \|	diff2 \|	blob \| history
tools/testing/selftests/kvm/include/kvm_util.h	patch \|	diff1 \|	diff2 \|	blob \| history
tools/testing/selftests/kvm/lib/kvm_util.c	patch \|	diff1 \|	diff2 \|	blob \| history