Merge tag 'for-linus' of git://git.kernel.org/pub/scm/virt/kvm/kvm

author Linus Torvalds <torvalds@linux-foundation.org>

Tue, 7 Sep 2021 20:40:51 +0000 (13:40 -0700)

committer Linus Torvalds <torvalds@linux-foundation.org>

Tue, 7 Sep 2021 20:40:51 +0000 (13:40 -0700)
author Linus Torvalds <torvalds@linux-foundation.org>
Tue, 7 Sep 2021 20:40:51 +0000 (13:40 -0700)
committer Linus Torvalds <torvalds@linux-foundation.org>
Tue, 7 Sep 2021 20:40:51 +0000 (13:40 -0700)
diff --combined Documentation/virt/kvm/api.rst

index c6212c2,4ea1bb2..a6729c8
--- 1/Documentation/virt/kvm/api.rst
--- 2/Documentation/virt/kvm/api.rst
+++ b/Documentation/virt/kvm/api.rst
@@@ -3357,6 -3357,7 +3357,7 @@@ flags which can include the following
     - KVM_GUESTDBG_INJECT_DB:     inject DB type exception [x86]
     - KVM_GUESTDBG_INJECT_BP:     inject BP type exception [x86]
     - KVM_GUESTDBG_EXIT_PENDING:  trigger an immediate guest exit [s390]
+   - KVM_GUESTDBG_BLOCKIRQ:      avoid injecting interrupts/NMI/SMI [x86]
   
   For example KVM_GUESTDBG_USE_SW_BP indicates that software breakpoints
   are enabled in memory so we need to ensure breakpoint exceptions are
@@@ -5077,7 -5078,7 +5078,7 @@@ of bytes successfully copied is returne
   then ``length`` is returned.
   
   4.131 KVM_GET_SREGS2
- -------------------
+ +--------------------
   
   :Capability: KVM_CAP_SREGS2
   :Architectures: x86
@@@ -5090,17 -5091,17 +5091,17 @@@ This ioctl (when supported) replaces th
   
   ::
   
- -struct kvm_sregs2 {
- -      /* out (KVM_GET_SREGS2) / in (KVM_SET_SREGS2) */
- -      struct kvm_segment cs, ds, es, fs, gs, ss;
- -      struct kvm_segment tr, ldt;
- -      struct kvm_dtable gdt, idt;
- -      __u64 cr0, cr2, cr3, cr4, cr8;
- -      __u64 efer;
- -      __u64 apic_base;
- -      __u64 flags;
- -      __u64 pdptrs[4];
- -};
+ +        struct kvm_sregs2 {
+ +                /* out (KVM_GET_SREGS2) / in (KVM_SET_SREGS2) */
+ +                struct kvm_segment cs, ds, es, fs, gs, ss;
+ +                struct kvm_segment tr, ldt;
+ +                struct kvm_dtable gdt, idt;
+ +                __u64 cr0, cr2, cr3, cr4, cr8;
+ +                __u64 efer;
+ +                __u64 apic_base;
+ +                __u64 flags;
+ +                __u64 pdptrs[4];
+ +        };
   
   flags values for ``kvm_sregs2``:
   
@@@ -5110,7 -5111,7 +5111,7 @@@
   
   
   4.132 KVM_SET_SREGS2
- -------------------
+ +--------------------
   
   :Capability: KVM_CAP_SREGS2
   :Architectures: x86
@@@ -5201,13 -5202,15 +5202,16 @@@ trailing ``'\0'``, is indicated by the 
   The descriptors block is only needed to be read once for the lifetime of the
   file descriptor contains a sequence of ``struct kvm_stats_desc``, each followed
   by a string of size ``name_size``.
+ +::
   
         #define KVM_STATS_TYPE_SHIFT            0
         #define KVM_STATS_TYPE_MASK             (0xF << KVM_STATS_TYPE_SHIFT)
         #define KVM_STATS_TYPE_CUMULATIVE       (0x0 << KVM_STATS_TYPE_SHIFT)
         #define KVM_STATS_TYPE_INSTANT          (0x1 << KVM_STATS_TYPE_SHIFT)
         #define KVM_STATS_TYPE_PEAK             (0x2 << KVM_STATS_TYPE_SHIFT)
+       #define KVM_STATS_TYPE_LINEAR_HIST      (0x3 << KVM_STATS_TYPE_SHIFT)
+       #define KVM_STATS_TYPE_LOG_HIST         (0x4 << KVM_STATS_TYPE_SHIFT)
+       #define KVM_STATS_TYPE_MAX              KVM_STATS_TYPE_LOG_HIST
   
         #define KVM_STATS_UNIT_SHIFT            4
         #define KVM_STATS_UNIT_MASK             (0xF << KVM_STATS_UNIT_SHIFT)
@@@ -5215,18 -5218,20 +5219,20 @@@
         #define KVM_STATS_UNIT_BYTES            (0x1 << KVM_STATS_UNIT_SHIFT)
         #define KVM_STATS_UNIT_SECONDS          (0x2 << KVM_STATS_UNIT_SHIFT)
         #define KVM_STATS_UNIT_CYCLES           (0x3 << KVM_STATS_UNIT_SHIFT)
+       #define KVM_STATS_UNIT_MAX              KVM_STATS_UNIT_CYCLES
   
         #define KVM_STATS_BASE_SHIFT            8
         #define KVM_STATS_BASE_MASK             (0xF << KVM_STATS_BASE_SHIFT)
         #define KVM_STATS_BASE_POW10            (0x0 << KVM_STATS_BASE_SHIFT)
         #define KVM_STATS_BASE_POW2             (0x1 << KVM_STATS_BASE_SHIFT)
+       #define KVM_STATS_BASE_MAX              KVM_STATS_BASE_POW2
   
         struct kvm_stats_desc {
                 __u32 flags;
                 __s16 exponent;
                 __u16 size;
                 __u32 offset;
-               __u32 unused;
+               __u32 bucket_size;
                 char name[];
         };
   
@@@ -5235,26 -5240,38 +5241,40 @@@ by this descriptor. Its endianness is C
   The following flags are supported:
   
   Bits 0-3 of ``flags`` encode the type:
+ +
     * ``KVM_STATS_TYPE_CUMULATIVE``
-     The statistics data is cumulative. The value of data can only be increased.
+     The statistics reports a cumulative count. The value of data can only be increased.
       Most of the counters used in KVM are of this type.
       The corresponding ``size`` field for this type is always 1.
       All cumulative statistics data are read/write.
     * ``KVM_STATS_TYPE_INSTANT``
-     The statistics data is instantaneous. Its value can be increased or
+     The statistics reports an instantaneous value. Its value can be increased or
       decreased. This type is usually used as a measurement of some resources,
       like the number of dirty pages, the number of large pages, etc.
       All instant statistics are read only.
       The corresponding ``size`` field for this type is always 1.
     * ``KVM_STATS_TYPE_PEAK``
-     The statistics data is peak. The value of data can only be increased, and
-     represents a peak value for a measurement, for example the maximum number
+     The statistics data reports a peak value, for example the maximum number
       of items in a hash table bucket, the longest time waited and so on.
+     The value of data can only be increased.
       The corresponding ``size`` field for this type is always 1.
+   * ``KVM_STATS_TYPE_LINEAR_HIST``
+     The statistic is reported as a linear histogram. The number of
+     buckets is specified by the ``size`` field. The size of buckets is specified
+     by the ``hist_param`` field. The range of the Nth bucket (1 <= N < ``size``)
+     is [``hist_param``*(N-1), ``hist_param``*N), while the range of the last
+     bucket is [``hist_param``*(``size``-1), +INF). (+INF means positive infinity
+     value.) The bucket value indicates how many samples fell in the bucket's range.
+   * ``KVM_STATS_TYPE_LOG_HIST``
+     The statistic is reported as a logarithmic histogram. The number of
+     buckets is specified by the ``size`` field. The range of the first bucket is
+     [0, 1), while the range of the last bucket is [pow(2, ``size``-2), +INF).
+     Otherwise, The Nth bucket (1 < N < ``size``) covers
+     [pow(2, N-2), pow(2, N-1)). The bucket value indicates how many samples fell
+     in the bucket's range.
   
   Bits 4-7 of ``flags`` encode the unit:
+ +
     * ``KVM_STATS_UNIT_NONE``
       There is no unit for the value of statistics data. This usually means that
       the value is a simple counter of an event.
@@@ -5269,7 -5286,6 +5289,7 @@@
   
   Bits 8-11 of ``flags``, together with ``exponent``, encode the scale of the
   unit:
+ +
     * ``KVM_STATS_BASE_POW10``
       The scale is based on power of 10. It is used for measurement of time and
       CPU clock cycles.  For example, an exponent of -9 can be used with
@@@ -5286,9 -5302,9 +5306,9 @@@ unsigned 64bit data
   The ``offset`` field is the offset from the start of Data Block to the start of
   the corresponding statistics data.
   
- The ``unused`` field is reserved for future support for other types of
- statistics data, like log/linear histogram. Its value is always 0 for the types
- defined above.
+ The ``bucket_size`` field is used as a parameter for histogram statistics data.
+ It is only used by linear histogram statistics data, specifying the size of a
+ bucket.
   
   The ``name`` field is the name string of the statistics data. The name string
   starts at the end of ``struct kvm_stats_desc``.  The maximum length including
@@@ -7217,7 -7233,7 +7237,7 @@@ supported in the host. A VMM can check 
   available to the guest on migration.
   
   8.33 KVM_CAP_HYPERV_ENFORCE_CPUID
- ------------------------------
+ +---------------------------------
   
   Architectures: x86
   
diff --combined arch/arm64/include/asm/cpufeature.h

index cdfa2a2,20e0517..ef6be92
--- 1/arch/arm64/include/asm/cpufeature.h
--- 2/arch/arm64/include/asm/cpufeature.h
+++ b/arch/arm64/include/asm/cpufeature.h
@@@ -552,7 -552,7 +552,7 @@@ cpuid_feature_cap_perfmon_field(u64 fea
         u64 mask = GENMASK_ULL(field + 3, field);
   
         /* Treat IMPLEMENTATION DEFINED functionality as unimplemented */
- -      if (val == 0xf)
+ +      if (val == ID_AA64DFR0_PMUVER_IMP_DEF)
                 val = 0;
   
         if (val > cap) {
@@@ -602,14 -602,14 +602,14 @@@ static inline bool id_aa64pfr0_32bit_el
   {
         u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL1_SHIFT);
   
-       return val == ID_AA64PFR0_EL1_32BIT_64BIT;
+       return val == ID_AA64PFR0_ELx_32BIT_64BIT;
   }
   
   static inline bool id_aa64pfr0_32bit_el0(u64 pfr0)
   {
         u32 val = cpuid_feature_extract_unsigned_field(pfr0, ID_AA64PFR0_EL0_SHIFT);
   
-       return val == ID_AA64PFR0_EL0_32BIT_64BIT;
+       return val == ID_AA64PFR0_ELx_32BIT_64BIT;
   }
   
   static inline bool id_aa64pfr0_sve(u64 pfr0)
@@@ -784,13 -784,13 +784,13 @@@ extern int do_emulate_mrs(struct pt_reg
   static inline u32 id_aa64mmfr0_parange_to_phys_shift(int parange)
   {
         switch (parange) {
-       case 0: return 32;
-       case 1: return 36;
-       case 2: return 40;
-       case 3: return 42;
-       case 4: return 44;
-       case 5: return 48;
-       case 6: return 52;
+       case ID_AA64MMFR0_PARANGE_32: return 32;
+       case ID_AA64MMFR0_PARANGE_36: return 36;
+       case ID_AA64MMFR0_PARANGE_40: return 40;
+       case ID_AA64MMFR0_PARANGE_42: return 42;
+       case ID_AA64MMFR0_PARANGE_44: return 44;
+       case ID_AA64MMFR0_PARANGE_48: return 48;
+       case ID_AA64MMFR0_PARANGE_52: return 52;
         /*
          * A future PE could use a value unknown to the kernel.
          * However, by the "D10.1.4 Principles of the ID scheme
diff --combined arch/arm64/include/asm/sysreg.h

index f2e06e7,7640fa2..b268082
--- 1/arch/arm64/include/asm/sysreg.h
--- 2/arch/arm64/include/asm/sysreg.h
+++ b/arch/arm64/include/asm/sysreg.h
@@@ -11,7 -11,6 +11,7 @@@
   
   #include <linux/bits.h>
   #include <linux/stringify.h>
+ +#include <linux/kasan-tags.h>
   
   /*
    * ARMv8 ARM reserves the following encoding for system registers:
@@@ -699,7 -698,8 +699,7 @@@
         (SCTLR_ELx_M    | SCTLR_ELx_C    | SCTLR_ELx_SA   | SCTLR_EL1_SA0   | \
          SCTLR_EL1_SED  | SCTLR_ELx_I    | SCTLR_EL1_DZE  | SCTLR_EL1_UCT   | \
          SCTLR_EL1_NTWE | SCTLR_ELx_IESB | SCTLR_EL1_SPAN | SCTLR_ELx_ITFSB | \
- -       SCTLR_ELx_ATA  | SCTLR_EL1_ATA0 | ENDIAN_SET_EL1 | SCTLR_EL1_UCI   | \
- -       SCTLR_EL1_EPAN | SCTLR_EL1_RES1)
+ +       ENDIAN_SET_EL1 | SCTLR_EL1_UCI  | SCTLR_EL1_EPAN | SCTLR_EL1_RES1)
   
   /* MAIR_ELx memory attributes (used by Linux) */
   #define MAIR_ATTR_DEVICE_nGnRnE               UL(0x00)
@@@ -784,14 -784,13 +784,13 @@@
   #define ID_AA64PFR0_AMU                       0x1
   #define ID_AA64PFR0_SVE                       0x1
   #define ID_AA64PFR0_RAS_V1            0x1
+ #define ID_AA64PFR0_RAS_V1P1          0x2
   #define ID_AA64PFR0_FP_NI             0xf
   #define ID_AA64PFR0_FP_SUPPORTED      0x0
   #define ID_AA64PFR0_ASIMD_NI          0xf
   #define ID_AA64PFR0_ASIMD_SUPPORTED   0x0
- #define ID_AA64PFR0_EL1_64BIT_ONLY    0x1
- #define ID_AA64PFR0_EL1_32BIT_64BIT   0x2
- #define ID_AA64PFR0_EL0_64BIT_ONLY    0x1
- #define ID_AA64PFR0_EL0_32BIT_64BIT   0x2
+ #define ID_AA64PFR0_ELx_64BIT_ONLY    0x1
+ #define ID_AA64PFR0_ELx_32BIT_64BIT   0x2
   
   /* id_aa64pfr1 */
   #define ID_AA64PFR1_MPAMFRAC_SHIFT    16
@@@ -847,6 -846,9 +846,9 @@@
   #define ID_AA64MMFR0_ASID_SHIFT               4
   #define ID_AA64MMFR0_PARANGE_SHIFT    0
   
+ #define ID_AA64MMFR0_ASID_8           0x0
+ #define ID_AA64MMFR0_ASID_16          0x2
+ 
   #define ID_AA64MMFR0_TGRAN4_NI                        0xf
   #define ID_AA64MMFR0_TGRAN4_SUPPORTED_MIN     0x0
   #define ID_AA64MMFR0_TGRAN4_SUPPORTED_MAX     0x7
@@@ -857,9 -859,16 +859,16 @@@
   #define ID_AA64MMFR0_TGRAN16_SUPPORTED_MIN    0x1
   #define ID_AA64MMFR0_TGRAN16_SUPPORTED_MAX    0xf
   
+ #define ID_AA64MMFR0_PARANGE_32               0x0
+ #define ID_AA64MMFR0_PARANGE_36               0x1
+ #define ID_AA64MMFR0_PARANGE_40               0x2
+ #define ID_AA64MMFR0_PARANGE_42               0x3
+ #define ID_AA64MMFR0_PARANGE_44               0x4
   #define ID_AA64MMFR0_PARANGE_48               0x5
   #define ID_AA64MMFR0_PARANGE_52               0x6
   
+ #define ARM64_MIN_PARANGE_BITS                32
+ 
   #define ID_AA64MMFR0_TGRAN_2_SUPPORTED_DEFAULT        0x0
   #define ID_AA64MMFR0_TGRAN_2_SUPPORTED_NONE   0x1
   #define ID_AA64MMFR0_TGRAN_2_SUPPORTED_MIN    0x2
@@@ -904,6 -913,7 +913,7 @@@
   #define ID_AA64MMFR2_CNP_SHIFT                0
   
   /* id_aa64dfr0 */
+ #define ID_AA64DFR0_MTPMU_SHIFT               48
   #define ID_AA64DFR0_TRBE_SHIFT                44
   #define ID_AA64DFR0_TRACE_FILT_SHIFT  40
   #define ID_AA64DFR0_DOUBLELOCK_SHIFT  36
@@@ -1034,14 -1044,17 +1044,17 @@@
   #define ID_AA64MMFR0_TGRAN_SHIFT              ID_AA64MMFR0_TGRAN4_SHIFT
   #define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN      ID_AA64MMFR0_TGRAN4_SUPPORTED_MIN
   #define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX      ID_AA64MMFR0_TGRAN4_SUPPORTED_MAX
+ #define ID_AA64MMFR0_TGRAN_2_SHIFT            ID_AA64MMFR0_TGRAN4_2_SHIFT
   #elif defined(CONFIG_ARM64_16K_PAGES)
   #define ID_AA64MMFR0_TGRAN_SHIFT              ID_AA64MMFR0_TGRAN16_SHIFT
   #define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN      ID_AA64MMFR0_TGRAN16_SUPPORTED_MIN
   #define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX      ID_AA64MMFR0_TGRAN16_SUPPORTED_MAX
+ #define ID_AA64MMFR0_TGRAN_2_SHIFT            ID_AA64MMFR0_TGRAN16_2_SHIFT
   #elif defined(CONFIG_ARM64_64K_PAGES)
   #define ID_AA64MMFR0_TGRAN_SHIFT              ID_AA64MMFR0_TGRAN64_SHIFT
   #define ID_AA64MMFR0_TGRAN_SUPPORTED_MIN      ID_AA64MMFR0_TGRAN64_SUPPORTED_MIN
   #define ID_AA64MMFR0_TGRAN_SUPPORTED_MAX      ID_AA64MMFR0_TGRAN64_SUPPORTED_MAX
+ #define ID_AA64MMFR0_TGRAN_2_SHIFT            ID_AA64MMFR0_TGRAN64_2_SHIFT
   #endif
   
   #define MVFR2_FPMISC_SHIFT            4
@@@ -1071,21 -1084,6 +1084,21 @@@
   #define SYS_GCR_EL1_RRND      (BIT(16))
   #define SYS_GCR_EL1_EXCL_MASK 0xffffUL
   
+ +#ifdef CONFIG_KASAN_HW_TAGS
+ +/*
+ + * KASAN always uses a whole byte for its tags. With CONFIG_KASAN_HW_TAGS it
+ + * only uses tags in the range 0xF0-0xFF, which we map to MTE tags 0x0-0xF.
+ + */
+ +#define __MTE_TAG_MIN         (KASAN_TAG_MIN & 0xf)
+ +#define __MTE_TAG_MAX         (KASAN_TAG_MAX & 0xf)
+ +#define __MTE_TAG_INCL                GENMASK(__MTE_TAG_MAX, __MTE_TAG_MIN)
+ +#define KERNEL_GCR_EL1_EXCL   (SYS_GCR_EL1_EXCL_MASK & ~__MTE_TAG_INCL)
+ +#else
+ +#define KERNEL_GCR_EL1_EXCL   SYS_GCR_EL1_EXCL_MASK
+ +#endif
+ +
+ +#define KERNEL_GCR_EL1                (SYS_GCR_EL1_RRND | KERNEL_GCR_EL1_EXCL)
+ +
   /* RGSR_EL1 Definitions */
   #define SYS_RGSR_EL1_TAG_MASK 0xfUL
   #define SYS_RGSR_EL1_SEED_SHIFT       8
@@@ -1172,6 -1170,11 +1185,11 @@@
   #define ICH_VTR_A3V_SHIFT     21
   #define ICH_VTR_A3V_MASK      (1 << ICH_VTR_A3V_SHIFT)
   
+ #define ARM64_FEATURE_FIELD_BITS      4
+ 
+ /* Create a mask for the feature bits of the specified feature. */
+ #define ARM64_FEATURE_MASK(x) (GENMASK_ULL(x##_SHIFT + ARM64_FEATURE_FIELD_BITS - 1, x##_SHIFT))
+ 
   #ifdef __ASSEMBLY__
   
         .irp    num,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30
diff --combined arch/arm64/kernel/cpufeature.c

index b2770d7,5b59fe5..f8a3067
--- 1/arch/arm64/kernel/cpufeature.c
--- 2/arch/arm64/kernel/cpufeature.c
+++ b/arch/arm64/kernel/cpufeature.c
@@@ -67,7 -67,6 +67,7 @@@
   #include <linux/crash_dump.h>
   #include <linux/sort.h>
   #include <linux/stop_machine.h>
+ +#include <linux/sysfs.h>
   #include <linux/types.h>
   #include <linux/minmax.h>
   #include <linux/mm.h>
@@@ -240,8 -239,8 +240,8 @@@ static const struct arm64_ftr_bits ftr_
         S_ARM64_FTR_BITS(FTR_VISIBLE, FTR_STRICT, FTR_LOWER_SAFE, ID_AA64PFR0_FP_SHIFT, 4, ID_AA64PFR0_FP_NI),
         ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL3_SHIFT, 4, 0),
         ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL2_SHIFT, 4, 0),
-       ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_EL1_64BIT_ONLY),
-       ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_EL0_64BIT_ONLY),
+       ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL1_SHIFT, 4, ID_AA64PFR0_ELx_64BIT_ONLY),
+       ARM64_FTR_BITS(FTR_HIDDEN, FTR_NONSTRICT, FTR_LOWER_SAFE, ID_AA64PFR0_EL0_SHIFT, 4, ID_AA64PFR0_ELx_64BIT_ONLY),
         ARM64_FTR_END,
   };
   
@@@ -1322,31 -1321,6 +1322,31 @@@ const struct cpumask *system_32bit_el0_
         return cpu_possible_mask;
   }
   
+ +static int __init parse_32bit_el0_param(char *str)
+ +{
+ +      allow_mismatched_32bit_el0 = true;
+ +      return 0;
+ +}
+ +early_param("allow_mismatched_32bit_el0", parse_32bit_el0_param);
+ +
+ +static ssize_t aarch32_el0_show(struct device *dev,
+ +                              struct device_attribute *attr, char *buf)
+ +{
+ +      const struct cpumask *mask = system_32bit_el0_cpumask();
+ +
+ +      return sysfs_emit(buf, "%*pbl\n", cpumask_pr_args(mask));
+ +}
+ +static const DEVICE_ATTR_RO(aarch32_el0);
+ +
+ +static int __init aarch32_el0_sysfs_init(void)
+ +{
+ +      if (!allow_mismatched_32bit_el0)
+ +              return 0;
+ +
+ +      return device_create_file(cpu_subsys.dev_root, &dev_attr_aarch32_el0);
+ +}
+ +device_initcall(aarch32_el0_sysfs_init);
+ +
   static bool has_32bit_el0(const struct arm64_cpu_capabilities *entry, int scope)
   {
         if (!has_cpuid_feature(entry, scope))
@@@ -1587,6 -1561,8 +1587,6 @@@ kpti_install_ng_mappings(const struct a
   
         if (!cpu)
                 arm64_use_ng_mappings = true;
- -
- -      return;
   }
   #else
   static void
@@@ -1758,7 -1734,7 +1758,7 @@@ static void cpu_has_fwb(const struct ar
         u64 val = read_sysreg_s(SYS_CLIDR_EL1);
   
         /* Check that CLIDR_EL1.LOU{U,IS} are both 0 */
- -      WARN_ON(val & (7 << 27 | 7 << 21));
+ +      WARN_ON(CLIDR_LOUU(val) || CLIDR_LOUIS(val));
   }
   
   #ifdef CONFIG_ARM64_PAN
@@@ -1867,9 -1843,6 +1867,9 @@@ static void bti_enable(const struct arm
   #ifdef CONFIG_ARM64_MTE
   static void cpu_enable_mte(struct arm64_cpu_capabilities const *cap)
   {
+ +      sysreg_clear_set(sctlr_el1, 0, SCTLR_ELx_ATA | SCTLR_EL1_ATA0);
+ +      isb();
+ +
         /*
          * Clear the tags in the zero page. This needs to be done via the
          * linear map which has the Tagged attribute.
@@@ -1983,7 -1956,7 +1983,7 @@@ static const struct arm64_cpu_capabilit
                 .sys_reg = SYS_ID_AA64PFR0_EL1,
                 .sign = FTR_UNSIGNED,
                 .field_pos = ID_AA64PFR0_EL0_SHIFT,
-               .min_field_value = ID_AA64PFR0_EL0_32BIT_64BIT,
+               .min_field_value = ID_AA64PFR0_ELx_32BIT_64BIT,
         },
   #ifdef CONFIG_KVM
         {
@@@ -1994,7 -1967,7 +1994,7 @@@
                 .sys_reg = SYS_ID_AA64PFR0_EL1,
                 .sign = FTR_UNSIGNED,
                 .field_pos = ID_AA64PFR0_EL1_SHIFT,
-               .min_field_value = ID_AA64PFR0_EL1_32BIT_64BIT,
+               .min_field_value = ID_AA64PFR0_ELx_32BIT_64BIT,
         },
         {
                 .desc = "Protected KVM",
@@@ -2928,38 -2901,15 +2928,38 @@@ void __init setup_cpu_features(void
   
   static int enable_mismatched_32bit_el0(unsigned int cpu)
   {
+ +      /*
+ +       * The first 32-bit-capable CPU we detected and so can no longer
+ +       * be offlined by userspace. -1 indicates we haven't yet onlined
+ +       * a 32-bit-capable CPU.
+ +       */
+ +      static int lucky_winner = -1;
+ +
         struct cpuinfo_arm64 *info = &per_cpu(cpu_data, cpu);
         bool cpu_32bit = id_aa64pfr0_32bit_el0(info->reg_id_aa64pfr0);
   
         if (cpu_32bit) {
                 cpumask_set_cpu(cpu, cpu_32bit_el0_mask);
                 static_branch_enable_cpuslocked(&arm64_mismatched_32bit_el0);
- -              setup_elf_hwcaps(compat_elf_hwcaps);
         }
   
+ +      if (cpumask_test_cpu(0, cpu_32bit_el0_mask) == cpu_32bit)
+ +              return 0;
+ +
+ +      if (lucky_winner >= 0)
+ +              return 0;
+ +
+ +      /*
+ +       * We've detected a mismatch. We need to keep one of our CPUs with
+ +       * 32-bit EL0 online so that is_cpu_allowed() doesn't end up rejecting
+ +       * every CPU in the system for a 32-bit task.
+ +       */
+ +      lucky_winner = cpu_32bit ? cpu : cpumask_any_and(cpu_32bit_el0_mask,
+ +                                                       cpu_active_mask);
+ +      get_cpu_device(lucky_winner)->offline_disabled = true;
+ +      setup_elf_hwcaps(compat_elf_hwcaps);
+ +      pr_info("Asymmetric 32-bit EL0 support detected on CPU %u; CPU hot-unplug disabled on CPU %u\n",
+ +              cpu, lucky_winner);
         return 0;
   }
   
diff --combined arch/powerpc/include/asm/kvm_host.h

index a779f78,4931d03..080a7fe
--- 1/arch/powerpc/include/asm/kvm_host.h
--- 2/arch/powerpc/include/asm/kvm_host.h
+++ b/arch/powerpc/include/asm/kvm_host.h
@@@ -103,7 -103,6 +103,6 @@@ struct kvm_vcpu_stat 
         u64 emulated_inst_exits;
         u64 dec_exits;
         u64 ext_intr_exits;
-       u64 halt_wait_ns;
         u64 halt_successful_wait;
         u64 dbell_exits;
         u64 gdbell_exits;
@@@ -811,8 -810,6 +810,8 @@@ struct kvm_vcpu_arch 
   
         u32 online;
   
+ +      u64 hfscr_permitted;    /* A mask of permitted HFSCR facilities */
+ +
         /* For support of nested guests */
         struct kvm_nested_guest *nested;
         u32 nested_vcpu_id;
diff --combined arch/powerpc/kvm/book3s_64_vio_hv.c

index 636c6ae,f38dfe1..870b7f0
--- 1/arch/powerpc/kvm/book3s_64_vio_hv.c
--- 2/arch/powerpc/kvm/book3s_64_vio_hv.c
+++ b/arch/powerpc/kvm/book3s_64_vio_hv.c
@@@ -80,7 -80,7 +80,7 @@@ static long kvmppc_rm_tce_to_ua(struct 
         unsigned long gfn = tce >> PAGE_SHIFT;
         struct kvm_memory_slot *memslot;
   
-       memslot = search_memslots(kvm_memslots_raw(kvm), gfn);
+       memslot = __gfn_to_memslot(kvm_memslots_raw(kvm), gfn);
         if (!memslot)
                 return -EINVAL;
   
@@@ -173,13 -173,10 +173,13 @@@ static void kvmppc_rm_tce_put(struct kv
         idx -= stt->offset;
         page = stt->pages[idx / TCES_PER_PAGE];
         /*
- -       * page must not be NULL in real mode,
- -       * kvmppc_rm_ioba_validate() must have taken care of this.
+ +       * kvmppc_rm_ioba_validate() allows pages not be allocated if TCE is
+ +       * being cleared, otherwise it returns H_TOO_HARD and we skip this.
          */
- -      WARN_ON_ONCE_RM(!page);
+ +      if (!page) {
+ +              WARN_ON_ONCE_RM(tce != 0);
+ +              return;
+ +      }
         tbl = kvmppc_page_address(page);
   
         tbl[idx % TCES_PER_PAGE] = tce;
diff --combined arch/powerpc/kvm/book3s_hv.c

index bb0dacf,829ff9b..2acb1c9
--- 1/arch/powerpc/kvm/book3s_hv.c
--- 2/arch/powerpc/kvm/book3s_hv.c
+++ b/arch/powerpc/kvm/book3s_hv.c
@@@ -59,7 -59,6 +59,7 @@@
   #include <asm/kvm_book3s.h>
   #include <asm/mmu_context.h>
   #include <asm/lppaca.h>
+ +#include <asm/pmc.h>
   #include <asm/processor.h>
   #include <asm/cputhreads.h>
   #include <asm/page.h>
@@@ -1166,7 -1165,7 +1166,7 @@@ int kvmppc_pseries_do_hcall(struct kvm_
                 break;
   #endif
         case H_RANDOM:
- -              if (!powernv_get_random_long(&vcpu->arch.regs.gpr[4]))
+ +              if (!arch_get_random_seed_long(&vcpu->arch.regs.gpr[4]))
                         ret = H_HARDWARE;
                 break;
         case H_RPT_INVALIDATE:
@@@ -1680,21 -1679,6 +1680,21 @@@ static int kvmppc_handle_exit_hv(struc
                         r = RESUME_GUEST;
                 }
                 break;
+ +
+ +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
+ +      case BOOK3S_INTERRUPT_HV_SOFTPATCH:
+ +              /*
+ +               * This occurs for various TM-related instructions that
+ +               * we need to emulate on POWER9 DD2.2.  We have already
+ +               * handled the cases where the guest was in real-suspend
+ +               * mode and was transitioning to transactional state.
+ +               */
+ +              r = kvmhv_p9_tm_emulation(vcpu);
+ +              if (r != -1)
+ +                      break;
+ +              fallthrough; /* go to facility unavailable handler */
+ +#endif
+ +
         /*
          * This occurs if the guest (kernel or userspace), does something that
          * is prohibited by HFSCR.
@@@ -1713,6 -1697,18 +1713,6 @@@
                 }
                 break;
   
- -#ifdef CONFIG_PPC_TRANSACTIONAL_MEM
- -      case BOOK3S_INTERRUPT_HV_SOFTPATCH:
- -              /*
- -               * This occurs for various TM-related instructions that
- -               * we need to emulate on POWER9 DD2.2.  We have already
- -               * handled the cases where the guest was in real-suspend
- -               * mode and was transitioning to transactional state.
- -               */
- -              r = kvmhv_p9_tm_emulation(vcpu);
- -              break;
- -#endif
- -
         case BOOK3S_INTERRUPT_HV_RM_HARD:
                 r = RESUME_PASSTHROUGH;
                 break;
@@@ -1731,7 -1727,6 +1731,7 @@@
   
   static int kvmppc_handle_nested_exit(struct kvm_vcpu *vcpu)
   {
+ +      struct kvm_nested_guest *nested = vcpu->arch.nested;
         int r;
         int srcu_idx;
   
@@@ -1816,41 -1811,9 +1816,41 @@@
                  * mode and was transitioning to transactional state.
                  */
                 r = kvmhv_p9_tm_emulation(vcpu);
- -              break;
+ +              if (r != -1)
+ +                      break;
+ +              fallthrough; /* go to facility unavailable handler */
   #endif
   
+ +      case BOOK3S_INTERRUPT_H_FAC_UNAVAIL: {
+ +              u64 cause = vcpu->arch.hfscr >> 56;
+ +
+ +              /*
+ +               * Only pass HFU interrupts to the L1 if the facility is
+ +               * permitted but disabled by the L1's HFSCR, otherwise
+ +               * the interrupt does not make sense to the L1 so turn
+ +               * it into a HEAI.
+ +               */
+ +              if (!(vcpu->arch.hfscr_permitted & (1UL << cause)) ||
+ +                                      (nested->hfscr & (1UL << cause))) {
+ +                      vcpu->arch.trap = BOOK3S_INTERRUPT_H_EMUL_ASSIST;
+ +
+ +                      /*
+ +                       * If the fetch failed, return to guest and
+ +                       * try executing it again.
+ +                       */
+ +                      r = kvmppc_get_last_inst(vcpu, INST_GENERIC,
+ +                                               &vcpu->arch.emul_inst);
+ +                      if (r != EMULATE_DONE)
+ +                              r = RESUME_GUEST;
+ +                      else
+ +                              r = RESUME_HOST;
+ +              } else {
+ +                      r = RESUME_HOST;
+ +              }
+ +
+ +              break;
+ +      }
+ +
         case BOOK3S_INTERRUPT_HV_RM_HARD:
                 vcpu->arch.trap = 0;
                 r = RESUME_GUEST;
@@@ -2721,7 -2684,6 +2721,7 @@@ static int kvmppc_core_vcpu_create_hv(s
         spin_lock_init(&vcpu->arch.vpa_update_lock);
         spin_lock_init(&vcpu->arch.tbacct_lock);
         vcpu->arch.busy_preempt = TB_NIL;
+ +      vcpu->arch.shregs.msr = MSR_ME;
         vcpu->arch.intr_msr = MSR_SF | MSR_ME;
   
         /*
@@@ -2743,8 -2705,6 +2743,8 @@@
         if (cpu_has_feature(CPU_FTR_TM_COMP))
                 vcpu->arch.hfscr |= HFSCR_TM;
   
+ +      vcpu->arch.hfscr_permitted = vcpu->arch.hfscr;
+ +
         kvmppc_mmu_book3s_hv_init(vcpu);
   
         vcpu->arch.state = KVMPPC_VCPU_NOTREADY;
@@@ -3767,6 -3727,7 +3767,6 @@@ static void load_spr_state(struct kvm_v
         mtspr(SPRN_EBBHR, vcpu->arch.ebbhr);
         mtspr(SPRN_EBBRR, vcpu->arch.ebbrr);
         mtspr(SPRN_BESCR, vcpu->arch.bescr);
- -      mtspr(SPRN_WORT, vcpu->arch.wort);
         mtspr(SPRN_TIDR, vcpu->arch.tid);
         mtspr(SPRN_AMR, vcpu->arch.amr);
         mtspr(SPRN_UAMOR, vcpu->arch.uamor);
@@@ -3793,6 -3754,7 +3793,6 @@@ static void store_spr_state(struct kvm_
         vcpu->arch.ebbhr = mfspr(SPRN_EBBHR);
         vcpu->arch.ebbrr = mfspr(SPRN_EBBRR);
         vcpu->arch.bescr = mfspr(SPRN_BESCR);
- -      vcpu->arch.wort = mfspr(SPRN_WORT);
         vcpu->arch.tid = mfspr(SPRN_TIDR);
         vcpu->arch.amr = mfspr(SPRN_AMR);
         vcpu->arch.uamor = mfspr(SPRN_UAMOR);
@@@ -3824,6 -3786,7 +3824,6 @@@ static void restore_p9_host_os_sprs(str
                                     struct p9_host_os_sprs *host_os_sprs)
   {
         mtspr(SPRN_PSPB, 0);
- -      mtspr(SPRN_WORT, 0);
         mtspr(SPRN_UAMOR, 0);
   
         mtspr(SPRN_DSCR, host_os_sprs->dscr);
@@@ -3889,18 -3852,6 +3889,18 @@@ static int kvmhv_p9_guest_entry(struct 
             cpu_has_feature(CPU_FTR_P9_TM_HV_ASSIST))
                 kvmppc_restore_tm_hv(vcpu, vcpu->arch.shregs.msr, true);
   
+ +#ifdef CONFIG_PPC_PSERIES
+ +      if (kvmhv_on_pseries()) {
+ +              barrier();
+ +              if (vcpu->arch.vpa.pinned_addr) {
+ +                      struct lppaca *lp = vcpu->arch.vpa.pinned_addr;
+ +                      get_lppaca()->pmcregs_in_use = lp->pmcregs_in_use;
+ +              } else {
+ +                      get_lppaca()->pmcregs_in_use = 1;
+ +              }
+ +              barrier();
+ +      }
+ +#endif
         kvmhv_load_guest_pmu(vcpu);
   
         msr_check_and_set(MSR_FP | MSR_VEC | MSR_VSX);
@@@ -4035,13 -3986,6 +4035,13 @@@
         save_pmu |= nesting_enabled(vcpu->kvm);
   
         kvmhv_save_guest_pmu(vcpu, save_pmu);
+ +#ifdef CONFIG_PPC_PSERIES
+ +      if (kvmhv_on_pseries()) {
+ +              barrier();
+ +              get_lppaca()->pmcregs_in_use = ppc_get_pmu_inuse();
+ +              barrier();
+ +      }
+ +#endif
   
         vc->entry_exit_map = 0x101;
         vc->in_guest = 0;
@@@ -4202,19 -4146,31 +4202,31 @@@ out
   
         /* Attribute wait time */
         if (do_sleep) {
-               vc->runner->stat.halt_wait_ns +=
+               vc->runner->stat.generic.halt_wait_ns +=
                         ktime_to_ns(cur) - ktime_to_ns(start_wait);
+               KVM_STATS_LOG_HIST_UPDATE(
+                               vc->runner->stat.generic.halt_wait_hist,
+                               ktime_to_ns(cur) - ktime_to_ns(start_wait));
                 /* Attribute failed poll time */
-               if (vc->halt_poll_ns)
+               if (vc->halt_poll_ns) {
                         vc->runner->stat.generic.halt_poll_fail_ns +=
                                 ktime_to_ns(start_wait) -
                                 ktime_to_ns(start_poll);
+                       KVM_STATS_LOG_HIST_UPDATE(
+                               vc->runner->stat.generic.halt_poll_fail_hist,
+                               ktime_to_ns(start_wait) -
+                               ktime_to_ns(start_poll));
+               }
         } else {
                 /* Attribute successful poll time */
-               if (vc->halt_poll_ns)
+               if (vc->halt_poll_ns) {
                         vc->runner->stat.generic.halt_poll_success_ns +=
                                 ktime_to_ns(cur) -
                                 ktime_to_ns(start_poll);
+                       KVM_STATS_LOG_HIST_UPDATE(
+                               vc->runner->stat.generic.halt_poll_success_hist,
+                               ktime_to_ns(cur) - ktime_to_ns(start_poll));
+               }
         }
   
         /* Adjust poll time */
@@@ -5384,7 -5340,6 +5396,7 @@@ static int kvmppc_set_passthru_irq(stru
         struct kvmppc_passthru_irqmap *pimap;
         struct irq_chip *chip;
         int i, rc = 0;
+ +      struct irq_data *host_data;
   
         if (!kvm_irq_bypass)
                 return 1;
@@@ -5412,7 -5367,7 +5424,7 @@@
          * what our real-mode EOI code does, or a XIVE interrupt
          */
         chip = irq_data_get_irq_chip(&desc->irq_data);
- -      if (!chip || !(is_pnv_opal_msi(chip) || is_xive_irq(chip))) {
+ +      if (!chip || !is_pnv_opal_msi(chip)) {
                 pr_warn("kvmppc_set_passthru_irq_hv: Could not assign IRQ map for (%d,%d)\n",
                         host_irq, guest_gsi);
                 mutex_unlock(&kvm->lock);
@@@ -5449,22 -5404,15 +5461,22 @@@
          * the KVM real mode handler.
          */
         smp_wmb();
- -      irq_map->r_hwirq = desc->irq_data.hwirq;
+ +
+ +      /*
+ +       * The 'host_irq' number is mapped in the PCI-MSI domain but
+ +       * the underlying calls, which will EOI the interrupt in real
+ +       * mode, need an HW IRQ number mapped in the XICS IRQ domain.
+ +       */
+ +      host_data = irq_domain_get_irq_data(irq_get_default_host(), host_irq);
+ +      irq_map->r_hwirq = (unsigned int)irqd_to_hwirq(host_data);
   
         if (i == pimap->n_mapped)
                 pimap->n_mapped++;
   
         if (xics_on_xive())
- -              rc = kvmppc_xive_set_mapped(kvm, guest_gsi, desc);
+ +              rc = kvmppc_xive_set_mapped(kvm, guest_gsi, host_irq);
         else
- -              kvmppc_xics_set_mapped(kvm, guest_gsi, desc->irq_data.hwirq);
+ +              kvmppc_xics_set_mapped(kvm, guest_gsi, irq_map->r_hwirq);
         if (rc)
                 irq_map->r_hwirq = 0;
   
@@@ -5503,7 -5451,7 +5515,7 @@@ static int kvmppc_clr_passthru_irq(stru
         }
   
         if (xics_on_xive())
- -              rc = kvmppc_xive_clr_mapped(kvm, guest_gsi, pimap->mapped[i].desc);
+ +              rc = kvmppc_xive_clr_mapped(kvm, guest_gsi, host_irq);
         else
                 kvmppc_xics_clr_mapped(kvm, guest_gsi, pimap->mapped[i].r_hwirq);
   
diff --combined arch/s390/include/asm/kvm_host.h

index d681ae4,bf1ab06..a604d51
--- 1/arch/s390/include/asm/kvm_host.h
--- 2/arch/s390/include/asm/kvm_host.h
+++ b/arch/s390/include/asm/kvm_host.h
@@@ -244,6 -244,7 +244,7 @@@ struct kvm_s390_sie_block 
         __u8    fpf;                    /* 0x0060 */
   #define ECB_GS                0x40
   #define ECB_TE                0x10
+ #define ECB_SPECI     0x08
   #define ECB_SRSI      0x04
   #define ECB_HOSTPROTINT       0x02
         __u8    ecb;                    /* 0x0061 */
@@@ -798,12 -799,14 +799,12 @@@ struct kvm_s390_cpu_model 
         unsigned short ibc;
   };
   
- -struct kvm_s390_module_hook {
- -      int (*hook)(struct kvm_vcpu *vcpu);
- -      struct module *owner;
- -};
+ +typedef int (*crypto_hook)(struct kvm_vcpu *vcpu);
   
   struct kvm_s390_crypto {
         struct kvm_s390_crypto_cb *crycb;
- -      struct kvm_s390_module_hook *pqap_hook;
+ +      struct rw_semaphore pqap_hook_rwsem;
+ +      crypto_hook *pqap_hook;
         __u32 crycbd;
         __u8 aes_kw;
         __u8 dea_kw;
@@@ -955,6 -958,7 +956,7 @@@ struct kvm_arch
         atomic64_t cmma_dirty_pages;
         /* subset of available cpu features enabled by user space */
         DECLARE_BITMAP(cpu_feat, KVM_S390_VM_CPU_FEAT_NR_BITS);
+       /* indexed by vcpu_idx */
         DECLARE_BITMAP(idle_mask, KVM_MAX_VCPUS);
         struct kvm_s390_gisa_interrupt gisa_int;
         struct kvm_s390_pv pv;
diff --combined arch/s390/kvm/kvm-s390.c

index efda061,1053c14..752a0ff
--- 1/arch/s390/kvm/kvm-s390.c
--- 2/arch/s390/kvm/kvm-s390.c
+++ b/arch/s390/kvm/kvm-s390.c
@@@ -66,8 -66,6 +66,6 @@@ const struct _kvm_stats_desc kvm_vm_sta
         STATS_DESC_COUNTER(VM, inject_service_signal),
         STATS_DESC_COUNTER(VM, inject_virtio)
   };
- static_assert(ARRAY_SIZE(kvm_vm_stats_desc) ==
-               sizeof(struct kvm_vm_stat) / sizeof(u64));
   
   const struct kvm_stats_header kvm_vm_stats_header = {
         .name_size = KVM_STATS_NAME_SIZE,
@@@ -174,8 -172,6 +172,6 @@@ const struct _kvm_stats_desc kvm_vcpu_s
         STATS_DESC_COUNTER(VCPU, instruction_diagnose_other),
         STATS_DESC_COUNTER(VCPU, pfault_sync)
   };
- static_assert(ARRAY_SIZE(kvm_vcpu_stats_desc) ==
-               sizeof(struct kvm_vcpu_stat) / sizeof(u64));
   
   const struct kvm_stats_header kvm_vcpu_stats_header = {
         .name_size = KVM_STATS_NAME_SIZE,
@@@ -1953,7 -1949,7 +1949,7 @@@ out
   static int gfn_to_memslot_approx(struct kvm_memslots *slots, gfn_t gfn)
   {
         int start = 0, end = slots->used_slots;
-       int slot = atomic_read(&slots->lru_slot);
+       int slot = atomic_read(&slots->last_used_slot);
         struct kvm_memory_slot *memslots = slots->memslots;
   
         if (gfn >= memslots[slot].base_gfn &&
@@@ -1974,7 -1970,7 +1970,7 @@@
   
         if (gfn >= memslots[start].base_gfn &&
             gfn < memslots[start].base_gfn + memslots[start].npages) {
-               atomic_set(&slots->lru_slot, start);
+               atomic_set(&slots->last_used_slot, start);
         }
   
         return start;
@@@ -2559,26 -2555,12 +2555,26 @@@ static void kvm_s390_set_crycb_format(s
                 kvm->arch.crypto.crycbd |= CRYCB_FORMAT1;
   }
   
+ +/*
+ + * kvm_arch_crypto_set_masks
+ + *
+ + * @kvm: pointer to the target guest's KVM struct containing the crypto masks
+ + *     to be set.
+ + * @apm: the mask identifying the accessible AP adapters
+ + * @aqm: the mask identifying the accessible AP domains
+ + * @adm: the mask identifying the accessible AP control domains
+ + *
+ + * Set the masks that identify the adapters, domains and control domains to
+ + * which the KVM guest is granted access.
+ + *
+ + * Note: The kvm->lock mutex must be locked by the caller before invoking this
+ + *     function.
+ + */
   void kvm_arch_crypto_set_masks(struct kvm *kvm, unsigned long *apm,
                                unsigned long *aqm, unsigned long *adm)
   {
         struct kvm_s390_crypto_cb *crycb = kvm->arch.crypto.crycb;
   
- -      mutex_lock(&kvm->lock);
         kvm_s390_vcpu_block_all(kvm);
   
         switch (kvm->arch.crypto.crycbd & CRYCB_FORMAT_MASK) {
@@@ -2609,23 -2591,13 +2605,23 @@@
         /* recreate the shadow crycb for each vcpu */
         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
         kvm_s390_vcpu_unblock_all(kvm);
- -      mutex_unlock(&kvm->lock);
   }
   EXPORT_SYMBOL_GPL(kvm_arch_crypto_set_masks);
   
+ +/*
+ + * kvm_arch_crypto_clear_masks
+ + *
+ + * @kvm: pointer to the target guest's KVM struct containing the crypto masks
+ + *     to be cleared.
+ + *
+ + * Clear the masks that identify the adapters, domains and control domains to
+ + * which the KVM guest is granted access.
+ + *
+ + * Note: The kvm->lock mutex must be locked by the caller before invoking this
+ + *     function.
+ + */
   void kvm_arch_crypto_clear_masks(struct kvm *kvm)
   {
- -      mutex_lock(&kvm->lock);
         kvm_s390_vcpu_block_all(kvm);
   
         memset(&kvm->arch.crypto.crycb->apcb0, 0,
@@@ -2637,6 -2609,7 +2633,6 @@@
         /* recreate the shadow crycb for each vcpu */
         kvm_s390_sync_request_broadcast(kvm, KVM_REQ_VSIE_RESTART);
         kvm_s390_vcpu_unblock_all(kvm);
- -      mutex_unlock(&kvm->lock);
   }
   EXPORT_SYMBOL_GPL(kvm_arch_crypto_clear_masks);
   
@@@ -2653,7 -2626,6 +2649,7 @@@ static void kvm_s390_crypto_init(struc
   {
         kvm->arch.crypto.crycb = &kvm->arch.sie_page2->crycb;
         kvm_s390_set_crycb_format(kvm);
+ +      init_rwsem(&kvm->arch.crypto.pqap_hook_rwsem);
   
         if (!test_kvm_facility(kvm, 76))
                 return;
@@@ -3224,6 -3196,8 +3220,8 @@@ static int kvm_s390_vcpu_setup(struct k
                 vcpu->arch.sie_block->ecb |= ECB_SRSI;
         if (test_kvm_facility(vcpu->kvm, 73))
                 vcpu->arch.sie_block->ecb |= ECB_TE;
+       if (!kvm_is_ucontrol(vcpu->kvm))
+               vcpu->arch.sie_block->ecb |= ECB_SPECI;
   
         if (test_kvm_facility(vcpu->kvm, 8) && vcpu->kvm->arch.use_pfmfi)
                 vcpu->arch.sie_block->ecb2 |= ECB2_PFMFI;
@@@ -4068,7 -4042,7 +4066,7 @@@ static int vcpu_pre_run(struct kvm_vcp
                 kvm_s390_patch_guest_per_regs(vcpu);
         }
   
-       clear_bit(vcpu->vcpu_id, vcpu->kvm->arch.gisa_int.kicked_mask);
+       clear_bit(kvm_vcpu_get_idx(vcpu), vcpu->kvm->arch.gisa_int.kicked_mask);
   
         vcpu->arch.sie_block->icptcode = 0;
         cpuflags = atomic_read(&vcpu->arch.sie_block->cpuflags);
diff --combined arch/x86/kvm/hyperv.c

index 41d2a53,fe4a027..232a86a
--- 1/arch/x86/kvm/hyperv.c
--- 2/arch/x86/kvm/hyperv.c
+++ b/arch/x86/kvm/hyperv.c
@@@ -88,6 -88,10 +88,10 @@@ static bool synic_has_vector_auto_eoi(s
   static void synic_update_vector(struct kvm_vcpu_hv_synic *synic,
                                 int vector)
   {
+       struct kvm_vcpu *vcpu = hv_synic_to_vcpu(synic);
+       struct kvm_hv *hv = to_kvm_hv(vcpu->kvm);
+       int auto_eoi_old, auto_eoi_new;
+ 
         if (vector < HV_SYNIC_FIRST_VALID_VECTOR)
                 return;
   
@@@ -96,10 -100,30 +100,30 @@@
         else
                 __clear_bit(vector, synic->vec_bitmap);
   
+       auto_eoi_old = bitmap_weight(synic->auto_eoi_bitmap, 256);
+ 
         if (synic_has_vector_auto_eoi(synic, vector))
                 __set_bit(vector, synic->auto_eoi_bitmap);
         else
                 __clear_bit(vector, synic->auto_eoi_bitmap);
+ 
+       auto_eoi_new = bitmap_weight(synic->auto_eoi_bitmap, 256);
+ 
+       if (!!auto_eoi_old == !!auto_eoi_new)
+               return;
+ 
+       mutex_lock(&vcpu->kvm->arch.apicv_update_lock);
+ 
+       if (auto_eoi_new)
+               hv->synic_auto_eoi_used++;
+       else
+               hv->synic_auto_eoi_used--;
+ 
+       __kvm_request_apicv_update(vcpu->kvm,
+                                  !hv->synic_auto_eoi_used,
+                                  APICV_INHIBIT_REASON_HYPERV);
+ 
+       mutex_unlock(&vcpu->kvm->arch.apicv_update_lock);
   }
   
   static int synic_set_sint(struct kvm_vcpu_hv_synic *synic, int sint,
@@@ -933,12 -957,6 +957,6 @@@ int kvm_hv_activate_synic(struct kvm_vc
   
         synic = to_hv_synic(vcpu);
   
-       /*
-        * Hyper-V SynIC auto EOI SINT's are
-        * not compatible with APICV, so request
-        * to deactivate APICV permanently.
-        */
-       kvm_request_apicv_update(vcpu->kvm, false, APICV_INHIBIT_REASON_HYPERV);
         synic->active = true;
         synic->dont_zero_synic_pages = dont_zero_synic_pages;
         synic->control = HV_SYNIC_CONTROL_ENABLE;
@@@ -1933,7 -1951,7 +1951,7 @@@ ret_success
   void kvm_hv_set_cpuid(struct kvm_vcpu *vcpu)
   {
         struct kvm_cpuid_entry2 *entry;
- -      struct kvm_vcpu_hv *hv_vcpu = to_hv_vcpu(vcpu);
+ +      struct kvm_vcpu_hv *hv_vcpu;
   
         entry = kvm_find_cpuid_entry(vcpu, HYPERV_CPUID_INTERFACE, 0);
         if (entry && entry->eax == HYPERV_CPUID_SIGNATURE_EAX) {
@@@ -2476,6 -2494,8 +2494,8 @@@ int kvm_get_hv_cpuid(struct kvm_vcpu *v
                                 ent->eax |= HV_X64_ENLIGHTENED_VMCS_RECOMMENDED;
                         if (!cpu_smt_possible())
                                 ent->eax |= HV_X64_NO_NONARCH_CORESHARING;
+ 
+                       ent->eax |= HV_DEPRECATING_AEOI_RECOMMENDED;
                         /*
                          * Default number of spinlock retry attempts, matches
                          * HyperV 2016.
diff --combined arch/x86/kvm/svm/nested.c

index e551547,5e13357..2545d0c
--- 1/arch/x86/kvm/svm/nested.c
--- 2/arch/x86/kvm/svm/nested.c
+++ b/arch/x86/kvm/svm/nested.c
@@@ -158,9 -158,6 +158,9 @@@ void recalc_intercepts(struct vcpu_svm 
         /* If SMI is not intercepted, ignore guest SMI intercept as well  */
         if (!intercept_smi)
                 vmcb_clr_intercept(c, INTERCEPT_SMI);
+ +
+ +      vmcb_set_intercept(c, INTERCEPT_VMLOAD);
+ +      vmcb_set_intercept(c, INTERCEPT_VMSAVE);
   }
   
   static void copy_vmcb_control_area(struct vmcb_control_area *dst,
@@@ -506,11 -503,7 +506,11 @@@ static void nested_vmcb02_prepare_save(
   
   static void nested_vmcb02_prepare_control(struct vcpu_svm *svm)
   {
- -      const u32 mask = V_INTR_MASKING_MASK | V_GIF_ENABLE_MASK | V_GIF_MASK;
+ +      const u32 int_ctl_vmcb01_bits =
+ +              V_INTR_MASKING_MASK | V_GIF_MASK | V_GIF_ENABLE_MASK;
+ +
+ +      const u32 int_ctl_vmcb12_bits = V_TPR_MASK | V_IRQ_INJECTION_BITS_MASK;
+ +
         struct kvm_vcpu *vcpu = &svm->vcpu;
   
         /*
@@@ -542,8 -535,8 +542,8 @@@
                 vcpu->arch.l1_tsc_offset + svm->nested.ctl.tsc_offset;
   
         svm->vmcb->control.int_ctl             =
- -              (svm->nested.ctl.int_ctl & ~mask) |
- -              (svm->vmcb01.ptr->control.int_ctl & mask);
+ +              (svm->nested.ctl.int_ctl & int_ctl_vmcb12_bits) |
+ +              (svm->vmcb01.ptr->control.int_ctl & int_ctl_vmcb01_bits);
   
         svm->vmcb->control.virt_ext            = svm->nested.ctl.virt_ext;
         svm->vmcb->control.int_vector          = svm->nested.ctl.int_vector;
@@@ -666,11 -659,6 +666,6 @@@ int nested_svm_vmrun(struct kvm_vcpu *v
                 goto out;
         }
   
- 
-       /* Clear internal status */
-       kvm_clear_exception_queue(vcpu);
-       kvm_clear_interrupt_queue(vcpu);
- 
         /*
          * Since vmcb01 is not in use, we can use it to store some of the L1
          * state.
diff --combined arch/x86/kvm/svm/svm.c

index 69639f9,1a70e11..05e8d4d
--- 1/arch/x86/kvm/svm/svm.c
--- 2/arch/x86/kvm/svm/svm.c
+++ b/arch/x86/kvm/svm/svm.c
@@@ -46,8 -46,6 +46,6 @@@
   #include "kvm_onhyperv.h"
   #include "svm_onhyperv.h"
   
- #define __ex(x) __kvm_handle_fault_on_reboot(x)
- 
   MODULE_AUTHOR("Qumranet");
   MODULE_LICENSE("GPL");
   
@@@ -261,7 -259,7 +259,7 @@@ u32 svm_msrpm_offset(u32 msr
   static int get_max_npt_level(void)
   {
   #ifdef CONFIG_X86_64
-       return PT64_ROOT_4LEVEL;
+       return pgtable_l5_enabled() ? PT64_ROOT_5LEVEL : PT64_ROOT_4LEVEL;
   #else
         return PT32E_ROOT_LEVEL;
   #endif
@@@ -462,11 -460,6 +460,6 @@@ static int has_svm(void
                 return 0;
         }
   
-       if (pgtable_l5_enabled()) {
-               pr_info("KVM doesn't yet support 5-level paging on AMD SVM\n");
-               return 0;
-       }
- 
         return 1;
   }
   
@@@ -1015,7 -1008,9 +1008,9 @@@ static __init int svm_hardware_setup(vo
         if (!boot_cpu_has(X86_FEATURE_NPT))
                 npt_enabled = false;
   
-       kvm_configure_mmu(npt_enabled, get_max_npt_level(), PG_LEVEL_1G);
+       /* Force VM NPT level equal to the host's max NPT level */
+       kvm_configure_mmu(npt_enabled, get_max_npt_level(),
+                         get_max_npt_level(), PG_LEVEL_1G);
         pr_info("kvm: Nested Paging %sabled\n", npt_enabled ? "en" : "dis");
   
         /* Note, SEV setup consumes npt_enabled. */
@@@ -1161,8 -1156,6 +1156,6 @@@ static void init_vmcb(struct kvm_vcpu *
         struct vmcb_control_area *control = &svm->vmcb->control;
         struct vmcb_save_area *save = &svm->vmcb->save;
   
-       vcpu->arch.hflags = 0;
- 
         svm_set_intercept(svm, INTERCEPT_CR0_READ);
         svm_set_intercept(svm, INTERCEPT_CR3_READ);
         svm_set_intercept(svm, INTERCEPT_CR4_READ);
@@@ -1241,29 -1234,14 +1234,14 @@@
                 SVM_SELECTOR_S_MASK | SVM_SELECTOR_CODE_MASK;
         save->cs.limit = 0xffff;
   
+       save->gdtr.base = 0;
         save->gdtr.limit = 0xffff;
+       save->idtr.base = 0;
         save->idtr.limit = 0xffff;
   
         init_sys_seg(&save->ldtr, SEG_TYPE_LDT);
         init_sys_seg(&save->tr, SEG_TYPE_BUSY_TSS16);
   
-       svm_set_cr4(vcpu, 0);
-       svm_set_efer(vcpu, 0);
-       save->dr6 = 0xffff0ff0;
-       kvm_set_rflags(vcpu, X86_EFLAGS_FIXED);
-       save->rip = 0x0000fff0;
-       vcpu->arch.regs[VCPU_REGS_RIP] = save->rip;
- 
-       /*
-        * svm_set_cr0() sets PG and WP and clears NW and CD on save->cr0.
-        * It also updates the guest-visible cr0 value.
-        */
-       svm_set_cr0(vcpu, X86_CR0_NW | X86_CR0_CD | X86_CR0_ET);
-       kvm_mmu_reset_context(vcpu);
- 
-       save->cr4 = X86_CR4_PAE;
-       /* rdx = ?? */
- 
         if (npt_enabled) {
                 /* Setup VMCB for Nested Paging */
                 control->nested_ctl |= SVM_NESTED_CTL_NP_ENABLE;
@@@ -1273,14 -1251,12 +1251,12 @@@
                 svm_clr_intercept(svm, INTERCEPT_CR3_WRITE);
                 save->g_pat = vcpu->arch.pat;
                 save->cr3 = 0;
-               save->cr4 = 0;
         }
         svm->current_vmcb->asid_generation = 0;
         svm->asid = 0;
   
         svm->nested.vmcb12_gpa = INVALID_GPA;
         svm->nested.last_vmcb12_gpa = INVALID_GPA;
-       vcpu->arch.hflags = 0;
   
         if (!kvm_pause_in_guest(vcpu->kvm)) {
                 control->pause_filter_count = pause_filter_count;
@@@ -1330,25 -1306,11 +1306,11 @@@
   static void svm_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event)
   {
         struct vcpu_svm *svm = to_svm(vcpu);
-       u32 dummy;
-       u32 eax = 1;
   
         svm->spec_ctrl = 0;
         svm->virt_spec_ctrl = 0;
   
-       if (!init_event) {
-               vcpu->arch.apic_base = APIC_DEFAULT_PHYS_BASE |
-                                      MSR_IA32_APICBASE_ENABLE;
-               if (kvm_vcpu_is_reset_bsp(vcpu))
-                       vcpu->arch.apic_base |= MSR_IA32_APICBASE_BSP;
-       }
         init_vmcb(vcpu);
- 
-       kvm_cpuid(vcpu, &eax, &dummy, &dummy, &dummy, false);
-       kvm_rdx_write(vcpu, eax);
- 
-       if (kvm_vcpu_apicv_active(vcpu) && !init_event)
-               avic_update_vapic_bar(svm, APIC_DEFAULT_PHYS_BASE);
   }
   
   void svm_switch_vmcb(struct vcpu_svm *svm, struct kvm_vmcb_info *target_vmcb)
@@@ -1513,12 -1475,15 +1475,15 @@@ static void svm_vcpu_load(struct kvm_vc
                 sd->current_vmcb = svm->vmcb;
                 indirect_branch_prediction_barrier();
         }
-       avic_vcpu_load(vcpu, cpu);
+       if (kvm_vcpu_apicv_active(vcpu))
+               avic_vcpu_load(vcpu, cpu);
   }
   
   static void svm_vcpu_put(struct kvm_vcpu *vcpu)
   {
-       avic_vcpu_put(vcpu);
+       if (kvm_vcpu_apicv_active(vcpu))
+               avic_vcpu_put(vcpu);
+ 
         svm_prepare_host_switch(vcpu);
   
         ++vcpu->stat.host_state_reload;
@@@ -1560,7 -1525,7 +1525,7 @@@ static void svm_cache_reg(struct kvm_vc
                 load_pdptrs(vcpu, vcpu->arch.walk_mmu, kvm_read_cr3(vcpu));
                 break;
         default:
-               WARN_ON_ONCE(1);
+               KVM_BUG_ON(1, vcpu->kvm);
         }
   }
   
@@@ -1589,18 -1554,17 +1554,18 @@@ static void svm_set_vintr(struct vcpu_s
   
   static void svm_clear_vintr(struct vcpu_svm *svm)
   {
- -      const u32 mask = V_TPR_MASK | V_GIF_ENABLE_MASK | V_GIF_MASK | V_INTR_MASKING_MASK;
         svm_clr_intercept(svm, INTERCEPT_VINTR);
   
         /* Drop int_ctl fields related to VINTR injection.  */
- -      svm->vmcb->control.int_ctl &= mask;
+ +      svm->vmcb->control.int_ctl &= ~V_IRQ_INJECTION_BITS_MASK;
         if (is_guest_mode(&svm->vcpu)) {
- -              svm->vmcb01.ptr->control.int_ctl &= mask;
+ +              svm->vmcb01.ptr->control.int_ctl &= ~V_IRQ_INJECTION_BITS_MASK;
   
                 WARN_ON((svm->vmcb->control.int_ctl & V_TPR_MASK) !=
                         (svm->nested.ctl.int_ctl & V_TPR_MASK));
- -              svm->vmcb->control.int_ctl |= svm->nested.ctl.int_ctl & ~mask;
+ +
+ +              svm->vmcb->control.int_ctl |= svm->nested.ctl.int_ctl &
+ +                      V_IRQ_INJECTION_BITS_MASK;
         }
   
         vmcb_mark_dirty(svm->vmcb, VMCB_INTR);
@@@ -2078,11 -2042,15 +2043,15 @@@ static int shutdown_interception(struc
                 return -EINVAL;
   
         /*
-        * VMCB is undefined after a SHUTDOWN intercept
-        * so reinitialize it.
+        * VMCB is undefined after a SHUTDOWN intercept.  INIT the vCPU to put
+        * the VMCB in a known good state.  Unfortuately, KVM doesn't have
+        * KVM_MP_STATE_SHUTDOWN and can't add it without potentially breaking
+        * userspace.  At a platform view, INIT is acceptable behavior as
+        * there exist bare metal platforms that automatically INIT the CPU
+        * in response to shutdown.
          */
         clear_page(svm->vmcb);
-       init_vmcb(vcpu);
+       kvm_vcpu_reset(vcpu, true);
   
         kvm_run->exit_reason = KVM_EXIT_SHUTDOWN;
         return 0;
@@@ -2993,10 -2961,6 +2962,6 @@@ static int svm_set_msr(struct kvm_vcpu 
                 svm->msr_decfg = data;
                 break;
         }
-       case MSR_IA32_APICBASE:
-               if (kvm_vcpu_apicv_active(vcpu))
-                       avic_update_vapic_bar(to_svm(vcpu), data);
-               fallthrough;
         default:
                 return kvm_set_msr_common(vcpu, msr);
         }
@@@ -3021,7 -2985,7 +2986,7 @@@ static int interrupt_window_interceptio
          * In this case AVIC was temporarily disabled for
          * requesting the IRQ window and we have to re-enable it.
          */
-       svm_toggle_avic_for_irq_window(vcpu, true);
+       kvm_request_apicv_update(vcpu->kvm, true, APICV_INHIBIT_REASON_IRQWIN);
   
         ++vcpu->stat.irq_window_exits;
         return 1;
@@@ -3269,12 -3233,14 +3234,14 @@@ static void dump_vmcb(struct kvm_vcpu *
                "excp_to:", save->last_excp_to);
   }
   
- static int svm_handle_invalid_exit(struct kvm_vcpu *vcpu, u64 exit_code)
+ static bool svm_check_exit_valid(struct kvm_vcpu *vcpu, u64 exit_code)
   {
-       if (exit_code < ARRAY_SIZE(svm_exit_handlers) &&
-           svm_exit_handlers[exit_code])
-               return 0;
+       return (exit_code < ARRAY_SIZE(svm_exit_handlers) &&
+               svm_exit_handlers[exit_code]);
+ }
   
+ static int svm_handle_invalid_exit(struct kvm_vcpu *vcpu, u64 exit_code)
+ {
         vcpu_unimpl(vcpu, "svm: unexpected exit reason 0x%llx\n", exit_code);
         dump_vmcb(vcpu);
         vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
@@@ -3282,14 -3248,13 +3249,13 @@@
         vcpu->run->internal.ndata = 2;
         vcpu->run->internal.data[0] = exit_code;
         vcpu->run->internal.data[1] = vcpu->arch.last_vmentry_cpu;
- 
-       return -EINVAL;
+       return 0;
   }
   
   int svm_invoke_exit_handler(struct kvm_vcpu *vcpu, u64 exit_code)
   {
-       if (svm_handle_invalid_exit(vcpu, exit_code))
-               return 0;
+       if (!svm_check_exit_valid(vcpu, exit_code))
+               return svm_handle_invalid_exit(vcpu, exit_code);
   
   #ifdef CONFIG_RETPOLINE
         if (exit_code == SVM_EXIT_MSR)
@@@ -3573,7 -3538,7 +3539,7 @@@ static void svm_enable_irq_window(struc
                  * via AVIC. In such case, we need to temporarily disable AVIC,
                  * and fallback to injecting IRQ via V_IRQ.
                  */
-               svm_toggle_avic_for_irq_window(vcpu, false);
+               kvm_request_apicv_update(vcpu->kvm, false, APICV_INHIBIT_REASON_IRQWIN);
                 svm_set_vintr(svm);
         }
   }
@@@ -3808,6 -3773,8 +3774,8 @@@ static __no_kcsan fastpath_t svm_vcpu_r
   
         pre_svm_run(vcpu);
   
+       WARN_ON_ONCE(kvm_apicv_activated(vcpu->kvm) != kvm_vcpu_apicv_active(vcpu));
+ 
         sync_lapic_to_cr8(vcpu);
   
         if (unlikely(svm->asid != svm->vmcb->control.asid)) {
@@@ -4610,7 -4577,6 +4578,6 @@@ static struct kvm_x86_ops svm_x86_ops _
         .set_virtual_apic_mode = svm_set_virtual_apic_mode,
         .refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl,
         .check_apicv_inhibit_reasons = svm_check_apicv_inhibit_reasons,
-       .pre_update_apicv_exec_ctrl = svm_pre_update_apicv_exec_ctrl,
         .load_eoi_exitmap = svm_load_eoi_exitmap,
         .hwapic_irr_update = svm_hwapic_irr_update,
         .hwapic_isr_update = svm_hwapic_isr_update,
diff --combined arch/x86/kvm/vmx/nested.c

index b3f77d1,bc63279..ccb03d6
--- 1/arch/x86/kvm/vmx/nested.c
--- 2/arch/x86/kvm/vmx/nested.c
+++ b/arch/x86/kvm/vmx/nested.c
@@@ -330,31 -330,6 +330,31 @@@ void nested_vmx_free_vcpu(struct kvm_vc
         vcpu_put(vcpu);
   }
   
+ +#define EPTP_PA_MASK   GENMASK_ULL(51, 12)
+ +
+ +static bool nested_ept_root_matches(hpa_t root_hpa, u64 root_eptp, u64 eptp)
+ +{
+ +      return VALID_PAGE(root_hpa) &&
+ +             ((root_eptp & EPTP_PA_MASK) == (eptp & EPTP_PA_MASK));
+ +}
+ +
+ +static void nested_ept_invalidate_addr(struct kvm_vcpu *vcpu, gpa_t eptp,
+ +                                     gpa_t addr)
+ +{
+ +      uint i;
+ +      struct kvm_mmu_root_info *cached_root;
+ +
+ +      WARN_ON_ONCE(!mmu_is_nested(vcpu));
+ +
+ +      for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
+ +              cached_root = &vcpu->arch.mmu->prev_roots[i];
+ +
+ +              if (nested_ept_root_matches(cached_root->hpa, cached_root->pgd,
+ +                                          eptp))
+ +                      vcpu->arch.mmu->invlpg(vcpu, addr, cached_root->hpa);
+ +      }
+ +}
+ +
   static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
                 struct x86_exception *fault)
   {
@@@ -367,22 -342,10 +367,22 @@@
                 vm_exit_reason = EXIT_REASON_PML_FULL;
                 vmx->nested.pml_full = false;
                 exit_qualification &= INTR_INFO_UNBLOCK_NMI;
- -      } else if (fault->error_code & PFERR_RSVD_MASK)
- -              vm_exit_reason = EXIT_REASON_EPT_MISCONFIG;
- -      else
- -              vm_exit_reason = EXIT_REASON_EPT_VIOLATION;
+ +      } else {
+ +              if (fault->error_code & PFERR_RSVD_MASK)
+ +                      vm_exit_reason = EXIT_REASON_EPT_MISCONFIG;
+ +              else
+ +                      vm_exit_reason = EXIT_REASON_EPT_VIOLATION;
+ +
+ +              /*
+ +               * Although the caller (kvm_inject_emulated_page_fault) would
+ +               * have already synced the faulting address in the shadow EPT
+ +               * tables for the current EPTP12, we also need to sync it for
+ +               * any other cached EPTP02s based on the same EP4TA, since the
+ +               * TLB associates mappings to the EP4TA rather than the full EPTP.
+ +               */
+ +              nested_ept_invalidate_addr(vcpu, vmcs12->ept_pointer,
+ +                                         fault->address);
+ +      }
   
         nested_vmx_vmexit(vcpu, vm_exit_reason, 0, exit_qualification);
         vmcs12->guest_physical_address = fault->address;
@@@ -2207,7 -2170,8 +2207,8 @@@ static void prepare_vmcs02_early_rare(s
         }
   }
   
- static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
+ static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs01,
+                                struct vmcs12 *vmcs12)
   {
         u32 exec_control;
         u64 guest_efer = nested_vmx_calc_efer(vmx, vmcs12);
@@@ -2218,23 -2182,22 +2219,22 @@@
         /*
          * PIN CONTROLS
          */
-       exec_control = vmx_pin_based_exec_ctrl(vmx);
+       exec_control = __pin_controls_get(vmcs01);
         exec_control |= (vmcs12->pin_based_vm_exec_control &
                          ~PIN_BASED_VMX_PREEMPTION_TIMER);
   
         /* Posted interrupts setting is only taken from vmcs12.  */
-       if (nested_cpu_has_posted_intr(vmcs12)) {
+       vmx->nested.pi_pending = false;
+       if (nested_cpu_has_posted_intr(vmcs12))
                 vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv;
-               vmx->nested.pi_pending = false;
-       } else {
+       else
                 exec_control &= ~PIN_BASED_POSTED_INTR;
-       }
         pin_controls_set(vmx, exec_control);
   
         /*
          * EXEC CONTROLS
          */
-       exec_control = vmx_exec_control(vmx); /* L0's desires */
+       exec_control = __exec_controls_get(vmcs01); /* L0's desires */
         exec_control &= ~CPU_BASED_INTR_WINDOW_EXITING;
         exec_control &= ~CPU_BASED_NMI_WINDOW_EXITING;
         exec_control &= ~CPU_BASED_TPR_SHADOW;
@@@ -2271,10 -2234,11 +2271,11 @@@
          * SECONDARY EXEC CONTROLS
          */
         if (cpu_has_secondary_exec_ctrls()) {
-               exec_control = vmx->secondary_exec_control;
+               exec_control = __secondary_exec_controls_get(vmcs01);
   
                 /* Take the following fields only from vmcs12 */
                 exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
+                                 SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
                                   SECONDARY_EXEC_ENABLE_INVPCID |
                                   SECONDARY_EXEC_ENABLE_RDTSCP |
                                   SECONDARY_EXEC_XSAVES |
@@@ -2282,7 -2246,9 +2283,9 @@@
                                   SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
                                   SECONDARY_EXEC_APIC_REGISTER_VIRT |
                                   SECONDARY_EXEC_ENABLE_VMFUNC |
-                                 SECONDARY_EXEC_TSC_SCALING);
+                                 SECONDARY_EXEC_TSC_SCALING |
+                                 SECONDARY_EXEC_DESC);
+ 
                 if (nested_cpu_has(vmcs12,
                                    CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
                         exec_control |= vmcs12->secondary_vm_exec_control;
@@@ -2322,8 -2288,9 +2325,9 @@@
          * on the related bits (if supported by the CPU) in the hope that
          * we can avoid VMWrites during vmx_set_efer().
          */
-       exec_control = (vmcs12->vm_entry_controls | vmx_vmentry_ctrl()) &
-                       ~VM_ENTRY_IA32E_MODE & ~VM_ENTRY_LOAD_IA32_EFER;
+       exec_control = __vm_entry_controls_get(vmcs01);
+       exec_control |= vmcs12->vm_entry_controls;
+       exec_control &= ~(VM_ENTRY_IA32E_MODE | VM_ENTRY_LOAD_IA32_EFER);
         if (cpu_has_load_ia32_efer()) {
                 if (guest_efer & EFER_LMA)
                         exec_control |= VM_ENTRY_IA32E_MODE;
@@@ -2339,9 -2306,11 +2343,11 @@@
          * we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
          * bits may be modified by vmx_set_efer() in prepare_vmcs02().
          */
-       exec_control = vmx_vmexit_ctrl();
+       exec_control = __vm_exit_controls_get(vmcs01);
         if (cpu_has_load_ia32_efer() && guest_efer != host_efer)
                 exec_control |= VM_EXIT_LOAD_IA32_EFER;
+       else
+               exec_control &= ~VM_EXIT_LOAD_IA32_EFER;
         vm_exit_controls_set(vmx, exec_control);
   
         /*
@@@ -3384,7 -3353,7 +3390,7 @@@ enum nvmx_vmentry_status nested_vmx_ent
   
         vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02);
   
-       prepare_vmcs02_early(vmx, vmcs12);
+       prepare_vmcs02_early(vmx, &vmx->vmcs01, vmcs12);
   
         if (from_vmentry) {
                 if (unlikely(!nested_get_vmcs12_pages(vcpu))) {
@@@ -4304,7 -4273,7 +4310,7 @@@ static void load_vmcs12_host_state(stru
                 seg.l = 1;
         else
                 seg.db = 1;
-       vmx_set_segment(vcpu, &seg, VCPU_SREG_CS);
+       __vmx_set_segment(vcpu, &seg, VCPU_SREG_CS);
         seg = (struct kvm_segment) {
                 .base = 0,
                 .limit = 0xFFFFFFFF,
@@@ -4315,17 -4284,17 +4321,17 @@@
                 .g = 1
         };
         seg.selector = vmcs12->host_ds_selector;
-       vmx_set_segment(vcpu, &seg, VCPU_SREG_DS);
+       __vmx_set_segment(vcpu, &seg, VCPU_SREG_DS);
         seg.selector = vmcs12->host_es_selector;
-       vmx_set_segment(vcpu, &seg, VCPU_SREG_ES);
+       __vmx_set_segment(vcpu, &seg, VCPU_SREG_ES);
         seg.selector = vmcs12->host_ss_selector;
-       vmx_set_segment(vcpu, &seg, VCPU_SREG_SS);
+       __vmx_set_segment(vcpu, &seg, VCPU_SREG_SS);
         seg.selector = vmcs12->host_fs_selector;
         seg.base = vmcs12->host_fs_base;
-       vmx_set_segment(vcpu, &seg, VCPU_SREG_FS);
+       __vmx_set_segment(vcpu, &seg, VCPU_SREG_FS);
         seg.selector = vmcs12->host_gs_selector;
         seg.base = vmcs12->host_gs_base;
-       vmx_set_segment(vcpu, &seg, VCPU_SREG_GS);
+       __vmx_set_segment(vcpu, &seg, VCPU_SREG_GS);
         seg = (struct kvm_segment) {
                 .base = vmcs12->host_tr_base,
                 .limit = 0x67,
@@@ -4333,14 -4302,15 +4339,15 @@@
                 .type = 11,
                 .present = 1
         };
-       vmx_set_segment(vcpu, &seg, VCPU_SREG_TR);
+       __vmx_set_segment(vcpu, &seg, VCPU_SREG_TR);
+ 
+       memset(&seg, 0, sizeof(seg));
+       seg.unusable = 1;
+       __vmx_set_segment(vcpu, &seg, VCPU_SREG_LDTR);
   
         kvm_set_dr(vcpu, 7, 0x400);
         vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
   
-       if (cpu_has_vmx_msr_bitmap())
-               vmx_update_msr_bitmap(vcpu);
- 
         if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr,
                                 vmcs12->vm_exit_msr_load_count))
                 nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL);
@@@ -4419,9 -4389,6 +4426,6 @@@ static void nested_vmx_restore_host_sta
   
         kvm_mmu_reset_context(vcpu);
   
-       if (cpu_has_vmx_msr_bitmap())
-               vmx_update_msr_bitmap(vcpu);
- 
         /*
          * This nasty bit of open coding is a compromise between blindly
          * loading L1's MSRs using the exit load lists (incorrect emulation
@@@ -5362,6 -5329,14 +5366,6 @@@ static int handle_vmptrst(struct kvm_vc
         return nested_vmx_succeed(vcpu);
   }
   
- -#define EPTP_PA_MASK   GENMASK_ULL(51, 12)
- -
- -static bool nested_ept_root_matches(hpa_t root_hpa, u64 root_eptp, u64 eptp)
- -{
- -      return VALID_PAGE(root_hpa) &&
- -              ((root_eptp & EPTP_PA_MASK) == (eptp & EPTP_PA_MASK));
- -}
- -
   /* Emulate the INVEPT instruction */
   static int handle_invept(struct kvm_vcpu *vcpu)
   {
@@@ -5855,8 -5830,7 +5859,8 @@@ static bool nested_vmx_l0_wants_exit(st
                 if (is_nmi(intr_info))
                         return true;
                 else if (is_page_fault(intr_info))
- -                      return vcpu->arch.apf.host_apf_flags || !enable_ept;
+ +                      return vcpu->arch.apf.host_apf_flags ||
+ +                             vmx_need_pf_intercept(vcpu);
                 else if (is_debug(intr_info) &&
                          vcpu->guest_debug &
                          (KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
author	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 7 Sep 2021 20:40:51 +0000 (13:40 -0700)
committer	Linus Torvalds <torvalds@linux-foundation.org>
	Tue, 7 Sep 2021 20:40:51 +0000 (13:40 -0700)
		1	2
Documentation/virt/kvm/api.rst	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm64/include/asm/cpufeature.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm64/include/asm/sysreg.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/arm64/kernel/cpufeature.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/include/asm/kvm_host.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/kvm/book3s_64_vio_hv.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/powerpc/kvm/book3s_hv.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/include/asm/kvm_host.h	patch \|	diff1 \|	diff2 \|	blob \| history
arch/s390/kvm/kvm-s390.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/hyperv.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/svm/nested.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/svm/svm.c	patch \|	diff1 \|	diff2 \|	blob \| history
arch/x86/kvm/vmx/nested.c	patch \|	diff1 \|	diff2 \|	blob \| history