Merge tag 'drm-fixes-2021-05-14' of git://anongit.freedesktop.org/drm/drm
authorLinus Torvalds <torvalds@linux-foundation.org>
Fri, 14 May 2021 17:38:16 +0000 (10:38 -0700)
committerLinus Torvalds <torvalds@linux-foundation.org>
Fri, 14 May 2021 17:38:16 +0000 (10:38 -0700)
Pull drm fixes from Dave Airlie:
 "Not much here, mostly amdgpu fixes, with a couple of radeon, and a
  cosmetic vc4.

  Two MAINTAINERS file updates also.

  amdgpu:
   - Fixes for flexible array conversions
   - Fix sysfs attribute init
   - Harvesting fixes
   - VCN CG/PG fixes for Picasso

  radeon:
   - Fixes for flexible array conversions
   - Fix for flickering on Oland with multiple 4K displays

  vc4:
   - drop unused function"

* tag 'drm-fixes-2021-05-14' of git://anongit.freedesktop.org/drm/drm:
  drm/amdgpu: update vcn1.0 Non-DPG suspend sequence
  drm/amdgpu: set vcn mgcg flag for picasso
  drm/radeon/dpm: Disable sclk switching on Oland when two 4K 60Hz monitors are connected
  drm/amdgpu: update the method for harvest IP for specific SKU
  drm/amdgpu: add judgement when add ip blocks (v2)
  drm/amd/display: Initialize attribute for hdcp_srm sysfs file
  drm/amd/pm: Fix out-of-bounds bug
  drm/radeon/si_dpm: Fix SMU power state load
  drm/radeon/ni_dpm: Fix booting bug
  MAINTAINERS: Update address for Emma Anholt
  MAINTAINERS: Update my e-mail
  drm/vc4: remove unused function
  drm/ttm: Do not add non-system domain BO into swap list

100 files changed:
Documentation/ABI/obsolete/sysfs-kernel-fadump_registered
Documentation/ABI/obsolete/sysfs-kernel-fadump_release_mem
Documentation/ABI/testing/sysfs-module
Documentation/block/data-integrity.rst
Documentation/cdrom/cdrom-standard.rst
Documentation/driver-api/serial/index.rst
Documentation/hwmon/tmp103.rst
Documentation/networking/device_drivers/ethernet/intel/i40e.rst
Documentation/networking/device_drivers/ethernet/intel/iavf.rst
Documentation/process/kernel-enforcement-statement.rst
Documentation/security/tpm/xen-tpmfront.rst
Documentation/timers/no_hz.rst
Documentation/translations/zh_CN/SecurityBugs [deleted file]
Documentation/usb/mtouchusb.rst
Documentation/usb/usb-serial.rst
Documentation/virt/kvm/api.rst
arch/x86/include/asm/kvm_host.h
arch/x86/include/asm/kvm_para.h
arch/x86/include/uapi/asm/kvm.h
arch/x86/kernel/kvm.c
arch/x86/kernel/kvmclock.c
arch/x86/kvm/cpuid.c
arch/x86/kvm/emulate.c
arch/x86/kvm/kvm_emulate.h
arch/x86/kvm/lapic.c
arch/x86/kvm/mmu/mmu.c
arch/x86/kvm/mmu/tdp_mmu.c
arch/x86/kvm/svm/nested.c
arch/x86/kvm/svm/sev.c
arch/x86/kvm/svm/svm.c
arch/x86/kvm/svm/svm.h
arch/x86/kvm/vmx/capabilities.h
arch/x86/kvm/vmx/nested.c
arch/x86/kvm/vmx/vmx.c
arch/x86/kvm/vmx/vmx.h
arch/x86/kvm/x86.c
drivers/acpi/device_pm.c
drivers/acpi/internal.h
drivers/acpi/power.c
drivers/acpi/scan.c
drivers/acpi/sleep.h
drivers/base/power/runtime.c
drivers/char/tpm/tpm2-cmd.c
drivers/char/tpm/tpm_tis_core.c
drivers/cpufreq/intel_pstate.c
drivers/hwmon/adm9240.c
drivers/hwmon/corsair-psu.c
drivers/hwmon/ltc2992.c
drivers/hwmon/occ/common.c
drivers/hwmon/occ/common.h
drivers/hwmon/pmbus/fsp-3y.c
drivers/tty/vt/vt_ioctl.c
drivers/video/console/vgacon.c
fs/btrfs/ctree.h
fs/btrfs/extent-tree.c
fs/btrfs/file.c
fs/btrfs/free-space-cache.c
fs/btrfs/inode.c
fs/btrfs/ioctl.c
fs/btrfs/ordered-data.c
fs/btrfs/qgroup.c
fs/btrfs/send.c
fs/btrfs/tree-log.c
fs/btrfs/zoned.c
include/linux/console_struct.h
include/linux/pm.h
kernel/ptrace.c
security/keys/trusted-keys/trusted_tpm1.c
security/keys/trusted-keys/trusted_tpm2.c
tools/arch/powerpc/include/uapi/asm/errno.h
tools/arch/x86/include/asm/cpufeatures.h
tools/arch/x86/include/asm/msr-index.h
tools/arch/x86/include/uapi/asm/vmx.h
tools/arch/x86/lib/memcpy_64.S
tools/arch/x86/lib/memset_64.S
tools/include/asm/alternative-asm.h [deleted file]
tools/include/asm/alternative.h [new file with mode: 0644]
tools/include/uapi/asm-generic/unistd.h
tools/include/uapi/drm/drm.h
tools/include/uapi/drm/i915_drm.h
tools/include/uapi/linux/kvm.h
tools/include/uapi/linux/perf_event.h
tools/include/uapi/linux/prctl.h
tools/kvm/kvm_stat/kvm_stat.txt
tools/perf/Makefile.config
tools/perf/arch/arm64/util/kvm-stat.c
tools/perf/arch/mips/entry/syscalls/syscall_n64.tbl
tools/perf/arch/powerpc/entry/syscalls/syscall.tbl
tools/perf/arch/s390/entry/syscalls/syscall.tbl
tools/perf/arch/x86/entry/syscalls/syscall_64.tbl
tools/perf/pmu-events/jevents.c
tools/perf/tests/attr/base-record
tools/perf/tests/attr/base-stat
tools/perf/tests/attr/system-wide-dummy
tools/perf/util/Build
tools/perf/util/record.c
tools/perf/util/session.c
tools/testing/selftests/kvm/lib/x86_64/handlers.S
tools/testing/selftests/kvm/x86_64/evmcs_test.c
virt/kvm/kvm_main.c

index 0360be3..dae880b 100644 (file)
@@ -1,4 +1,4 @@
-This ABI is renamed and moved to a new location /sys/kernel/fadump/registered.¬
+This ABI is renamed and moved to a new location /sys/kernel/fadump/registered.
 
 What:          /sys/kernel/fadump_registered
 Date:          Feb 2012
index 6ce0b12..ca2396e 100644 (file)
@@ -1,4 +1,4 @@
-This ABI is renamed and moved to a new location /sys/kernel/fadump/release_mem.¬
+This ABI is renamed and moved to a new location /sys/kernel/fadump/release_mem.
 
 What:          /sys/kernel/fadump_release_mem
 Date:          Feb 2012
index a485434..88bddf1 100644 (file)
@@ -37,13 +37,13 @@ Description:        Maximum time allowed for periodic transfers per microframe (μs)
 
 What:          /sys/module/*/{coresize,initsize}
 Date:          Jan 2012
-KernelVersion:»·3.3
+KernelVersion: 3.3
 Contact:       Kay Sievers <kay.sievers@vrfy.org>
 Description:   Module size in bytes.
 
 What:          /sys/module/*/taint
 Date:          Jan 2012
-KernelVersion:»·3.3
+KernelVersion: 3.3
 Contact:       Kay Sievers <kay.sievers@vrfy.org>
 Description:   Module taint flags:
                        ==  =====================
index 4f2452a..07a97aa 100644 (file)
@@ -1,4 +1,4 @@
-==============
+==============
 Data Integrity
 ==============
 
index 70500b1..5845960 100644 (file)
@@ -146,18 +146,18 @@ with the kernel as a block device by registering the following general
 *struct file_operations*::
 
        struct file_operations cdrom_fops = {
-               NULL,                   /∗ lseek ∗/
-               block _read ,           /∗ read—general block-dev read ∗/
-               block _write,           /∗ write—general block-dev write ∗/
-               NULL,                   /∗ readdir ∗/
-               NULL,                   /∗ select ∗/
-               cdrom_ioctl,            /∗ ioctl ∗/
-               NULL,                   /∗ mmap ∗/
-               cdrom_open,             /∗ open ∗/
-               cdrom_release,          /∗ release ∗/
-               NULL,                   /∗ fsync ∗/
-               NULL,                   /∗ fasync ∗/
-               NULL                    /∗ revalidate ∗/
+               NULL,                   /* lseek */
+               block _read ,           /* read--general block-dev read */
+               block _write,           /* write--general block-dev write */
+               NULL,                   /* readdir */
+               NULL,                   /* select */
+               cdrom_ioctl,            /* ioctl */
+               NULL,                   /* mmap */
+               cdrom_open,             /* open */
+               cdrom_release,          /* release */
+               NULL,                   /* fsync */
+               NULL,                   /* fasync */
+               NULL                    /* revalidate */
        };
 
 Every active CD-ROM device shares this *struct*. The routines
@@ -250,12 +250,12 @@ The drive-specific, minor-like information that is registered with
 `cdrom.c`, currently contains the following fields::
 
   struct cdrom_device_info {
-       const struct cdrom_device_ops * ops;    /* device operations for this major */
+       const struct cdrom_device_ops * ops;    /* device operations for this major */
        struct list_head list;                  /* linked list of all device_info */
        struct gendisk * disk;                  /* matching block layer disk */
        void *  handle;                         /* driver-dependent data */
 
-       int mask;                               /* mask of capability: disables them */
+       int mask;                               /* mask of capability: disables them */
        int speed;                              /* maximum speed for reading data */
        int capacity;                           /* number of discs in a jukebox */
 
@@ -569,7 +569,7 @@ the *CDC_CLOSE_TRAY* bit in *mask*.
 
 In the file `cdrom.c` you will encounter many constructions of the type::
 
-       if (cdo->capability & ∼cdi->mask & CDC _⟨capability⟩) ...
+       if (cdo->capability & ~cdi->mask & CDC _<capability>) ...
 
 There is no *ioctl* to set the mask... The reason is that
 I think it is better to control the **behavior** rather than the
index 21351b8..8f7d7af 100644 (file)
@@ -19,7 +19,6 @@ Serial drivers
 
     moxa-smartio
     n_gsm
-    rocket
     serial-iso7816
     serial-rs485
 
index e195a7d..b3ef814 100644 (file)
@@ -21,10 +21,10 @@ Description
 The TMP103 is a digital output temperature sensor in a four-ball
 wafer chip-scale package (WCSP). The TMP103 is capable of reading
 temperatures to a resolution of 1°C. The TMP103 is specified for
-operation over a temperature range of 40°C to +125°C.
+operation over a temperature range of -40°C to +125°C.
 
 Resolution: 8 Bits
-Accuracy: ±1°C Typ (10°C to +100°C)
+Accuracy: ±1°C Typ (-10°C to +100°C)
 
 The driver provides the common sysfs-interface for temperatures (see
 Documentation/hwmon/sysfs-interface.rst under Temperatures).
index 8a9b185..2d3f6bd 100644 (file)
@@ -173,7 +173,7 @@ Director rule is added from ethtool (Sideband filter), ATR is turned off by the
 driver. To re-enable ATR, the sideband can be disabled with the ethtool -K
 option. For example::
 
-  ethtool K [adapter] ntuple [off|on]
+  ethtool -K [adapter] ntuple [off|on]
 
 If sideband is re-enabled after ATR is re-enabled, ATR remains enabled until a
 TCP-IP flow is added. When all TCP-IP sideband rules are deleted, ATR is
@@ -688,7 +688,7 @@ shaper bw_rlimit: for each tc, sets minimum and maximum bandwidth rates.
 Totals must be equal or less than port speed.
 
 For example: min_rate 1Gbit 3Gbit: Verify bandwidth limit using network
-monitoring tools such as ifstat or sar –n DEV [interval] [number of samples]
+monitoring tools such as `ifstat` or `sar -n DEV [interval] [number of samples]`
 
 2. Enable HW TC offload on interface::
 
index 52e037b..25330b7 100644 (file)
@@ -179,7 +179,7 @@ shaper bw_rlimit: for each tc, sets minimum and maximum bandwidth rates.
 Totals must be equal or less than port speed.
 
 For example: min_rate 1Gbit 3Gbit: Verify bandwidth limit using network
-monitoring tools such as ifstat or sar –n DEV [interval] [number of samples]
+monitoring tools such as ``ifstat`` or ``sar -n DEV [interval] [number of samples]``
 
 NOTE:
   Setting up channels via ethtool (ethtool -L) is not supported when the
index e5a1be4..dc2d813 100644 (file)
@@ -1,4 +1,4 @@
-.. _process_statement_kernel:
+.. _process_statement_kernel:
 
 Linux Kernel Enforcement Statement
 ----------------------------------
index 00d5b1d..31c6752 100644 (file)
@@ -1,4 +1,4 @@
-=============================
+=============================
 Virtual TPM interface for Xen
 =============================
 
index c4c70e1..6cadad7 100644 (file)
@@ -1,4 +1,4 @@
-======================================
+======================================
 NO_HZ: Reducing Scheduling-Clock Ticks
 ======================================
 
diff --git a/Documentation/translations/zh_CN/SecurityBugs b/Documentation/translations/zh_CN/SecurityBugs
deleted file mode 100644 (file)
index 2d0fffd..0000000
+++ /dev/null
@@ -1,50 +0,0 @@
-Chinese translated version of Documentation/admin-guide/security-bugs.rst
-
-If you have any comment or update to the content, please contact the
-original document maintainer directly.  However, if you have a problem
-communicating in English you can also ask the Chinese maintainer for
-help.  Contact the Chinese maintainer if this translation is outdated
-or if there is a problem with the translation.
-
-Chinese maintainer: Harry Wei <harryxiyou@gmail.com>
----------------------------------------------------------------------
-Documentation/admin-guide/security-bugs.rst 的中文翻译
-
-如果想评论或更新本文的内容,请直接联系原文档的维护者。如果你使用英文
-交流有困难的话,也可以向中文版维护者求助。如果本翻译更新不及时或者翻
-译存在问题,请联系中文版维护者。
-
-中文版维护者: 贾威威 Harry Wei <harryxiyou@gmail.com>
-中文版翻译者: 贾威威 Harry Wei <harryxiyou@gmail.com>
-中文版校译者: 贾威威 Harry Wei <harryxiyou@gmail.com>
-
-
-以下为正文
----------------------------------------------------------------------
-Linux内核开发者认为安全非常重要。因此,我们想要知道当一个有关于
-安全的漏洞被发现的时候,并且它可能会被尽快的修复或者公开。请把这个安全
-漏洞报告给Linux内核安全团队。
-
-1) 联系
-
-linux内核安全团队可以通过email<security@kernel.org>来联系。这是
-一组独立的安全工作人员,可以帮助改善漏洞报告并且公布和取消一个修复。安
-全团队有可能会从部分的维护者那里引进额外的帮助来了解并且修复安全漏洞。
-当遇到任何漏洞,所能提供的信息越多就越能诊断和修复。如果你不清楚什么
-是有帮助的信息,那就请重温一下admin-guide/reporting-bugs.rst文件中的概述过程。任
-何攻击性的代码都是非常有用的,未经报告者的同意不会被取消,除非它已经
-被公布于众。
-
-2) 公开
-
-Linux内核安全团队的宗旨就是和漏洞提交者一起处理漏洞的解决方案直
-到公开。我们喜欢尽快地完全公开漏洞。当一个漏洞或者修复还没有被完全地理
-解,解决方案没有通过测试或者供应商协调,可以合理地延迟公开。然而,我们
-期望这些延迟尽可能的短些,是可数的几天,而不是几个星期或者几个月。公开
-日期是通过安全团队和漏洞提供者以及供应商洽谈后的结果。公开时间表是从很
-短(特殊的,它已经被公众所知道)到几个星期。作为一个基本的默认政策,我
-们所期望通知公众的日期是7天的安排。
-
-3) 保密协议
-
-Linux内核安全团队不是一个正式的团体,因此不能加入任何的保密协议。
index d1111b7..5ae1f74 100644 (file)
@@ -1,4 +1,4 @@
-================
+================
 mtouchusb driver
 ================
 
index 8fa7dbd..69586ae 100644 (file)
@@ -1,4 +1,4 @@
-==========
+==========
 USB serial
 ==========
 
index 22d0775..7fcb2fd 100644 (file)
@@ -4803,7 +4803,7 @@ KVM_PV_VM_VERIFY
 4.126 KVM_X86_SET_MSR_FILTER
 ----------------------------
 
-:Capability: KVM_X86_SET_MSR_FILTER
+:Capability: KVM_CAP_X86_MSR_FILTER
 :Architectures: x86
 :Type: vm ioctl
 :Parameters: struct kvm_msr_filter
@@ -6715,7 +6715,7 @@ accesses that would usually trigger a #GP by KVM into the guest will
 instead get bounced to user space through the KVM_EXIT_X86_RDMSR and
 KVM_EXIT_X86_WRMSR exit notifications.
 
-8.27 KVM_X86_SET_MSR_FILTER
+8.27 KVM_CAP_X86_MSR_FILTER
 ---------------------------
 
 :Architectures: x86
index cbbcee0..55efbac 100644 (file)
 #define VALID_PAGE(x) ((x) != INVALID_PAGE)
 
 #define UNMAPPED_GVA (~(gpa_t)0)
+#define INVALID_GPA (~(gpa_t)0)
 
 /* KVM Hugepage definitions for x86 */
 #define KVM_MAX_HUGEPAGE_LEVEL PG_LEVEL_1G
@@ -199,6 +200,7 @@ enum x86_intercept_stage;
 
 #define KVM_NR_DB_REGS 4
 
+#define DR6_BUS_LOCK   (1 << 11)
 #define DR6_BD         (1 << 13)
 #define DR6_BS         (1 << 14)
 #define DR6_BT         (1 << 15)
@@ -212,7 +214,7 @@ enum x86_intercept_stage;
  * DR6_ACTIVE_LOW is also used as the init/reset value for DR6.
  */
 #define DR6_ACTIVE_LOW 0xffff0ff0
-#define DR6_VOLATILE   0x0001e00f
+#define DR6_VOLATILE   0x0001e80f
 #define DR6_FIXED_1    (DR6_ACTIVE_LOW & ~DR6_VOLATILE)
 
 #define DR7_BP_EN_MASK 0x000000ff
@@ -407,7 +409,7 @@ struct kvm_mmu {
        u32 pkru_mask;
 
        u64 *pae_root;
-       u64 *lm_root;
+       u64 *pml4_root;
 
        /*
         * check zero bits on shadow page table entries, these
@@ -1417,6 +1419,7 @@ struct kvm_arch_async_pf {
        bool direct_map;
 };
 
+extern u32 __read_mostly kvm_nr_uret_msrs;
 extern u64 __read_mostly host_efer;
 extern bool __read_mostly allow_smaller_maxphyaddr;
 extern struct kvm_x86_ops kvm_x86_ops;
@@ -1775,9 +1778,15 @@ int kvm_pv_send_ipi(struct kvm *kvm, unsigned long ipi_bitmap_low,
                    unsigned long ipi_bitmap_high, u32 min,
                    unsigned long icr, int op_64_bit);
 
-void kvm_define_user_return_msr(unsigned index, u32 msr);
+int kvm_add_user_return_msr(u32 msr);
+int kvm_find_user_return_msr(u32 msr);
 int kvm_set_user_return_msr(unsigned index, u64 val, u64 mask);
 
+static inline bool kvm_is_supported_user_return_msr(u32 msr)
+{
+       return kvm_find_user_return_msr(msr) >= 0;
+}
+
 u64 kvm_scale_tsc(struct kvm_vcpu *vcpu, u64 tsc);
 u64 kvm_read_l1_tsc(struct kvm_vcpu *vcpu, u64 host_tsc);
 
index 3381198..6929987 100644 (file)
@@ -7,8 +7,6 @@
 #include <linux/interrupt.h>
 #include <uapi/asm/kvm_para.h>
 
-extern void kvmclock_init(void);
-
 #ifdef CONFIG_KVM_GUEST
 bool kvm_check_and_clear_guest_paused(void);
 #else
@@ -86,13 +84,14 @@ static inline long kvm_hypercall4(unsigned int nr, unsigned long p1,
 }
 
 #ifdef CONFIG_KVM_GUEST
+void kvmclock_init(void);
+void kvmclock_disable(void);
 bool kvm_para_available(void);
 unsigned int kvm_arch_para_features(void);
 unsigned int kvm_arch_para_hints(void);
 void kvm_async_pf_task_wait_schedule(u32 token);
 void kvm_async_pf_task_wake(u32 token);
 u32 kvm_read_and_reset_apf_flags(void);
-void kvm_disable_steal_time(void);
 bool __kvm_handle_async_pf(struct pt_regs *regs, u32 token);
 
 DECLARE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
@@ -137,11 +136,6 @@ static inline u32 kvm_read_and_reset_apf_flags(void)
        return 0;
 }
 
-static inline void kvm_disable_steal_time(void)
-{
-       return;
-}
-
 static __always_inline bool kvm_handle_async_pf(struct pt_regs *regs, u32 token)
 {
        return false;
index 5a3022c..0662f64 100644 (file)
@@ -437,6 +437,8 @@ struct kvm_vmx_nested_state_hdr {
                __u16 flags;
        } smm;
 
+       __u16 pad;
+
        __u32 flags;
        __u64 preemption_timer_deadline;
 };
index d307c22..a26643d 100644 (file)
@@ -26,6 +26,7 @@
 #include <linux/kprobes.h>
 #include <linux/nmi.h>
 #include <linux/swait.h>
+#include <linux/syscore_ops.h>
 #include <asm/timer.h>
 #include <asm/cpu.h>
 #include <asm/traps.h>
@@ -37,6 +38,7 @@
 #include <asm/tlb.h>
 #include <asm/cpuidle_haltpoll.h>
 #include <asm/ptrace.h>
+#include <asm/reboot.h>
 #include <asm/svm.h>
 
 DEFINE_STATIC_KEY_FALSE(kvm_async_pf_enabled);
@@ -345,7 +347,7 @@ static void kvm_guest_cpu_init(void)
 
                wrmsrl(MSR_KVM_ASYNC_PF_EN, pa);
                __this_cpu_write(apf_reason.enabled, 1);
-               pr_info("KVM setup async PF for cpu %d\n", smp_processor_id());
+               pr_info("setup async PF for cpu %d\n", smp_processor_id());
        }
 
        if (kvm_para_has_feature(KVM_FEATURE_PV_EOI)) {
@@ -371,34 +373,17 @@ static void kvm_pv_disable_apf(void)
        wrmsrl(MSR_KVM_ASYNC_PF_EN, 0);
        __this_cpu_write(apf_reason.enabled, 0);
 
-       pr_info("Unregister pv shared memory for cpu %d\n", smp_processor_id());
+       pr_info("disable async PF for cpu %d\n", smp_processor_id());
 }
 
-static void kvm_pv_guest_cpu_reboot(void *unused)
+static void kvm_disable_steal_time(void)
 {
-       /*
-        * We disable PV EOI before we load a new kernel by kexec,
-        * since MSR_KVM_PV_EOI_EN stores a pointer into old kernel's memory.
-        * New kernel can re-enable when it boots.
-        */
-       if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
-               wrmsrl(MSR_KVM_PV_EOI_EN, 0);
-       kvm_pv_disable_apf();
-       kvm_disable_steal_time();
-}
+       if (!has_steal_clock)
+               return;
 
-static int kvm_pv_reboot_notify(struct notifier_block *nb,
-                               unsigned long code, void *unused)
-{
-       if (code == SYS_RESTART)
-               on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1);
-       return NOTIFY_DONE;
+       wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
 }
 
-static struct notifier_block kvm_pv_reboot_nb = {
-       .notifier_call = kvm_pv_reboot_notify,
-};
-
 static u64 kvm_steal_clock(int cpu)
 {
        u64 steal;
@@ -416,14 +401,6 @@ static u64 kvm_steal_clock(int cpu)
        return steal;
 }
 
-void kvm_disable_steal_time(void)
-{
-       if (!has_steal_clock)
-               return;
-
-       wrmsr(MSR_KVM_STEAL_TIME, 0, 0);
-}
-
 static inline void __set_percpu_decrypted(void *ptr, unsigned long size)
 {
        early_set_memory_decrypted((unsigned long) ptr, size);
@@ -451,6 +428,27 @@ static void __init sev_map_percpu_data(void)
        }
 }
 
+static void kvm_guest_cpu_offline(bool shutdown)
+{
+       kvm_disable_steal_time();
+       if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
+               wrmsrl(MSR_KVM_PV_EOI_EN, 0);
+       kvm_pv_disable_apf();
+       if (!shutdown)
+               apf_task_wake_all();
+       kvmclock_disable();
+}
+
+static int kvm_cpu_online(unsigned int cpu)
+{
+       unsigned long flags;
+
+       local_irq_save(flags);
+       kvm_guest_cpu_init();
+       local_irq_restore(flags);
+       return 0;
+}
+
 #ifdef CONFIG_SMP
 
 static DEFINE_PER_CPU(cpumask_var_t, __pv_cpu_mask);
@@ -635,31 +633,64 @@ static void __init kvm_smp_prepare_boot_cpu(void)
        kvm_spinlock_init();
 }
 
-static void kvm_guest_cpu_offline(void)
+static int kvm_cpu_down_prepare(unsigned int cpu)
 {
-       kvm_disable_steal_time();
-       if (kvm_para_has_feature(KVM_FEATURE_PV_EOI))
-               wrmsrl(MSR_KVM_PV_EOI_EN, 0);
-       kvm_pv_disable_apf();
-       apf_task_wake_all();
+       unsigned long flags;
+
+       local_irq_save(flags);
+       kvm_guest_cpu_offline(false);
+       local_irq_restore(flags);
+       return 0;
 }
 
-static int kvm_cpu_online(unsigned int cpu)
+#endif
+
+static int kvm_suspend(void)
 {
-       local_irq_disable();
-       kvm_guest_cpu_init();
-       local_irq_enable();
+       kvm_guest_cpu_offline(false);
+
        return 0;
 }
 
-static int kvm_cpu_down_prepare(unsigned int cpu)
+static void kvm_resume(void)
 {
-       local_irq_disable();
-       kvm_guest_cpu_offline();
-       local_irq_enable();
-       return 0;
+       kvm_cpu_online(raw_smp_processor_id());
+}
+
+static struct syscore_ops kvm_syscore_ops = {
+       .suspend        = kvm_suspend,
+       .resume         = kvm_resume,
+};
+
+static void kvm_pv_guest_cpu_reboot(void *unused)
+{
+       kvm_guest_cpu_offline(true);
+}
+
+static int kvm_pv_reboot_notify(struct notifier_block *nb,
+                               unsigned long code, void *unused)
+{
+       if (code == SYS_RESTART)
+               on_each_cpu(kvm_pv_guest_cpu_reboot, NULL, 1);
+       return NOTIFY_DONE;
 }
 
+static struct notifier_block kvm_pv_reboot_nb = {
+       .notifier_call = kvm_pv_reboot_notify,
+};
+
+/*
+ * After a PV feature is registered, the host will keep writing to the
+ * registered memory location. If the guest happens to shutdown, this memory
+ * won't be valid. In cases like kexec, in which you install a new kernel, this
+ * means a random memory location will be kept being written.
+ */
+#ifdef CONFIG_KEXEC_CORE
+static void kvm_crash_shutdown(struct pt_regs *regs)
+{
+       kvm_guest_cpu_offline(true);
+       native_machine_crash_shutdown(regs);
+}
 #endif
 
 static void __init kvm_guest_init(void)
@@ -704,6 +735,12 @@ static void __init kvm_guest_init(void)
        kvm_guest_cpu_init();
 #endif
 
+#ifdef CONFIG_KEXEC_CORE
+       machine_ops.crash_shutdown = kvm_crash_shutdown;
+#endif
+
+       register_syscore_ops(&kvm_syscore_ops);
+
        /*
         * Hard lockup detection is enabled by default. Disable it, as guests
         * can get false positives too easily, for example if the host is
index d37ed4e..ad273e5 100644 (file)
@@ -20,7 +20,6 @@
 #include <asm/hypervisor.h>
 #include <asm/mem_encrypt.h>
 #include <asm/x86_init.h>
-#include <asm/reboot.h>
 #include <asm/kvmclock.h>
 
 static int kvmclock __initdata = 1;
@@ -203,28 +202,9 @@ static void kvm_setup_secondary_clock(void)
 }
 #endif
 
-/*
- * After the clock is registered, the host will keep writing to the
- * registered memory location. If the guest happens to shutdown, this memory
- * won't be valid. In cases like kexec, in which you install a new kernel, this
- * means a random memory location will be kept being written. So before any
- * kind of shutdown from our side, we unregister the clock by writing anything
- * that does not have the 'enable' bit set in the msr
- */
-#ifdef CONFIG_KEXEC_CORE
-static void kvm_crash_shutdown(struct pt_regs *regs)
-{
-       native_write_msr(msr_kvm_system_time, 0, 0);
-       kvm_disable_steal_time();
-       native_machine_crash_shutdown(regs);
-}
-#endif
-
-static void kvm_shutdown(void)
+void kvmclock_disable(void)
 {
        native_write_msr(msr_kvm_system_time, 0, 0);
-       kvm_disable_steal_time();
-       native_machine_shutdown();
 }
 
 static void __init kvmclock_init_mem(void)
@@ -351,10 +331,6 @@ void __init kvmclock_init(void)
 #endif
        x86_platform.save_sched_clock_state = kvm_save_sched_clock_state;
        x86_platform.restore_sched_clock_state = kvm_restore_sched_clock_state;
-       machine_ops.shutdown  = kvm_shutdown;
-#ifdef CONFIG_KEXEC_CORE
-       machine_ops.crash_shutdown  = kvm_crash_shutdown;
-#endif
        kvm_get_preset_lpj();
 
        /*
index 19606a3..9a48f13 100644 (file)
@@ -458,7 +458,7 @@ void kvm_set_cpu_caps(void)
                F(AVX512_VPOPCNTDQ) | F(UMIP) | F(AVX512_VBMI2) | F(GFNI) |
                F(VAES) | F(VPCLMULQDQ) | F(AVX512_VNNI) | F(AVX512_BITALG) |
                F(CLDEMOTE) | F(MOVDIRI) | F(MOVDIR64B) | 0 /*WAITPKG*/ |
-               F(SGX_LC)
+               F(SGX_LC) | F(BUS_LOCK_DETECT)
        );
        /* Set LA57 based on hardware capability. */
        if (cpuid_ecx(7) & F(LA57))
@@ -567,6 +567,21 @@ void kvm_set_cpu_caps(void)
                F(ACE2) | F(ACE2_EN) | F(PHE) | F(PHE_EN) |
                F(PMM) | F(PMM_EN)
        );
+
+       /*
+        * Hide RDTSCP and RDPID if either feature is reported as supported but
+        * probing MSR_TSC_AUX failed.  This is purely a sanity check and
+        * should never happen, but the guest will likely crash if RDTSCP or
+        * RDPID is misreported, and KVM has botched MSR_TSC_AUX emulation in
+        * the past.  For example, the sanity check may fire if this instance of
+        * KVM is running as L1 on top of an older, broken KVM.
+        */
+       if (WARN_ON((kvm_cpu_cap_has(X86_FEATURE_RDTSCP) ||
+                    kvm_cpu_cap_has(X86_FEATURE_RDPID)) &&
+                    !kvm_is_supported_user_return_msr(MSR_TSC_AUX))) {
+               kvm_cpu_cap_clear(X86_FEATURE_RDTSCP);
+               kvm_cpu_cap_clear(X86_FEATURE_RDPID);
+       }
 }
 EXPORT_SYMBOL_GPL(kvm_set_cpu_caps);
 
@@ -637,7 +652,8 @@ static int __do_cpuid_func_emulated(struct kvm_cpuid_array *array, u32 func)
        case 7:
                entry->flags |= KVM_CPUID_FLAG_SIGNIFCANT_INDEX;
                entry->eax = 0;
-               entry->ecx = F(RDPID);
+               if (kvm_cpu_cap_has(X86_FEATURE_RDTSCP))
+                       entry->ecx = F(RDPID);
                ++array->nent;
        default:
                break;
index 77e1c89..8a0ccdb 100644 (file)
@@ -4502,7 +4502,7 @@ static const struct opcode group8[] = {
  * from the register case of group9.
  */
 static const struct gprefix pfx_0f_c7_7 = {
-       N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdtscp),
+       N, N, N, II(DstMem | ModRM | Op3264 | EmulateOnUD, em_rdpid, rdpid),
 };
 
 
index 0d35911..f016838 100644 (file)
@@ -468,6 +468,7 @@ enum x86_intercept {
        x86_intercept_clgi,
        x86_intercept_skinit,
        x86_intercept_rdtscp,
+       x86_intercept_rdpid,
        x86_intercept_icebp,
        x86_intercept_wbinvd,
        x86_intercept_monitor,
index 152591f..c0ebef5 100644 (file)
@@ -1913,8 +1913,8 @@ void kvm_lapic_expired_hv_timer(struct kvm_vcpu *vcpu)
        if (!apic->lapic_timer.hv_timer_in_use)
                goto out;
        WARN_ON(rcuwait_active(&vcpu->wait));
-       cancel_hv_timer(apic);
        apic_timer_expired(apic, false);
+       cancel_hv_timer(apic);
 
        if (apic_lvtt_period(apic) && apic->lapic_timer.period) {
                advance_periodic_target_expiration(apic);
index 4b3ee24..0144c40 100644 (file)
@@ -3310,12 +3310,12 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
        if (mmu->shadow_root_level == PT64_ROOT_4LEVEL) {
                pm_mask |= PT_ACCESSED_MASK | PT_WRITABLE_MASK | PT_USER_MASK;
 
-               if (WARN_ON_ONCE(!mmu->lm_root)) {
+               if (WARN_ON_ONCE(!mmu->pml4_root)) {
                        r = -EIO;
                        goto out_unlock;
                }
 
-               mmu->lm_root[0] = __pa(mmu->pae_root) | pm_mask;
+               mmu->pml4_root[0] = __pa(mmu->pae_root) | pm_mask;
        }
 
        for (i = 0; i < 4; ++i) {
@@ -3335,7 +3335,7 @@ static int mmu_alloc_shadow_roots(struct kvm_vcpu *vcpu)
        }
 
        if (mmu->shadow_root_level == PT64_ROOT_4LEVEL)
-               mmu->root_hpa = __pa(mmu->lm_root);
+               mmu->root_hpa = __pa(mmu->pml4_root);
        else
                mmu->root_hpa = __pa(mmu->pae_root);
 
@@ -3350,7 +3350,7 @@ out_unlock:
 static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
 {
        struct kvm_mmu *mmu = vcpu->arch.mmu;
-       u64 *lm_root, *pae_root;
+       u64 *pml4_root, *pae_root;
 
        /*
         * When shadowing 32-bit or PAE NPT with 64-bit NPT, the PML4 and PDP
@@ -3369,14 +3369,14 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
        if (WARN_ON_ONCE(mmu->shadow_root_level != PT64_ROOT_4LEVEL))
                return -EIO;
 
-       if (mmu->pae_root && mmu->lm_root)
+       if (mmu->pae_root && mmu->pml4_root)
                return 0;
 
        /*
         * The special roots should always be allocated in concert.  Yell and
         * bail if KVM ends up in a state where only one of the roots is valid.
         */
-       if (WARN_ON_ONCE(!tdp_enabled || mmu->pae_root || mmu->lm_root))
+       if (WARN_ON_ONCE(!tdp_enabled || mmu->pae_root || mmu->pml4_root))
                return -EIO;
 
        /*
@@ -3387,14 +3387,14 @@ static int mmu_alloc_special_roots(struct kvm_vcpu *vcpu)
        if (!pae_root)
                return -ENOMEM;
 
-       lm_root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
-       if (!lm_root) {
+       pml4_root = (void *)get_zeroed_page(GFP_KERNEL_ACCOUNT);
+       if (!pml4_root) {
                free_page((unsigned long)pae_root);
                return -ENOMEM;
        }
 
        mmu->pae_root = pae_root;
-       mmu->lm_root = lm_root;
+       mmu->pml4_root = pml4_root;
 
        return 0;
 }
@@ -5261,7 +5261,7 @@ static void free_mmu_pages(struct kvm_mmu *mmu)
        if (!tdp_enabled && mmu->pae_root)
                set_memory_encrypted((unsigned long)mmu->pae_root, 1);
        free_page((unsigned long)mmu->pae_root);
-       free_page((unsigned long)mmu->lm_root);
+       free_page((unsigned long)mmu->pml4_root);
 }
 
 static int __kvm_mmu_create(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu)
index 88f69a6..95eeb5a 100644 (file)
@@ -388,7 +388,7 @@ static void handle_removed_tdp_mmu_page(struct kvm *kvm, tdp_ptep_t pt,
 }
 
 /**
- * handle_changed_spte - handle bookkeeping associated with an SPTE change
+ * __handle_changed_spte - handle bookkeeping associated with an SPTE change
  * @kvm: kvm instance
  * @as_id: the address space of the paging structure the SPTE was a part of
  * @gfn: the base GFN that was mapped by the SPTE
@@ -444,6 +444,13 @@ static void __handle_changed_spte(struct kvm *kvm, int as_id, gfn_t gfn,
 
        trace_kvm_tdp_mmu_spte_changed(as_id, gfn, level, old_spte, new_spte);
 
+       if (is_large_pte(old_spte) != is_large_pte(new_spte)) {
+               if (is_large_pte(old_spte))
+                       atomic64_sub(1, (atomic64_t*)&kvm->stat.lpages);
+               else
+                       atomic64_add(1, (atomic64_t*)&kvm->stat.lpages);
+       }
+
        /*
         * The only times a SPTE should be changed from a non-present to
         * non-present state is when an MMIO entry is installed/modified/
@@ -1009,6 +1016,14 @@ int kvm_tdp_mmu_map(struct kvm_vcpu *vcpu, gpa_t gpa, u32 error_code,
                }
 
                if (!is_shadow_present_pte(iter.old_spte)) {
+                       /*
+                        * If SPTE has been forzen by another thread, just
+                        * give up and retry, avoiding unnecessary page table
+                        * allocation and free.
+                        */
+                       if (is_removed_spte(iter.old_spte))
+                               break;
+
                        sp = alloc_tdp_mmu_page(vcpu, iter.gfn, iter.level);
                        child_pt = sp->spt;
 
index 540d43b..5e8d844 100644 (file)
@@ -764,7 +764,6 @@ int nested_svm_vmexit(struct vcpu_svm *svm)
        nested_svm_copy_common_state(svm->nested.vmcb02.ptr, svm->vmcb01.ptr);
 
        svm_switch_vmcb(svm, &svm->vmcb01);
-       WARN_ON_ONCE(svm->vmcb->control.exit_code != SVM_EXIT_VMRUN);
 
        /*
         * On vmexit the  GIF is set to false and
@@ -872,6 +871,15 @@ void svm_free_nested(struct vcpu_svm *svm)
        __free_page(virt_to_page(svm->nested.vmcb02.ptr));
        svm->nested.vmcb02.ptr = NULL;
 
+       /*
+        * When last_vmcb12_gpa matches the current vmcb12 gpa,
+        * some vmcb12 fields are not loaded if they are marked clean
+        * in the vmcb12, since in this case they are up to date already.
+        *
+        * When the vmcb02 is freed, this optimization becomes invalid.
+        */
+       svm->nested.last_vmcb12_gpa = INVALID_GPA;
+
        svm->nested.initialized = false;
 }
 
@@ -884,9 +892,11 @@ void svm_leave_nested(struct vcpu_svm *svm)
 
        if (is_guest_mode(vcpu)) {
                svm->nested.nested_run_pending = 0;
+               svm->nested.vmcb12_gpa = INVALID_GPA;
+
                leave_guest_mode(vcpu);
 
-               svm_switch_vmcb(svm, &svm->nested.vmcb02);
+               svm_switch_vmcb(svm, &svm->vmcb01);
 
                nested_svm_uninit_mmu_context(vcpu);
                vmcb_mark_all_dirty(svm->vmcb);
@@ -1298,12 +1308,17 @@ static int svm_set_nested_state(struct kvm_vcpu *vcpu,
         * L2 registers if needed are moved from the current VMCB to VMCB02.
         */
 
+       if (is_guest_mode(vcpu))
+               svm_leave_nested(svm);
+       else
+               svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save;
+
+       svm_set_gif(svm, !!(kvm_state->flags & KVM_STATE_NESTED_GIF_SET));
+
        svm->nested.nested_run_pending =
                !!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
 
        svm->nested.vmcb12_gpa = kvm_state->hdr.svm.vmcb_pa;
-       if (svm->current_vmcb == &svm->vmcb01)
-               svm->nested.vmcb02.ptr->save = svm->vmcb01.ptr->save;
 
        svm->vmcb01.ptr->save.es = save->es;
        svm->vmcb01.ptr->save.cs = save->cs;
index 1356ee0..5bc887e 100644 (file)
@@ -763,7 +763,7 @@ static int __sev_dbg_decrypt(struct kvm *kvm, unsigned long src_paddr,
 }
 
 static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr,
-                                 unsigned long __user dst_uaddr,
+                                 void __user *dst_uaddr,
                                  unsigned long dst_paddr,
                                  int size, int *err)
 {
@@ -787,8 +787,7 @@ static int __sev_dbg_decrypt_user(struct kvm *kvm, unsigned long paddr,
 
        if (tpage) {
                offset = paddr & 15;
-               if (copy_to_user((void __user *)(uintptr_t)dst_uaddr,
-                                page_address(tpage) + offset, size))
+               if (copy_to_user(dst_uaddr, page_address(tpage) + offset, size))
                        ret = -EFAULT;
        }
 
@@ -800,9 +799,9 @@ e_free:
 }
 
 static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
-                                 unsigned long __user vaddr,
+                                 void __user *vaddr,
                                  unsigned long dst_paddr,
-                                 unsigned long __user dst_vaddr,
+                                 void __user *dst_vaddr,
                                  int size, int *error)
 {
        struct page *src_tpage = NULL;
@@ -810,13 +809,12 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
        int ret, len = size;
 
        /* If source buffer is not aligned then use an intermediate buffer */
-       if (!IS_ALIGNED(vaddr, 16)) {
+       if (!IS_ALIGNED((unsigned long)vaddr, 16)) {
                src_tpage = alloc_page(GFP_KERNEL);
                if (!src_tpage)
                        return -ENOMEM;
 
-               if (copy_from_user(page_address(src_tpage),
-                               (void __user *)(uintptr_t)vaddr, size)) {
+               if (copy_from_user(page_address(src_tpage), vaddr, size)) {
                        __free_page(src_tpage);
                        return -EFAULT;
                }
@@ -830,7 +828,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
         *   - copy the source buffer in an intermediate buffer
         *   - use the intermediate buffer as source buffer
         */
-       if (!IS_ALIGNED(dst_vaddr, 16) || !IS_ALIGNED(size, 16)) {
+       if (!IS_ALIGNED((unsigned long)dst_vaddr, 16) || !IS_ALIGNED(size, 16)) {
                int dst_offset;
 
                dst_tpage = alloc_page(GFP_KERNEL);
@@ -855,7 +853,7 @@ static int __sev_dbg_encrypt_user(struct kvm *kvm, unsigned long paddr,
                               page_address(src_tpage), size);
                else {
                        if (copy_from_user(page_address(dst_tpage) + dst_offset,
-                                          (void __user *)(uintptr_t)vaddr, size)) {
+                                          vaddr, size)) {
                                ret = -EFAULT;
                                goto e_free;
                        }
@@ -935,15 +933,15 @@ static int sev_dbg_crypt(struct kvm *kvm, struct kvm_sev_cmd *argp, bool dec)
                if (dec)
                        ret = __sev_dbg_decrypt_user(kvm,
                                                     __sme_page_pa(src_p[0]) + s_off,
-                                                    dst_vaddr,
+                                                    (void __user *)dst_vaddr,
                                                     __sme_page_pa(dst_p[0]) + d_off,
                                                     len, &argp->error);
                else
                        ret = __sev_dbg_encrypt_user(kvm,
                                                     __sme_page_pa(src_p[0]) + s_off,
-                                                    vaddr,
+                                                    (void __user *)vaddr,
                                                     __sme_page_pa(dst_p[0]) + d_off,
-                                                    dst_vaddr,
+                                                    (void __user *)dst_vaddr,
                                                     len, &argp->error);
 
                sev_unpin_memory(kvm, src_p, n);
@@ -1764,7 +1762,8 @@ e_mirror_unlock:
 e_source_unlock:
        mutex_unlock(&source_kvm->lock);
 e_source_put:
-       fput(source_kvm_file);
+       if (source_kvm_file)
+               fput(source_kvm_file);
        return ret;
 }
 
@@ -2198,7 +2197,7 @@ vmgexit_err:
        return -EINVAL;
 }
 
-static void pre_sev_es_run(struct vcpu_svm *svm)
+void sev_es_unmap_ghcb(struct vcpu_svm *svm)
 {
        if (!svm->ghcb)
                return;
@@ -2234,9 +2233,6 @@ void pre_sev_run(struct vcpu_svm *svm, int cpu)
        struct svm_cpu_data *sd = per_cpu(svm_data, cpu);
        int asid = sev_get_asid(svm->vcpu.kvm);
 
-       /* Perform any SEV-ES pre-run actions */
-       pre_sev_es_run(svm);
-
        /* Assign the asid allocated with this SEV guest */
        svm->asid = asid;
 
index b649f92..dfa351e 100644 (file)
@@ -212,7 +212,7 @@ DEFINE_PER_CPU(struct svm_cpu_data *, svm_data);
  * RDTSCP and RDPID are not used in the kernel, specifically to allow KVM to
  * defer the restoration of TSC_AUX until the CPU returns to userspace.
  */
-#define TSC_AUX_URET_SLOT      0
+static int tsc_aux_uret_slot __read_mostly = -1;
 
 static const u32 msrpm_ranges[] = {0, 0xc0000000, 0xc0010000};
 
@@ -447,6 +447,11 @@ static int has_svm(void)
                return 0;
        }
 
+       if (pgtable_l5_enabled()) {
+               pr_info("KVM doesn't yet support 5-level paging on AMD SVM\n");
+               return 0;
+       }
+
        return 1;
 }
 
@@ -959,8 +964,7 @@ static __init int svm_hardware_setup(void)
                kvm_tsc_scaling_ratio_frac_bits = 32;
        }
 
-       if (boot_cpu_has(X86_FEATURE_RDTSCP))
-               kvm_define_user_return_msr(TSC_AUX_URET_SLOT, MSR_TSC_AUX);
+       tsc_aux_uret_slot = kvm_add_user_return_msr(MSR_TSC_AUX);
 
        /* Check for pause filtering support */
        if (!boot_cpu_has(X86_FEATURE_PAUSEFILTER)) {
@@ -1100,7 +1104,9 @@ static u64 svm_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
        return svm->vmcb->control.tsc_offset;
 }
 
-static void svm_check_invpcid(struct vcpu_svm *svm)
+/* Evaluate instruction intercepts that depend on guest CPUID features. */
+static void svm_recalc_instruction_intercepts(struct kvm_vcpu *vcpu,
+                                             struct vcpu_svm *svm)
 {
        /*
         * Intercept INVPCID if shadow paging is enabled to sync/free shadow
@@ -1113,6 +1119,13 @@ static void svm_check_invpcid(struct vcpu_svm *svm)
                else
                        svm_clr_intercept(svm, INTERCEPT_INVPCID);
        }
+
+       if (kvm_cpu_cap_has(X86_FEATURE_RDTSCP)) {
+               if (guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
+                       svm_clr_intercept(svm, INTERCEPT_RDTSCP);
+               else
+                       svm_set_intercept(svm, INTERCEPT_RDTSCP);
+       }
 }
 
 static void init_vmcb(struct kvm_vcpu *vcpu)
@@ -1235,8 +1248,8 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
        svm->current_vmcb->asid_generation = 0;
        svm->asid = 0;
 
-       svm->nested.vmcb12_gpa = 0;
-       svm->nested.last_vmcb12_gpa = 0;
+       svm->nested.vmcb12_gpa = INVALID_GPA;
+       svm->nested.last_vmcb12_gpa = INVALID_GPA;
        vcpu->arch.hflags = 0;
 
        if (!kvm_pause_in_guest(vcpu->kvm)) {
@@ -1248,7 +1261,7 @@ static void init_vmcb(struct kvm_vcpu *vcpu)
                svm_clr_intercept(svm, INTERCEPT_PAUSE);
        }
 
-       svm_check_invpcid(svm);
+       svm_recalc_instruction_intercepts(vcpu, svm);
 
        /*
         * If the host supports V_SPEC_CTRL then disable the interception
@@ -1424,6 +1437,9 @@ static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
        struct vcpu_svm *svm = to_svm(vcpu);
        struct svm_cpu_data *sd = per_cpu(svm_data, vcpu->cpu);
 
+       if (sev_es_guest(vcpu->kvm))
+               sev_es_unmap_ghcb(svm);
+
        if (svm->guest_state_loaded)
                return;
 
@@ -1445,8 +1461,8 @@ static void svm_prepare_guest_switch(struct kvm_vcpu *vcpu)
                }
        }
 
-       if (static_cpu_has(X86_FEATURE_RDTSCP))
-               kvm_set_user_return_msr(TSC_AUX_URET_SLOT, svm->tsc_aux, -1ull);
+       if (likely(tsc_aux_uret_slot >= 0))
+               kvm_set_user_return_msr(tsc_aux_uret_slot, svm->tsc_aux, -1ull);
 
        svm->guest_state_loaded = true;
 }
@@ -2655,11 +2671,6 @@ static int svm_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                        msr_info->data |= (u64)svm->sysenter_esp_hi << 32;
                break;
        case MSR_TSC_AUX:
-               if (!boot_cpu_has(X86_FEATURE_RDTSCP))
-                       return 1;
-               if (!msr_info->host_initiated &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
-                       return 1;
                msr_info->data = svm->tsc_aux;
                break;
        /*
@@ -2876,30 +2887,13 @@ static int svm_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr)
                svm->sysenter_esp_hi = guest_cpuid_is_intel(vcpu) ? (data >> 32) : 0;
                break;
        case MSR_TSC_AUX:
-               if (!boot_cpu_has(X86_FEATURE_RDTSCP))
-                       return 1;
-
-               if (!msr->host_initiated &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
-                       return 1;
-
-               /*
-                * Per Intel's SDM, bits 63:32 are reserved, but AMD's APM has
-                * incomplete and conflicting architectural behavior.  Current
-                * AMD CPUs completely ignore bits 63:32, i.e. they aren't
-                * reserved and always read as zeros.  Emulate AMD CPU behavior
-                * to avoid explosions if the vCPU is migrated from an AMD host
-                * to an Intel host.
-                */
-               data = (u32)data;
-
                /*
                 * TSC_AUX is usually changed only during boot and never read
                 * directly.  Intercept TSC_AUX instead of exposing it to the
                 * guest via direct_access_msrs, and switch it via user return.
                 */
                preempt_disable();
-               r = kvm_set_user_return_msr(TSC_AUX_URET_SLOT, data, -1ull);
+               r = kvm_set_user_return_msr(tsc_aux_uret_slot, data, -1ull);
                preempt_enable();
                if (r)
                        return 1;
@@ -3084,6 +3078,7 @@ static int (*const svm_exit_handlers[])(struct kvm_vcpu *vcpu) = {
        [SVM_EXIT_STGI]                         = stgi_interception,
        [SVM_EXIT_CLGI]                         = clgi_interception,
        [SVM_EXIT_SKINIT]                       = skinit_interception,
+       [SVM_EXIT_RDTSCP]                       = kvm_handle_invalid_op,
        [SVM_EXIT_WBINVD]                       = kvm_emulate_wbinvd,
        [SVM_EXIT_MONITOR]                      = kvm_emulate_monitor,
        [SVM_EXIT_MWAIT]                        = kvm_emulate_mwait,
@@ -3972,8 +3967,7 @@ static void svm_vcpu_after_set_cpuid(struct kvm_vcpu *vcpu)
        svm->nrips_enabled = kvm_cpu_cap_has(X86_FEATURE_NRIPS) &&
                             guest_cpuid_has(vcpu, X86_FEATURE_NRIPS);
 
-       /* Check again if INVPCID interception if required */
-       svm_check_invpcid(svm);
+       svm_recalc_instruction_intercepts(vcpu, svm);
 
        /* For sev guests, the memory encryption bit is not reserved in CR3.  */
        if (sev_guest(vcpu->kvm)) {
index 84b3133..e44567c 100644 (file)
@@ -581,6 +581,7 @@ void sev_es_init_vmcb(struct vcpu_svm *svm);
 void sev_es_create_vcpu(struct vcpu_svm *svm);
 void sev_vcpu_deliver_sipi_vector(struct kvm_vcpu *vcpu, u8 vector);
 void sev_es_prepare_guest_switch(struct vcpu_svm *svm, unsigned int cpu);
+void sev_es_unmap_ghcb(struct vcpu_svm *svm);
 
 /* vmenter.S */
 
index d1d7798..8dee8a5 100644 (file)
@@ -398,6 +398,9 @@ static inline u64 vmx_supported_debugctl(void)
 {
        u64 debugctl = 0;
 
+       if (boot_cpu_has(X86_FEATURE_BUS_LOCK_DETECT))
+               debugctl |= DEBUGCTLMSR_BUS_LOCK_DETECT;
+
        if (vmx_get_perf_capabilities() & PMU_CAP_LBR_FMT)
                debugctl |= DEBUGCTLMSR_LBR_MASK;
 
index bced766..6058a65 100644 (file)
@@ -3098,15 +3098,8 @@ static bool nested_get_evmcs_page(struct kvm_vcpu *vcpu)
                        nested_vmx_handle_enlightened_vmptrld(vcpu, false);
 
                if (evmptrld_status == EVMPTRLD_VMFAIL ||
-                   evmptrld_status == EVMPTRLD_ERROR) {
-                       pr_debug_ratelimited("%s: enlightened vmptrld failed\n",
-                                            __func__);
-                       vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
-                       vcpu->run->internal.suberror =
-                               KVM_INTERNAL_ERROR_EMULATION;
-                       vcpu->run->internal.ndata = 0;
+                   evmptrld_status == EVMPTRLD_ERROR)
                        return false;
-               }
        }
 
        return true;
@@ -3194,8 +3187,16 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
 
 static bool vmx_get_nested_state_pages(struct kvm_vcpu *vcpu)
 {
-       if (!nested_get_evmcs_page(vcpu))
+       if (!nested_get_evmcs_page(vcpu)) {
+               pr_debug_ratelimited("%s: enlightened vmptrld failed\n",
+                                    __func__);
+               vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
+               vcpu->run->internal.suberror =
+                       KVM_INTERNAL_ERROR_EMULATION;
+               vcpu->run->internal.ndata = 0;
+
                return false;
+       }
 
        if (is_guest_mode(vcpu) && !nested_get_vmcs12_pages(vcpu))
                return false;
@@ -4435,7 +4436,15 @@ void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
        /* Similarly, triple faults in L2 should never escape. */
        WARN_ON_ONCE(kvm_check_request(KVM_REQ_TRIPLE_FAULT, vcpu));
 
-       kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
+       if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) {
+               /*
+                * KVM_REQ_GET_NESTED_STATE_PAGES is also used to map
+                * Enlightened VMCS after migration and we still need to
+                * do that when something is forcing L2->L1 exit prior to
+                * the first L2 run.
+                */
+               (void)nested_get_evmcs_page(vcpu);
+       }
 
        /* Service the TLB flush request for L2 before switching to L1. */
        if (kvm_check_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu))
index d000cdd..4bceb5c 100644 (file)
@@ -455,21 +455,6 @@ static inline void vmx_segment_cache_clear(struct vcpu_vmx *vmx)
 
 static unsigned long host_idt_base;
 
-/*
- * Though SYSCALL is only supported in 64-bit mode on Intel CPUs, kvm
- * will emulate SYSCALL in legacy mode if the vendor string in guest
- * CPUID.0:{EBX,ECX,EDX} is "AuthenticAMD" or "AMDisbetter!" To
- * support this emulation, IA32_STAR must always be included in
- * vmx_uret_msrs_list[], even in i386 builds.
- */
-static const u32 vmx_uret_msrs_list[] = {
-#ifdef CONFIG_X86_64
-       MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
-#endif
-       MSR_EFER, MSR_TSC_AUX, MSR_STAR,
-       MSR_IA32_TSX_CTRL,
-};
-
 #if IS_ENABLED(CONFIG_HYPERV)
 static bool __read_mostly enlightened_vmcs = true;
 module_param(enlightened_vmcs, bool, 0444);
@@ -697,21 +682,11 @@ static bool is_valid_passthrough_msr(u32 msr)
        return r;
 }
 
-static inline int __vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
-{
-       int i;
-
-       for (i = 0; i < vmx->nr_uret_msrs; ++i)
-               if (vmx_uret_msrs_list[vmx->guest_uret_msrs[i].slot] == msr)
-                       return i;
-       return -1;
-}
-
 struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
 {
        int i;
 
-       i = __vmx_find_uret_msr(vmx, msr);
+       i = kvm_find_user_return_msr(msr);
        if (i >= 0)
                return &vmx->guest_uret_msrs[i];
        return NULL;
@@ -720,13 +695,14 @@ struct vmx_uret_msr *vmx_find_uret_msr(struct vcpu_vmx *vmx, u32 msr)
 static int vmx_set_guest_uret_msr(struct vcpu_vmx *vmx,
                                  struct vmx_uret_msr *msr, u64 data)
 {
+       unsigned int slot = msr - vmx->guest_uret_msrs;
        int ret = 0;
 
        u64 old_msr_data = msr->data;
        msr->data = data;
-       if (msr - vmx->guest_uret_msrs < vmx->nr_active_uret_msrs) {
+       if (msr->load_into_hardware) {
                preempt_disable();
-               ret = kvm_set_user_return_msr(msr->slot, msr->data, msr->mask);
+               ret = kvm_set_user_return_msr(slot, msr->data, msr->mask);
                preempt_enable();
                if (ret)
                        msr->data = old_msr_data;
@@ -1078,7 +1054,7 @@ static bool update_transition_efer(struct vcpu_vmx *vmx)
                return false;
        }
 
-       i = __vmx_find_uret_msr(vmx, MSR_EFER);
+       i = kvm_find_user_return_msr(MSR_EFER);
        if (i < 0)
                return false;
 
@@ -1240,11 +1216,14 @@ void vmx_prepare_switch_to_guest(struct kvm_vcpu *vcpu)
         */
        if (!vmx->guest_uret_msrs_loaded) {
                vmx->guest_uret_msrs_loaded = true;
-               for (i = 0; i < vmx->nr_active_uret_msrs; ++i)
-                       kvm_set_user_return_msr(vmx->guest_uret_msrs[i].slot,
+               for (i = 0; i < kvm_nr_uret_msrs; ++i) {
+                       if (!vmx->guest_uret_msrs[i].load_into_hardware)
+                               continue;
+
+                       kvm_set_user_return_msr(i,
                                                vmx->guest_uret_msrs[i].data,
                                                vmx->guest_uret_msrs[i].mask);
-
+               }
        }
 
        if (vmx->nested.need_vmcs12_to_shadow_sync)
@@ -1751,19 +1730,16 @@ static void vmx_queue_exception(struct kvm_vcpu *vcpu)
        vmx_clear_hlt(vcpu);
 }
 
-static void vmx_setup_uret_msr(struct vcpu_vmx *vmx, unsigned int msr)
+static void vmx_setup_uret_msr(struct vcpu_vmx *vmx, unsigned int msr,
+                              bool load_into_hardware)
 {
-       struct vmx_uret_msr tmp;
-       int from, to;
+       struct vmx_uret_msr *uret_msr;
 
-       from = __vmx_find_uret_msr(vmx, msr);
-       if (from < 0)
+       uret_msr = vmx_find_uret_msr(vmx, msr);
+       if (!uret_msr)
                return;
-       to = vmx->nr_active_uret_msrs++;
 
-       tmp = vmx->guest_uret_msrs[to];
-       vmx->guest_uret_msrs[to] = vmx->guest_uret_msrs[from];
-       vmx->guest_uret_msrs[from] = tmp;
+       uret_msr->load_into_hardware = load_into_hardware;
 }
 
 /*
@@ -1773,29 +1749,42 @@ static void vmx_setup_uret_msr(struct vcpu_vmx *vmx, unsigned int msr)
  */
 static void setup_msrs(struct vcpu_vmx *vmx)
 {
-       vmx->guest_uret_msrs_loaded = false;
-       vmx->nr_active_uret_msrs = 0;
 #ifdef CONFIG_X86_64
+       bool load_syscall_msrs;
+
        /*
         * The SYSCALL MSRs are only needed on long mode guests, and only
         * when EFER.SCE is set.
         */
-       if (is_long_mode(&vmx->vcpu) && (vmx->vcpu.arch.efer & EFER_SCE)) {
-               vmx_setup_uret_msr(vmx, MSR_STAR);
-               vmx_setup_uret_msr(vmx, MSR_LSTAR);
-               vmx_setup_uret_msr(vmx, MSR_SYSCALL_MASK);
-       }
+       load_syscall_msrs = is_long_mode(&vmx->vcpu) &&
+                           (vmx->vcpu.arch.efer & EFER_SCE);
+
+       vmx_setup_uret_msr(vmx, MSR_STAR, load_syscall_msrs);
+       vmx_setup_uret_msr(vmx, MSR_LSTAR, load_syscall_msrs);
+       vmx_setup_uret_msr(vmx, MSR_SYSCALL_MASK, load_syscall_msrs);
 #endif
-       if (update_transition_efer(vmx))
-               vmx_setup_uret_msr(vmx, MSR_EFER);
+       vmx_setup_uret_msr(vmx, MSR_EFER, update_transition_efer(vmx));
 
-       if (guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP))
-               vmx_setup_uret_msr(vmx, MSR_TSC_AUX);
+       vmx_setup_uret_msr(vmx, MSR_TSC_AUX,
+                          guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDTSCP) ||
+                          guest_cpuid_has(&vmx->vcpu, X86_FEATURE_RDPID));
 
-       vmx_setup_uret_msr(vmx, MSR_IA32_TSX_CTRL);
+       /*
+        * hle=0, rtm=0, tsx_ctrl=1 can be found with some combinations of new
+        * kernel and old userspace.  If those guests run on a tsx=off host, do
+        * allow guests to use TSX_CTRL, but don't change the value in hardware
+        * so that TSX remains always disabled.
+        */
+       vmx_setup_uret_msr(vmx, MSR_IA32_TSX_CTRL, boot_cpu_has(X86_FEATURE_RTM));
 
        if (cpu_has_vmx_msr_bitmap())
                vmx_update_msr_bitmap(&vmx->vcpu);
+
+       /*
+        * The set of MSRs to load may have changed, reload MSRs before the
+        * next VM-Enter.
+        */
+       vmx->guest_uret_msrs_loaded = false;
 }
 
 static u64 vmx_write_l1_tsc_offset(struct kvm_vcpu *vcpu, u64 offset)
@@ -1993,11 +1982,6 @@ static int vmx_get_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                else
                        msr_info->data = vmx->pt_desc.guest.addr_a[index / 2];
                break;
-       case MSR_TSC_AUX:
-               if (!msr_info->host_initiated &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
-                       return 1;
-               goto find_uret_msr;
        case MSR_IA32_DEBUGCTLMSR:
                msr_info->data = vmcs_read64(GUEST_IA32_DEBUGCTL);
                break;
@@ -2031,6 +2015,9 @@ static u64 vcpu_supported_debugctl(struct kvm_vcpu *vcpu)
        if (!intel_pmu_lbr_is_enabled(vcpu))
                debugctl &= ~DEBUGCTLMSR_LBR_MASK;
 
+       if (!guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT))
+               debugctl &= ~DEBUGCTLMSR_BUS_LOCK_DETECT;
+
        return debugctl;
 }
 
@@ -2313,14 +2300,6 @@ static int vmx_set_msr(struct kvm_vcpu *vcpu, struct msr_data *msr_info)
                else
                        vmx->pt_desc.guest.addr_a[index / 2] = data;
                break;
-       case MSR_TSC_AUX:
-               if (!msr_info->host_initiated &&
-                   !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP))
-                       return 1;
-               /* Check reserved bit, higher 32 bits should be zero */
-               if ((data >> 32) != 0)
-                       return 1;
-               goto find_uret_msr;
        case MSR_IA32_PERF_CAPABILITIES:
                if (data && !vcpu_to_pmu(vcpu)->version)
                        return 1;
@@ -4369,7 +4348,23 @@ static void vmx_compute_secondary_exec_control(struct vcpu_vmx *vmx)
                                                  xsaves_enabled, false);
        }
 
-       vmx_adjust_sec_exec_feature(vmx, &exec_control, rdtscp, RDTSCP);
+       /*
+        * RDPID is also gated by ENABLE_RDTSCP, turn on the control if either
+        * feature is exposed to the guest.  This creates a virtualization hole
+        * if both are supported in hardware but only one is exposed to the
+        * guest, but letting the guest execute RDTSCP or RDPID when either one
+        * is advertised is preferable to emulating the advertised instruction
+        * in KVM on #UD, and obviously better than incorrectly injecting #UD.
+        */
+       if (cpu_has_vmx_rdtscp()) {
+               bool rdpid_or_rdtscp_enabled =
+                       guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) ||
+                       guest_cpuid_has(vcpu, X86_FEATURE_RDPID);
+
+               vmx_adjust_secondary_exec_control(vmx, &exec_control,
+                                                 SECONDARY_EXEC_ENABLE_RDTSCP,
+                                                 rdpid_or_rdtscp_enabled, false);
+       }
        vmx_adjust_sec_exec_feature(vmx, &exec_control, invpcid, INVPCID);
 
        vmx_adjust_sec_exec_exiting(vmx, &exec_control, rdrand, RDRAND);
@@ -6855,6 +6850,7 @@ static void vmx_free_vcpu(struct kvm_vcpu *vcpu)
 
 static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
 {
+       struct vmx_uret_msr *tsx_ctrl;
        struct vcpu_vmx *vmx;
        int i, cpu, err;
 
@@ -6877,43 +6873,19 @@ static int vmx_create_vcpu(struct kvm_vcpu *vcpu)
                        goto free_vpid;
        }
 
-       BUILD_BUG_ON(ARRAY_SIZE(vmx_uret_msrs_list) != MAX_NR_USER_RETURN_MSRS);
-
-       for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i) {
-               u32 index = vmx_uret_msrs_list[i];
-               u32 data_low, data_high;
-               int j = vmx->nr_uret_msrs;
-
-               if (rdmsr_safe(index, &data_low, &data_high) < 0)
-                       continue;
-               if (wrmsr_safe(index, data_low, data_high) < 0)
-                       continue;
-
-               vmx->guest_uret_msrs[j].slot = i;
-               vmx->guest_uret_msrs[j].data = 0;
-               switch (index) {
-               case MSR_IA32_TSX_CTRL:
-                       /*
-                        * TSX_CTRL_CPUID_CLEAR is handled in the CPUID
-                        * interception.  Keep the host value unchanged to avoid
-                        * changing CPUID bits under the host kernel's feet.
-                        *
-                        * hle=0, rtm=0, tsx_ctrl=1 can be found with some
-                        * combinations of new kernel and old userspace.  If
-                        * those guests run on a tsx=off host, do allow guests
-                        * to use TSX_CTRL, but do not change the value on the
-                        * host so that TSX remains always disabled.
-                        */
-                       if (boot_cpu_has(X86_FEATURE_RTM))
-                               vmx->guest_uret_msrs[j].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
-                       else
-                               vmx->guest_uret_msrs[j].mask = 0;
-                       break;
-               default:
-                       vmx->guest_uret_msrs[j].mask = -1ull;
-                       break;
-               }
-               ++vmx->nr_uret_msrs;
+       for (i = 0; i < kvm_nr_uret_msrs; ++i) {
+               vmx->guest_uret_msrs[i].data = 0;
+               vmx->guest_uret_msrs[i].mask = -1ull;
+       }
+       if (boot_cpu_has(X86_FEATURE_RTM)) {
+               /*
+                * TSX_CTRL_CPUID_CLEAR is handled in the CPUID interception.
+                * Keep the host value unchanged to avoid changing CPUID bits
+                * under the host kernel's feet.
+                */
+               tsx_ctrl = vmx_find_uret_msr(vmx, MSR_IA32_TSX_CTRL);
+               if (tsx_ctrl)
+                       vmx->guest_uret_msrs[i].mask = ~(u64)TSX_CTRL_CPUID_CLEAR;
        }
 
        err = alloc_loaded_vmcs(&vmx->vmcs01);
@@ -7344,9 +7316,11 @@ static __init void vmx_set_cpu_caps(void)
        if (!cpu_has_vmx_xsaves())
                kvm_cpu_cap_clear(X86_FEATURE_XSAVES);
 
-       /* CPUID 0x80000001 */
-       if (!cpu_has_vmx_rdtscp())
+       /* CPUID 0x80000001 and 0x7 (RDPID) */
+       if (!cpu_has_vmx_rdtscp()) {
                kvm_cpu_cap_clear(X86_FEATURE_RDTSCP);
+               kvm_cpu_cap_clear(X86_FEATURE_RDPID);
+       }
 
        if (cpu_has_vmx_waitpkg())
                kvm_cpu_cap_check_and_set(X86_FEATURE_WAITPKG);
@@ -7402,8 +7376,9 @@ static int vmx_check_intercept(struct kvm_vcpu *vcpu,
        /*
         * RDPID causes #UD if disabled through secondary execution controls.
         * Because it is marked as EmulateOnUD, we need to intercept it here.
+        * Note, RDPID is hidden behind ENABLE_RDTSCP.
         */
-       case x86_intercept_rdtscp:
+       case x86_intercept_rdpid:
                if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_RDTSCP)) {
                        exception->vector = UD_VECTOR;
                        exception->error_code_valid = false;
@@ -7769,17 +7744,42 @@ static struct kvm_x86_ops vmx_x86_ops __initdata = {
        .vcpu_deliver_sipi_vector = kvm_vcpu_deliver_sipi_vector,
 };
 
+static __init void vmx_setup_user_return_msrs(void)
+{
+
+       /*
+        * Though SYSCALL is only supported in 64-bit mode on Intel CPUs, kvm
+        * will emulate SYSCALL in legacy mode if the vendor string in guest
+        * CPUID.0:{EBX,ECX,EDX} is "AuthenticAMD" or "AMDisbetter!" To
+        * support this emulation, MSR_STAR is included in the list for i386,
+        * but is never loaded into hardware.  MSR_CSTAR is also never loaded
+        * into hardware and is here purely for emulation purposes.
+        */
+       const u32 vmx_uret_msrs_list[] = {
+       #ifdef CONFIG_X86_64
+               MSR_SYSCALL_MASK, MSR_LSTAR, MSR_CSTAR,
+       #endif
+               MSR_EFER, MSR_TSC_AUX, MSR_STAR,
+               MSR_IA32_TSX_CTRL,
+       };
+       int i;
+
+       BUILD_BUG_ON(ARRAY_SIZE(vmx_uret_msrs_list) != MAX_NR_USER_RETURN_MSRS);
+
+       for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i)
+               kvm_add_user_return_msr(vmx_uret_msrs_list[i]);
+}
+
 static __init int hardware_setup(void)
 {
        unsigned long host_bndcfgs;
        struct desc_ptr dt;
-       int r, i, ept_lpage_level;
+       int r, ept_lpage_level;
 
        store_idt(&dt);
        host_idt_base = dt.address;
 
-       for (i = 0; i < ARRAY_SIZE(vmx_uret_msrs_list); ++i)
-               kvm_define_user_return_msr(i, vmx_uret_msrs_list[i]);
+       vmx_setup_user_return_msrs();
 
        if (setup_vmcs_config(&vmcs_config, &vmx_capability) < 0)
                return -EIO;
index 008cb87..16e4e45 100644 (file)
@@ -36,7 +36,7 @@ struct vmx_msrs {
 };
 
 struct vmx_uret_msr {
-       unsigned int slot; /* The MSR's slot in kvm_user_return_msrs. */
+       bool load_into_hardware;
        u64 data;
        u64 mask;
 };
@@ -245,8 +245,16 @@ struct vcpu_vmx {
        u32                   idt_vectoring_info;
        ulong                 rflags;
 
+       /*
+        * User return MSRs are always emulated when enabled in the guest, but
+        * only loaded into hardware when necessary, e.g. SYSCALL #UDs outside
+        * of 64-bit mode or if EFER.SCE=1, thus the SYSCALL MSRs don't need to
+        * be loaded into hardware if those conditions aren't met.
+        * nr_active_uret_msrs tracks the number of MSRs that need to be loaded
+        * into hardware when running the guest.  guest_uret_msrs[] is resorted
+        * whenever the number of "active" uret MSRs is modified.
+        */
        struct vmx_uret_msr   guest_uret_msrs[MAX_NR_USER_RETURN_MSRS];
-       int                   nr_uret_msrs;
        int                   nr_active_uret_msrs;
        bool                  guest_uret_msrs_loaded;
 #ifdef CONFIG_X86_64
index 6eda283..9b6bca6 100644 (file)
@@ -184,11 +184,6 @@ module_param(pi_inject_timer, bint, S_IRUGO | S_IWUSR);
  */
 #define KVM_MAX_NR_USER_RETURN_MSRS 16
 
-struct kvm_user_return_msrs_global {
-       int nr;
-       u32 msrs[KVM_MAX_NR_USER_RETURN_MSRS];
-};
-
 struct kvm_user_return_msrs {
        struct user_return_notifier urn;
        bool registered;
@@ -198,7 +193,9 @@ struct kvm_user_return_msrs {
        } values[KVM_MAX_NR_USER_RETURN_MSRS];
 };
 
-static struct kvm_user_return_msrs_global __read_mostly user_return_msrs_global;
+u32 __read_mostly kvm_nr_uret_msrs;
+EXPORT_SYMBOL_GPL(kvm_nr_uret_msrs);
+static u32 __read_mostly kvm_uret_msrs_list[KVM_MAX_NR_USER_RETURN_MSRS];
 static struct kvm_user_return_msrs __percpu *user_return_msrs;
 
 #define KVM_SUPPORTED_XCR0     (XFEATURE_MASK_FP | XFEATURE_MASK_SSE \
@@ -330,23 +327,53 @@ static void kvm_on_user_return(struct user_return_notifier *urn)
                user_return_notifier_unregister(urn);
        }
        local_irq_restore(flags);
-       for (slot = 0; slot < user_return_msrs_global.nr; ++slot) {
+       for (slot = 0; slot < kvm_nr_uret_msrs; ++slot) {
                values = &msrs->values[slot];
                if (values->host != values->curr) {
-                       wrmsrl(user_return_msrs_global.msrs[slot], values->host);
+                       wrmsrl(kvm_uret_msrs_list[slot], values->host);
                        values->curr = values->host;
                }
        }
 }
 
-void kvm_define_user_return_msr(unsigned slot, u32 msr)
+static int kvm_probe_user_return_msr(u32 msr)
+{
+       u64 val;
+       int ret;
+
+       preempt_disable();
+       ret = rdmsrl_safe(msr, &val);
+       if (ret)
+               goto out;
+       ret = wrmsrl_safe(msr, val);
+out:
+       preempt_enable();
+       return ret;
+}
+
+int kvm_add_user_return_msr(u32 msr)
 {
-       BUG_ON(slot >= KVM_MAX_NR_USER_RETURN_MSRS);
-       user_return_msrs_global.msrs[slot] = msr;
-       if (slot >= user_return_msrs_global.nr)
-               user_return_msrs_global.nr = slot + 1;
+       BUG_ON(kvm_nr_uret_msrs >= KVM_MAX_NR_USER_RETURN_MSRS);
+
+       if (kvm_probe_user_return_msr(msr))
+               return -1;
+
+       kvm_uret_msrs_list[kvm_nr_uret_msrs] = msr;
+       return kvm_nr_uret_msrs++;
 }
-EXPORT_SYMBOL_GPL(kvm_define_user_return_msr);
+EXPORT_SYMBOL_GPL(kvm_add_user_return_msr);
+
+int kvm_find_user_return_msr(u32 msr)
+{
+       int i;
+
+       for (i = 0; i < kvm_nr_uret_msrs; ++i) {
+               if (kvm_uret_msrs_list[i] == msr)
+                       return i;
+       }
+       return -1;
+}
+EXPORT_SYMBOL_GPL(kvm_find_user_return_msr);
 
 static void kvm_user_return_msr_cpu_online(void)
 {
@@ -355,8 +382,8 @@ static void kvm_user_return_msr_cpu_online(void)
        u64 value;
        int i;
 
-       for (i = 0; i < user_return_msrs_global.nr; ++i) {
-               rdmsrl_safe(user_return_msrs_global.msrs[i], &value);
+       for (i = 0; i < kvm_nr_uret_msrs; ++i) {
+               rdmsrl_safe(kvm_uret_msrs_list[i], &value);
                msrs->values[i].host = value;
                msrs->values[i].curr = value;
        }
@@ -371,7 +398,7 @@ int kvm_set_user_return_msr(unsigned slot, u64 value, u64 mask)
        value = (value & mask) | (msrs->values[slot].host & ~mask);
        if (value == msrs->values[slot].curr)
                return 0;
-       err = wrmsrl_safe(user_return_msrs_global.msrs[slot], value);
+       err = wrmsrl_safe(kvm_uret_msrs_list[slot], value);
        if (err)
                return 1;
 
@@ -1149,6 +1176,9 @@ static u64 kvm_dr6_fixed(struct kvm_vcpu *vcpu)
 
        if (!guest_cpuid_has(vcpu, X86_FEATURE_RTM))
                fixed |= DR6_RTM;
+
+       if (!guest_cpuid_has(vcpu, X86_FEATURE_BUS_LOCK_DETECT))
+               fixed |= DR6_BUS_LOCK;
        return fixed;
 }
 
@@ -1615,6 +1645,30 @@ static int __kvm_set_msr(struct kvm_vcpu *vcpu, u32 index, u64 data,
                 * invokes 64-bit SYSENTER.
                 */
                data = get_canonical(data, vcpu_virt_addr_bits(vcpu));
+               break;
+       case MSR_TSC_AUX:
+               if (!kvm_is_supported_user_return_msr(MSR_TSC_AUX))
+                       return 1;
+
+               if (!host_initiated &&
+                   !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) &&
+                   !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
+                       return 1;
+
+               /*
+                * Per Intel's SDM, bits 63:32 are reserved, but AMD's APM has
+                * incomplete and conflicting architectural behavior.  Current
+                * AMD CPUs completely ignore bits 63:32, i.e. they aren't
+                * reserved and always read as zeros.  Enforce Intel's reserved
+                * bits check if and only if the guest CPU is Intel, and clear
+                * the bits in all other cases.  This ensures cross-vendor
+                * migration will provide consistent behavior for the guest.
+                */
+               if (guest_cpuid_is_intel(vcpu) && (data >> 32) != 0)
+                       return 1;
+
+               data = (u32)data;
+               break;
        }
 
        msr.data = data;
@@ -1651,6 +1705,18 @@ int __kvm_get_msr(struct kvm_vcpu *vcpu, u32 index, u64 *data,
        if (!host_initiated && !kvm_msr_allowed(vcpu, index, KVM_MSR_FILTER_READ))
                return KVM_MSR_RET_FILTERED;
 
+       switch (index) {
+       case MSR_TSC_AUX:
+               if (!kvm_is_supported_user_return_msr(MSR_TSC_AUX))
+                       return 1;
+
+               if (!host_initiated &&
+                   !guest_cpuid_has(vcpu, X86_FEATURE_RDTSCP) &&
+                   !guest_cpuid_has(vcpu, X86_FEATURE_RDPID))
+                       return 1;
+               break;
+       }
+
        msr.index = index;
        msr.host_initiated = host_initiated;
 
@@ -5468,14 +5534,18 @@ static void kvm_free_msr_filter(struct kvm_x86_msr_filter *msr_filter)
 static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter,
                              struct kvm_msr_filter_range *user_range)
 {
-       struct msr_bitmap_range range;
        unsigned long *bitmap = NULL;
        size_t bitmap_size;
-       int r;
 
        if (!user_range->nmsrs)
                return 0;
 
+       if (user_range->flags & ~(KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE))
+               return -EINVAL;
+
+       if (!user_range->flags)
+               return -EINVAL;
+
        bitmap_size = BITS_TO_LONGS(user_range->nmsrs) * sizeof(long);
        if (!bitmap_size || bitmap_size > KVM_MSR_FILTER_MAX_BITMAP_SIZE)
                return -EINVAL;
@@ -5484,31 +5554,15 @@ static int kvm_add_msr_filter(struct kvm_x86_msr_filter *msr_filter,
        if (IS_ERR(bitmap))
                return PTR_ERR(bitmap);
 
-       range = (struct msr_bitmap_range) {
+       msr_filter->ranges[msr_filter->count] = (struct msr_bitmap_range) {
                .flags = user_range->flags,
                .base = user_range->base,
                .nmsrs = user_range->nmsrs,
                .bitmap = bitmap,
        };
 
-       if (range.flags & ~(KVM_MSR_FILTER_READ | KVM_MSR_FILTER_WRITE)) {
-               r = -EINVAL;
-               goto err;
-       }
-
-       if (!range.flags) {
-               r = -EINVAL;
-               goto err;
-       }
-
-       /* Everything ok, add this range identifier. */
-       msr_filter->ranges[msr_filter->count] = range;
        msr_filter->count++;
-
        return 0;
-err:
-       kfree(bitmap);
-       return r;
 }
 
 static int kvm_vm_ioctl_set_msr_filter(struct kvm *kvm, void __user *argp)
@@ -5937,7 +5991,8 @@ static void kvm_init_msr_list(void)
                                continue;
                        break;
                case MSR_TSC_AUX:
-                       if (!kvm_cpu_cap_has(X86_FEATURE_RDTSCP))
+                       if (!kvm_cpu_cap_has(X86_FEATURE_RDTSCP) &&
+                           !kvm_cpu_cap_has(X86_FEATURE_RDPID))
                                continue;
                        break;
                case MSR_IA32_UMWAIT_CONTROL:
@@ -8039,6 +8094,18 @@ static void pvclock_gtod_update_fn(struct work_struct *work)
 
 static DECLARE_WORK(pvclock_gtod_work, pvclock_gtod_update_fn);
 
+/*
+ * Indirection to move queue_work() out of the tk_core.seq write held
+ * region to prevent possible deadlocks against time accessors which
+ * are invoked with work related locks held.
+ */
+static void pvclock_irq_work_fn(struct irq_work *w)
+{
+       queue_work(system_long_wq, &pvclock_gtod_work);
+}
+
+static DEFINE_IRQ_WORK(pvclock_irq_work, pvclock_irq_work_fn);
+
 /*
  * Notification about pvclock gtod data update.
  */
@@ -8050,13 +8117,14 @@ static int pvclock_gtod_notify(struct notifier_block *nb, unsigned long unused,
 
        update_pvclock_gtod(tk);
 
-       /* disable master clock if host does not trust, or does not
-        * use, TSC based clocksource.
+       /*
+        * Disable master clock if host does not trust, or does not use,
+        * TSC based clocksource. Delegate queue_work() to irq_work as
+        * this is invoked with tk_core.seq write held.
         */
        if (!gtod_is_based_on_tsc(gtod->clock.vclock_mode) &&
            atomic_read(&kvm_guest_has_master_clock) != 0)
-               queue_work(system_long_wq, &pvclock_gtod_work);
-
+               irq_work_queue(&pvclock_irq_work);
        return 0;
 }
 
@@ -8118,6 +8186,7 @@ int kvm_arch_init(void *opaque)
                printk(KERN_ERR "kvm: failed to allocate percpu kvm_user_return_msrs\n");
                goto out_free_x86_emulator_cache;
        }
+       kvm_nr_uret_msrs = 0;
 
        r = kvm_mmu_module_init();
        if (r)
@@ -8168,6 +8237,8 @@ void kvm_arch_exit(void)
        cpuhp_remove_state_nocalls(CPUHP_AP_X86_KVM_CLK_ONLINE);
 #ifdef CONFIG_X86_64
        pvclock_gtod_unregister_notifier(&pvclock_gtod_notifier);
+       irq_work_sync(&pvclock_irq_work);
+       cancel_work_sync(&pvclock_gtod_work);
 #endif
        kvm_x86_ops.hardware_enable = NULL;
        kvm_mmu_module_exit();
index 16c0fe8..d260bc1 100644 (file)
@@ -1313,6 +1313,7 @@ int acpi_dev_pm_attach(struct device *dev, bool power_on)
                {"PNP0C0B", }, /* Generic ACPI fan */
                {"INT3404", }, /* Fan */
                {"INTC1044", }, /* Fan for Tiger Lake generation */
+               {"INTC1048", }, /* Fan for Alder Lake generation */
                {}
        };
        struct acpi_device *adev = ACPI_COMPANION(dev);
index b852cff..f973bbe 100644 (file)
@@ -142,6 +142,7 @@ int acpi_device_sleep_wake(struct acpi_device *dev,
 int acpi_power_get_inferred_state(struct acpi_device *device, int *state);
 int acpi_power_on_resources(struct acpi_device *device, int state);
 int acpi_power_transition(struct acpi_device *device, int state);
+void acpi_turn_off_unused_power_resources(void);
 
 /* --------------------------------------------------------------------------
                               Device Power Management
index 32974b5..56102ea 100644 (file)
@@ -995,6 +995,7 @@ void acpi_resume_power_resources(void)
 
        mutex_unlock(&power_resource_list_lock);
 }
+#endif
 
 void acpi_turn_off_unused_power_resources(void)
 {
@@ -1015,4 +1016,3 @@ void acpi_turn_off_unused_power_resources(void)
 
        mutex_unlock(&power_resource_list_lock);
 }
-#endif
index a22778e..453eff8 100644 (file)
@@ -700,6 +700,7 @@ int acpi_device_add(struct acpi_device *device,
 
                result = acpi_device_set_name(device, acpi_device_bus_id);
                if (result) {
+                       kfree_const(acpi_device_bus_id->bus_id);
                        kfree(acpi_device_bus_id);
                        goto err_unlock;
                }
@@ -2359,6 +2360,8 @@ int __init acpi_scan_init(void)
                }
        }
 
+       acpi_turn_off_unused_power_resources();
+
        acpi_scan_initialized = true;
 
  out:
index 1856f76..7fe41ee 100644 (file)
@@ -8,7 +8,6 @@ extern struct list_head acpi_wakeup_device_list;
 extern struct mutex acpi_device_lock;
 
 extern void acpi_resume_power_resources(void);
-extern void acpi_turn_off_unused_power_resources(void);
 
 static inline acpi_status acpi_set_waking_vector(u32 wakeup_address)
 {
index 1fc1a99..b570848 100644 (file)
@@ -1637,6 +1637,7 @@ void pm_runtime_init(struct device *dev)
        dev->power.request_pending = false;
        dev->power.request = RPM_REQ_NONE;
        dev->power.deferred_resume = false;
+       dev->power.needs_force_resume = 0;
        INIT_WORK(&dev->power.work, pm_runtime_work);
 
        dev->power.timer_expires = 0;
@@ -1804,10 +1805,12 @@ int pm_runtime_force_suspend(struct device *dev)
         * its parent, but set its status to RPM_SUSPENDED anyway in case this
         * function will be called again for it in the meantime.
         */
-       if (pm_runtime_need_not_resume(dev))
+       if (pm_runtime_need_not_resume(dev)) {
                pm_runtime_set_suspended(dev);
-       else
+       } else {
                __update_runtime_status(dev, RPM_SUSPENDED);
+               dev->power.needs_force_resume = 1;
+       }
 
        return 0;
 
@@ -1834,7 +1837,7 @@ int pm_runtime_force_resume(struct device *dev)
        int (*callback)(struct device *);
        int ret = 0;
 
-       if (!pm_runtime_status_suspended(dev) || pm_runtime_need_not_resume(dev))
+       if (!pm_runtime_status_suspended(dev) || !dev->power.needs_force_resume)
                goto out;
 
        /*
@@ -1853,6 +1856,7 @@ int pm_runtime_force_resume(struct device *dev)
 
        pm_runtime_mark_last_busy(dev);
 out:
+       dev->power.needs_force_resume = 0;
        pm_runtime_enable(dev);
        return ret;
 }
index eff1f12..c84d239 100644 (file)
@@ -656,6 +656,7 @@ int tpm2_get_cc_attrs_tbl(struct tpm_chip *chip)
 
        if (nr_commands !=
            be32_to_cpup((__be32 *)&buf.data[TPM_HEADER_SIZE + 5])) {
+               rc = -EFAULT;
                tpm_buf_destroy(&buf);
                goto out;
        }
index a2e0395..55b9d39 100644 (file)
@@ -709,16 +709,14 @@ static int tpm_tis_gen_interrupt(struct tpm_chip *chip)
        cap_t cap;
        int ret;
 
-       /* TPM 2.0 */
-       if (chip->flags & TPM_CHIP_FLAG_TPM2)
-               return tpm2_get_tpm_pt(chip, 0x100, &cap2, desc);
-
-       /* TPM 1.2 */
        ret = request_locality(chip, 0);
        if (ret < 0)
                return ret;
 
-       ret = tpm1_getcap(chip, TPM_CAP_PROP_TIS_TIMEOUT, &cap, desc, 0);
+       if (chip->flags & TPM_CHIP_FLAG_TPM2)
+               ret = tpm2_get_tpm_pt(chip, 0x100, &cap2, desc);
+       else
+               ret = tpm1_getcap(chip, TPM_CAP_PROP_TIS_TIMEOUT, &cap, desc, 0);
 
        release_locality(chip, 0);
 
@@ -1127,12 +1125,20 @@ int tpm_tis_resume(struct device *dev)
        if (ret)
                return ret;
 
-       /* TPM 1.2 requires self-test on resume. This function actually returns
+       /*
+        * TPM 1.2 requires self-test on resume. This function actually returns
         * an error code but for unknown reason it isn't handled.
         */
-       if (!(chip->flags & TPM_CHIP_FLAG_TPM2))
+       if (!(chip->flags & TPM_CHIP_FLAG_TPM2)) {
+               ret = request_locality(chip, 0);
+               if (ret < 0)
+                       return ret;
+
                tpm1_do_selftest(chip);
 
+               release_locality(chip, 0);
+       }
+
        return 0;
 }
 EXPORT_SYMBOL_GPL(tpm_tis_resume);
index f040106..0e69dff 100644 (file)
@@ -3033,6 +3033,14 @@ static const struct x86_cpu_id hwp_support_ids[] __initconst = {
        {}
 };
 
+static bool intel_pstate_hwp_is_enabled(void)
+{
+       u64 value;
+
+       rdmsrl(MSR_PM_ENABLE, value);
+       return !!(value & 0x1);
+}
+
 static int __init intel_pstate_init(void)
 {
        const struct x86_cpu_id *id;
@@ -3051,8 +3059,12 @@ static int __init intel_pstate_init(void)
                 * Avoid enabling HWP for processors without EPP support,
                 * because that means incomplete HWP implementation which is a
                 * corner case and supporting it is generally problematic.
+                *
+                * If HWP is enabled already, though, there is no choice but to
+                * deal with it.
                 */
-               if (!no_hwp && boot_cpu_has(X86_FEATURE_HWP_EPP)) {
+               if ((!no_hwp && boot_cpu_has(X86_FEATURE_HWP_EPP)) ||
+                   intel_pstate_hwp_is_enabled()) {
                        hwp_active++;
                        hwp_mode_bdw = id->driver_data;
                        intel_pstate.attr = hwp_cpufreq_attrs;
index 5677263..483cd75 100644 (file)
@@ -485,7 +485,7 @@ static int adm9240_in_write(struct device *dev, u32 attr, int channel, long val)
                reg = ADM9240_REG_IN_MIN(channel);
                break;
        case hwmon_in_max:
-               reg = ADM9240_REG_IN(channel);
+               reg = ADM9240_REG_IN_MAX(channel);
                break;
        default:
                return -EOPNOTSUPP;
index 3a5807e..02298b8 100644 (file)
@@ -355,7 +355,7 @@ static umode_t corsairpsu_hwmon_power_is_visible(const struct corsairpsu_data *p
                return 0444;
        default:
                return 0;
-       };
+       }
 }
 
 static umode_t corsairpsu_hwmon_in_is_visible(const struct corsairpsu_data *priv, u32 attr,
@@ -376,7 +376,7 @@ static umode_t corsairpsu_hwmon_in_is_visible(const struct corsairpsu_data *priv
                break;
        default:
                break;
-       };
+       }
 
        return res;
 }
index 4382105..2a4bed0 100644 (file)
@@ -900,11 +900,15 @@ static int ltc2992_parse_dt(struct ltc2992_state *st)
 
        fwnode_for_each_available_child_node(fwnode, child) {
                ret = fwnode_property_read_u32(child, "reg", &addr);
-               if (ret < 0)
+               if (ret < 0) {
+                       fwnode_handle_put(child);
                        return ret;
+               }
 
-               if (addr > 1)
+               if (addr > 1) {
+                       fwnode_handle_put(child);
                        return -EINVAL;
+               }
 
                ret = fwnode_property_read_u32(child, "shunt-resistor-micro-ohms", &val);
                if (!ret)
index f1ac153..967532a 100644 (file)
@@ -217,9 +217,9 @@ int occ_update_response(struct occ *occ)
                return rc;
 
        /* limit the maximum rate of polling the OCC */
-       if (time_after(jiffies, occ->last_update + OCC_UPDATE_FREQUENCY)) {
+       if (time_after(jiffies, occ->next_update)) {
                rc = occ_poll(occ);
-               occ->last_update = jiffies;
+               occ->next_update = jiffies + OCC_UPDATE_FREQUENCY;
        } else {
                rc = occ->last_error;
        }
@@ -1165,6 +1165,7 @@ int occ_setup(struct occ *occ, const char *name)
                return rc;
        }
 
+       occ->next_update = jiffies + OCC_UPDATE_FREQUENCY;
        occ_parse_poll_response(occ);
 
        rc = occ_setup_sensor_attrs(occ);
index 67e6968..e6df719 100644 (file)
@@ -99,7 +99,7 @@ struct occ {
        u8 poll_cmd_data;               /* to perform OCC poll command */
        int (*send_cmd)(struct occ *occ, u8 *cmd);
 
-       unsigned long last_update;
+       unsigned long next_update;
        struct mutex lock;              /* lock OCC access */
 
        struct device *hwmon;
index b177987..e248424 100644 (file)
@@ -57,7 +57,7 @@ static int page_log_to_page_real(int page_log, enum chips chip)
                case YH5151E_PAGE_12V_LOG:
                        return YH5151E_PAGE_12V_REAL;
                case YH5151E_PAGE_5V_LOG:
-                       return YH5151E_PAGE_5V_LOG;
+                       return YH5151E_PAGE_5V_REAL;
                case YH5151E_PAGE_3V3_LOG:
                        return YH5151E_PAGE_3V3_REAL;
                }
@@ -103,8 +103,18 @@ static int set_page(struct i2c_client *client, int page_log)
 
 static int fsp3y_read_byte_data(struct i2c_client *client, int page, int reg)
 {
+       const struct pmbus_driver_info *info = pmbus_get_driver_info(client);
+       struct fsp3y_data *data = to_fsp3y_data(info);
        int rv;
 
+       /*
+        * YH5151-E outputs vout in linear11. The conversion is done when
+        * reading. Here, we have to inject pmbus_core with the correct
+        * exponent (it is -6).
+        */
+       if (data->chip == yh5151e && reg == PMBUS_VOUT_MODE)
+               return 0x1A;
+
        rv = set_page(client, page);
        if (rv < 0)
                return rv;
@@ -114,6 +124,8 @@ static int fsp3y_read_byte_data(struct i2c_client *client, int page, int reg)
 
 static int fsp3y_read_word_data(struct i2c_client *client, int page, int phase, int reg)
 {
+       const struct pmbus_driver_info *info = pmbus_get_driver_info(client);
+       struct fsp3y_data *data = to_fsp3y_data(info);
        int rv;
 
        /*
@@ -144,7 +156,18 @@ static int fsp3y_read_word_data(struct i2c_client *client, int page, int phase,
        if (rv < 0)
                return rv;
 
-       return i2c_smbus_read_word_data(client, reg);
+       rv = i2c_smbus_read_word_data(client, reg);
+       if (rv < 0)
+               return rv;
+
+       /*
+        * YH-5151E is non-compliant and outputs output voltages in linear11
+        * instead of linear16.
+        */
+       if (data->chip == yh5151e && reg == PMBUS_READ_VOUT)
+               rv = sign_extend32(rv, 10) & 0xffff;
+
+       return rv;
 }
 
 static struct pmbus_driver_info fsp3y_info[] = {
index 89aeaf3..0e0cd9e 100644 (file)
@@ -671,21 +671,58 @@ static int vt_resizex(struct vc_data *vc, struct vt_consize __user *cs)
        if (copy_from_user(&v, cs, sizeof(struct vt_consize)))
                return -EFAULT;
 
-       if (v.v_vlin)
-               pr_info_once("\"struct vt_consize\"->v_vlin is ignored. Please report if you need this.\n");
-       if (v.v_clin)
-               pr_info_once("\"struct vt_consize\"->v_clin is ignored. Please report if you need this.\n");
+       /* FIXME: Should check the copies properly */
+       if (!v.v_vlin)
+               v.v_vlin = vc->vc_scan_lines;
+
+       if (v.v_clin) {
+               int rows = v.v_vlin / v.v_clin;
+               if (v.v_rows != rows) {
+                       if (v.v_rows) /* Parameters don't add up */
+                               return -EINVAL;
+                       v.v_rows = rows;
+               }
+       }
+
+       if (v.v_vcol && v.v_ccol) {
+               int cols = v.v_vcol / v.v_ccol;
+               if (v.v_cols != cols) {
+                       if (v.v_cols)
+                               return -EINVAL;
+                       v.v_cols = cols;
+               }
+       }
+
+       if (v.v_clin > 32)
+               return -EINVAL;
 
-       console_lock();
        for (i = 0; i < MAX_NR_CONSOLES; i++) {
-               vc = vc_cons[i].d;
+               struct vc_data *vcp;
 
-               if (vc) {
-                       vc->vc_resize_user = 1;
-                       vc_resize(vc, v.v_cols, v.v_rows);
+               if (!vc_cons[i].d)
+                       continue;
+               console_lock();
+               vcp = vc_cons[i].d;
+               if (vcp) {
+                       int ret;
+                       int save_scan_lines = vcp->vc_scan_lines;
+                       int save_cell_height = vcp->vc_cell_height;
+
+                       if (v.v_vlin)
+                               vcp->vc_scan_lines = v.v_vlin;
+                       if (v.v_clin)
+                               vcp->vc_cell_height = v.v_clin;
+                       vcp->vc_resize_user = 1;
+                       ret = vc_resize(vcp, v.v_cols, v.v_rows);
+                       if (ret) {
+                               vcp->vc_scan_lines = save_scan_lines;
+                               vcp->vc_cell_height = save_cell_height;
+                               console_unlock();
+                               return ret;
+                       }
                }
+               console_unlock();
        }
-       console_unlock();
 
        return 0;
 }
index 39258f9..ef9c57c 100644 (file)
@@ -380,7 +380,7 @@ static void vgacon_init(struct vc_data *c, int init)
                vc_resize(c, vga_video_num_columns, vga_video_num_lines);
 
        c->vc_scan_lines = vga_scan_lines;
-       c->vc_font.height = vga_video_font_height;
+       c->vc_font.height = c->vc_cell_height = vga_video_font_height;
        c->vc_complement_mask = 0x7700;
        if (vga_512_chars)
                c->vc_hi_font_mask = 0x0800;
@@ -515,32 +515,32 @@ static void vgacon_cursor(struct vc_data *c, int mode)
                switch (CUR_SIZE(c->vc_cursor_type)) {
                case CUR_UNDERLINE:
                        vgacon_set_cursor_size(c->state.x,
-                                              c->vc_font.height -
-                                              (c->vc_font.height <
+                                              c->vc_cell_height -
+                                              (c->vc_cell_height <
                                                10 ? 2 : 3),
-                                              c->vc_font.height -
-                                              (c->vc_font.height <
+                                              c->vc_cell_height -
+                                              (c->vc_cell_height <
                                                10 ? 1 : 2));
                        break;
                case CUR_TWO_THIRDS:
                        vgacon_set_cursor_size(c->state.x,
-                                              c->vc_font.height / 3,
-                                              c->vc_font.height -
-                                              (c->vc_font.height <
+                                              c->vc_cell_height / 3,
+                                              c->vc_cell_height -
+                                              (c->vc_cell_height <
                                                10 ? 1 : 2));
                        break;
                case CUR_LOWER_THIRD:
                        vgacon_set_cursor_size(c->state.x,
-                                              (c->vc_font.height * 2) / 3,
-                                              c->vc_font.height -
-                                              (c->vc_font.height <
+                                              (c->vc_cell_height * 2) / 3,
+                                              c->vc_cell_height -
+                                              (c->vc_cell_height <
                                                10 ? 1 : 2));
                        break;
                case CUR_LOWER_HALF:
                        vgacon_set_cursor_size(c->state.x,
-                                              c->vc_font.height / 2,
-                                              c->vc_font.height -
-                                              (c->vc_font.height <
+                                              c->vc_cell_height / 2,
+                                              c->vc_cell_height -
+                                              (c->vc_cell_height <
                                                10 ? 1 : 2));
                        break;
                case CUR_NONE:
@@ -551,7 +551,7 @@ static void vgacon_cursor(struct vc_data *c, int mode)
                        break;
                default:
                        vgacon_set_cursor_size(c->state.x, 1,
-                                              c->vc_font.height);
+                                              c->vc_cell_height);
                        break;
                }
                break;
@@ -562,13 +562,13 @@ static int vgacon_doresize(struct vc_data *c,
                unsigned int width, unsigned int height)
 {
        unsigned long flags;
-       unsigned int scanlines = height * c->vc_font.height;
+       unsigned int scanlines = height * c->vc_cell_height;
        u8 scanlines_lo = 0, r7 = 0, vsync_end = 0, mode, max_scan;
 
        raw_spin_lock_irqsave(&vga_lock, flags);
 
        vgacon_xres = width * VGA_FONTWIDTH;
-       vgacon_yres = height * c->vc_font.height;
+       vgacon_yres = height * c->vc_cell_height;
        if (vga_video_type >= VIDEO_TYPE_VGAC) {
                outb_p(VGA_CRTC_MAX_SCAN, vga_video_port_reg);
                max_scan = inb_p(vga_video_port_val);
@@ -623,9 +623,9 @@ static int vgacon_doresize(struct vc_data *c,
 static int vgacon_switch(struct vc_data *c)
 {
        int x = c->vc_cols * VGA_FONTWIDTH;
-       int y = c->vc_rows * c->vc_font.height;
+       int y = c->vc_rows * c->vc_cell_height;
        int rows = screen_info.orig_video_lines * vga_default_font_height/
-               c->vc_font.height;
+               c->vc_cell_height;
        /*
         * We need to save screen size here as it's the only way
         * we can spot the screen has been resized and we need to
@@ -1038,7 +1038,7 @@ static int vgacon_adjust_height(struct vc_data *vc, unsigned fontheight)
                                cursor_size_lastto = 0;
                                c->vc_sw->con_cursor(c, CM_DRAW);
                        }
-                       c->vc_font.height = fontheight;
+                       c->vc_font.height = c->vc_cell_height = fontheight;
                        vc_resize(c, 0, rows);  /* Adjust console size */
                }
        }
@@ -1086,12 +1086,20 @@ static int vgacon_resize(struct vc_data *c, unsigned int width,
        if ((width << 1) * height > vga_vram_size)
                return -EINVAL;
 
+       if (user) {
+               /*
+                * Ho ho!  Someone (svgatextmode, eh?) may have reprogrammed
+                * the video mode!  Set the new defaults then and go away.
+                */
+               screen_info.orig_video_cols = width;
+               screen_info.orig_video_lines = height;
+               vga_default_font_height = c->vc_cell_height;
+               return 0;
+       }
        if (width % 2 || width > screen_info.orig_video_cols ||
            height > (screen_info.orig_video_lines * vga_default_font_height)/
-           c->vc_font.height)
-               /* let svgatextmode tinker with video timings and
-                  return success */
-               return (user) ? 0 : -EINVAL;
+           c->vc_cell_height)
+               return -EINVAL;
 
        if (con_is_visible(c) && !vga_is_gfx) /* who knows */
                vgacon_doresize(c, width, height);
index f83fd3c..9fb7682 100644 (file)
@@ -3127,7 +3127,7 @@ int btrfs_truncate_inode_items(struct btrfs_trans_handle *trans,
                               struct btrfs_inode *inode, u64 new_size,
                               u32 min_type);
 
-int btrfs_start_delalloc_snapshot(struct btrfs_root *root);
+int btrfs_start_delalloc_snapshot(struct btrfs_root *root, bool in_reclaim_context);
 int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr,
                               bool in_reclaim_context);
 int btrfs_set_extent_delalloc(struct btrfs_inode *inode, u64 start, u64 end,
index 7a28314..f1d15b6 100644 (file)
@@ -1340,12 +1340,16 @@ int btrfs_discard_extent(struct btrfs_fs_info *fs_info, u64 bytenr,
                stripe = bbio->stripes;
                for (i = 0; i < bbio->num_stripes; i++, stripe++) {
                        u64 bytes;
+                       struct btrfs_device *device = stripe->dev;
 
-                       if (!stripe->dev->bdev) {
+                       if (!device->bdev) {
                                ASSERT(btrfs_test_opt(fs_info, DEGRADED));
                                continue;
                        }
 
+                       if (!test_bit(BTRFS_DEV_STATE_WRITEABLE, &device->dev_state))
+                               continue;
+
                        ret = do_discard_extent(stripe, &bytes);
                        if (!ret) {
                                discarded_bytes += bytes;
index 864c08d..3b10d98 100644 (file)
@@ -2067,6 +2067,30 @@ static int start_ordered_ops(struct inode *inode, loff_t start, loff_t end)
        return ret;
 }
 
+static inline bool skip_inode_logging(const struct btrfs_log_ctx *ctx)
+{
+       struct btrfs_inode *inode = BTRFS_I(ctx->inode);
+       struct btrfs_fs_info *fs_info = inode->root->fs_info;
+
+       if (btrfs_inode_in_log(inode, fs_info->generation) &&
+           list_empty(&ctx->ordered_extents))
+               return true;
+
+       /*
+        * If we are doing a fast fsync we can not bail out if the inode's
+        * last_trans is <= then the last committed transaction, because we only
+        * update the last_trans of the inode during ordered extent completion,
+        * and for a fast fsync we don't wait for that, we only wait for the
+        * writeback to complete.
+        */
+       if (inode->last_trans <= fs_info->last_trans_committed &&
+           (test_bit(BTRFS_INODE_NEEDS_FULL_SYNC, &inode->runtime_flags) ||
+            list_empty(&ctx->ordered_extents)))
+               return true;
+
+       return false;
+}
+
 /*
  * fsync call for both files and directories.  This logs the inode into
  * the tree log instead of forcing full commits whenever possible.
@@ -2185,17 +2209,8 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
 
        atomic_inc(&root->log_batch);
 
-       /*
-        * If we are doing a fast fsync we can not bail out if the inode's
-        * last_trans is <= then the last committed transaction, because we only
-        * update the last_trans of the inode during ordered extent completion,
-        * and for a fast fsync we don't wait for that, we only wait for the
-        * writeback to complete.
-        */
        smp_mb();
-       if (btrfs_inode_in_log(BTRFS_I(inode), fs_info->generation) ||
-           (BTRFS_I(inode)->last_trans <= fs_info->last_trans_committed &&
-            (full_sync || list_empty(&ctx.ordered_extents)))) {
+       if (skip_inode_logging(&ctx)) {
                /*
                 * We've had everything committed since the last time we were
                 * modified so clear this flag in case it was set for whatever
index e54466f..4806295 100644 (file)
@@ -3949,7 +3949,7 @@ static int cleanup_free_space_cache_v1(struct btrfs_fs_info *fs_info,
 {
        struct btrfs_block_group *block_group;
        struct rb_node *node;
-       int ret;
+       int ret = 0;
 
        btrfs_info(fs_info, "cleaning free space cache v1");
 
index 4af3360..eb6fddf 100644 (file)
@@ -9678,7 +9678,7 @@ out:
        return ret;
 }
 
-int btrfs_start_delalloc_snapshot(struct btrfs_root *root)
+int btrfs_start_delalloc_snapshot(struct btrfs_root *root, bool in_reclaim_context)
 {
        struct writeback_control wbc = {
                .nr_to_write = LONG_MAX,
@@ -9691,7 +9691,7 @@ int btrfs_start_delalloc_snapshot(struct btrfs_root *root)
        if (test_bit(BTRFS_FS_STATE_ERROR, &fs_info->fs_state))
                return -EROFS;
 
-       return start_delalloc_inodes(root, &wbc, true, false);
+       return start_delalloc_inodes(root, &wbc, true, in_reclaim_context);
 }
 
 int btrfs_start_delalloc_roots(struct btrfs_fs_info *fs_info, long nr,
index ee1dbab..5dc2fd8 100644 (file)
@@ -259,6 +259,8 @@ int btrfs_fileattr_set(struct user_namespace *mnt_userns,
        if (!fa->flags_valid) {
                /* 1 item for the inode */
                trans = btrfs_start_transaction(root, 1);
+               if (IS_ERR(trans))
+                       return PTR_ERR(trans);
                goto update_flags;
        }
 
@@ -907,7 +909,7 @@ static noinline int btrfs_mksnapshot(const struct path *parent,
         */
        btrfs_drew_read_lock(&root->snapshot_lock);
 
-       ret = btrfs_start_delalloc_snapshot(root);
+       ret = btrfs_start_delalloc_snapshot(root, false);
        if (ret)
                goto out;
 
index 07b0b42..6c413bb 100644 (file)
@@ -984,7 +984,7 @@ int btrfs_split_ordered_extent(struct btrfs_ordered_extent *ordered, u64 pre,
 
        if (pre)
                ret = clone_ordered_extent(ordered, 0, pre);
-       if (post)
+       if (ret == 0 && post)
                ret = clone_ordered_extent(ordered, pre + ordered->disk_num_bytes,
                                           post);
 
index 2319c92..3ded812 100644 (file)
@@ -3545,11 +3545,15 @@ static int try_flush_qgroup(struct btrfs_root *root)
        struct btrfs_trans_handle *trans;
        int ret;
 
-       /* Can't hold an open transaction or we run the risk of deadlocking */
-       ASSERT(current->journal_info == NULL ||
-              current->journal_info == BTRFS_SEND_TRANS_STUB);
-       if (WARN_ON(current->journal_info &&
-                   current->journal_info != BTRFS_SEND_TRANS_STUB))
+       /*
+        * Can't hold an open transaction or we run the risk of deadlocking,
+        * and can't either be under the context of a send operation (where
+        * current->journal_info is set to BTRFS_SEND_TRANS_STUB), as that
+        * would result in a crash when starting a transaction and does not
+        * make sense either (send is a read-only operation).
+        */
+       ASSERT(current->journal_info == NULL);
+       if (WARN_ON(current->journal_info))
                return 0;
 
        /*
@@ -3562,7 +3566,7 @@ static int try_flush_qgroup(struct btrfs_root *root)
                return 0;
        }
 
-       ret = btrfs_start_delalloc_snapshot(root);
+       ret = btrfs_start_delalloc_snapshot(root, true);
        if (ret < 0)
                goto out;
        btrfs_wait_ordered_extents(root, U64_MAX, 0, (u64)-1);
index 55741ad..bd69db7 100644 (file)
@@ -7170,7 +7170,7 @@ static int flush_delalloc_roots(struct send_ctx *sctx)
        int i;
 
        if (root) {
-               ret = btrfs_start_delalloc_snapshot(root);
+               ret = btrfs_start_delalloc_snapshot(root, false);
                if (ret)
                        return ret;
                btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
@@ -7178,7 +7178,7 @@ static int flush_delalloc_roots(struct send_ctx *sctx)
 
        for (i = 0; i < sctx->clone_roots_cnt; i++) {
                root = sctx->clone_roots[i].root;
-               ret = btrfs_start_delalloc_snapshot(root);
+               ret = btrfs_start_delalloc_snapshot(root, false);
                if (ret)
                        return ret;
                btrfs_wait_ordered_extents(root, U64_MAX, 0, U64_MAX);
index f67721d..95a6000 100644 (file)
@@ -6061,7 +6061,8 @@ static int btrfs_log_inode_parent(struct btrfs_trans_handle *trans,
         * (since logging them is pointless, a link count of 0 means they
         * will never be accessible).
         */
-       if (btrfs_inode_in_log(inode, trans->transid) ||
+       if ((btrfs_inode_in_log(inode, trans->transid) &&
+            list_empty(&ctx->ordered_extents)) ||
            inode->vfs_inode.i_nlink == 0) {
                ret = BTRFS_NO_LOG_SYNC;
                goto end_no_trans;
index 70b23a0..304ce64 100644 (file)
@@ -1126,6 +1126,11 @@ int btrfs_load_block_group_zone_info(struct btrfs_block_group *cache, bool new)
                        goto out;
                }
 
+               if (zone.type == BLK_ZONE_TYPE_CONVENTIONAL) {
+                       ret = -EIO;
+                       goto out;
+               }
+
                switch (zone.cond) {
                case BLK_ZONE_COND_OFFLINE:
                case BLK_ZONE_COND_READONLY:
index 1537348..d5b9c8d 100644 (file)
@@ -101,6 +101,7 @@ struct vc_data {
        unsigned int    vc_rows;
        unsigned int    vc_size_row;            /* Bytes per row */
        unsigned int    vc_scan_lines;          /* # of scan lines */
+       unsigned int    vc_cell_height;         /* CRTC character cell height */
        unsigned long   vc_origin;              /* [!] Start of real screen */
        unsigned long   vc_scr_end;             /* [!] End of real screen */
        unsigned long   vc_visible_origin;      /* [!] Top of visible window */
index c965740..1d8209c 100644 (file)
@@ -601,6 +601,7 @@ struct dev_pm_info {
        unsigned int            idle_notification:1;
        unsigned int            request_pending:1;
        unsigned int            deferred_resume:1;
+       unsigned int            needs_force_resume:1;
        unsigned int            runtime_auto:1;
        bool                    ignore_children:1;
        unsigned int            no_callbacks:1;
index 76f0945..2997ca6 100644 (file)
@@ -170,6 +170,21 @@ void __ptrace_unlink(struct task_struct *child)
        spin_unlock(&child->sighand->siglock);
 }
 
+static bool looks_like_a_spurious_pid(struct task_struct *task)
+{
+       if (task->exit_code != ((PTRACE_EVENT_EXEC << 8) | SIGTRAP))
+               return false;
+
+       if (task_pid_vnr(task) == task->ptrace_message)
+               return false;
+       /*
+        * The tracee changed its pid but the PTRACE_EVENT_EXEC event
+        * was not wait()'ed, most probably debugger targets the old
+        * leader which was destroyed in de_thread().
+        */
+       return true;
+}
+
 /* Ensure that nothing can wake it up, even SIGKILL */
 static bool ptrace_freeze_traced(struct task_struct *task)
 {
@@ -180,7 +195,8 @@ static bool ptrace_freeze_traced(struct task_struct *task)
                return ret;
 
        spin_lock_irq(&task->sighand->siglock);
-       if (task_is_traced(task) && !__fatal_signal_pending(task)) {
+       if (task_is_traced(task) && !looks_like_a_spurious_pid(task) &&
+           !__fatal_signal_pending(task)) {
                task->state = __TASK_TRACED;
                ret = true;
        }
index 4693945..aa108be 100644 (file)
@@ -493,10 +493,12 @@ static int tpm_seal(struct tpm_buf *tb, uint16_t keytype,
 
        ret = tpm_get_random(chip, td->nonceodd, TPM_NONCE_SIZE);
        if (ret < 0)
-               return ret;
+               goto out;
 
-       if (ret != TPM_NONCE_SIZE)
-               return -EIO;
+       if (ret != TPM_NONCE_SIZE) {
+               ret = -EIO;
+               goto out;
+       }
 
        ordinal = htonl(TPM_ORD_SEAL);
        datsize = htonl(datalen);
index 617fabd..0165da3 100644 (file)
@@ -336,9 +336,9 @@ out:
                        rc = -EPERM;
        }
        if (blob_len < 0)
-               return blob_len;
-
-       payload->blob_len = blob_len;
+               rc = blob_len;
+       else
+               payload->blob_len = blob_len;
 
        tpm_put_ops(chip);
        return rc;
index cc79856..4ba87de 100644 (file)
@@ -2,6 +2,7 @@
 #ifndef _ASM_POWERPC_ERRNO_H
 #define _ASM_POWERPC_ERRNO_H
 
+#undef EDEADLOCK
 #include <asm-generic/errno.h>
 
 #undef EDEADLOCK
index cc96e26..ac37830 100644 (file)
@@ -84,7 +84,7 @@
 
 /* CPU types for specific tunings: */
 #define X86_FEATURE_K8                 ( 3*32+ 4) /* "" Opteron, Athlon64 */
-#define X86_FEATURE_K7                 ( 3*32+ 5) /* "" Athlon */
+/* FREE, was #define X86_FEATURE_K7                    ( 3*32+ 5) "" Athlon */
 #define X86_FEATURE_P3                 ( 3*32+ 6) /* "" P3 */
 #define X86_FEATURE_P4                 ( 3*32+ 7) /* "" P4 */
 #define X86_FEATURE_CONSTANT_TSC       ( 3*32+ 8) /* TSC ticks at a constant rate */
 #define X86_FEATURE_EPT_AD             ( 8*32+17) /* Intel Extended Page Table access-dirty bit */
 #define X86_FEATURE_VMCALL             ( 8*32+18) /* "" Hypervisor supports the VMCALL instruction */
 #define X86_FEATURE_VMW_VMMCALL                ( 8*32+19) /* "" VMware prefers VMMCALL hypercall instruction */
+#define X86_FEATURE_PVUNLOCK           ( 8*32+20) /* "" PV unlock function */
+#define X86_FEATURE_VCPUPREEMPT                ( 8*32+21) /* "" PV vcpu_is_preempted function */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (EBX), word 9 */
 #define X86_FEATURE_FSGSBASE           ( 9*32+ 0) /* RDFSBASE, WRFSBASE, RDGSBASE, WRGSBASE instructions*/
 #define X86_FEATURE_FENCE_SWAPGS_KERNEL        (11*32+ 5) /* "" LFENCE in kernel entry SWAPGS path */
 #define X86_FEATURE_SPLIT_LOCK_DETECT  (11*32+ 6) /* #AC for split lock */
 #define X86_FEATURE_PER_THREAD_MBA     (11*32+ 7) /* "" Per-thread Memory Bandwidth Allocation */
+#define X86_FEATURE_SGX1               (11*32+ 8) /* "" Basic SGX */
+#define X86_FEATURE_SGX2               (11*32+ 9) /* "" SGX Enclave Dynamic Memory Management (EDMM) */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:1 (EAX), word 12 */
 #define X86_FEATURE_AVX_VNNI           (12*32+ 4) /* AVX VNNI instructions */
 #define X86_FEATURE_AVIC               (15*32+13) /* Virtual Interrupt Controller */
 #define X86_FEATURE_V_VMSAVE_VMLOAD    (15*32+15) /* Virtual VMSAVE VMLOAD */
 #define X86_FEATURE_VGIF               (15*32+16) /* Virtual GIF */
+#define X86_FEATURE_V_SPEC_CTRL                (15*32+20) /* Virtual SPEC_CTRL */
 #define X86_FEATURE_SVME_ADDR_CHK      (15*32+28) /* "" SVME addr check */
 
 /* Intel-defined CPU features, CPUID level 0x00000007:0 (ECX), word 16 */
 #define X86_FEATURE_AVX512_VPOPCNTDQ   (16*32+14) /* POPCNT for vectors of DW/QW */
 #define X86_FEATURE_LA57               (16*32+16) /* 5-level page tables */
 #define X86_FEATURE_RDPID              (16*32+22) /* RDPID instruction */
+#define X86_FEATURE_BUS_LOCK_DETECT    (16*32+24) /* Bus Lock detect */
 #define X86_FEATURE_CLDEMOTE           (16*32+25) /* CLDEMOTE instruction */
 #define X86_FEATURE_MOVDIRI            (16*32+27) /* MOVDIRI instruction */
 #define X86_FEATURE_MOVDIR64B          (16*32+28) /* MOVDIR64B instruction */
 #define X86_FEATURE_MD_CLEAR           (18*32+10) /* VERW clears CPU buffers */
 #define X86_FEATURE_TSX_FORCE_ABORT    (18*32+13) /* "" TSX_FORCE_ABORT */
 #define X86_FEATURE_SERIALIZE          (18*32+14) /* SERIALIZE instruction */
+#define X86_FEATURE_HYBRID_CPU         (18*32+15) /* "" This part has CPUs of more than one type */
 #define X86_FEATURE_TSXLDTRK           (18*32+16) /* TSX Suspend Load Address Tracking */
 #define X86_FEATURE_PCONFIG            (18*32+18) /* Intel PCONFIG */
 #define X86_FEATURE_ARCH_LBR           (18*32+19) /* Intel ARCH LBR */
index 4502935..742d89a 100644 (file)
 #define MSR_PEBS_DATA_CFG              0x000003f2
 #define MSR_IA32_DS_AREA               0x00000600
 #define MSR_IA32_PERF_CAPABILITIES     0x00000345
+#define PERF_CAP_METRICS_IDX           15
+#define PERF_CAP_PT_IDX                        16
+
 #define MSR_PEBS_LD_LAT_THRESHOLD      0x000003f6
 
 #define MSR_IA32_RTIT_CTL              0x00000570
 #define DEBUGCTLMSR_LBR                        (1UL <<  0) /* last branch recording */
 #define DEBUGCTLMSR_BTF_SHIFT          1
 #define DEBUGCTLMSR_BTF                        (1UL <<  1) /* single-step on branches */
+#define DEBUGCTLMSR_BUS_LOCK_DETECT    (1UL <<  2)
 #define DEBUGCTLMSR_TR                 (1UL <<  6)
 #define DEBUGCTLMSR_BTS                        (1UL <<  7)
 #define DEBUGCTLMSR_BTINT              (1UL <<  8)
index b8e650a..946d761 100644 (file)
@@ -27,6 +27,7 @@
 
 
 #define VMX_EXIT_REASONS_FAILED_VMENTRY         0x80000000
+#define VMX_EXIT_REASONS_SGX_ENCLAVE_MODE      0x08000000
 
 #define EXIT_REASON_EXCEPTION_NMI       0
 #define EXIT_REASON_EXTERNAL_INTERRUPT  1
index 1e299ac..1cc9da6 100644 (file)
@@ -4,7 +4,7 @@
 #include <linux/linkage.h>
 #include <asm/errno.h>
 #include <asm/cpufeatures.h>
-#include <asm/alternative-asm.h>
+#include <asm/alternative.h>
 #include <asm/export.h>
 
 .pushsection .noinstr.text, "ax"
index 0bfd26e..9827ae2 100644 (file)
@@ -3,7 +3,7 @@
 
 #include <linux/linkage.h>
 #include <asm/cpufeatures.h>
-#include <asm/alternative-asm.h>
+#include <asm/alternative.h>
 #include <asm/export.h>
 
 /*
diff --git a/tools/include/asm/alternative-asm.h b/tools/include/asm/alternative-asm.h
deleted file mode 100644 (file)
index b54bd86..0000000
+++ /dev/null
@@ -1,10 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _TOOLS_ASM_ALTERNATIVE_ASM_H
-#define _TOOLS_ASM_ALTERNATIVE_ASM_H
-
-/* Just disable it so we can build arch/x86/lib/memcpy_64.S for perf bench: */
-
-#define altinstruction_entry #
-#define ALTERNATIVE_2 #
-
-#endif
diff --git a/tools/include/asm/alternative.h b/tools/include/asm/alternative.h
new file mode 100644 (file)
index 0000000..b54bd86
--- /dev/null
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _TOOLS_ASM_ALTERNATIVE_ASM_H
+#define _TOOLS_ASM_ALTERNATIVE_ASM_H
+
+/* Just disable it so we can build arch/x86/lib/memcpy_64.S for perf bench: */
+
+#define altinstruction_entry #
+#define ALTERNATIVE_2 #
+
+#endif
index ce58cff..6de5a7f 100644 (file)
@@ -863,9 +863,18 @@ __SYSCALL(__NR_process_madvise, sys_process_madvise)
 __SC_COMP(__NR_epoll_pwait2, sys_epoll_pwait2, compat_sys_epoll_pwait2)
 #define __NR_mount_setattr 442
 __SYSCALL(__NR_mount_setattr, sys_mount_setattr)
+#define __NR_quotactl_path 443
+__SYSCALL(__NR_quotactl_path, sys_quotactl_path)
+
+#define __NR_landlock_create_ruleset 444
+__SYSCALL(__NR_landlock_create_ruleset, sys_landlock_create_ruleset)
+#define __NR_landlock_add_rule 445
+__SYSCALL(__NR_landlock_add_rule, sys_landlock_add_rule)
+#define __NR_landlock_restrict_self 446
+__SYSCALL(__NR_landlock_restrict_self, sys_landlock_restrict_self)
 
 #undef __NR_syscalls
-#define __NR_syscalls 443
+#define __NR_syscalls 447
 
 /*
  * 32 bit systems traditionally used different
index 0827037..67b94bc 100644 (file)
@@ -625,30 +625,147 @@ struct drm_gem_open {
        __u64 size;
 };
 
+/**
+ * DRM_CAP_DUMB_BUFFER
+ *
+ * If set to 1, the driver supports creating dumb buffers via the
+ * &DRM_IOCTL_MODE_CREATE_DUMB ioctl.
+ */
 #define DRM_CAP_DUMB_BUFFER            0x1
+/**
+ * DRM_CAP_VBLANK_HIGH_CRTC
+ *
+ * If set to 1, the kernel supports specifying a CRTC index in the high bits of
+ * &drm_wait_vblank_request.type.
+ *
+ * Starting kernel version 2.6.39, this capability is always set to 1.
+ */
 #define DRM_CAP_VBLANK_HIGH_CRTC       0x2
+/**
+ * DRM_CAP_DUMB_PREFERRED_DEPTH
+ *
+ * The preferred bit depth for dumb buffers.
+ *
+ * The bit depth is the number of bits used to indicate the color of a single
+ * pixel excluding any padding. This is different from the number of bits per
+ * pixel. For instance, XRGB8888 has a bit depth of 24 but has 32 bits per
+ * pixel.
+ *
+ * Note that this preference only applies to dumb buffers, it's irrelevant for
+ * other types of buffers.
+ */
 #define DRM_CAP_DUMB_PREFERRED_DEPTH   0x3
+/**
+ * DRM_CAP_DUMB_PREFER_SHADOW
+ *
+ * If set to 1, the driver prefers userspace to render to a shadow buffer
+ * instead of directly rendering to a dumb buffer. For best speed, userspace
+ * should do streaming ordered memory copies into the dumb buffer and never
+ * read from it.
+ *
+ * Note that this preference only applies to dumb buffers, it's irrelevant for
+ * other types of buffers.
+ */
 #define DRM_CAP_DUMB_PREFER_SHADOW     0x4
+/**
+ * DRM_CAP_PRIME
+ *
+ * Bitfield of supported PRIME sharing capabilities. See &DRM_PRIME_CAP_IMPORT
+ * and &DRM_PRIME_CAP_EXPORT.
+ *
+ * PRIME buffers are exposed as dma-buf file descriptors. See
+ * Documentation/gpu/drm-mm.rst, section "PRIME Buffer Sharing".
+ */
 #define DRM_CAP_PRIME                  0x5
+/**
+ * DRM_PRIME_CAP_IMPORT
+ *
+ * If this bit is set in &DRM_CAP_PRIME, the driver supports importing PRIME
+ * buffers via the &DRM_IOCTL_PRIME_FD_TO_HANDLE ioctl.
+ */
 #define  DRM_PRIME_CAP_IMPORT          0x1
+/**
+ * DRM_PRIME_CAP_EXPORT
+ *
+ * If this bit is set in &DRM_CAP_PRIME, the driver supports exporting PRIME
+ * buffers via the &DRM_IOCTL_PRIME_HANDLE_TO_FD ioctl.
+ */
 #define  DRM_PRIME_CAP_EXPORT          0x2
+/**
+ * DRM_CAP_TIMESTAMP_MONOTONIC
+ *
+ * If set to 0, the kernel will report timestamps with ``CLOCK_REALTIME`` in
+ * struct drm_event_vblank. If set to 1, the kernel will report timestamps with
+ * ``CLOCK_MONOTONIC``. See ``clock_gettime(2)`` for the definition of these
+ * clocks.
+ *
+ * Starting from kernel version 2.6.39, the default value for this capability
+ * is 1. Starting kernel version 4.15, this capability is always set to 1.
+ */
 #define DRM_CAP_TIMESTAMP_MONOTONIC    0x6
+/**
+ * DRM_CAP_ASYNC_PAGE_FLIP
+ *
+ * If set to 1, the driver supports &DRM_MODE_PAGE_FLIP_ASYNC.
+ */
 #define DRM_CAP_ASYNC_PAGE_FLIP                0x7
-/*
- * The CURSOR_WIDTH and CURSOR_HEIGHT capabilities return a valid widthxheight
- * combination for the hardware cursor. The intention is that a hardware
- * agnostic userspace can query a cursor plane size to use.
+/**
+ * DRM_CAP_CURSOR_WIDTH
+ *
+ * The ``CURSOR_WIDTH`` and ``CURSOR_HEIGHT`` capabilities return a valid
+ * width x height combination for the hardware cursor. The intention is that a
+ * hardware agnostic userspace can query a cursor plane size to use.
  *
  * Note that the cross-driver contract is to merely return a valid size;
  * drivers are free to attach another meaning on top, eg. i915 returns the
  * maximum plane size.
  */
 #define DRM_CAP_CURSOR_WIDTH           0x8
+/**
+ * DRM_CAP_CURSOR_HEIGHT
+ *
+ * See &DRM_CAP_CURSOR_WIDTH.
+ */
 #define DRM_CAP_CURSOR_HEIGHT          0x9
+/**
+ * DRM_CAP_ADDFB2_MODIFIERS
+ *
+ * If set to 1, the driver supports supplying modifiers in the
+ * &DRM_IOCTL_MODE_ADDFB2 ioctl.
+ */
 #define DRM_CAP_ADDFB2_MODIFIERS       0x10
+/**
+ * DRM_CAP_PAGE_FLIP_TARGET
+ *
+ * If set to 1, the driver supports the &DRM_MODE_PAGE_FLIP_TARGET_ABSOLUTE and
+ * &DRM_MODE_PAGE_FLIP_TARGET_RELATIVE flags in
+ * &drm_mode_crtc_page_flip_target.flags for the &DRM_IOCTL_MODE_PAGE_FLIP
+ * ioctl.
+ */
 #define DRM_CAP_PAGE_FLIP_TARGET       0x11
+/**
+ * DRM_CAP_CRTC_IN_VBLANK_EVENT
+ *
+ * If set to 1, the kernel supports reporting the CRTC ID in
+ * &drm_event_vblank.crtc_id for the &DRM_EVENT_VBLANK and
+ * &DRM_EVENT_FLIP_COMPLETE events.
+ *
+ * Starting kernel version 4.12, this capability is always set to 1.
+ */
 #define DRM_CAP_CRTC_IN_VBLANK_EVENT   0x12
+/**
+ * DRM_CAP_SYNCOBJ
+ *
+ * If set to 1, the driver supports sync objects. See
+ * Documentation/gpu/drm-mm.rst, section "DRM Sync Objects".
+ */
 #define DRM_CAP_SYNCOBJ                0x13
+/**
+ * DRM_CAP_SYNCOBJ_TIMELINE
+ *
+ * If set to 1, the driver supports timeline operations on sync objects. See
+ * Documentation/gpu/drm-mm.rst, section "DRM Sync Objects".
+ */
 #define DRM_CAP_SYNCOBJ_TIMELINE       0x14
 
 /* DRM_IOCTL_GET_CAP ioctl argument type */
index 1987e2e..ddc47bb 100644 (file)
@@ -943,6 +943,7 @@ struct drm_i915_gem_exec_object {
        __u64 offset;
 };
 
+/* DRM_IOCTL_I915_GEM_EXECBUFFER was removed in Linux 5.13 */
 struct drm_i915_gem_execbuffer {
        /**
         * List of buffers to be validated with their relocations to be
index f6afee2..3fd9a7e 100644 (file)
@@ -1078,6 +1078,10 @@ struct kvm_ppc_resize_hpt {
 #define KVM_CAP_DIRTY_LOG_RING 192
 #define KVM_CAP_X86_BUS_LOCK_EXIT 193
 #define KVM_CAP_PPC_DAWR1 194
+#define KVM_CAP_SET_GUEST_DEBUG2 195
+#define KVM_CAP_SGX_ATTRIBUTE 196
+#define KVM_CAP_VM_COPY_ENC_CONTEXT_FROM 197
+#define KVM_CAP_PTP_KVM 198
 
 #ifdef KVM_CAP_IRQ_ROUTING
 
@@ -1671,6 +1675,8 @@ enum sev_cmd_id {
        KVM_SEV_CERT_EXPORT,
        /* Attestation report */
        KVM_SEV_GET_ATTESTATION_REPORT,
+       /* Guest Migration Extension */
+       KVM_SEV_SEND_CANCEL,
 
        KVM_SEV_NR_MAX,
 };
@@ -1729,6 +1735,45 @@ struct kvm_sev_attestation_report {
        __u32 len;
 };
 
+struct kvm_sev_send_start {
+       __u32 policy;
+       __u64 pdh_cert_uaddr;
+       __u32 pdh_cert_len;
+       __u64 plat_certs_uaddr;
+       __u32 plat_certs_len;
+       __u64 amd_certs_uaddr;
+       __u32 amd_certs_len;
+       __u64 session_uaddr;
+       __u32 session_len;
+};
+
+struct kvm_sev_send_update_data {
+       __u64 hdr_uaddr;
+       __u32 hdr_len;
+       __u64 guest_uaddr;
+       __u32 guest_len;
+       __u64 trans_uaddr;
+       __u32 trans_len;
+};
+
+struct kvm_sev_receive_start {
+       __u32 handle;
+       __u32 policy;
+       __u64 pdh_uaddr;
+       __u32 pdh_len;
+       __u64 session_uaddr;
+       __u32 session_len;
+};
+
+struct kvm_sev_receive_update_data {
+       __u64 hdr_uaddr;
+       __u32 hdr_len;
+       __u64 guest_uaddr;
+       __u32 guest_len;
+       __u64 trans_uaddr;
+       __u32 trans_len;
+};
+
 #define KVM_DEV_ASSIGN_ENABLE_IOMMU    (1 << 0)
 #define KVM_DEV_ASSIGN_PCI_2_3         (1 << 1)
 #define KVM_DEV_ASSIGN_MASK_INTX       (1 << 2)
index 14332f4..bf81435 100644 (file)
@@ -127,6 +127,7 @@ enum perf_sw_ids {
        PERF_COUNT_SW_EMULATION_FAULTS          = 8,
        PERF_COUNT_SW_DUMMY                     = 9,
        PERF_COUNT_SW_BPF_OUTPUT                = 10,
+       PERF_COUNT_SW_CGROUP_SWITCHES           = 11,
 
        PERF_COUNT_SW_MAX,                      /* non-ABI */
 };
@@ -326,6 +327,7 @@ enum perf_event_read_format {
 #define PERF_ATTR_SIZE_VER4    104     /* add: sample_regs_intr */
 #define PERF_ATTR_SIZE_VER5    112     /* add: aux_watermark */
 #define PERF_ATTR_SIZE_VER6    120     /* add: aux_sample_size */
+#define PERF_ATTR_SIZE_VER7    128     /* add: sig_data */
 
 /*
  * Hardware event_id to monitor via a performance monitoring event:
@@ -404,7 +406,10 @@ struct perf_event_attr {
                                cgroup         :  1, /* include cgroup events */
                                text_poke      :  1, /* include text poke events */
                                build_id       :  1, /* use build id in mmap2 events */
-                               __reserved_1   : 29;
+                               inherit_thread :  1, /* children only inherit if cloned with CLONE_THREAD */
+                               remove_on_exec :  1, /* event is removed from task on exec */
+                               sigtrap        :  1, /* send synchronous SIGTRAP on event */
+                               __reserved_1   : 26;
 
        union {
                __u32           wakeup_events;    /* wakeup every n events */
@@ -456,6 +461,12 @@ struct perf_event_attr {
        __u16   __reserved_2;
        __u32   aux_sample_size;
        __u32   __reserved_3;
+
+       /*
+        * User provided data if sigtrap=1, passed back to user via
+        * siginfo_t::si_perf, e.g. to permit user to identify the event.
+        */
+       __u64   sig_data;
 };
 
 /*
@@ -1171,10 +1182,15 @@ enum perf_callchain_context {
 /**
  * PERF_RECORD_AUX::flags bits
  */
-#define PERF_AUX_FLAG_TRUNCATED                0x01    /* record was truncated to fit */
-#define PERF_AUX_FLAG_OVERWRITE                0x02    /* snapshot from overwrite mode */
-#define PERF_AUX_FLAG_PARTIAL          0x04    /* record contains gaps */
-#define PERF_AUX_FLAG_COLLISION                0x08    /* sample collided with another */
+#define PERF_AUX_FLAG_TRUNCATED                        0x01    /* record was truncated to fit */
+#define PERF_AUX_FLAG_OVERWRITE                        0x02    /* snapshot from overwrite mode */
+#define PERF_AUX_FLAG_PARTIAL                  0x04    /* record contains gaps */
+#define PERF_AUX_FLAG_COLLISION                        0x08    /* sample collided with another */
+#define PERF_AUX_FLAG_PMU_FORMAT_TYPE_MASK     0xff00  /* PMU specific trace format type */
+
+/* CoreSight PMU AUX buffer formats */
+#define PERF_AUX_FLAG_CORESIGHT_FORMAT_CORESIGHT       0x0000 /* Default for backward compatibility */
+#define PERF_AUX_FLAG_CORESIGHT_FORMAT_RAW             0x0100 /* Raw format of the source */
 
 #define PERF_FLAG_FD_NO_GROUP          (1UL << 0)
 #define PERF_FLAG_FD_OUTPUT            (1UL << 1)
index 667f1ae..18a9f59 100644 (file)
@@ -255,4 +255,8 @@ struct prctl_mm_map {
 # define SYSCALL_DISPATCH_FILTER_ALLOW 0
 # define SYSCALL_DISPATCH_FILTER_BLOCK 1
 
+/* Set/get enabled arm64 pointer authentication keys */
+#define PR_PAC_SET_ENABLED_KEYS                60
+#define PR_PAC_GET_ENABLED_KEYS                61
+
 #endif /* _LINUX_PRCTL_H */
index feaf464..3a9f203 100644 (file)
@@ -111,7 +111,7 @@ OPTIONS
 --tracepoints::
         retrieve statistics from tracepoints
 
-*z*::
+-z::
 --skip-zero-records::
         omit records with all zeros in logging mode
 
index 0d66190..406a951 100644 (file)
@@ -540,6 +540,7 @@ ifndef NO_LIBELF
       ifdef LIBBPF_DYNAMIC
         ifeq ($(feature-libbpf), 1)
           EXTLIBS += -lbpf
+          $(call detected,CONFIG_LIBBPF_DYNAMIC)
         else
           dummy := $(error Error: No libbpf devel library found, please install libbpf-devel);
         endif
index 2303256..73d18e0 100644 (file)
@@ -71,7 +71,7 @@ struct kvm_reg_events_ops kvm_reg_events_ops[] = {
                .name   = "vmexit",
                .ops    = &exit_events,
        },
-       { NULL },
+       { NULL, NULL },
 };
 
 const char * const kvm_skip_events[] = {
index 9164969..9974f5f 100644 (file)
 439    n64     faccessat2                      sys_faccessat2
 440    n64     process_madvise                 sys_process_madvise
 441    n64     epoll_pwait2                    sys_epoll_pwait2
+442    n64     mount_setattr                   sys_mount_setattr
+443    n64     quotactl_path                   sys_quotactl_path
+444    n64     landlock_create_ruleset         sys_landlock_create_ruleset
+445    n64     landlock_add_rule               sys_landlock_add_rule
+446    n64     landlock_restrict_self          sys_landlock_restrict_self
index 0b2480c..2e68fbb 100644 (file)
 440    common  process_madvise                 sys_process_madvise
 441    common  epoll_pwait2                    sys_epoll_pwait2                compat_sys_epoll_pwait2
 442    common  mount_setattr                   sys_mount_setattr
+443    common  quotactl_path                   sys_quotactl_path
+444    common  landlock_create_ruleset         sys_landlock_create_ruleset
+445    common  landlock_add_rule               sys_landlock_add_rule
+446    common  landlock_restrict_self          sys_landlock_restrict_self
index 3abef21..7e4a2ab 100644 (file)
 440  common    process_madvise         sys_process_madvise             sys_process_madvise
 441  common    epoll_pwait2            sys_epoll_pwait2                compat_sys_epoll_pwait2
 442  common    mount_setattr           sys_mount_setattr               sys_mount_setattr
+443  common    quotactl_path           sys_quotactl_path               sys_quotactl_path
+444  common    landlock_create_ruleset sys_landlock_create_ruleset     sys_landlock_create_ruleset
+445  common    landlock_add_rule       sys_landlock_add_rule           sys_landlock_add_rule
+446  common    landlock_restrict_self  sys_landlock_restrict_self      sys_landlock_restrict_self
index 7bf01cb..ecd551b 100644 (file)
 440    common  process_madvise         sys_process_madvise
 441    common  epoll_pwait2            sys_epoll_pwait2
 442    common  mount_setattr           sys_mount_setattr
+443    common  quotactl_path           sys_quotactl_path
+444    common  landlock_create_ruleset sys_landlock_create_ruleset
+445    common  landlock_add_rule       sys_landlock_add_rule
+446    common  landlock_restrict_self  sys_landlock_restrict_self
 
 #
 # Due to a historical design error, certain syscalls are numbered differently
index ed4f0bd..7422b0e 100644 (file)
@@ -1123,8 +1123,10 @@ static int process_one_file(const char *fpath, const struct stat *sb,
                        mapfile = strdup(fpath);
                        return 0;
                }
-
-               pr_info("%s: Ignoring file %s\n", prog, fpath);
+               if (is_json_file(bname))
+                       pr_debug("%s: ArchStd json is preprocessed %s\n", prog, fpath);
+               else
+                       pr_info("%s: Ignoring file %s\n", prog, fpath);
                return 0;
        }
 
index 645009c..4a7b8de 100644 (file)
@@ -5,7 +5,7 @@ group_fd=-1
 flags=0|8
 cpu=*
 type=0|1
-size=120
+size=128
 config=0
 sample_period=*
 sample_type=263
index b0f42c3..4081644 100644 (file)
@@ -5,7 +5,7 @@ group_fd=-1
 flags=0|8
 cpu=*
 type=0
-size=120
+size=128
 config=0
 sample_period=0
 sample_type=65536
index eba723c..86a15dd 100644 (file)
@@ -7,7 +7,7 @@ cpu=*
 pid=-1
 flags=8
 type=1
-size=120
+size=128
 config=9
 sample_period=4000
 sample_type=455
index 8c0d9f3..b64bdc1 100644 (file)
@@ -145,7 +145,14 @@ perf-$(CONFIG_LIBELF) += symbol-elf.o
 perf-$(CONFIG_LIBELF) += probe-file.o
 perf-$(CONFIG_LIBELF) += probe-event.o
 
+ifdef CONFIG_LIBBPF_DYNAMIC
+  hashmap := 1
+endif
 ifndef CONFIG_LIBBPF
+  hashmap := 1
+endif
+
+ifdef hashmap
 perf-y += hashmap.o
 endif
 
index f99852d..43e5b56 100644 (file)
@@ -157,9 +157,15 @@ static int get_max_rate(unsigned int *rate)
 static int record_opts__config_freq(struct record_opts *opts)
 {
        bool user_freq = opts->user_freq != UINT_MAX;
+       bool user_interval = opts->user_interval != ULLONG_MAX;
        unsigned int max_rate;
 
-       if (opts->user_interval != ULLONG_MAX)
+       if (user_interval && user_freq) {
+               pr_err("cannot set frequency and period at the same time\n");
+               return -1;
+       }
+
+       if (user_interval)
                opts->default_interval = opts->user_interval;
        if (user_freq)
                opts->freq = opts->user_freq;
index a12cf4f..106b3d6 100644 (file)
@@ -904,7 +904,7 @@ static void perf_event__cpu_map_swap(union perf_event *event,
        struct perf_record_record_cpu_map *mask;
        unsigned i;
 
-       data->type = bswap_64(data->type);
+       data->type = bswap_16(data->type);
 
        switch (data->type) {
        case PERF_CPU_MAP__CPUS:
@@ -937,7 +937,7 @@ static void perf_event__stat_config_swap(union perf_event *event,
 {
        u64 size;
 
-       size  = event->stat_config.nr * sizeof(event->stat_config.data[0]);
+       size  = bswap_64(event->stat_config.nr) * sizeof(event->stat_config.data[0]);
        size += 1; /* nr item itself */
        mem_bswap_64(&event->stat_config.nr, size);
 }
index aaf7bc7..7629819 100644 (file)
@@ -54,9 +54,9 @@ idt_handlers:
        .align 8
 
        /* Fetch current address and append it to idt_handlers. */
-       current_handler = .
+666 :
 .pushsection .rodata
-.quad current_handler
+       .quad 666b
 .popsection
 
        .if ! \has_error
index ca22ee6..63096ce 100644 (file)
 #include "vmx.h"
 
 #define VCPU_ID                5
+#define NMI_VECTOR     2
+
+static int ud_count;
+
+void enable_x2apic(void)
+{
+       uint32_t spiv_reg = APIC_BASE_MSR + (APIC_SPIV >> 4);
+
+       wrmsr(MSR_IA32_APICBASE, rdmsr(MSR_IA32_APICBASE) |
+             MSR_IA32_APICBASE_ENABLE | MSR_IA32_APICBASE_EXTD);
+       wrmsr(spiv_reg, rdmsr(spiv_reg) | APIC_SPIV_APIC_ENABLED);
+}
+
+static void guest_ud_handler(struct ex_regs *regs)
+{
+       ud_count++;
+       regs->rip += 3; /* VMLAUNCH */
+}
+
+static void guest_nmi_handler(struct ex_regs *regs)
+{
+}
 
 void l2_guest_code(void)
 {
@@ -25,15 +47,23 @@ void l2_guest_code(void)
 
        GUEST_SYNC(8);
 
+       /* Forced exit to L1 upon restore */
+       GUEST_SYNC(9);
+
        /* Done, exit to L1 and never come back.  */
        vmcall();
 }
 
-void l1_guest_code(struct vmx_pages *vmx_pages)
+void guest_code(struct vmx_pages *vmx_pages)
 {
 #define L2_GUEST_STACK_SIZE 64
        unsigned long l2_guest_stack[L2_GUEST_STACK_SIZE];
 
+       enable_x2apic();
+
+       GUEST_SYNC(1);
+       GUEST_SYNC(2);
+
        enable_vp_assist(vmx_pages->vp_assist_gpa, vmx_pages->vp_assist);
 
        GUEST_ASSERT(vmx_pages->vmcs_gpa);
@@ -55,27 +85,40 @@ void l1_guest_code(struct vmx_pages *vmx_pages)
        current_evmcs->revision_id = EVMCS_VERSION;
        GUEST_SYNC(6);
 
+       current_evmcs->pin_based_vm_exec_control |=
+               PIN_BASED_NMI_EXITING;
        GUEST_ASSERT(!vmlaunch());
        GUEST_ASSERT(vmptrstz() == vmx_pages->enlightened_vmcs_gpa);
-       GUEST_SYNC(9);
+
+       /*
+        * NMI forces L2->L1 exit, resuming L2 and hope that EVMCS is
+        * up-to-date (RIP points where it should and not at the beginning
+        * of l2_guest_code(). GUEST_SYNC(9) checkes that.
+        */
        GUEST_ASSERT(!vmresume());
-       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+
        GUEST_SYNC(10);
+
+       GUEST_ASSERT(vmreadz(VM_EXIT_REASON) == EXIT_REASON_VMCALL);
+       GUEST_SYNC(11);
+
+       /* Try enlightened vmptrld with an incorrect GPA */
+       evmcs_vmptrld(0xdeadbeef, vmx_pages->enlightened_vmcs);
+       GUEST_ASSERT(vmlaunch());
+       GUEST_ASSERT(ud_count == 1);
+       GUEST_DONE();
 }
 
-void guest_code(struct vmx_pages *vmx_pages)
+void inject_nmi(struct kvm_vm *vm)
 {
-       GUEST_SYNC(1);
-       GUEST_SYNC(2);
+       struct kvm_vcpu_events events;
 
-       if (vmx_pages)
-               l1_guest_code(vmx_pages);
+       vcpu_events_get(vm, VCPU_ID, &events);
 
-       GUEST_DONE();
+       events.nmi.pending = 1;
+       events.flags |= KVM_VCPUEVENT_VALID_NMI_PENDING;
 
-       /* Try enlightened vmptrld with an incorrect GPA */
-       evmcs_vmptrld(0xdeadbeef, vmx_pages->enlightened_vmcs);
-       GUEST_ASSERT(vmlaunch());
+       vcpu_events_set(vm, VCPU_ID, &events);
 }
 
 int main(int argc, char *argv[])
@@ -109,6 +152,13 @@ int main(int argc, char *argv[])
        vcpu_alloc_vmx(vm, &vmx_pages_gva);
        vcpu_args_set(vm, VCPU_ID, 1, vmx_pages_gva);
 
+       vm_init_descriptor_tables(vm);
+       vcpu_init_descriptor_tables(vm, VCPU_ID);
+       vm_handle_exception(vm, UD_VECTOR, guest_ud_handler);
+       vm_handle_exception(vm, NMI_VECTOR, guest_nmi_handler);
+
+       pr_info("Running L1 which uses EVMCS to run L2\n");
+
        for (stage = 1;; stage++) {
                _vcpu_run(vm, VCPU_ID);
                TEST_ASSERT(run->exit_reason == KVM_EXIT_IO,
@@ -124,7 +174,7 @@ int main(int argc, char *argv[])
                case UCALL_SYNC:
                        break;
                case UCALL_DONE:
-                       goto part1_done;
+                       goto done;
                default:
                        TEST_FAIL("Unknown ucall %lu", uc.cmd);
                }
@@ -154,12 +204,14 @@ int main(int argc, char *argv[])
                TEST_ASSERT(!memcmp(&regs1, &regs2, sizeof(regs2)),
                            "Unexpected register values after vcpu_load_state; rdi: %lx rsi: %lx",
                            (ulong) regs2.rdi, (ulong) regs2.rsi);
-       }
 
-part1_done:
-       _vcpu_run(vm, VCPU_ID);
-       TEST_ASSERT(run->exit_reason == KVM_EXIT_SHUTDOWN,
-                   "Unexpected successful VMEnter with invalid eVMCS pointer!");
+               /* Force immediate L2->L1 exit before resuming */
+               if (stage == 8) {
+                       pr_info("Injecting NMI into L1 before L2 had a chance to run after restore\n");
+                       inject_nmi(vm);
+               }
+       }
 
+done:
        kvm_vm_free(vm);
 }
index 2799c66..6b4feb9 100644 (file)
@@ -2893,8 +2893,8 @@ static void grow_halt_poll_ns(struct kvm_vcpu *vcpu)
        if (val < grow_start)
                val = grow_start;
 
-       if (val > halt_poll_ns)
-               val = halt_poll_ns;
+       if (val > vcpu->kvm->max_halt_poll_ns)
+               val = vcpu->kvm->max_halt_poll_ns;
 
        vcpu->halt_poll_ns = val;
 out:
@@ -2973,7 +2973,8 @@ void kvm_vcpu_block(struct kvm_vcpu *vcpu)
                                goto out;
                        }
                        poll_end = cur = ktime_get();
-               } while (single_task_running() && ktime_before(cur, stop));
+               } while (single_task_running() && !need_resched() &&
+                        ktime_before(cur, stop));
        }
 
        prepare_to_rcuwait(&vcpu->wait);